diff options
230 files changed, 71635 insertions, 1315 deletions
diff --git a/.bzrignore b/.bzrignore index 9f59f057f4f..297338d08dc 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1,6 +1,7 @@ *-t *.Plo *.Po +*.Tpo *.a *.bb *.bbg @@ -1058,6 +1059,7 @@ libmysqld/ha_innobase.cc libmysqld/ha_innodb.cc libmysqld/ha_isam.cc libmysqld/ha_isammrg.cc +libmysqld/ha_maria.cc libmysqld/ha_myisam.cc libmysqld/ha_myisammrg.cc libmysqld/ha_ndbcluster.cc @@ -2385,6 +2387,20 @@ storage/innobase/ut/.deps/ut0rnd.Po storage/innobase/ut/.deps/ut0ut.Po storage/innobase/ut/.deps/ut0vec.Po storage/innobase/ut/.deps/ut0wqueue.Po +storage/maria/*.MAD +storage/maria/*.MAI +storage/maria/ma_rt_test +storage/maria/ma_sp_test +storage/maria/ma_test1 +storage/maria/ma_test2 +storage/maria/ma_test3 +storage/maria/ma_test_all +storage/maria/maria.log +storage/maria/maria_chk +storage/maria/maria_ftdump +storage/maria/maria_log +storage/maria/maria_pack +storage/maria/unittest/maria_control storage/myisam/.deps/ft_boolean_search.Po storage/myisam/.deps/ft_nlq_search.Po storage/myisam/.deps/ft_parser.Po @@ -2922,13 +2938,25 @@ unittest/examples/.deps/simple-t.Po unittest/examples/.deps/skip-t.Po unittest/examples/.deps/skip_all-t.Po unittest/examples/.deps/todo-t.Po +unittest/maria_control unittest/mysys/*.t unittest/mysys/.deps/base64-t.Po unittest/mysys/.deps/bitmap-t.Po unittest/mysys/.deps/my_atomic-t.Po +unittest/mysys/mf_pagecache_consist_1k-t-big +unittest/mysys/mf_pagecache_consist_1kHC-t-big +unittest/mysys/mf_pagecache_consist_1kRD-t-big +unittest/mysys/mf_pagecache_consist_1kWR-t-big +unittest/mysys/mf_pagecache_consist_64k-t-big +unittest/mysys/mf_pagecache_consist_64kHC-t-big +unittest/mysys/mf_pagecache_consist_64kRD-t-big +unittest/mysys/mf_pagecache_consist_64kWR-t-big +unittest/mysys/mf_pagecache_single_64k-t-big unittest/mytap/.deps/tap.Po unittest/mytap/t/*.t unittest/mytap/t/.deps/basic-t.Po +unittest/page_cache_test_file_1 +unittest/pagecache_debug.log unittest/unit vi.h vio/*.ds? diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index 11dd67d5de0..5dca590849e 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -121,8 +121,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify " valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max" # # Used in -debug builds -debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS " +debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS" debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX" +debug_cflags="$debug_cflags -DMY_LF_EXTRA_DEBUG" error_inject="--with-error-inject " # # Base C++ flags for all builds @@ -154,8 +155,6 @@ then base_configs="$base_configs --with-libedit" fi -static_link="--with-mysqld-ldflags=-all-static " -static_link="$static_link --with-client-ldflags=-all-static" # we need local-infile in all binaries for rpl000001 # if you need to disable local-infile in the client, write a build script # and unset local_infile_configs diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit index 86f3db012ee..0830070ccf9 100755 --- a/BitKeeper/triggers/post-commit +++ b/BitKeeper/triggers/post-commit @@ -5,7 +5,7 @@ FROM=$USER@mysql.com COMMITS=commits@lists.mysql.com DOCS=docs-commit@mysql.com LIMIT=10000 -VERSION="5.1" +VERSION="maria" BKROOT=`bk root` if [ -x /usr/sbin/sendmail ]; then @@ -76,55 +76,6 @@ EOF ) > $BKROOT/BitKeeper/tmp/dev_public.txt $SENDMAIL -t < $BKROOT/BitKeeper/tmp/dev_public.txt - -#++ -# commits@ mail -#-- - echo "Notifying commits list at $COMMITS" - ( - cat <<EOF -List-ID: <bk.mysql-$VERSION> -From: $FROM -To: $COMMITS -Subject: bk commit into $VERSION tree ($CHANGESET)$BS -X-CSetKey: <$CSETKEY> -$BH -Below is the list of changes that have just been committed into a local -$VERSION repository of $USER. When $USER does a push these changes will -be propagated to the main repository and, within 24 hours after the -push, to the public repository. -For information on how to access the public repository -see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) | bk sed -e ${LIMIT}q > $BKROOT/BitKeeper/tmp/commits.txt - -$SENDMAIL -t < $BKROOT/BitKeeper/tmp/commits.txt - -#++ -# docs-commit@ mail -# Picks up anything under the Docs subdirectory (relevant for docs team). -#-- - bk changes -v -r+ | grep -q " Docs/" - if [ $? -eq 0 ] - then - echo "Notifying docs list at $DOCS" - ( - cat <<EOF -List-ID: <bk.mysql-$VERSION> -From: $FROM -To: $DOCS -Subject: bk commit - $VERSION tree (Manual) ($CHANGESET)$BS - -EOF - bk changes -v -r+ - bk cset -r+ -d - ) > $BKROOT/BitKeeper/tmp/docs.txt - $SENDMAIL -t < $BKROOT/BitKeeper/tmp/docs.txt - fi - else echo "commit failed because '$BK_STATUS', you may need to re-clone..." fi diff --git a/Makefile.am b/Makefile.am index 6d435bff85a..31946b18987 100644 --- a/Makefile.am +++ b/Makefile.am @@ -130,7 +130,7 @@ test-binlog-statement: cd mysql-test ; \ @PERL@ ./mysql-test-run.pl $(force) --mysqld=--binlog-format=statement -test: test-unit test-ns test-pr +test: test-ns test-pr test-full: test test-nr test-ps diff --git a/configure.in b/configure.in index 9581b5ac46a..1a62c3129cb 100644 --- a/configure.in +++ b/configure.in @@ -2298,9 +2298,9 @@ AC_ARG_WITH(man, if test "$with_man" = "yes" then man_dirs="man" - man1_files=`ls -1 $srcdir/man/*.1 | sed -e 's;^.*man/;;'` + man1_files=`ls -1 $srcdir/man/*.1 2>/dev/null| sed -e 's;^.*man/;;'` man1_files=`echo $man1_files` - man8_files=`ls -1 $srcdir/man/*.8 | sed -e 's;^.*man/;;'` + man8_files=`ls -1 $srcdir/man/*.8 2>/dev/null| sed -e 's;^.*man/;;'` man8_files=`echo $man8_files` else man_dirs="" @@ -2436,7 +2436,6 @@ AC_SUBST(readline_basedir) AC_SUBST(readline_link) AC_SUBST(readline_h_ln_cmd) - # If we have threads generate some library functions and test programs sql_server_dirs= sql_server= diff --git a/include/Makefile.am b/include/Makefile.am index d40df7d8343..f92110cb4ce 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -27,8 +27,8 @@ pkginclude_HEADERS = $(HEADERS_ABI) my_dbug.h m_string.h my_sys.h \ my_getopt.h sslopt-longopts.h my_dir.h \ sslopt-vars.h sslopt-case.h sql_common.h keycache.h \ m_ctype.h mysql/plugin.h $(HEADERS_GEN) -noinst_HEADERS = config-win.h config-netware.h \ - heap.h my_bitmap.h my_uctype.h \ +noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \ + heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \ myisam.h myisampack.h myisammrg.h ft_global.h\ mysys_err.h my_base.h help_start.h help_end.h \ my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \ @@ -37,7 +37,7 @@ noinst_HEADERS = config-win.h config-netware.h \ mysql_version.h.in my_handler.h my_time.h decimal.h \ my_vle.h my_user.h my_atomic.h atomic/nolock.h \ atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \ - my_libwrap.h + my_libwrap.h pagecache.h # Remove built files and the symlinked directories CLEANFILES = $(BUILT_SOURCES) readline openssl diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index f15c8b13b7f..9b5cbecbd0a 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -13,24 +13,25 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if defined(__i386__) || defined(_M_IX86) - -#ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" -#else -# define LOCK "lock" -#endif - -#ifdef __GNUC__ -#include "x86-gcc.h" -#elif defined(_MSC_VER) -#include "x86-msvc.h" -#endif +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) + +# ifdef MY_ATOMIC_MODE_DUMMY +# define LOCK_prefix "" +# else +# define LOCK_prefix "lock" +# endif + +# ifdef __GNUC__ +# include "x86-gcc.h" +# elif defined(_MSC_VER) +# error Broken! +# include "x86-msvc.h" +# endif #endif #ifdef make_atomic_cas_body -typedef struct { } my_atomic_rwlock_t; +typedef struct { } my_atomic_rwlock_t __attribute__ ((unused)); #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 18b77e93d80..cb41952b70c 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -13,7 +13,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; +typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; #ifdef MY_ATOMIC_MODE_DUMMY /* @@ -31,17 +31,22 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; #define my_atomic_rwlock_wrunlock(name) #define MY_ATOMIC_MODE "dummy (non-atomic)" #else -#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw) -#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0) -#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw) -#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw) -#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define MY_ATOMIC_MODE "rwlocks" +/* + we're using read-write lock macros but map them to mutex locks, and they're + faster. Still, having semantically rich API we can change the + underlying implementation, if necessary. +*/ +#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw) +#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0) +#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw) +#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw) +#define MY_ATOMIC_MODE "mutex" #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; -#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav; +#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav; #define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a; #define make_atomic_load_body(S) ret= *a; #define make_atomic_store_body(S) *a= v; diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index d79dadbf05e..5a34bc22f9e 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -19,10 +19,18 @@ architectures support double-word (128-bit) cas. */ -#ifdef MY_ATOMIC_NO_XADD -#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +#ifdef __x86_64__ +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix +# endif #else -#define MY_ATOMIC_MODE "gcc-x86" LOCK +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix +# endif #endif /* fix -ansi errors while maintaining readability */ @@ -32,12 +40,12 @@ #ifndef MY_ATOMIC_NO_XADD #define make_atomic_add_body(S) \ - asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) #endif -#define make_atomic_swap_body(S) \ - asm volatile ("; xchg %0, %1;" : "+r" (v) , "+m" (*a)) +#define make_atomic_fas_body(S) \ + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) #define make_atomic_cas_body(S) \ - asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \ + asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) #ifdef MY_ATOMIC_MODE_DUMMY @@ -46,13 +54,16 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ ret=0; \ - asm volatile (LOCK "; cmpxchg %2, %0" \ + asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ : "+m" (*a), "+a" (ret): "r" (ret)) #define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v)) + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) #endif +/* TODO test on intel whether the below helps. on AMD it makes no difference */ +//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; }) + diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h index c4885bb8451..2a2cfe70de9 100644 --- a/include/atomic/x86-msvc.h +++ b/include/atomic/x86-msvc.h @@ -25,24 +25,24 @@ #ifndef _atomic_h_cleanup_ #define _atomic_h_cleanup_ "atomic/x86-msvc.h" -#define MY_ATOMIC_MODE "msvc-x86" LOCK +#define MY_ATOMIC_MODE "msvc-x86" LOCK_prefix #define make_atomic_add_body(S) \ _asm { \ _asm mov reg_ ## S, v \ - _asm LOCK xadd *a, reg_ ## S \ + _asm LOCK_prefix xadd *a, reg_ ## S \ _asm movzx v, reg_ ## S \ } #define make_atomic_cas_body(S) \ _asm { \ _asm mov areg_ ## S, *cmp \ _asm mov reg2_ ## S, set \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov *cmp, areg_ ## S \ _asm setz al \ _asm movzx ret, al \ } -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ _asm { \ _asm mov reg_ ## S, v \ _asm xchg *a, reg_ ## S \ @@ -55,13 +55,13 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ _asm { \ _asm mov areg_ ## S, 0 \ _asm mov reg2_ ## S, areg_ ## S \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov ret, areg_ ## S \ } #define make_atomic_store_body(S) \ diff --git a/include/ft_global.h b/include/ft_global.h index 6c3457541a5..fd1397d1d25 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -65,6 +65,17 @@ void ft_free_stopwords(void); FT_INFO *ft_init_search(uint,void *, uint, byte *, uint,CHARSET_INFO *, byte *); my_bool ft_boolean_check_syntax_string(const byte *); +/* Internal symbols for fulltext between maria and MyISAM */ + +#define HA_FT_WTYPE HA_KEYTYPE_FLOAT +#define HA_FT_WLEN 4 +#define FT_SEGS 2 + +#define ft_sintXkorr(A) mi_sint4korr(A) +#define ft_intXstore(T,A) mi_int4store(T,A) + +extern const HA_KEYSEG ft_keysegs[FT_SEGS]; + #ifdef __cplusplus } #endif diff --git a/include/keycache.h b/include/keycache.h index dc763b8cc08..90e86f7e8f9 100644 --- a/include/keycache.h +++ b/include/keycache.h @@ -127,7 +127,8 @@ extern void end_key_cache(KEY_CACHE *keycache, my_bool cleanup); /* Functions to handle multiple key caches */ extern my_bool multi_keycache_init(void); extern void multi_keycache_free(void); -extern KEY_CACHE *multi_key_cache_search(byte *key, uint length); +extern KEY_CACHE *multi_key_cache_search(byte *key, uint length, + KEY_CACHE *def); extern my_bool multi_key_cache_set(const byte *key, uint length, KEY_CACHE *key_cache); extern void multi_key_cache_change(KEY_CACHE *old_data, diff --git a/include/lf.h b/include/lf.h new file mode 100644 index 00000000000..39a47e6568a --- /dev/null +++ b/include/lf.h @@ -0,0 +1,258 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _lf_h +#define _lf_h + +#include <my_atomic.h> + +/* + Helpers to define both func() and _func(), where + func() is a _func() protected by my_atomic_rwlock_wrlock() +*/ + +#define lock_wrap(f, t, proto_args, args, lock) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + t ret; \ + my_atomic_rwlock_wrlock(lock); \ + ret= _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ + return ret; \ +} + +#define lock_wrap_void(f, proto_args, args, lock) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + my_atomic_rwlock_wrlock(lock); \ + _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ +} + +#define nolock_wrap(f, t, proto_args, args) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + return _ ## f args; \ +} + +#define nolock_wrap_void(f, proto_args, args) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + _ ## f args; \ +} + +/* + wait-free dynamic array, see lf_dynarray.c + + 4 levels of 256 elements each mean 4311810304 elements in an array - it + should be enough for a while +*/ +#define LF_DYNARRAY_LEVEL_LENGTH 256 +#define LF_DYNARRAY_LEVELS 4 + +typedef struct { + void * volatile level[LF_DYNARRAY_LEVELS]; + uint size_of_element; + my_atomic_rwlock_t lock; +} LF_DYNARRAY; + +typedef int (*lf_dynarray_func)(void *, void *); + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size); +void lf_dynarray_destroy(LF_DYNARRAY *array); + +nolock_wrap(lf_dynarray_value, void *, + (LF_DYNARRAY *array, uint idx), + (array, idx)) +lock_wrap(lf_dynarray_lvalue, void *, + (LF_DYNARRAY *array, uint idx), + (array, idx), + &array->lock) +nolock_wrap(lf_dynarray_iterate, int, + (LF_DYNARRAY *array, lf_dynarray_func func, void *arg), + (array, func, arg)) + +/* + pin manager for memory allocator, lf_alloc-pin.c +*/ + +#define LF_PINBOX_PINS 4 +#define LF_PURGATORY_SIZE 10 + +typedef void lf_pinbox_free_func(void *, void *); + +typedef struct { + LF_DYNARRAY pinstack; + lf_pinbox_free_func *free_func; + void *free_func_arg; + uint free_ptr_offset; + uint32 volatile pinstack_top_ver; /* this is a versioned pointer */ + uint32 volatile pins_in_stack; /* number of elements in array */ +} LF_PINBOX; + +typedef struct { + void * volatile pin[LF_PINBOX_PINS]; + LF_PINBOX *pinbox; + void *purgatory; + uint32 purgatory_count; + uint32 volatile link; +/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ + char pad[128-sizeof(uint32)*2 + -sizeof(LF_PINBOX *) + -sizeof(void *)*(LF_PINBOX_PINS+1)]; +} LF_PINS; + +/* + shortcut macros to do an atomic_wrlock on a structure that uses pins + (e.g. lf_hash). +*/ +#define lf_rwlock_by_pins(PINS) \ + my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock) +#define lf_rwunlock_by_pins(PINS) \ + my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinstack.lock) + +/* + compile-time assert, to require "no less than N" pins + it's enough if it'll fail on at least one compiler, so + we'll enable it on GCC only, which supports zero-length arrays. +*/ +#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) +#define LF_REQUIRE_PINS(N) \ + static const char require_pins[LF_PINBOX_PINS-N] __attribute__ ((unused)); \ + static const int LF_NUM_PINS_IN_THIS_FILE= N + +#define _lf_pin(PINS, PIN, ADDR) \ + ( \ + assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \ + my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \ + ) +#else +#define LF_REQUIRE_PINS(N) +#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) +#endif + +#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL) +#define lf_pin(PINS, PIN, ADDR) \ + do { \ + lf_rwlock_by_pins(PINS); \ + _lf_pin(PINS, PIN, ADDR); \ + lf_rwunlock_by_pins(PINS); \ + } while (0) +#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) +#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) +#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0) + +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void * free_func_arg); +void lf_pinbox_destroy(LF_PINBOX *pinbox); + +lock_wrap(lf_pinbox_get_pins, LF_PINS *, + (LF_PINBOX *pinbox), + (pinbox), + &pinbox->pinstack.lock) +lock_wrap_void(lf_pinbox_put_pins, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinstack.lock) +lock_wrap_void(lf_pinbox_free, + (LF_PINS *pins, void *addr), + (pins, addr), + &pins->pinbox->pinstack.lock) + +/* + memory allocator, lf_alloc-pin.c +*/ + +struct st_lf_alloc_node { + struct st_lf_alloc_node *next; +}; + +typedef struct st_lf_allocator { + LF_PINBOX pinbox; + struct st_lf_alloc_node * volatile top; + uint element_size; + uint32 volatile mallocs; +} LF_ALLOCATOR; + +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset); +void lf_alloc_destroy(LF_ALLOCATOR *allocator); +uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); +/* + shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ +#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) +#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) +#define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox) +#define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox) +#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) +#define lf_alloc_real_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0)) + +lock_wrap(lf_alloc_new, void *, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinstack.lock) + +/* + extendible hash, lf_hash.c +*/ +#include <hash.h> + +#define LF_HASH_UNIQUE 1 + +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + hash_get_key get_key; /* see HASH */ + CHARSET_INFO *charset; /* see HASH */ + uint key_offset, key_length; /* see HASH */ + uint element_size, flags; /* LF_HASH_UNIQUE, etc */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ +} LF_HASH; + +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset); +void lf_hash_destroy(LF_HASH *hash); +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +/* + shortcut macros to access underlying pinbox functions from an LF_HASH + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ +#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc) +#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc) +#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS) + +/* + cleanup +*/ + +#undef lock_wrap_void +#undef lock_wrap +#undef nolock_wrap_void +#undef nolock_wrap + +#endif + diff --git a/include/m_string.h b/include/m_string.h index 4f098c3206d..407dd68c699 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -67,7 +67,7 @@ # define bcopy(s, d, n) memcpy((d), (s), (n)) # define bcmp(A,B,C) memcmp((A),(B),(C)) # define bzero(A,B) memset((A),0,(B)) -# define bmove_align(A,B,C) memcpy((A),(B),(C)) +# define bmove_align(A,B,C) memcpy((A),(B),(C)) #endif #if defined(__cplusplus) @@ -126,7 +126,10 @@ extern int bcmp(const char *s1,const char *s2,uint len); extern int my_bcmp(const char *s1,const char *s2,uint len); #undef bcmp #define bcmp(A,B,C) my_bcmp((A),(B),(C)) -#endif +#define bzero_if_purify(A,B) bzero(A,B) +#else +#define bzero_if_purify(A,B) +#endif /* HAVE_purify */ #ifndef bmove512 extern void bmove512(gptr dst,const gptr src,uint len); diff --git a/include/maria.h b/include/maria.h new file mode 100644 index 00000000000..94152ce9bfa --- /dev/null +++ b/include/maria.h @@ -0,0 +1,439 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file should be included when using maria_funktions */ + +#ifndef _maria_h +#define _maria_h +#ifdef __cplusplus +extern "C" { +#endif +#ifndef _my_base_h +#include <my_base.h> +#endif +#ifndef _m_ctype_h +#include <m_ctype.h> +#endif +#ifndef _keycache_h +#include "keycache.h" +#endif +#include "my_handler.h" +#include "ft_global.h" +#include <myisamchk.h> +#include <mysql/plugin.h> + +/* + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details +*/ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ +#else +#define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */ +#endif + +#define MARIA_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ +#define MARIA_NAME_IEXT ".MAI" +#define MARIA_NAME_DEXT ".MAD" +/* Max extra space to use when sorting keys */ +#define MARIA_MAX_TEMP_LENGTH 2*1024L*1024L*1024L +/* Possible values for maria_block_size (must be power of 2) */ +#define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */ +#define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ +#define MARIA_MAX_KEY_BLOCK_LENGTH 32768 +#define maria_portable_sizeof_char_ptr 8 +#define MARIA_MAX_KEY_LENGTH 1000 /* Max length in bytes */ + +/* + In the following macros '_keyno_' is 0 .. keys-1. + If there can be more keys than bits in the key_map, the highest bit + is for all upper keys. They cannot be switched individually. + This means that clearing of high keys is ignored, setting one high key + sets all high keys. +*/ +#define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG) +#define MARIA_KEYMAP_HIGH_MASK (ULL(1) << (MARIA_KEYMAP_BITS - 1)) +#define maria_get_mask_all_keys_active(_keys_) \ + (((_keys_) < MARIA_KEYMAP_BITS) ? \ + ((ULL(1) << (_keys_)) - ULL(1)) : \ + (~ ULL(0))) +#if MARIA_MAX_KEY > MARIA_KEYMAP_BITS +#define maria_is_key_active(_keymap_,_keyno_) \ + (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + test((_keymap_) & (ULL(1) << (_keyno_))) : \ + test((_keymap_) & MARIA_KEYMAP_HIGH_MASK)) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (ULL(1) << (_keyno_)) : \ + MARIA_KEYMAP_HIGH_MASK) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (~ (ULL(1) << (_keyno_))) : \ + (~ (ULL(0))) /*ignore*/ ) +#else +#define maria_is_key_active(_keymap_,_keyno_) \ + test((_keymap_) & (ULL(1) << (_keyno_))) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (ULL(1) << (_keyno_)) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (~ (ULL(1) << (_keyno_))) +#endif +#define maria_is_any_key_active(_keymap_) \ + test((_keymap_)) +#define maria_is_all_keys_active(_keymap_,_keys_) \ + ((_keymap_) == maria_get_mask_all_keys_active(_keys_)) +#define maria_set_all_keys_active(_keymap_,_keys_) \ + (_keymap_)= maria_get_mask_all_keys_active(_keys_) +#define maria_clear_all_keys_active(_keymap_) \ + (_keymap_)= 0 +#define maria_intersect_keys_active(_to_,_from_) \ + (_to_)&= (_from_) +#define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \ + ((_keymap1_) & (_keymap2_) & \ + maria_get_mask_all_keys_active(_keys_)) +#define maria_copy_keys_active(_to_,_maxkeys_,_from_) \ + (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \ + (_from_)) + + /* Param to/from maria_info */ + +typedef ulonglong MARIA_RECORD_POS; + +typedef struct st_maria_isaminfo /* Struct from h_info */ +{ + ha_rows records; /* Records in database */ + ha_rows deleted; /* Deleted records in database */ + MARIA_RECORD_POS recpos; /* Pos for last used record */ + MARIA_RECORD_POS newrecpos; /* Pos if we write new record */ + MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */ + my_off_t data_file_length; /* Length of data file */ + my_off_t max_data_file_length, index_file_length; + my_off_t max_index_file_length, delete_length; + ulong reclength; /* Recordlength */ + ulong mean_reclength; /* Mean recordlength (if packed) */ + ulonglong auto_increment; + ulonglong key_map; /* Which keys are used */ + char *data_file_name, *index_file_name; + uint keys; /* Number of keys in use */ + uint options; /* HA_OPTION_... used */ + int errkey, /* With key was dupplicated on err */ + sortkey; /* clustered by this key */ + File filenr; /* (uniq) filenr for datafile */ + time_t create_time; /* When table was created */ + time_t check_time; + time_t update_time; + uint reflength; + ulong record_offset; + ulong *rec_per_key; /* for sql optimizing */ +} MARIA_INFO; + + +typedef struct st_maria_create_info +{ + const char *index_file_name, *data_file_name; /* If using symlinks */ + ha_rows max_rows; + ha_rows reloc_rows; + ulonglong auto_increment; + ulonglong data_file_length; + ulonglong key_file_length; + uint null_bytes; + uint old_options; + enum data_file_type org_data_file_type; + uint8 language; + my_bool with_auto_increment, transactional; +} MARIA_CREATE_INFO; + +struct st_maria_info; /* For referense */ +struct st_maria_share; +typedef struct st_maria_info MARIA_HA; +struct st_maria_s_param; + +typedef struct st_maria_keydef /* Key definition with open & info */ +{ + struct st_maria_share *share; /* Pointer to base (set in open) */ + uint16 keysegs; /* Number of key-segment */ + uint16 flag; /* NOSAME, PACK_USED */ + + uint8 key_alg; /* BTREE, RTREE */ + uint16 block_length; /* Length of keyblock (auto) */ + uint16 underflow_block_length; /* When to execute underflow */ + uint16 keylength; /* Tot length of keyparts (auto) */ + uint16 minlength; /* min length of (packed) key (auto) */ + uint16 maxlength; /* max length of (packed) key (auto) */ + uint32 version; /* For concurrent read/write */ + uint32 ftparser_nr; /* distinct ftparser number */ + + HA_KEYSEG *seg, *end; + struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */ + int (*bin_search)(struct st_maria_info *info, + struct st_maria_keydef *keyinfo, byte *page, byte *key, + uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *was_last_key); + uint(*get_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte **page, byte *key); + int (*pack_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte *next_key, byte *org_key, byte *prev_key, + const byte *key, struct st_maria_s_param *s_temp); + void (*store_key)(struct st_maria_keydef *keyinfo, byte *key_pos, + struct st_maria_s_param *s_temp); + int (*ck_insert)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); + int (*ck_delete)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); +} MARIA_KEYDEF; + + +#define MARIA_UNIQUE_HASH_LENGTH 4 + +typedef struct st_maria_unique_def /* Segment definition of unique */ +{ + uint16 keysegs; /* Number of key-segment */ + uint8 key; /* Mapped to which key */ + uint8 null_are_equal; + HA_KEYSEG *seg, *end; +} MARIA_UNIQUEDEF; + +typedef struct st_maria_decode_tree /* Decode huff-table */ +{ + uint16 *table; + uint quick_table_bits; + byte *intervalls; +} MARIA_DECODE_TREE; + + +struct st_maria_bit_buff; + +/* + Note that null markers should always be first in a row ! + When creating a column, one should only specify: + type, length, null_bit and null_pos +*/ + +typedef struct st_maria_columndef /* column information */ +{ + uint64 offset; /* Offset to position in row */ + enum en_fieldtype type; + uint16 length; /* length of field */ + uint16 fill_length; + uint16 null_pos; /* Position for null marker */ + uint16 empty_pos; /* Position for empty marker */ + uint8 null_bit; /* If column may be NULL */ + uint8 empty_bit; /* If column may be empty */ + +#ifndef NOT_PACKED_DATABASES + void(*unpack)(struct st_maria_columndef *rec, + struct st_maria_bit_buff *buff, + byte *start, byte *end); + enum en_fieldtype base_type; + uint space_length_bits, pack_type; + MARIA_DECODE_TREE *huff_tree; +#endif +} MARIA_COLUMNDEF; + + +extern ulong maria_block_size; +extern ulong maria_concurrent_insert; +extern my_bool maria_flush, maria_single_user; +extern my_bool maria_delay_key_write, maria_delay_rec_write; +extern my_off_t maria_max_temp_length; +extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size; +extern KEY_CACHE maria_key_cache_var, *maria_key_cache; + + + /* Prototypes for maria-functions */ + +extern int maria_init(void); +extern void maria_end(void); +extern int maria_close(struct st_maria_info *file); +extern int maria_delete(struct st_maria_info *file, const byte *buff); +extern struct st_maria_info *maria_open(const char *name, int mode, + uint wait_if_locked); +extern int maria_panic(enum ha_panic_function function); +extern int maria_rfirst(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rkey(struct st_maria_info *file, byte *buf, int inx, + const byte *key, + uint key_len, enum ha_rkey_function search_flag); +extern int maria_rlast(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext_same(struct st_maria_info *info, byte *buf); +extern int maria_rprev(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rrnd(struct st_maria_info *file, byte *buf, + MARIA_RECORD_POS pos); +extern int maria_scan_init(struct st_maria_info *file); +extern int maria_scan(struct st_maria_info *file, byte *buf); +extern void maria_scan_end(struct st_maria_info *file); +extern int maria_rsame(struct st_maria_info *file, byte *record, int inx); +extern int maria_rsame_with_pos(struct st_maria_info *file, byte *record, + int inx, MARIA_RECORD_POS pos); +extern int maria_update(struct st_maria_info *file, const byte *old, + byte *new_record); +extern int maria_write(struct st_maria_info *file, byte *buff); +extern MARIA_RECORD_POS maria_position(struct st_maria_info *file); +extern int maria_status(struct st_maria_info *info, MARIA_INFO *x, uint flag); +extern int maria_lock_database(struct st_maria_info *file, int lock_type); +extern int maria_create(const char *name, enum data_file_type record_type, + uint keys, MARIA_KEYDEF *keydef, + uint columns, MARIA_COLUMNDEF *columndef, + uint uniques, MARIA_UNIQUEDEF *uniquedef, + MARIA_CREATE_INFO *create_info, uint flags); +extern int maria_delete_table(const char *name); +extern int maria_rename(const char *from, const char *to); +extern int maria_extra(struct st_maria_info *file, + enum ha_extra_function function, void *extra_arg); +extern int maria_reset(struct st_maria_info *file); +extern ha_rows maria_records_in_range(struct st_maria_info *info, int inx, + key_range *min_key, key_range *max_key); +extern int maria_is_changed(struct st_maria_info *info); +extern int maria_delete_all_rows(struct st_maria_info *info); +extern uint maria_get_pointer_length(ulonglong file_length, uint def); + + +/* this is used to pass to mysql_mariachk_table */ + +#define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */ +#define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */ + +typedef uint maria_bit_type; + +typedef struct st_maria_bit_buff +{ /* Used for packing of record */ + maria_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MARIA_BIT_BUFF; + + +typedef struct st_maria_sort_info +{ +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MARIA_HA *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; + myf myf_rw; + enum data_file_type new_data_file_type; +} MARIA_SORT_INFO; + +typedef struct st_maria_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + + MARIA_KEYDEF *keyinfo; + MARIA_SORT_INFO *sort_info; + HA_KEYSEG *seg; + byte **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + MEM_ROOT wordroot; + char *record; + MY_TMPDIR *tmpdir; + + /* + The next two are used to collect statistics, see maria_update_key_parts for + description. + */ + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; + + MARIA_RECORD_POS pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ + my_size_t rec_buff_size; + + int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *); + int (*key_read)(struct st_maria_sort_param *, byte *); + int (*key_write)(struct st_maria_sort_param *, const byte *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_maria_sort_param *, register byte **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,char *, + uint, uint); +} MARIA_SORT_PARAM; + + +/* functions in maria_check */ +void maria_chk_init(HA_CHECK *param); +int maria_chk_status(HA_CHECK *param, MARIA_HA *info); +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag); +int maria_chk_size(HA_CHECK *param, MARIA_HA *info); +int maria_chk_key(HA_CHECK *param, MARIA_HA *info); +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, int extend); +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick); +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, + my_string name); +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_change_to_newfile(const char *filename, const char *old_ext, + const char *new_ext, myf myflags); +void maria_lock_memory(HA_CHECK *param); +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update); +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records); +int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start, + my_off_t length, const char *type); +int maria_movepoint(MARIA_HA *info, byte *record, my_off_t oldpos, + my_off_t newpos, uint prot_key); +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile); +int maria_test_if_almost_full(MARIA_HA *info); +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename); +int maria_disable_indexes(MARIA_HA *info); +int maria_enable_indexes(MARIA_HA *info); +int maria_indexes_are_disabled(MARIA_HA *info); +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows); +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map, + my_bool force); + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows); +void maria_flush_bulk_insert(MARIA_HA *info, uint inx); +void maria_end_bulk_insert(MARIA_HA *info); +int maria_assign_to_key_cache(MARIA_HA *info, ulonglong key_map, + KEY_CACHE *key_cache); +void maria_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache); +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves); + +/* fulltext functions */ +FT_INFO *maria_ft_init_search(uint,void *, uint, byte *, uint, + CHARSET_INFO *, byte *); + +/* 'Almost-internal' Maria functions */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/my_atomic.h b/include/my_atomic.h index 7b066e9b529..5f328f32f3f 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -13,6 +13,40 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + This header defines five atomic operations: + + my_atomic_add#(&var, what) + add 'what' to *var, and return the old value of *var + + my_atomic_fas#(&var, what) + 'Fetch And Store' + store 'what' in *var, and return the old value of *var + + my_atomic_cas#(&var, &old, new) + 'Compare And Swap' + if *var is equal to *old, then store 'new' in *var, and return TRUE + otherwise store *var in *old, and return FALSE + + my_atomic_load#(&var) + return *var + + my_atomic_store#(&var, what) + store 'what' in *var + + '#' is substituted by a size suffix - 8, 16, 32, or ptr + (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr). + + NOTE This operations are not always atomic, so they always must be + enclosed in my_atomic_rwlock_rdlock(lock)/my_atomic_rwlock_rdunlock(lock) + or my_atomic_rwlock_wrlock(lock)/my_atomic_rwlock_wrunlock(lock). + Hint: if a code block makes intensive use of atomic ops, it make sense + to take/release rwlock once for the whole block, not for every statement. + + On architectures where these operations are really atomic, rwlocks will + be optimized away. +*/ + #ifndef my_atomic_rwlock_init #define intptr void * @@ -26,70 +60,115 @@ #endif #ifndef make_atomic_add_body -#define make_atomic_add_body(S) \ +#define make_atomic_add_body(S) \ int ## S tmp=*a; \ while (!my_atomic_cas ## S(a, &tmp, tmp+v)); \ v=tmp; #endif +#ifdef __GNUC__ +/* + we want to be able to use my_atomic_xxx functions with + both signed and unsigned integers. But gcc will issue a warning + "passing arg N of `my_atomic_XXX' as [un]signed due to prototype" + if the signedness of the argument doesn't match the prototype, or + "pointer targets in passing argument N of my_atomic_XXX differ in signedness" + if int* is used where uint* is expected (or vice versa). + Let's shut these warnings up +*/ +#define make_transparent_unions(S) \ + typedef union { \ + int ## S i; \ + uint ## S u; \ + } U_ ## S __attribute__ ((transparent_union)); \ + typedef union { \ + int ## S volatile *i; \ + uint ## S volatile *u; \ + } Uv_ ## S __attribute__ ((transparent_union)); +#define uintptr intptr +make_transparent_unions(8) +make_transparent_unions(16) +make_transparent_unions(32) +make_transparent_unions(ptr) +#undef uintptr +#undef make_transparent_unions +#define a U_a.i +#define cmp U_cmp.i +#define v U_v.i +#define set U_set.i +#else +#define U_8 int8 +#define U_16 int16 +#define U_32 int32 +#define U_ptr intptr +#define Uv_8 int8 +#define Uv_16 int16 +#define Uv_32 int32 +#define Uv_ptr intptr +#define U_a volatile *a +#define U_cmp *cmp +#define U_v v +#define U_set set +#endif /* __GCC__ transparent_union magic */ + #ifdef HAVE_INLINE -#define make_atomic_add(S) \ -static inline int ## S my_atomic_add ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_add_body(S); \ - return v; \ +#define make_atomic_add(S) \ +STATIC_INLINE int ## S my_atomic_add ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_add_body(S); \ + return v; \ } -#define make_atomic_swap(S) \ -static inline int ## S my_atomic_swap ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_swap_body(S); \ - return v; \ +#define make_atomic_fas(S) \ +STATIC_INLINE int ## S my_atomic_fas ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_fas_body(S); \ + return v; \ } -#define make_atomic_cas(S) \ -static inline int my_atomic_cas ## S(int ## S volatile *a, \ - int ## S *cmp, int ## S set) \ -{ \ - int8 ret; \ - make_atomic_cas_body(S); \ - return ret; \ +#define make_atomic_cas(S) \ +STATIC_INLINE int my_atomic_cas ## S(Uv_ ## S U_a, \ + Uv_ ## S U_cmp, U_ ## S U_set) \ +{ \ + int8 ret; \ + make_atomic_cas_body(S); \ + return ret; \ } -#define make_atomic_load(S) \ -static inline int ## S my_atomic_load ## S(int ## S volatile *a) \ -{ \ - int ## S ret; \ - make_atomic_load_body(S); \ - return ret; \ +#define make_atomic_load(S) \ +STATIC_INLINE int ## S my_atomic_load ## S(Uv_ ## S U_a) \ +{ \ + int ## S ret; \ + make_atomic_load_body(S); \ + return ret; \ } -#define make_atomic_store(S) \ -static inline void my_atomic_store ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_store_body(S); \ +#define make_atomic_store(S) \ +STATIC_INLINE void my_atomic_store ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_store_body(S); \ } #else /* no inline functions */ -#define make_atomic_add(S) \ -extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v); +#define make_atomic_add(S) \ +extern int ## S my_atomic_add ## S(Uv_ ## S, U_ ## S); -#define make_atomic_swap(S) \ -extern int ## S my_atomic_swap ## S(int ## S volatile *a, int ## S v); +#define make_atomic_fas(S) \ +extern int ## S my_atomic_fas ## S(Uv_ ## S, U_ ## S); -#define make_atomic_cas(S) \ -extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set); +#define make_atomic_cas(S) \ +extern int my_atomic_cas ## S(Uv_ ## S, Uv_ ## S, U_ ## S); -#define make_atomic_load(S) \ -extern int ## S my_atomic_load ## S(int ## S volatile *a); +#define make_atomic_load(S) \ +extern int ## S my_atomic_load ## S(Uv_ ## S); -#define make_atomic_store(S) \ -extern void my_atomic_store ## S(int ## S volatile *a, int ## S v); +#define make_atomic_store(S) \ +extern void my_atomic_store ## S(Uv_ ## S, U_ ## S); #endif @@ -112,34 +191,42 @@ make_atomic_store(16) make_atomic_store(32) make_atomic_store(ptr) -make_atomic_swap( 8) -make_atomic_swap(16) -make_atomic_swap(32) -make_atomic_swap(ptr) +make_atomic_fas( 8) +make_atomic_fas(16) +make_atomic_fas(32) +make_atomic_fas(ptr) +#ifdef _atomic_h_cleanup_ +#include _atomic_h_cleanup_ +#undef _atomic_h_cleanup_ +#endif + +#undef U_8 +#undef U_16 +#undef U_32 +#undef U_ptr +#undef a +#undef cmp +#undef v +#undef set +#undef U_a +#undef U_cmp +#undef U_v +#undef U_set #undef make_atomic_add #undef make_atomic_cas #undef make_atomic_load #undef make_atomic_store -#undef make_atomic_swap +#undef make_atomic_fas #undef make_atomic_add_body #undef make_atomic_cas_body #undef make_atomic_load_body #undef make_atomic_store_body -#undef make_atomic_swap_body +#undef make_atomic_fas_body #undef intptr -#ifdef _atomic_h_cleanup_ -#include _atomic_h_cleanup_ -#undef _atomic_h_cleanup_ -#endif - -#if SIZEOF_CHARP == SIZEOF_INT -typedef int intptr; -#elif SIZEOF_CHARP == SIZEOF_LONG -typedef long intptr; -#else -#error +#ifndef LF_BACKOFF +#define LF_BACKOFF (1) #endif #define MY_ATOMIC_OK 0 diff --git a/include/my_base.h b/include/my_base.h index 26d513ba2a7..38376adfe85 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -14,7 +14,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This file includes constants used with all databases */ -/* Author: Michael Widenius */ #ifndef _my_base_h #define _my_base_h @@ -49,7 +48,7 @@ #define HA_OPEN_FROM_SQL_LAYER 64 #define HA_OPEN_MMAP 128 /* open memory mapped */ - /* The following is parameter to ha_rkey() how to use key */ +/* The following is parameter to ha_rkey() how to use key */ /* We define a complete-field prefix of a key value as a prefix where @@ -449,7 +448,7 @@ enum en_fieldtype { }; enum data_file_type { - STATIC_RECORD,DYNAMIC_RECORD,COMPRESSED_RECORD + STATIC_RECORD, DYNAMIC_RECORD, COMPRESSED_RECORD, BLOCK_RECORD }; /* For key ranges */ @@ -500,4 +499,7 @@ typedef ulong ha_rows; #define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 :2) +/* invalidator function reference for Query Cache */ +typedef void (* invalidator_by_filename)(const char * filename); + #endif /* _my_base_h */ diff --git a/include/my_bit.h b/include/my_bit.h new file mode 100644 index 00000000000..58e8bb39683 --- /dev/null +++ b/include/my_bit.h @@ -0,0 +1,107 @@ +/* + Some useful bit functions +*/ + +#ifdef HAVE_INLINE + +extern const char _my_bits_nbits[256]; +extern const uchar _my_bits_reverse_table[256]; + +/* + Find smallest X in 2^X >= value + This can be used to divide a number with value by doing a shift instead +*/ + +STATIC_INLINE uint my_bit_log2(ulong value) +{ + uint bit; + for (bit=0 ; value > 1 ; value>>=1, bit++) ; + return bit; +} + +STATIC_INLINE uint my_count_bits(ulonglong v) +{ +#if SIZEOF_LONG_LONG > 4 + /* The following code is a bit faster on 16 bit machines than if we would + only shift v */ + ulong v2=(ulong) (v >> 32); + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)] + + _my_bits_nbits[(uchar) (v2)] + + _my_bits_nbits[(uchar) (v2 >> 8)] + + _my_bits_nbits[(uchar) (v2 >> 16)] + + _my_bits_nbits[(uchar) (v2 >> 24)]); +#else + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)]); +#endif +} + +STATIC_INLINE uint my_count_bits_ushort(ushort v) +{ + return _my_bits_nbits[v]; +} + + +/* + Next highest power of two + + SYNOPSIS + my_round_up_to_next_power() + v Value to check + + RETURN + Next or equal power of 2 + Note: 0 will return 0 + + NOTES + Algorithm by Sean Anderson, according to: + http://graphics.stanford.edu/~seander/bithacks.html + (Orignal code public domain) + + Comments shows how this works with 01100000000000000000000000001011 +*/ + +STATIC_INLINE uint32 my_round_up_to_next_power(uint32 v) +{ + v--; /* 01100000000000000000000000001010 */ + v|= v >> 1; /* 01110000000000000000000000001111 */ + v|= v >> 2; /* 01111100000000000000000000001111 */ + v|= v >> 4; /* 01111111110000000000000000001111 */ + v|= v >> 8; /* 01111111111111111100000000001111 */ + v|= v >> 16; /* 01111111111111111111111111111111 */ + return v+1; /* 10000000000000000000000000000000 */ +} + +STATIC_INLINE uint32 my_clear_highest_bit(uint32 v) +{ + uint32 w=v >> 1; + w|= w >> 1; + w|= w >> 2; + w|= w >> 4; + w|= w >> 8; + w|= w >> 16; + return v & w; +} + +STATIC_INLINE uint32 my_reverse_bits(uint32 key) +{ + return + (_my_bits_reverse_table[ key & 255] << 24) | + (_my_bits_reverse_table[(key>> 8) & 255] << 16) | + (_my_bits_reverse_table[(key>>16) & 255] << 8) | + _my_bits_reverse_table[(key>>24) ]; +} + +#else +extern uint my_bit_log2(ulong value); +extern uint32 my_round_up_to_next_power(uint32 v); +uint32 my_clear_highest_bit(uint32 v); +uint32 my_reverse_bits(uint32 key); +extern uint my_count_bits(ulonglong v); +extern uint my_count_bits_ushort(ushort v); +#endif diff --git a/include/my_dbug.h b/include/my_dbug.h index 0541cf00c29..080f0cb7c54 100644 --- a/include/my_dbug.h +++ b/include/my_dbug.h @@ -101,7 +101,7 @@ extern FILE *_db_fp_(void); #define DBUG_LONGJMP(a1) longjmp(a1) #define DBUG_DUMP(keyword,a1,a2) #define DBUG_END() -#define DBUG_ASSERT(A) +#define DBUG_ASSERT(A) do { } while(0) #define DBUG_LOCK_FILE #define DBUG_FILE (stderr) #define DBUG_UNLOCK_FILE diff --git a/include/my_global.h b/include/my_global.h index e25752b8ed8..68f47a75e4b 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -224,6 +224,8 @@ #endif #undef inline_test_2 #undef inline_test_1 +/* helper macro for "instantiating" inline functions */ +#define STATIC_INLINE static inline /* The following macros are used to control inlining a bit more than @@ -458,6 +460,14 @@ C_MODE_END */ #include <assert.h> +/* an assert that works at compile-time. only for constant expression */ +#define compile_time_assert(X) \ + do \ + { \ + char compile_time_assert[(X) ? 1 : -1] \ + __attribute__ ((unused)); \ + } while(0) + /* Go around some bugs in different OS and compilers */ #if defined (HPUX11) && defined(_LARGEFILE_SOURCE) #define _LARGEFILE64_SOURCE @@ -1016,6 +1026,14 @@ typedef unsigned __int64 my_ulonglong; typedef unsigned long long my_ulonglong; #endif +#if SIZEOF_CHARP == SIZEOF_INT +typedef int intptr; +#elif SIZEOF_CHARP == SIZEOF_LONG +typedef long intptr; +#else +#error +#endif + #ifdef USE_RAID /* The following is done with a if to not get problems with pre-processors @@ -1479,4 +1497,12 @@ do { doubleget_union _tmp; \ #define dlerror() "" #endif +/* + Only Linux is known to need an explicit sync of the directory to make sure a + file creation/deletion/renaming in(from,to) this directory durable. +*/ +#ifdef TARGET_OS_LINUX +#define NEED_EXPLICIT_SYNC_DIR 1 +#endif + #endif /* my_global_h */ diff --git a/include/my_handler.h b/include/my_handler.h index d7cd0567f9c..72caf67398f 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -18,10 +18,30 @@ #ifndef _my_handler_h #define _my_handler_h -#include "my_base.h" -#include "m_ctype.h" #include "myisampack.h" +/* + There is a hard limit for the maximum number of keys as there are only + 8 bits in the index file header for the number of keys in a table. + This means that 0..255 keys can exist for a table. The idea of + HA_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on + a MyISAM table for which one has more keys than MyISAM is normally + compiled for. If you don't have this, you will get a core dump when + running myisamchk compiled for 128 keys on a table with 255 keys. +*/ + +#define HA_MAX_POSSIBLE_KEY 255 /* For myisamchk */ +/* + The following defines can be increased if necessary. + But beware the dependency of HA_MAX_POSSIBLE_KEY_BUFF and HA_MAX_KEY_LENGTH. +*/ + +#define HA_MAX_KEY_LENGTH 1000 /* Max length in bytes */ +#define HA_MAX_KEY_SEG 16 /* Max segments for key */ + +#define HA_MAX_POSSIBLE_KEY_BUFF (HA_MAX_KEY_LENGTH + 24+ 6+6) +#define HA_MAX_KEY_BUFF (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8) + typedef struct st_HA_KEYSEG /* Key-portion */ { CHARSET_INFO *charset; @@ -38,33 +58,35 @@ typedef struct st_HA_KEYSEG /* Key-portion */ } HA_KEYSEG; #define get_key_length(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= (uint) (uchar) *((key)++); \ +{ if (*(uchar*) (key) != 255) \ + length= (uint) *(uchar*) ((key)++); \ else \ - { length=mi_uint2korr((key)+1); (key)+=3; } \ + { length= mi_uint2korr((key)+1); (key)+=3; } \ } #define get_key_length_rdonly(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= ((uint) (uchar) *((key))); \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key))); \ else \ - { length=mi_uint2korr((key)+1); } \ + { length= mi_uint2korr((key)+1); } \ } #define get_key_pack_length(length,length_pack,key) \ -{ if ((uchar) *(key) != 255) \ - { length= (uint) (uchar) *((key)++); length_pack=1; }\ +{ if (*(uchar*) (key) != 255) \ + { length= (uint) *(uchar*) ((key)++); length_pack= 1; }\ else \ - { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \ + { length=mi_uint2korr((key)+1); (key)+= 3; length_pack= 3; } \ } #define store_key_length_inc(key,length) \ { if ((length) < 255) \ - { *(key)++=(length); } \ + { *(key)++= (length); } \ else \ { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ } +#define size_to_store_key_length(length) ((length) < 255 ? 1 : 3) + #define get_rec_bits(bit_ptr, bit_ofs, bit_len) \ (((((uint16) (bit_ptr)[1] << 8) | (uint16) (bit_ptr)[0]) >> (bit_ofs)) & \ ((1 << (bit_len)) - 1)) @@ -81,7 +103,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */ #define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \ set_rec_bits(0, bit_ptr, bit_ofs, bit_len) -extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , +extern int ha_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , my_bool, my_bool); extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, diff --git a/include/my_sys.h b/include/my_sys.h index 8e831b448d7..c8362455970 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -54,6 +54,7 @@ extern int NEAR my_errno; /* Last error in mysys */ #define MY_WME 16 /* Write message on error */ #define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */ #define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */ +#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */ #define MY_RAID 64 /* Support for RAID */ #define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */ #define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */ @@ -215,6 +216,7 @@ extern int (*error_handler_hook)(uint my_err, const char *str,myf MyFlags); extern int (*fatal_error_handler_hook)(uint my_err, const char *str, myf MyFlags); extern uint my_file_limit; +extern ulong my_thread_stack_size; #ifdef HAVE_LARGE_PAGES extern my_bool my_use_large_pages; @@ -628,6 +630,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern int my_fclose(FILE *fd,myf MyFlags); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_sync(File fd, myf my_flags); +extern int my_sync_dir(const char *dir_name, myf my_flags); +extern int my_sync_dir_by_file(const char *file_name, myf my_flags); extern int my_error _VARARGS((int nr,myf MyFlags, ...)); extern int my_printf_error _VARARGS((uint my_err, const char *format, myf MyFlags, ...)) @@ -767,6 +771,7 @@ extern my_bool insert_dynamic(DYNAMIC_ARRAY *array,gptr element); extern byte *alloc_dynamic(DYNAMIC_ARRAY *array); extern byte *pop_dynamic(DYNAMIC_ARRAY*); extern my_bool set_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index); +extern my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements); extern void get_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index); extern void delete_dynamic(DYNAMIC_ARRAY *array); extern void delete_dynamic_element(DYNAMIC_ARRAY *array, uint array_index); @@ -831,10 +836,6 @@ extern int packfrm(const void *, uint, const void **, uint *); extern int unpackfrm(const void **, uint *, const void *); extern ha_checksum my_checksum(ha_checksum crc, const byte *mem, uint count); -extern uint my_bit_log2(ulong value); -extern uint32 my_round_up_to_next_power(uint32 v); -extern uint my_count_bits(ulonglong v); -extern uint my_count_bits_ushort(ushort v); extern void my_sleep(ulong m_seconds); extern ulong crc32(ulong crc, const uchar *buf, uint len); extern uint my_set_max_open_files(uint files); @@ -850,7 +851,7 @@ extern int my_getncpus(); #ifndef MAP_NOSYNC #define MAP_NOSYNC 0 #endif -#ifndef MAP_NORESERVE +#ifndef MAP_NORESERVE #define MAP_NORESERVE 0 /* For irix and AIX */ #endif diff --git a/include/myisam.h b/include/myisam.h index f763bf07719..d6d6426adf7 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -31,33 +31,19 @@ extern "C" { #include "keycache.h" #endif #include "my_handler.h" +#include <myisamchk.h> #include <mysql/plugin.h> /* - There is a hard limit for the maximum number of keys as there are only - 8 bits in the index file header for the number of keys in a table. - This means that 0..255 keys can exist for a table. The idea of - MI_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on - a MyISAM table for which one has more keys than MyISAM is normally - compiled for. If you don't have this, you will get a core dump when - running myisamchk compiled for 128 keys on a table with 255 keys. + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details */ -#define MI_MAX_POSSIBLE_KEY 255 /* For myisam_chk */ -#if MAX_INDEXES > MI_MAX_POSSIBLE_KEY -#define MI_MAX_KEY MI_MAX_POSSIBLE_KEY /* Max allowed keys */ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MI_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ #else #define MI_MAX_KEY MAX_INDEXES /* Max allowed keys */ #endif -#define MI_MAX_POSSIBLE_KEY_BUFF (1024+6+6) /* For myisam_chk */ -/* - The following defines can be increased if necessary. - But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH. -*/ -#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */ -#define MI_MAX_KEY_SEG 16 /* Max segments for key */ - -#define MI_MAX_KEY_BUFF (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8) #define MI_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ #define MI_NAME_IEXT ".MYI" #define MI_NAME_DEXT ".MYD" @@ -256,9 +242,6 @@ typedef struct st_columndef /* column information */ #endif } MI_COLUMNDEF; -/* invalidator function reference for Query Cache */ -typedef void (* invalidator_by_filename)(const char * filename); - extern my_string myisam_log_filename; /* Name of logfile */ extern ulong myisam_block_size; extern ulong myisam_concurrent_insert; @@ -311,195 +294,117 @@ extern int mi_delete_all_rows(struct st_myisam_info *info); extern ulong _mi_calc_blob_length(uint length , const byte *pos); extern uint mi_get_pointer_length(ulonglong file_length, uint def); -/* this is used to pass to mysql_myisamchk_table -- by Sasha Pachev */ +/* this is used to pass to mysql_myisamchk_table */ #define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */ #define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */ -/* - Definitions needed for myisamchk.c - - Entries marked as "QQ to be removed" are NOT used to - pass check/repair options to mi_check.c. They are used - internally by myisamchk.c or/and ha_myisam.cc and should NOT - be stored together with other flags. They should be removed - from the following list to make addition of new flags possible. -*/ - -#define T_AUTO_INC 1 -#define T_AUTO_REPAIR 2 /* QQ to be removed */ -#define T_BACKUP_DATA 4 -#define T_CALC_CHECKSUM 8 -#define T_CHECK 16 /* QQ to be removed */ -#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ -#define T_CREATE_MISSING_KEYS 64 -#define T_DESCRIPT 128 -#define T_DONT_CHECK_CHECKSUM 256 -#define T_EXTEND 512 -#define T_FAST (1L << 10) /* QQ to be removed */ -#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ -#define T_FORCE_UNIQUENESS (1L << 12) -#define T_INFO (1L << 13) -#define T_MEDIUM (1L << 14) -#define T_QUICK (1L << 15) /* QQ to be removed */ -#define T_READONLY (1L << 16) /* QQ to be removed */ -#define T_REP (1L << 17) -#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ -#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ -#define T_RETRY_WITHOUT_QUICK (1L << 20) -#define T_SAFE_REPAIR (1L << 21) -#define T_SILENT (1L << 22) -#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ -#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ -#define T_STATISTICS (1L << 25) -#define T_UNPACK (1L << 26) -#define T_UPDATE_STATE (1L << 27) -#define T_VERBOSE (1L << 28) -#define T_VERY_SILENT (1L << 29) -#define T_WAIT_FOREVER (1L << 30) -#define T_WRITE_LOOP ((ulong) 1L << 31) - -#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) - -/* - Flags used by myisamchk.c or/and ha_myisam.cc that are NOT passed - to mi_check.c follows: -*/ - -#define TT_USEFRM 1 -#define TT_FOR_UPGRADE 2 +typedef uint mi_bit_type; -#define O_NEW_INDEX 1 /* Bits set in out_flag */ -#define O_NEW_DATA 2 -#define O_DATA_LOST 4 +typedef struct st_mi_bit_buff +{ /* Used for packing of record */ + mi_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MI_BIT_BUFF; -/* these struct is used by my_check to tell it what to do */ -typedef struct st_sort_key_blocks /* Used when sorting */ +typedef struct st_sort_info { - uchar *buff,*end_pos; - uchar lastkey[MI_MAX_POSSIBLE_KEY_BUFF]; - uint last_length; - int inited; -} SORT_KEY_BLOCKS; - - -/* - MyISAM supports several statistics collection methods. Currently statistics - collection method is not stored in MyISAM file and has to be specified for - each table analyze/repair operation in MI_CHECK::stats_method. -*/ +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MI_INFO *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; + myf myf_rw; + enum data_file_type new_data_file_type; +} MI_SORT_INFO; -typedef enum -{ - /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ - MI_STATS_METHOD_NULLS_NOT_EQUAL, - /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ - MI_STATS_METHOD_NULLS_EQUAL, - /* Ignore NULLs - count only tuples without NULLs in the index components */ - MI_STATS_METHOD_IGNORE_NULLS -} enum_mi_stats_method; - -typedef struct st_mi_check_param +typedef struct st_mi_sort_param { - ulonglong auto_increment_value; - ulonglong max_data_file_length; - ulonglong keys_in_use; - ulonglong max_record_length; - my_off_t search_after_block; - my_off_t new_file_pos,key_file_blocks; - my_off_t keydata,totaldata,key_blocks,start_check_pos; - ha_rows total_records,total_deleted; - ha_checksum record_checksum,glob_crc; - ulong use_buffers,read_buffer_length,write_buffer_length, - sort_buffer_length,sort_key_blocks; - uint out_flag,warning_printed,error_printed,verbose; - uint opt_sort_key,total_files,max_level; - uint testflag, key_cache_block_size; - uint8 language; - my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort; - char temp_filename[FN_REFLEN],*isam_file_name; - MY_TMPDIR *tmpdir; - int tmpfile_createflag; - myf myf_rw; - IO_CACHE read_cache; + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + MI_KEYDEF *keyinfo; + MI_SORT_INFO *sort_info; + HA_KEYSEG *seg; + uchar **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + MEM_ROOT wordroot; + char *record; + MY_TMPDIR *tmpdir; + /* The next two are used to collect statistics, see update_key_parts for description. */ - ulonglong unique_count[MI_MAX_KEY_SEG+1]; - ulonglong notnull_count[MI_MAX_KEY_SEG+1]; - - ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; - ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; - void *thd; - const char *db_name, *table_name; - const char *op_name; - enum_mi_stats_method stats_method; -} MI_CHECK; - -typedef struct st_sort_ft_buf -{ - uchar *buf, *end; - int count; - uchar lastkey[MI_MAX_KEY_BUFF]; -} SORT_FT_BUF; + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; + + my_off_t pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ + + int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); + int (*key_read)(struct st_mi_sort_param *,void *); + int (*key_write)(struct st_mi_sort_param *, const void *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, + uint, uint); +} MI_SORT_PARAM; -typedef struct st_sort_info -{ - my_off_t filelength,dupp,buff_length; - ha_rows max_records; - uint current_key, total_keys; - myf myf_rw; - enum data_file_type new_data_file_type; - MI_INFO *info; - MI_CHECK *param; - char *buff; - SORT_KEY_BLOCKS *key_block,*key_block_end; - SORT_FT_BUF *ft_buf; - /* sync things */ - uint got_error, threads_running; -#ifdef THREAD - pthread_mutex_t mutex; - pthread_cond_t cond; -#endif -} SORT_INFO; /* functions in mi_check */ -void myisamchk_init(MI_CHECK *param); -int chk_status(MI_CHECK *param, MI_INFO *info); -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag); -int chk_size(MI_CHECK *param, MI_INFO *info); -int chk_key(MI_CHECK *param, MI_INFO *info); -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend); -int mi_repair(MI_CHECK *param, register MI_INFO *info, +void myisamchk_init(HA_CHECK *param); +int chk_status(HA_CHECK *param, MI_INFO *info); +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag); +int chk_size(HA_CHECK *param, MI_INFO *info); +int chk_key(HA_CHECK *param, MI_INFO *info); +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend); +int mi_repair(HA_CHECK *param, register MI_INFO *info, my_string name, int rep_quick); -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name); -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name); +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); int change_to_newfile(const char * filename, const char * old_ext, const char * new_ext, uint raid_chunks, myf myflags); -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename); -void lock_memory(MI_CHECK *param); -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void lock_memory(HA_CHECK *param); +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair); -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update); void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong *notnull, ulonglong records); -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type); int movepoint(MI_INFO *info,byte *record,my_off_t oldpos, my_off_t newpos, uint prot_key); -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); int test_if_almost_full(MI_INFO *info); -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename); +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, ulonglong key_map, my_bool force); @@ -513,6 +418,13 @@ void mi_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); +int flush_pending_blocks(MI_SORT_PARAM *param); +int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); +int thr_write_keys(MI_SORT_PARAM *sort_param); +int sort_write_record(MI_SORT_PARAM *sort_param); +int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); + #ifdef __cplusplus } #endif diff --git a/include/myisamchk.h b/include/myisamchk.h new file mode 100644 index 00000000000..66d0a77b62f --- /dev/null +++ b/include/myisamchk.h @@ -0,0 +1,164 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Definitions needed for myisamchk/mariachk.c */ + +/* + Entries marked as "QQ to be removed" are NOT used to + pass check/repair options to xxx_check.c. They are used + internally by xxxchk.c or/and ha_xxxx.cc and should NOT + be stored together with other flags. They should be removed + from the following list to make addition of new flags possible. +*/ + +#ifndef _myisamchk_h +#define _myisamchk_h + +#define T_AUTO_INC 1 +#define T_AUTO_REPAIR 2 /* QQ to be removed */ +#define T_BACKUP_DATA 4 +#define T_CALC_CHECKSUM 8 +#define T_CHECK 16 /* QQ to be removed */ +#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ +#define T_CREATE_MISSING_KEYS 64 +#define T_DESCRIPT 128 +#define T_DONT_CHECK_CHECKSUM 256 +#define T_EXTEND 512 +#define T_FAST (1L << 10) /* QQ to be removed */ +#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ +#define T_FORCE_UNIQUENESS (1L << 12) +#define T_INFO (1L << 13) +#define T_MEDIUM (1L << 14) +#define T_QUICK (1L << 15) /* QQ to be removed */ +#define T_READONLY (1L << 16) /* QQ to be removed */ +#define T_REP (1L << 17) +#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ +#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ +#define T_RETRY_WITHOUT_QUICK (1L << 20) +#define T_SAFE_REPAIR (1L << 21) +#define T_SILENT (1L << 22) +#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ +#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ +#define T_STATISTICS (1L << 25) +#define T_UNPACK (1L << 26) +#define T_UPDATE_STATE (1L << 27) +#define T_VERBOSE (1L << 28) +#define T_VERY_SILENT (1L << 29) +#define T_WAIT_FOREVER (1L << 30) +#define T_WRITE_LOOP ((ulong) 1L << 31) + +#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) + +/* + Flags used by xxxxchk.c or/and ha_xxxx.cc that are NOT passed + to xxxcheck.c follows: +*/ + +#define TT_USEFRM 1 +#define TT_FOR_UPGRADE 2 + +#define O_NEW_INDEX 1 /* Bits set in out_flag */ +#define O_NEW_DATA 2 +#define O_DATA_LOST 4 + +typedef struct st_sort_key_blocks /* Used when sorting */ +{ + byte *buff, *end_pos; + byte lastkey[HA_MAX_POSSIBLE_KEY_BUFF]; + uint last_length; + int inited; +} SORT_KEY_BLOCKS; + + +/* + MARIA/MYISAM supports several statistics collection + methods. Currently statistics collection method is not stored in + MARIA file and has to be specified for each table analyze/repair + operation in MI_CHECK::stats_method. +*/ + +typedef enum +{ + /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ + MI_STATS_METHOD_NULLS_NOT_EQUAL, + /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ + MI_STATS_METHOD_NULLS_EQUAL, + /* Ignore NULLs - count only tuples without NULLs in the index components */ + MI_STATS_METHOD_IGNORE_NULLS +} enum_handler_stats_method; + + +typedef struct st_handler_check_param +{ + char *isam_file_name; + MY_TMPDIR *tmpdir; + void *thd; + const char *db_name, *table_name, *op_name; + ulonglong auto_increment_value; + ulonglong max_data_file_length; + ulonglong keys_in_use; + ulonglong max_record_length; + /* + The next two are used to collect statistics, see update_key_parts for + description. + */ + ulonglong unique_count[HA_MAX_KEY_SEG + 1]; + ulonglong notnull_count[HA_MAX_KEY_SEG + 1]; + + my_off_t search_after_block; + my_off_t new_file_pos, key_file_blocks; + my_off_t keydata, totaldata, key_blocks, start_check_pos; + my_off_t used, empty, splits, del_length, link_used; + ha_rows total_records, total_deleted, records,del_blocks; + ha_rows full_page_count, tail_count; + ha_checksum record_checksum, glob_crc; + ha_checksum key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_record_checksum; + ulong use_buffers, read_buffer_length, write_buffer_length; + ulong sort_buffer_length, sort_key_blocks; + ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY]; + uint out_flag, warning_printed, error_printed, verbose; + uint opt_sort_key, total_files, max_level; + uint testflag, key_cache_block_size; + int tmpfile_createflag, err_count; + myf myf_rw; + uint8 language; + my_bool using_global_keycache, opt_lock_memory, opt_follow_links; + my_bool retry_repair, force_sort, calc_checksum, static_row_size; + char temp_filename[FN_REFLEN]; + IO_CACHE read_cache; + enum_handler_stats_method stats_method; +} HA_CHECK; + + +typedef struct st_sort_ftbuf +{ + byte *buf, *end; + int count; + byte lastkey[HA_MAX_KEY_BUFF]; +} SORT_FT_BUF; + + +typedef struct st_buffpek { + my_off_t file_pos; /* Where we are in the sort file */ + byte *base, *key; /* Key pointers */ + ha_rows count; /* Number of rows in table */ + ulong mem_count; /* numbers of keys in memory */ + ulong max_keys; /* Max keys in buffert */ +} BUFFPEK; + +#endif /* _myisamchk_h */ diff --git a/include/pagecache.h b/include/pagecache.h new file mode 100644 index 00000000000..32e23fb54ea --- /dev/null +++ b/include/pagecache.h @@ -0,0 +1,228 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Page cache variable structures */ + +#ifndef _pagecache_h +#define _pagecache_h +C_MODE_START + +#include "../storage/maria/ma_loghandler_lsn.h" +#include <m_string.h> + +/* Type of the page */ +enum pagecache_page_type +{ +#ifndef DBUG_OFF + /* used only for control page type changing during debugging */ + PAGECACHE_EMPTY_PAGE, +#endif + /* the page does not contain LSN */ + PAGECACHE_PLAIN_PAGE, + /* the page contain LSN (maria tablespace page) */ + PAGECACHE_LSN_PAGE +}; + +/* + This enum describe lock status changing. every type of page cache will + interpret WRITE/READ lock as it need. +*/ +enum pagecache_page_lock +{ + PAGECACHE_LOCK_LEFT_UNLOCKED, /* free -> free */ + PAGECACHE_LOCK_LEFT_READLOCKED, /* read -> read */ + PAGECACHE_LOCK_LEFT_WRITELOCKED, /* write -> write */ + PAGECACHE_LOCK_READ, /* free -> read */ + PAGECACHE_LOCK_WRITE, /* free -> write */ + PAGECACHE_LOCK_READ_UNLOCK, /* read -> free */ + PAGECACHE_LOCK_WRITE_UNLOCK, /* write -> free */ + PAGECACHE_LOCK_WRITE_TO_READ /* write -> read */ +}; +/* + This enum describe pin status changing +*/ +enum pagecache_page_pin +{ + PAGECACHE_PIN_LEFT_PINNED, /* pinned -> pinned */ + PAGECACHE_PIN_LEFT_UNPINNED, /* unpinned -> unpinned */ + PAGECACHE_PIN, /* unpinned -> pinned */ + PAGECACHE_UNPIN /* pinned -> unpinned */ +}; +/* How to write the page */ +enum pagecache_write_mode +{ + /* do not write immediately, i.e. it will be dirty page */ + PAGECACHE_WRITE_DELAY, + /* write page to the file and put it to the cache */ + PAGECACHE_WRITE_NOW, + /* page already is in the file. (key cache insert analogue) */ + PAGECACHE_WRITE_DONE +}; + +typedef void *PAGECACHE_PAGE_LINK; + +/* file descriptor for Maria */ +typedef struct st_pagecache_file +{ + int file; /* it is for debugging purposes then it will be uint32 file_no */ +} PAGECACHE_FILE; + +/* page number for maria */ +typedef uint32 pgcache_page_no_t; + +/* declare structures that is used by st_pagecache */ + +struct st_pagecache_block_link; +typedef struct st_pagecache_block_link PAGECACHE_BLOCK_LINK; +struct st_pagecache_page; +typedef struct st_pagecache_page PAGECACHE_PAGE; +struct st_pagecache_hash_link; +typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; + +#include <wqueue.h> + +typedef my_bool (*pagecache_disk_read_validator)(byte *page, gptr data); + +#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */ + +/* + The page cache structure + It also contains read-only statistics parameters. +*/ + +typedef struct st_pagecache +{ + my_bool inited; + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ + uint shift; /* block size = 2 ^ shift */ + my_size_t mem_size; /* specified size of the cache memory */ + uint32 block_size; /* size of the page buffer of a cache block */ + ulong min_warm_blocks; /* min number of warm blocks; */ + ulong age_threshold; /* age threshold for hot blocks */ + ulonglong time; /* total number of block link operations */ + uint hash_entries; /* max number of entries in the hash table */ + int hash_links; /* max number of hash links */ + int hash_links_used; /* number of hash links taken from free links pool */ + int disk_blocks; /* max number of blocks in the cache */ + ulong blocks_used; /* maximum number of concurrently used blocks */ + ulong blocks_unused; /* number of currently unused blocks */ + ulong blocks_changed; /* number of currently dirty blocks */ + ulong warm_blocks; /* number of blocks in warm sub-chain */ + ulong cnt_for_resize_op; /* counter to block resize operation */ + ulong blocks_available; /* number of blocks available in the LRU chain */ + PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */ + PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */ + PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */ + PAGECACHE_BLOCK_LINK *free_block_list;/* list of free blocks */ + PAGECACHE_BLOCK_LINK *block_root;/* memory for block links */ + byte HUGE_PTR *block_mem; /* memory for block buffers */ + PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */ + PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */ + pthread_mutex_t cache_lock; /* to lock access to the cache structure */ + WQUEUE resize_queue; /* threads waiting during resize operation */ + WQUEUE waiting_for_hash_link;/* waiting for a free hash link */ + WQUEUE waiting_for_block; /* requests waiting for a free block */ + /* hash for dirty file bl.*/ + PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + /* hash for other file bl.*/ + PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + + /* + The following variables are and variables used to hold parameters for + initializing the key cache. + */ + + ulonglong param_buff_size; /* size the memory allocated for the cache */ + ulong param_block_size; /* size of the blocks in the key cache */ + ulong param_division_limit; /* min. percentage of warm blocks */ + ulong param_age_threshold; /* determines when hot block is downgraded */ + + /* Statistics variables. These are reset in reset_pagecache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ + ulonglong global_cache_w_requests;/* number of write requests (write hits) */ + ulonglong global_cache_write; /* number of writes from cache to files */ + ulonglong global_cache_r_requests;/* number of read requests (read hits) */ + ulonglong global_cache_read; /* number of reads from files to cache */ + + int blocks; /* max number of blocks in the cache */ + my_bool in_init; /* Set to 1 in MySQL during init/resize */ +} PAGECACHE; + +extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size); +extern int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold); +extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold); + +#define pagecache_read(P,F,N,L,B,T,K,I) \ + pagecache_valid_read(P,F,N,L,B,T,K,I,0,0) + +extern byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data); +extern my_bool pagecache_write(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link); +extern void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page); +extern void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page); +extern void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno); +extern void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link); +extern int flush_pagecache_blocks(PAGECACHE *keycache, + PAGECACHE_FILE *file, + enum flush_type type); +extern my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush); +extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); +extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *max_lsn); +extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache); + +C_MODE_END +#endif /* _keycache_h */ diff --git a/include/wqueue.h b/include/wqueue.h new file mode 100644 index 00000000000..bacabb8c401 --- /dev/null +++ b/include/wqueue.h @@ -0,0 +1,26 @@ + +#ifndef _wqueue_h +#define _wqueue_h + +#include <my_global.h> +#include <my_pthread.h> + +/* info about requests in a waiting queue */ +typedef struct st_pagecache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting + threads */ +} WQUEUE; + +#ifdef THREAD +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, + pthread_mutex_t *lock); +void wqueue_release_queue(WQUEUE *wqueue); + +#endif + +#endif diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index db4368a3534..68bf129551b 100644 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -52,6 +52,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def ../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c ../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c + ../mysys/my_sync.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index dc6d658fcdf..bf8600e3ce7 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_iocache2.lo my_seek.lo my_sleep.lo \ my_pread.lo mf_cache.lo md5.lo sha1.lo \ my_getopt.lo my_gethostbyname.lo my_port.lo \ - my_rename.lo my_chsize.lo + my_rename.lo my_chsize.lo my_sync.lo sqlobjects = net.lo sql_cmn_objects = pack.lo client.lo my_time.lo diff --git a/mysql-test/include/have_maria.inc b/mysql-test/include/have_maria.inc new file mode 100644 index 00000000000..955e2305ca5 --- /dev/null +++ b/mysql-test/include/have_maria.inc @@ -0,0 +1,4 @@ +-- require r/have_maria.require +disable_query_log; +show variables like "have_maria"; +enable_query_log; diff --git a/mysql-test/r/events_logs_tests.result b/mysql-test/r/events_logs_tests.result index 1efd3fa602e..7b80a1bc21c 100644 --- a/mysql-test/r/events_logs_tests.result +++ b/mysql-test/r/events_logs_tests.result @@ -86,7 +86,7 @@ slo_val val 20 0 1 0 "Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10" -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; user_host query_time db sql_text USER_HOST SLEEPVAL events_test INSERT INTO slow_event_test SELECT @@long_query_time, SLEEP(2) DROP EVENT long_event2; diff --git a/mysql-test/r/have_maria.require b/mysql-test/r/have_maria.require new file mode 100644 index 00000000000..02988af6976 --- /dev/null +++ b/mysql-test/r/have_maria.require @@ -0,0 +1,2 @@ +Variable_name Value +have_maria YES diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result new file mode 100644 index 00000000000..7812f04336a --- /dev/null +++ b/mysql-test/r/maria.result @@ -0,0 +1,1788 @@ +set global storage_engine=maria; +set session storage_engine=maria; +drop table if exists t1,t2; +SET SQL_WARNINGS=1; +CREATE TABLE t1 ( +STRING_DATA char(255) default NULL, +KEY string_data (STRING_DATA) +); +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +delete from t1 where (a & 1); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status Table is already up to date +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +drop table t1; +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 4 Using index +explain select b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 4 NULL 4 Using index +explain select a,b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a,b from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain select a,b,c from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +drop table t1; +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +DROP TABLE t1; +create table t1 ( t1 char(255), key(t1(250))); +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +delete from t1 where i8=1; +select i1,i2 from t1; +i1 i2 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE `t1` ( +`post_id` mediumint(8) unsigned NOT NULL auto_increment, +`topic_id` mediumint(8) unsigned NOT NULL default '0', +`post_time` datetime NOT NULL default '0000-00-00 00:00:00', +`post_text` text NOT NULL, +`icon_url` varchar(10) NOT NULL default '', +`sign` tinyint(1) unsigned NOT NULL default '0', +`post_edit` varchar(150) NOT NULL default '', +`poster_login` varchar(35) NOT NULL default '', +`ip` varchar(15) NOT NULL default '', +PRIMARY KEY (`post_id`), +KEY `post_time` (`post_time`), +KEY `ip` (`ip`), +KEY `poster_login` (`poster_login`), +KEY `topic_id` (`topic_id`), +FULLTEXT KEY `post_text` (`post_text`) +); +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +ERROR 42000: Specified key was too long; max key length is 1000 bytes +DROP TABLE t1; +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 b 1 b A 5 NULL NULL YES BTREE +t1 1 c 1 c A 5 NULL NULL YES BTREE +t1 1 a 1 a A 1 NULL NULL BTREE +t1 1 a 2 b A 5 NULL NULL YES BTREE +t1 1 c_2 1 c A 5 NULL NULL YES BTREE +t1 1 c_2 2 a A 5 NULL NULL BTREE +explain select * from t1,t2 where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +explain select * from t1,t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1,t2 where t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 2 +1 SIMPLE t1 ref b b 5 test.t2.b 1 Using where +explain select * from t1,t2 force index(c) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +explain select * from t1 where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +explain select * from t1 force index (a) where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 4 NULL 4 Using where +explain select * from t1 where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c,c_2 c 5 const 1 Using where +explain select * from t1 use index() where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where +drop table t1,t2; +create table t1 (a int not null auto_increment primary key, b varchar(255)); +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 ( a text not null, key a (a(20))); +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +select concat(a,'.') from t1 where a='aaa'; +concat(a,'.') +aaa . +aaa. +select concat(a,'.') from t1 where binary a='aaa'; +concat(a,'.') +aaa. +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +concat(a,'.') +bbb. +bbb. +aa. +drop table t1; +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +a b c +807780 477 165 +drop table t1; +DROP TABLE IF EXISTS t1; +Warnings: +Note 1051 Unknown table 't1' +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +a +can can +can +can can +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; +create table t1 (a blob); +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +drop table t1; +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +concat(b,'.') +a. +b. +c. +update t1 set b='b ' where a=2; +update t1 set b='b ' where a > 1; +ERROR 23000: Duplicate entry 'b ' for key 'b' +insert into t1 (b) values ('b'); +ERROR 23000: Duplicate entry 'b' for key 'b' +select * from t1; +a b +1 a +2 b +3 c +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +a concat(b,'.') +1 a. +3 c. +drop table t1; +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +a +1 +select distinct t1.a from t1,t2 order by t2.a; +a +1 +select sql_big_result distinct t1.a from t1,t2; +a +1 +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +explain select distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +drop table t1,t2; +create table t1 ( +c1 varchar(32), +key (c1) +); +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +c1 +a +drop table t1; +create table t1 (a int not null, primary key(a)); +create table t2 (a int not null, b int not null, primary key(a,b)); +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +insert into t2 values(2,0); +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +drop table t1,t2; +CREATE TABLE t1 (c1 varchar(250) NOT NULL); +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)); +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +DROP TABLE t1,t2; +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); +Got one of the listed errors +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +Table Checksum +test.t1 2948697075 +test.t2 NULL +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3 extended; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +drop table t1,t2; +create table t1 (a int, key (a)); +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE +alter table t1 disable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +create table t2 (a int); +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +insert t1 select * from t2; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 1000 NULL NULL YES BTREE +alter table t1 engine=heap; +alter table t1 disable keys; +Warnings: +Note 1031 Table storage engine for 't1' doesn't have this option +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a NULL 500 NULL NULL YES HASH +drop table t1,t2; +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref idx idx 4 const 1 Using where +select count(*) from t1 where a is null; +count(*) +2 +drop table t1; +create table t1 (c1 int, c2 varchar(4) not null default '', +key(c2(3))) default charset=utf8; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (c1 int); +insert into t1 values (1),(2),(3),(4); +checksum table t1; +Table Checksum +test.t1 149057747 +delete from t1 where c1 = 1; +create table t2 as select * from t1; +checksum table t1; +Table Checksum +test.t1 984116287 +checksum table t2; +Table Checksum +test.t2 984116287 +drop table t1, t2; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +create table t1 (a int, key(a)); +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_equal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +drop table t1; +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_ignored +create table t1 ( +a char(3), b char(4), c char(5), d char(6), +key(a,b,c,d) +); +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 2 NULL NULL YES BTREE +t1 1 a 2 b A 4 NULL NULL YES BTREE +t1 1 a 3 c A 4 NULL NULL YES BTREE +t1 1 a 4 d A 4 NULL NULL YES BTREE +delete from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 0 NULL NULL YES BTREE +t1 1 a 2 b A 0 NULL NULL YES BTREE +t1 1 a 3 c A 0 NULL NULL YES BTREE +t1 1 a 4 d A 0 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +drop table t1; +create table t1( +cip INT NOT NULL, +time TIME NOT NULL, +score INT NOT NULL DEFAULT 0, +bob TINYBLOB +); +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), +(6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +drop table t1; +create table t1 ( +id1 int not null auto_increment, +id2 int not null default '0', +t text not null, +primary key (id1), +key x (id2, t(32)) +) engine=maria; +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +count(*) +5 +select count(id1) from t1 where id2 = 10; +count(id1) +5 +drop table t1; +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +MAX(a) +1 +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +DROP TABLE t1; +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +a b +xxxxxxxxx bbbbbb +xxxxxxxxx bbbbbb +DROP TABLE t1; +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 2 +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1 QUICK; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 1 +drop table if exists t1,t2,t3; +--- Testing varchar --- +--- Testing varchar --- +create table t1 (v varchar(10), c char(10), t text); +insert into t1 values('+ ', '+ ', '+ '); +set @a=repeat(' ',20); +insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a)); +Warnings: +Note 1265 Data truncated for column 'v' at row 1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+ *+*+ * +*+ *+*+ * +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t2 like t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t3 select * from t1; +show create table t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify c varchar(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify v char(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify t varchar(10); +Warnings: +Note 1265 Data truncated for column 't' at row 2 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` varchar(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+*+*+ * +*+*+*+ * +drop table t1,t2,t3; +create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`), + KEY `c` (`c`), + KEY `t` (`t`(10)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1; +count(*) +270 +insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1))); +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where c='a'; +count(*) +10 +select count(*) from t1 where t='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where c='a '; +count(*) +10 +select count(*) from t1 where t='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where c like 'a%'; +count(*) +11 +select count(*) from t1 where t like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where c='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c c 11 const # Using where; Using index +explain select count(*) from t1 where t='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range t t 13 NULL # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 13 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +alter table t1 add unique(v); +ERROR 23000: Duplicate entry '{ ' for key 'v_2' +alter table t1 add key(v); +select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a'; +qq +*a*a*a* +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v,v_2 # 13 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(*) from t1 group by c limit 10; +c count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(*) from t1 group by t limit 10; +t count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 303 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 drop key v, add key v (v(30)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`(30)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 33 NULL # Using where +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(600), drop key v, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(600) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +drop table t1; +create table t1 (a char(10), unique (a)); +insert into t1 values ('a '); +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a' for key 'a' +alter table t1 modify a varchar(10); +insert into t1 values ('a '),('a '),('a '),('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +update t1 set a='a ' where a like 'a%'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='abc ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a %'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +drop table t1; +create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`(5)), + KEY `c` (`c`(5)), + KEY `t` (`t`(5)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v char(10) character set utf8); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) CHARACTER SET utf8 DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(10), c char(10)) row_format=fixed; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED +insert into t1 values('a','a'),('a ','a '); +select concat('*',v,'*',c,'*') from t1; +concat('*',v,'*',c,'*') +*a*a* +*a *a* +drop table t1; +create table t1 (v varchar(65530), key(v(10))); +insert into t1 values(repeat('a',65530)); +select length(v) from t1 where v=repeat('a',65530); +length(v) +65530 +drop table t1; +create table t1(a int, b varchar(12), key ba(b, a)); +insert into t1 values (1, 'A'), (20, NULL); +explain select * from t1 where a=20 and b is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref ba ba 20 const,const 1 Using where; Using index +select * from t1 where a=20 and b is null; +a b +20 NULL +drop table t1; +create table t1 (v varchar(65530), key(v)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +drop table if exists t1; +create table t1 (v varchar(65536)); +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65530) character set utf8); +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext CHARACTER SET utf8 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65535)); +ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int); +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 values (8),(9); +unlock tables; +insert into t1 values (10),(11),(12); +select * from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int, b varchar(30) default "hello"); +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 (a) values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 (a) values (8),(9); +unlock tables; +insert into t1 (a) values (10),(11),(12); +select a from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +set global concurrent_insert=@save_concurrent_insert; +create table t1 (a int, key(a)); +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE +drop table t1; +show create table t1; +show create table t1; +create table t1 (a int) select 42 a; +select * from t1; +a +9 +select * from t1; +a +99 +select * from t1; +a +42 +drop table t1; +End of 4.1 tests +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; +create table t4 (c1 int) pack_keys=2; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1 +drop table t1, t2, t3; +End of 5.0 tests +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=2048 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1024 +alter table t1 key_block_size=2048; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 add c int, add key (c); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096, + KEY `c` (`c`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + `d` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096, + KEY `c` (`c`) KEY_BLOCK_SIZE=2048, + KEY `d` (`d`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`(1000)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024, + KEY `b` (`b`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=16384 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=1024 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=2048 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1 +CREATE TABLE t1 ( +c1 INT, +c2 VARCHAR(300), +KEY (c1) KEY_BLOCK_SIZE 1024, +KEY (c2) KEY_BLOCK_SIZE 8192 +); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), +(11, REPEAT('b', CEIL(RAND() * 300))), +(12, REPEAT('c', CEIL(RAND() * 300))), +(13, REPEAT('d', CEIL(RAND() * 300))), +(14, REPEAT('e', CEIL(RAND() * 300))), +(15, REPEAT('f', CEIL(RAND() * 300))), +(16, REPEAT('g', CEIL(RAND() * 300))), +(17, REPEAT('h', CEIL(RAND() * 300))), +(18, REPEAT('i', CEIL(RAND() * 300))), +(19, REPEAT('j', CEIL(RAND() * 300))), +(20, REPEAT('k', CEIL(RAND() * 300))), +(21, REPEAT('l', CEIL(RAND() * 300))), +(22, REPEAT('m', CEIL(RAND() * 300))), +(23, REPEAT('n', CEIL(RAND() * 300))), +(24, REPEAT('o', CEIL(RAND() * 300))), +(25, REPEAT('p', CEIL(RAND() * 300))), +(26, REPEAT('q', CEIL(RAND() * 300))), +(27, REPEAT('r', CEIL(RAND() * 300))), +(28, REPEAT('s', CEIL(RAND() * 300))), +(29, REPEAT('t', CEIL(RAND() * 300))), +(30, REPEAT('u', CEIL(RAND() * 300))), +(31, REPEAT('v', CEIL(RAND() * 300))), +(32, REPEAT('w', CEIL(RAND() * 300))), +(33, REPEAT('x', CEIL(RAND() * 300))), +(34, REPEAT('y', CEIL(RAND() * 300))), +(35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; +End of 5.1 tests +set global storage_engine=MyISAM; diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result new file mode 100644 index 00000000000..9268c44eecd --- /dev/null +++ b/mysql-test/r/ps_maria.result @@ -0,0 +1,3137 @@ +use test; +drop table if exists t1, t9 ; +create table t1 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +create table t9 +( +c1 tinyint, c2 smallint, c3 mediumint, c4 int, +c5 integer, c6 bigint, c7 float, c8 double, +c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4), +c13 date, c14 datetime, c15 timestamp, c16 time, +c17 year, c18 tinyint, c19 bool, c20 char, +c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext, +c25 blob, c26 text, c27 mediumblob, c28 mediumtext, +c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'), +c32 set('monday', 'tuesday', 'wednesday'), +primary key(c1) +) engine = 'MARIA' ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +test_sequence +------ simple select tests ------ +prepare stmt1 from ' select * from t9 order by c1 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t9 t9 c1 c1 1 4 1 N 49155 0 63 +def test t9 t9 c2 c2 2 6 1 Y 32768 0 63 +def test t9 t9 c3 c3 9 9 1 Y 32768 0 63 +def test t9 t9 c4 c4 3 11 1 Y 32768 0 63 +def test t9 t9 c5 c5 3 11 1 Y 32768 0 63 +def test t9 t9 c6 c6 8 20 1 Y 32768 0 63 +def test t9 t9 c7 c7 4 12 1 Y 32768 31 63 +def test t9 t9 c8 c8 5 22 1 Y 32768 31 63 +def test t9 t9 c9 c9 5 22 1 Y 32768 31 63 +def test t9 t9 c10 c10 5 22 1 Y 32768 31 63 +def test t9 t9 c11 c11 246 9 6 Y 0 4 63 +def test t9 t9 c12 c12 246 10 6 Y 0 4 63 +def test t9 t9 c13 c13 10 10 10 Y 128 0 63 +def test t9 t9 c14 c14 12 19 19 Y 128 0 63 +def test t9 t9 c15 c15 7 19 19 N 1249 0 63 +def test t9 t9 c16 c16 11 8 8 Y 128 0 63 +def test t9 t9 c17 c17 13 4 4 Y 32864 0 63 +def test t9 t9 c18 c18 1 4 1 Y 32768 0 63 +def test t9 t9 c19 c19 1 1 1 Y 32768 0 63 +def test t9 t9 c20 c20 254 1 1 Y 0 0 8 +def test t9 t9 c21 c21 254 10 10 Y 0 0 8 +def test t9 t9 c22 c22 253 30 30 Y 0 0 8 +def test t9 t9 c23 c23 252 255 8 Y 144 0 63 +def test t9 t9 c24 c24 252 255 8 Y 16 0 8 +def test t9 t9 c25 c25 252 65535 4 Y 144 0 63 +def test t9 t9 c26 c26 252 65535 4 Y 16 0 8 +def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63 +def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8 +def test t9 t9 c29 c29 252 4294967295 8 Y 144 0 63 +def test t9 t9 c30 c30 252 4294967295 8 Y 16 0 8 +def test t9 t9 c31 c31 254 5 3 Y 256 0 8 +def test t9 t9 c32 c32 254 24 7 Y 2048 0 8 +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +set @arg00='SELECT' ; +@arg00 a from t1 where a=1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a from t1 where a=1' at line 1 +prepare stmt1 from ' ? a from t1 where a=1 '; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a from t1 where a=1' at line 1 +set @arg00=1 ; +select @arg00, b from t1 where a=1 ; +@arg00 b +1 one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +1 one +set @arg00='lion' ; +select @arg00, b from t1 where a=1 ; +@arg00 b +lion one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +lion one +set @arg00=NULL ; +select @arg00, b from t1 where a=1 ; +@arg00 b +NULL one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +NULL one +set @arg00=1 ; +select b, a - @arg00 from t1 where a=1 ; +b a - @arg00 +one 0 +prepare stmt1 from ' select b, a - ? from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +b a - ? +one 0 +set @arg00=null ; +select @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select @arg00 + 1 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? + 1 as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select 1 + @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select 1 + ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +set @arg00='MySQL' ; +select substr(@arg00,1,2) from t1 where a=1 ; +substr(@arg00,1,2) +My +prepare stmt1 from ' select substr(?,1,2) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr(?,1,2) +My +set @arg00=3 ; +select substr('MySQL',@arg00,5) from t1 where a=1 ; +substr('MySQL',@arg00,5) +SQL +prepare stmt1 from ' select substr(''MySQL'',?,5) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',?,5) +SQL +select substr('MySQL',1,@arg00) from t1 where a=1 ; +substr('MySQL',1,@arg00) +MyS +prepare stmt1 from ' select substr(''MySQL'',1,?) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',1,?) +MyS +set @arg00='MySQL' ; +select a , concat(@arg00,b) from t1 order by a; +a concat(@arg00,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +prepare stmt1 from ' select a , concat(?,b) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(?,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +select a , concat(b,@arg00) from t1 order by a ; +a concat(b,@arg00) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +prepare stmt1 from ' select a , concat(b,?) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(b,?) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +set @arg00='MySQL' ; +select group_concat(@arg00,b order by a) from t1 +group by 'a' ; +group_concat(@arg00,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +prepare stmt1 from ' select group_concat(?,b order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(?,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +select group_concat(b,@arg00 order by a) from t1 +group by 'a' ; +group_concat(b,@arg00 order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +prepare stmt1 from ' select group_concat(b,? order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(b,? order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +set @arg00='first' ; +set @arg01='second' ; +set @arg02=NULL; +select @arg00, @arg01 from t1 where a=1 ; +@arg00 @arg01 +first second +prepare stmt1 from ' select ?, ? from t1 where a=1 ' ; +execute stmt1 using @arg00, @arg01 ; +? ? +first second +execute stmt1 using @arg02, @arg01 ; +? ? +NULL second +execute stmt1 using @arg00, @arg02 ; +? ? +first NULL +execute stmt1 using @arg02, @arg02 ; +? ? +NULL NULL +drop table if exists t5 ; +create table t5 (id1 int(11) not null default '0', +value2 varchar(100), value1 varchar(100)) ; +insert into t5 values (1,'hh','hh'),(2,'hh','hh'), +(1,'ii','ii'),(2,'ii','ii') ; +prepare stmt1 from ' select id1,value1 from t5 where id1=? or value1=? order by id1,value1 ' ; +set @arg00=1 ; +set @arg01='hh' ; +execute stmt1 using @arg00, @arg01 ; +id1 value1 +1 hh +1 ii +2 hh +drop table t5 ; +drop table if exists t5 ; +create table t5(session_id char(9) not null) ; +insert into t5 values ('abc') ; +prepare stmt1 from ' select * from t5 +where ?=''1111'' and session_id = ''abc'' ' ; +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +set @arg00='1111' ; +execute stmt1 using @arg00 ; +session_id +abc +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +drop table t5 ; +set @arg00='FROM' ; +select a @arg00 t1 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 t1 where a=1' at line 1 +prepare stmt1 from ' select a ? t1 where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? t1 where a=1' at line 1 +set @arg00='t1' ; +select a from @arg00 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 where a=1' at line 1 +prepare stmt1 from ' select a from ? where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? where a=1' at line 1 +set @arg00='WHERE' ; +select a from t1 @arg00 a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a=1' at line 1 +prepare stmt1 from ' select a from t1 ? a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a=1' at line 1 +set @arg00=1 ; +select a FROM t1 where a=@arg00 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +1 +set @arg00=1000 ; +execute stmt1 using @arg00 ; +a +set @arg00=NULL ; +select a FROM t1 where a=@arg00 ; +a +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +set @arg00=4 ; +select a FROM t1 where a=sqrt(@arg00) ; +a +2 +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +2 +set @arg00=NULL ; +select a FROM t1 where a=sqrt(@arg00) ; +a +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +set @arg00=2 ; +set @arg01=3 ; +select a FROM t1 where a in (@arg00,@arg01) order by a; +a +2 +3 +prepare stmt1 from ' select a FROM t1 where a in (?,?) order by a '; +execute stmt1 using @arg00, @arg01; +a +2 +3 +set @arg00= 'one' ; +set @arg01= 'two' ; +set @arg02= 'five' ; +prepare stmt1 from ' select b FROM t1 where b in (?,?,?) order by b ' ; +execute stmt1 using @arg00, @arg01, @arg02 ; +b +one +two +prepare stmt1 from ' select b FROM t1 where b like ? '; +set @arg00='two' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='tw%' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='%wo' ; +execute stmt1 using @arg00 ; +b +two +set @arg00=null ; +insert into t9 set c1= 0, c5 = NULL ; +select c5 from t9 where c5 > NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 > ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 < NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 < ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 = NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 = ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 <=> NULL ; +c5 +NULL +prepare stmt1 from ' select c5 from t9 where c5 <=> ? '; +execute stmt1 using @arg00 ; +c5 +NULL +delete from t9 where c1= 0 ; +set @arg00='>' ; +select a FROM t1 where a @arg00 1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 1' at line 1 +prepare stmt1 from ' select a FROM t1 where a ? 1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? 1' at line 1 +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00='two' ; +select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> @arg00 order by a ; +a b +1 one +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> ? order by a ' ; +execute stmt1 using @arg00 ; +a b +1 one +3 three +4 four +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00=2 ; +select a,b from t1 order by 2 ; +a b +4 four +1 one +3 three +2 two +prepare stmt1 from ' select a,b from t1 +order by ? '; +execute stmt1 using @arg00; +a b +4 four +1 one +3 three +2 two +set @arg00=1 ; +execute stmt1 using @arg00; +a b +1 one +2 two +3 three +4 four +set @arg00=0 ; +execute stmt1 using @arg00; +ERROR 42S22: Unknown column '?' in 'order clause' +set @arg00=1; +prepare stmt1 from ' select a,b from t1 order by a +limit 1 '; +execute stmt1 ; +a b +1 one +prepare stmt1 from ' select a,b from t1 order by a limit ? '; +execute stmt1 using @arg00; +a b +1 one +set @arg00='b' ; +set @arg01=0 ; +set @arg02=2 ; +set @arg03=2 ; +select sum(a), @arg00 from t1 where a > @arg01 +and b is not null group by substr(b,@arg02) +having sum(a) <> @arg03 ; +sum(a) @arg00 +3 b +1 b +4 b +prepare stmt1 from ' select sum(a), ? from t1 where a > ? +and b is not null group by substr(b,?) +having sum(a) <> ? '; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +sum(a) ? +3 b +1 b +4 b +test_sequence +------ join tests ------ +select first.a as a1, second.a as a2 +from t1 first, t1 second +where first.a = second.a order by a1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +prepare stmt1 from ' select first.a as a1, second.a as a2 + from t1 first, t1 second + where first.a = second.a order by a1 '; +execute stmt1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +set @arg00='ABC'; +set @arg01='two'; +set @arg02='one'; +select first.a, @arg00, second.a FROM t1 first, t1 second +where @arg01 = first.b or first.a = second.a or second.b = @arg02 +order by second.a, first.a; +a @arg00 a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +prepare stmt1 from ' select first.a, ?, second.a FROM t1 first, t1 second + where ? = first.b or first.a = second.a or second.b = ? + order by second.a, first.a'; +execute stmt1 using @arg00, @arg01, @arg02; +a ? a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +drop table if exists t2 ; +create table t2 as select * from t1 ; +set @query1= 'SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a ' ; +set @query2= 'SELECT * FROM t2 natural join t1 order by t2.a ' ; +set @query3= 'SELECT * FROM t2 join t1 using(a) order by t2.a ' ; +set @query4= 'SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query5= 'SELECT * FROM t2 natural left join t1 order by t2.a ' ; +set @query6= 'SELECT * FROM t2 left join t1 using(a) order by t2.a ' ; +set @query7= 'SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query8= 'SELECT * FROM t2 natural right join t1 order by t2.a ' ; +set @query9= 'SELECT * FROM t2 right join t1 using(a) order by t2.a ' ; +the join statement is: +SELECT * FROM t2 right join t1 using(a) order by t2.a +prepare stmt1 from @query9 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural right join t1 order by t2.a +prepare stmt1 from @query8 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query7 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 left join t1 using(a) order by t2.a +prepare stmt1 from @query6 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural left join t1 order by t2.a +prepare stmt1 from @query5 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query4 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 join t1 using(a) order by t2.a +prepare stmt1 from @query3 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural join t1 order by t2.a +prepare stmt1 from @query2 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a +prepare stmt1 from @query1 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +drop table t2 ; +test_sequence +------ subquery tests ------ +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') '; +execute stmt1 ; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = 'two' ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = @arg00 ) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ? ) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=3 ; +set @arg01='three' ; +select a,b FROM t1 where (a,b) in (select 3, 'three'); +a b +3 three +select a FROM t1 where (a,b) in (select @arg00,@arg01); +a +3 +prepare stmt1 from ' select a FROM t1 where (a,b) in (select ?, ?) '; +execute stmt1 using @arg00, @arg01; +a +3 +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where b = @arg03 ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where b = ? ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +prepare stmt1 from 'select c4 FROM t9 where + c13 = (select MAX(b) from t1 where a = ?) and c22 = ? ' ; +execute stmt1 using @arg01, @arg02; +c4 +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b ) order by a '; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = outer_table.b ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b) and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where a = @arg00 and b = outer_table.b) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where a = ? and b = outer_table.b) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where outer_table.a = @arg00 and a=2) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where outer_table.a = ? and a=2) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where outer_table.b = @arg03 +and outer_table.a=a ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where outer_table.b = ? + and outer_table.a=a ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +set @arg00=1 ; +set @arg01=0 ; +select a, @arg00 +from ( select a - @arg00 as a from t1 where a=@arg00 ) as t2 +where a=@arg01; +a @arg00 +0 1 +prepare stmt1 from ' select a, ? + from ( select a - ? as a from t1 where a=? ) as t2 + where a=? '; +execute stmt1 using @arg00, @arg00, @arg00, @arg01 ; +a ? +0 1 +drop table if exists t2 ; +create table t2 as select * from t1; +prepare stmt1 from ' select a in (select a from t2) from t1 ' ; +execute stmt1 ; +a in (select a from t2) +1 +1 +1 +1 +drop table if exists t5, t6, t7 ; +create table t5 (a int , b int) ; +create table t6 like t5 ; +create table t7 like t5 ; +insert into t5 values (0, 100), (1, 2), (1, 3), (2, 2), (2, 7), +(2, -1), (3, 10) ; +insert into t6 values (0, 0), (1, 1), (2, 1), (3, 1), (4, 1) ; +insert into t7 values (3, 3), (2, 2), (1, 1) ; +prepare stmt1 from ' select a, (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) from t7 ' ; +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +drop table t5, t6, t7 ; +drop table if exists t2 ; +create table t2 as select * from t9; +set @stmt= ' SELECT + (SELECT SUM(c1 + c12 + 0.0) FROM t2 + where (t9.c2 - 0e-3) = t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select 1.0e+0 from t2 + where t2.c3 * 9.0000000000 = t9.c4) as exists_s, + c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, + (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x = c25 ' ; +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= ' SELECT + (SELECT SUM(c1+c12+?) FROM t2 where (t9.c2-?)=t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select ? from t2 + where t2.c3*?=t9.c4) as exists_s, + c5*? in (select c6+? from t2) as in_s, + (c7-?, c8-?) in (select c9+?, c10+? from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x =c25 ' ; +set @arg00= 0.0 ; +set @arg01= 0e-3 ; +set @arg02= 1.0e+0 ; +set @arg03= 9.0000000000 ; +set @arg04= 4 ; +set @arg05= 0.3e+1 ; +set @arg06= 4 ; +set @arg07= 4 ; +set @arg08= 4.0 ; +set @arg09= 40e-1 ; +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +drop table t2 ; +select 1 < (select a from t1) ; +ERROR 21000: Subquery returns more than 1 row +prepare stmt1 from ' select 1 < (select a from t1) ' ; +execute stmt1 ; +ERROR 21000: Subquery returns more than 1 row +select 1 as my_col ; +my_col +1 +test_sequence +------ union tests ------ +prepare stmt1 from ' select a FROM t1 where a=1 + union distinct + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +execute stmt1 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=1 + union all + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +1 +prepare stmt1 from ' SELECT 1, 2 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT 1, 2 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT * from t1 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT * from t1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +set @arg00=1 ; +select @arg00 FROM t1 where a=1 +union distinct +select 1 FROM t1 where a=1; +@arg00 +1 +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select 1 FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +? +1 +set @arg00=1 ; +select 1 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +1 +1 +prepare stmt1 from ' select 1 FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +1 +1 +set @arg00='a' ; +select @arg00 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 '; +execute stmt1 using @arg00, @arg00; +? +a +prepare stmt1 from ' select ? + union distinct + select ? '; +execute stmt1 using @arg00, @arg00; +? +a +set @arg00='a' ; +set @arg01=1 ; +set @arg02='a' ; +set @arg03=2 ; +select @arg00 FROM t1 where a=@arg01 +union distinct +select @arg02 FROM t1 where a=@arg03; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=? + union distinct + select ? FROM t1 where a=? ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +? +a +set @arg00=1 ; +prepare stmt1 from ' select sum(a) + 200, ? from t1 +union distinct +select sum(a) + 200, 1 from t1 +group by b ' ; +execute stmt1 using @arg00; +sum(a) + 200 ? +210 1 +204 1 +201 1 +203 1 +202 1 +set @Oporto='Oporto' ; +set @Lisboa='Lisboa' ; +set @0=0 ; +set @1=1 ; +set @2=2 ; +set @3=3 ; +set @4=4 ; +select @Oporto,@Lisboa,@0,@1,@2,@3,@4 ; +@Oporto @Lisboa @0 @1 @2 @3 @4 +Oporto Lisboa 0 1 2 3 4 +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +group by b ; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + group by b + union distinct + select sum(a) + 200, ? from t1 + group by b ' ; +execute stmt1 using @Oporto, @Lisboa; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b ; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b ' ; +execute stmt1 using @Oporto, @1, @Lisboa, @2; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +having avg(a) > @2 +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b +having avg(a) > @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + having avg(a) > ? + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b + having avg(a) > ? '; +execute stmt1 using @Oporto, @1, @2, @Lisboa, @2, @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +test_sequence +------ explain select tests ------ +prepare stmt1 from ' explain select * from t9 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def id 8 3 1 N 32929 0 63 +def select_type 253 19 6 N 1 31 8 +def table 253 64 2 Y 0 31 8 +def type 253 10 3 Y 0 31 8 +def possible_keys 253 4096 0 Y 0 31 8 +def key 253 64 0 Y 0 31 8 +def key_len 253 4096 0 Y 128 31 63 +def ref 253 1024 0 Y 0 31 8 +def rows 8 10 1 Y 32928 0 63 +def Extra 253 255 0 N 1 31 8 +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t9 ALL NULL NULL NULL NULL 2 +drop table if exists t2 ; +create table t2 (s varchar(25), fulltext(s)) +ENGINE = 'MARIA' ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +s +Hollow Dogs +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +s +Gravedigger +drop table t2 ; +test_sequence +------ delete tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'delete from t1 where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +execute stmt1; +insert into t1 values(0,NULL); +set @arg00=NULL; +prepare stmt1 from 'delete from t1 where b=?' ; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL ; +a b +0 NULL +set @arg00='one'; +execute stmt1 using @arg00; +select a,b from t1 where b=@arg00; +a b +prepare stmt1 from 'truncate table t1' ; +test_sequence +------ update tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +set @arg00=NULL; +prepare stmt1 from 'update t1 set b=? where a=2' ; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 NULL +set @arg00='two'; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 two +set @arg00=2; +prepare stmt1 from 'update t1 set b=NULL where a=?' ; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +2 NULL +update t1 set b='two' where a=@arg00; +set @arg00=2000; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +set @arg00=2; +set @arg01=22; +prepare stmt1 from 'update t1 set a=? where a=?' ; +execute stmt1 using @arg00, @arg00; +select a,b from t1 where a=@arg00; +a b +2 two +execute stmt1 using @arg01, @arg00; +select a,b from t1 where a=@arg01; +a b +22 two +execute stmt1 using @arg00, @arg01; +select a,b from t1 where a=@arg00; +a b +2 two +set @arg00=NULL; +set @arg01=2; +execute stmt1 using @arg00, @arg01; +Warnings: +Warning 1048 Column 'a' cannot be null +select a,b from t1 order by a; +a b +0 two +1 one +3 three +4 four +set @arg00=0; +execute stmt1 using @arg01, @arg00; +select a,b from t1 order by a; +a b +1 one +2 two +3 three +4 four +set @arg00=23; +set @arg01='two'; +set @arg02=2; +set @arg03='two'; +set @arg04=2; +drop table if exists t2; +create table t2 as select a,b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +create table t2 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +insert into t2(a,b) select a, b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +set @arg00=1; +prepare stmt1 from 'update t1 set b=''bla'' +where a=2 +limit 1'; +execute stmt1 ; +select a,b from t1 where b = 'bla' ; +a b +2 bla +prepare stmt1 from 'update t1 set b=''bla'' where a=2 limit ?'; +execute stmt1 using @arg00; +test_sequence +------ insert tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'insert into t1 values(5, ''five'' )'; +execute stmt1; +select a,b from t1 where a = 5; +a b +5 five +set @arg00='six' ; +prepare stmt1 from 'insert into t1 values(6, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b = @arg00; +a b +6 six +execute stmt1 using @arg00; +ERROR 23000: Duplicate entry '6' for key 'PRIMARY' +set @arg00=NULL ; +prepare stmt1 from 'insert into t1 values(0, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL; +a b +0 NULL +set @arg00=8 ; +set @arg01='eight' ; +prepare stmt1 from 'insert into t1 values(?, ? )'; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where b = @arg01; +a b +8 eight +set @NULL= null ; +set @arg00= 'abc' ; +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg00 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg00 ; +select * from t1 where a > 10000 order by a ; +a b +10001 abc +10002 abc +delete from t1 where a > 10000 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @NULL ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @NULL ; +select * from t1 where a > 10000 order by a ; +a b +10001 NULL +10002 NULL +delete from t1 where a > 10000 ; +set @arg01= 10000 + 10 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 9 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 8 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 7 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 6 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 5 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 4 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 3 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg01 ; +select * from t1 where a > 10000 order by a ; +a b +10001 10001 +10002 10002 +10003 10003 +10004 10004 +10005 10005 +10006 10006 +10007 10007 +10008 10008 +10009 10009 +10010 10010 +delete from t1 where a > 10000 ; +set @arg00=81 ; +set @arg01='8-1' ; +set @arg02=82 ; +set @arg03='8-2' ; +prepare stmt1 from 'insert into t1 values(?,?),(?,?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +select a,b from t1 where a in (@arg00,@arg02) ; +a b +81 8-1 +82 8-2 +set @arg00=9 ; +set @arg01='nine' ; +prepare stmt1 from 'insert into t1 set a=?, b=? '; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where a = @arg00 ; +a b +9 nine +set @arg00=6 ; +set @arg01=1 ; +prepare stmt1 from 'insert into t1 set a=?, b=''sechs'' + on duplicate key update a=a + ?, b=concat(b,''modified'') '; +execute stmt1 using @arg00, @arg01; +select * from t1 order by a; +a b +0 NULL +1 one +2 two +3 three +4 four +5 five +7 sixmodified +8 eight +9 nine +81 8-1 +82 8-2 +set @arg00=81 ; +set @arg01=1 ; +execute stmt1 using @arg00, @arg01; +ERROR 23000: Duplicate entry '82' for key 'PRIMARY' +drop table if exists t2 ; +create table t2 (id int auto_increment primary key) +ENGINE= 'MARIA' ; +prepare stmt1 from ' select last_insert_id() ' ; +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +1 +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +2 +drop table t2 ; +set @1000=1000 ; +set @x1000_2="x1000_2" ; +set @x1000_3="x1000_3" ; +set @x1000="x1000" ; +set @1100=1100 ; +set @x1100="x1100" ; +set @100=100 ; +set @updated="updated" ; +insert into t1 values(1000,'x1000_1') ; +insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3) +on duplicate key update a = a + @100, b = concat(b,@updated) ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +prepare stmt1 from ' insert into t1 values(?,?),(?,?) + on duplicate key update a = a + ?, b = concat(b,?) '; +execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1200 x1000_1updatedupdated +delete from t1 where a >= 1000 ; +prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' '; +execute stmt1; +execute stmt1; +execute stmt1; +test_sequence +------ multi table tests ------ +delete from t1 ; +delete from t9 ; +insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ; +insert into t9 (c1,c21) +values (1, 'one'), (2, 'two'), (3, 'three') ; +prepare stmt_delete from " delete t1, t9 + from t1, t9 where t1.a=t9.c1 and t1.b='updated' "; +prepare stmt_update from " update t1, t9 + set t1.b='updated', t9.c21='updated' + where t1.a=t9.c1 and t1.a=? "; +prepare stmt_select1 from " select a, b from t1 order by a" ; +prepare stmt_select2 from " select c1, c21 from t9 order by c1" ; +set @arg00= 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +2 two +3 three +execute stmt_select2 ; +c1 c21 +2 two +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +3 three +execute stmt_select2 ; +c1 c21 +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +execute stmt_select2 ; +c1 c21 +set @arg00= @arg00 + 1 ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t1 values(0,NULL) ; +set @duplicate='duplicate ' ; +set @1000=1000 ; +set @5=5 ; +select a,b from t1 where a < 5 order by a ; +a b +0 NULL +1 one +2 two +3 three +4 four +insert into t1 select a + @1000, concat(@duplicate,b) from t1 +where a < @5 ; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +prepare stmt1 from ' insert into t1 select a + ?, concat(?,b) from t1 +where a < ? ' ; +execute stmt1 using @1000, @duplicate, @5; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +set @1=1 ; +set @2=2 ; +set @100=100 ; +set @float=1.00; +set @five='five' ; +drop table if exists t2; +create table t2 like t1 ; +insert into t2 (b,a) +select @duplicate, sum(first.a) from t1 first, t1 second +where first.a <> @5 and second.b = first.b +and second.b <> @five +group by second.b +having sum(second.a) > @2 +union +select b, a + @100 from t1 +where (a,b) in ( select sqrt(a+@1)+CAST(@float AS signed),b +from t1); +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +delete from t2 ; +prepare stmt1 from ' insert into t2 (b,a) +select ?, sum(first.a) + from t1 first, t1 second + where first.a <> ? and second.b = first.b and second.b <> ? + group by second.b + having sum(second.a) > ? +union +select b, a + ? from t1 + where (a,b) in ( select sqrt(a+?)+CAST(? AS signed),b + from t1 ) ' ; +execute stmt1 using @duplicate, @5, @five, @2, @100, @1, @float ; +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +drop table t2; +drop table if exists t5 ; +set @arg01= 8; +set @arg02= 8.0; +set @arg03= 80.00000000000e-1; +set @arg04= 'abc' ; +set @arg05= CAST('abc' as binary) ; +set @arg06= '1991-08-05' ; +set @arg07= CAST('1991-08-05' as date); +set @arg08= '1991-08-05 01:01:01' ; +set @arg09= CAST('1991-08-05 01:01:01' as datetime) ; +set @arg10= unix_timestamp('1991-01-01 01:01:01'); +set @arg11= YEAR('1991-01-01 01:01:01'); +set @arg12= 8 ; +set @arg12= NULL ; +set @arg13= 8.0 ; +set @arg13= NULL ; +set @arg14= 'abc'; +set @arg14= NULL ; +set @arg15= CAST('abc' as binary) ; +set @arg15= NULL ; +create table t5 as select +8 as const01, @arg01 as param01, +8.0 as const02, @arg02 as param02, +80.00000000000e-1 as const03, @arg03 as param03, +'abc' as const04, @arg04 as param04, +CAST('abc' as binary) as const05, @arg05 as param05, +'1991-08-05' as const06, @arg06 as param06, +CAST('1991-08-05' as date) as const07, @arg07 as param07, +'1991-08-05 01:01:01' as const08, @arg08 as param08, +CAST('1991-08-05 01:01:01' as datetime) as const09, @arg09 as param09, +unix_timestamp('1991-01-01 01:01:01') as const10, @arg10 as param10, +YEAR('1991-01-01 01:01:01') as const11, @arg11 as param11, +NULL as const12, @arg12 as param12, +@arg13 as param13, +@arg14 as param14, +@arg15 as param15; +show create table t5 ; +Table Create Table +t5 CREATE TABLE `t5` ( + `const01` int(1) NOT NULL DEFAULT '0', + `param01` bigint(20) DEFAULT NULL, + `const02` decimal(2,1) NOT NULL DEFAULT '0.0', + `param02` decimal(65,30) DEFAULT NULL, + `const03` double NOT NULL DEFAULT '0', + `param03` double DEFAULT NULL, + `const04` varchar(3) NOT NULL DEFAULT '', + `param04` longtext, + `const05` varbinary(3) NOT NULL DEFAULT '', + `param05` longblob, + `const06` varchar(10) NOT NULL DEFAULT '', + `param06` longtext, + `const07` date DEFAULT NULL, + `param07` longblob, + `const08` varchar(19) NOT NULL DEFAULT '', + `param08` longtext, + `const09` datetime DEFAULT NULL, + `param09` longblob, + `const10` int(10) NOT NULL DEFAULT '0', + `param10` bigint(20) DEFAULT NULL, + `const11` int(4) DEFAULT NULL, + `param11` bigint(20) DEFAULT NULL, + `const12` binary(0) DEFAULT NULL, + `param12` bigint(20) DEFAULT NULL, + `param13` decimal(65,30) DEFAULT NULL, + `param14` longtext, + `param15` longblob +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t5 ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t5 t5 const01 const01 3 1 1 N 32769 0 63 +def test t5 t5 param01 param01 8 20 1 Y 32768 0 63 +def test t5 t5 const02 const02 246 4 3 N 1 1 63 +def test t5 t5 param02 param02 246 67 32 Y 0 30 63 +def test t5 t5 const03 const03 5 17 1 N 32769 31 63 +def test t5 t5 param03 param03 5 23 1 Y 32768 31 63 +def test t5 t5 const04 const04 253 3 3 N 1 0 8 +def test t5 t5 param04 param04 252 4294967295 3 Y 16 0 8 +def test t5 t5 const05 const05 253 3 3 N 129 0 63 +def test t5 t5 param05 param05 252 4294967295 3 Y 144 0 63 +def test t5 t5 const06 const06 253 10 10 N 1 0 8 +def test t5 t5 param06 param06 252 4294967295 10 Y 16 0 8 +def test t5 t5 const07 const07 10 10 10 Y 128 0 63 +def test t5 t5 param07 param07 252 4294967295 10 Y 144 0 63 +def test t5 t5 const08 const08 253 19 19 N 1 0 8 +def test t5 t5 param08 param08 252 4294967295 19 Y 16 0 8 +def test t5 t5 const09 const09 12 19 19 Y 128 0 63 +def test t5 t5 param09 param09 252 4294967295 19 Y 144 0 63 +def test t5 t5 const10 const10 3 10 9 N 32769 0 63 +def test t5 t5 param10 param10 8 20 9 Y 32768 0 63 +def test t5 t5 const11 const11 3 4 4 Y 32768 0 63 +def test t5 t5 param11 param11 8 20 4 Y 32768 0 63 +def test t5 t5 const12 const12 254 0 0 Y 128 0 63 +def test t5 t5 param12 param12 8 20 0 Y 32768 0 63 +def test t5 t5 param13 param13 246 67 0 Y 0 30 63 +def test t5 t5 param14 param14 252 4294967295 0 Y 16 0 8 +def test t5 t5 param15 param15 252 4294967295 0 Y 144 0 63 +const01 8 +param01 8 +const02 8.0 +param02 8.000000000000000000000000000000 +const03 8 +param03 8 +const04 abc +param04 abc +const05 abc +param05 abc +const06 1991-08-05 +param06 1991-08-05 +const07 1991-08-05 +param07 1991-08-05 +const08 1991-08-05 01:01:01 +param08 1991-08-05 01:01:01 +const09 1991-08-05 01:01:01 +param09 1991-08-05 01:01:01 +const10 662680861 +param10 662680861 +const11 1991 +param11 1991 +const12 NULL +param12 NULL +param13 NULL +param14 NULL +param15 NULL +drop table t5 ; +test_sequence +------ data type conversion tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t9 set c1= 0, c15= '1991-01-01 01:01:01' ; +select * from t9 order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +test_sequence +------ select @parameter:= column ------ +prepare full_info from "select @arg01, @arg02, @arg03, @arg04, + @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, + @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, + @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, + @arg29, @arg30, @arg31, @arg32" ; +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 1 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 0 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select + @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, + @arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, + @arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, + @arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, + @arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, + @arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, + @arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, + @arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select ? := c1 from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ':= c1 from t9 where c1= 1' at line 1 +test_sequence +------ select column, .. into @parm,.. ------ +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 1 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 0 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, + c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, + c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1 into ? from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? from t9 where c1= 1' at line 1 +test_sequence +-- insert into numeric columns -- +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 ) ; +set @arg00= 21 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 )" ; +execute stmt1 ; +set @arg00= 23; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, +30.0, 30.0, 30.0 ) ; +set @arg00= 31.0 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, + 32.0, 32.0, 32.0 )" ; +execute stmt1 ; +set @arg00= 33.0; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( '40', '40', '40', '40', '40', '40', '40', '40', +'40', '40', '40' ) ; +set @arg00= '41' ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( '42', '42', '42', '42', '42', '42', '42', '42', + '42', '42', '42' )" ; +execute stmt1 ; +set @arg00= '43'; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary) ) ; +set @arg00= CAST('51' as binary) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary) )" ; +execute stmt1 ; +set @arg00= CAST('53' as binary) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 2 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +NULL, NULL, NULL ) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 61, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 62, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL )" ; +execute stmt1 ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 63, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 8.0 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 71, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 73, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 'abc' ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 81, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 83, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c1 >= 20 +order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c12 +20 20 20 20 20 20 20 20 20 20 20.0000 +21 21 21 21 21 21 21 21 21 21 21.0000 +22 22 22 22 22 22 22 22 22 22 22.0000 +23 23 23 23 23 23 23 23 23 23 23.0000 +30 30 30 30 30 30 30 30 30 30 30.0000 +31 31 31 31 31 31 31 31 31 31 31.0000 +32 32 32 32 32 32 32 32 32 32 32.0000 +33 33 33 33 33 33 33 33 33 33 33.0000 +40 40 40 40 40 40 40 40 40 40 40.0000 +41 41 41 41 41 41 41 41 41 41 41.0000 +42 42 42 42 42 42 42 42 42 42 42.0000 +43 43 43 43 43 43 43 43 43 43 43.0000 +50 50 50 50 50 50 50 50 50 50 50.0000 +51 51 51 51 51 51 51 51 51 51 51.0000 +52 52 52 52 52 52 52 52 52 52 52.0000 +53 53 53 53 53 53 53 53 53 53 53.0000 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where numeric column = .. -- +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 +and c8= 20 and c9= 20 and c10= 20 and c12= 20; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 + and c8= 20 and c9= 20 and c10= 20 and c12= 20 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 +and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 + and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20'; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20' "; +execute stmt1 ; +found +true +set @arg00= '20'; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and +c3= CAST('20' as binary) and c4= CAST('20' as binary) and +c5= CAST('20' as binary) and c6= CAST('20' as binary) and +c7= CAST('20' as binary) and c8= CAST('20' as binary) and +c9= CAST('20' as binary) and c10= CAST('20' as binary) and +c12= CAST('20' as binary); +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and + c3= CAST('20' as binary) and c4= CAST('20' as binary) and + c5= CAST('20' as binary) and c6= CAST('20' as binary) and + c7= CAST('20' as binary) and c8= CAST('20' as binary) and + c9= CAST('20' as binary) and c10= CAST('20' as binary) and + c12= CAST('20' as binary) "; +execute stmt1 ; +found +true +set @arg00= CAST('20' as binary) ; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- some numeric overflow experiments -- +prepare my_insert from "insert into t9 + ( c21, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 'O', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +prepare my_select from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c21 = 'O' "; +prepare my_delete from "delete from t9 where c21 = 'O' "; +set @arg00= 9223372036854775807 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= '9223372036854775807' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= -9223372036854775808 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-9223372036854775808' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= 1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= '1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= -1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +test_sequence +-- insert into string columns -- +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +select c1, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30 +from t9 where c1 >= 20 +order by c1 ; +c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 +20 2 20 20 20 20 20 20 20 20 20 20 +21 2 21 21 21 21 21 21 21 21 21 21 +22 2 22 22 22 22 22 22 22 22 22 22 +23 2 23 23 23 23 23 23 23 23 23 23 +30 3 30 30 30 30 30 30 30 30 30 30 +31 3 31 31 31 31 31 31 31 31 31 31 +32 3 32 32 32 32 32 32 32 32 32 32 +33 3 33 33 33 33 33 33 33 33 33 33 +40 4 40 40 40 40 40 40 40 40 40 40 +41 4 41 41 41 41 41 41 41 41 41 41 +42 4 42 42 42 42 42 42 42 42 42 42 +43 4 43 43 43 43 43 43 43 43 43 43 +50 5 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 +51 5 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 +52 5 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 +53 5 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 +54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00 +55 5 55 55 55 55 55 55 55 55 55 55 +56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00 +57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where string column = .. -- +set @arg00= '20'; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and +c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and +c27= '20' and c28= '20' and c29= '20' and c30= '20' ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and + c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and + c27= '20' and c28= '20' and c29= '20' and c30= '20'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('20' as binary); +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) += CAST('20' as binary) and c21= CAST('20' as binary) +and c22= CAST('20' as binary) and c23= CAST('20' as binary) and +c24= CAST('20' as binary) and c25= CAST('20' as binary) and +c26= CAST('20' as binary) and c27= CAST('20' as binary) and +c28= CAST('20' as binary) and c29= CAST('20' as binary) and +c30= CAST('20' as binary) ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20))) = @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and +c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) + = CAST('20' as binary) and c21= CAST('20' as binary) + and c22= CAST('20' as binary) and c23= CAST('20' as binary) and + c24= CAST('20' as binary) and c25= CAST('20' as binary) and + c26= CAST('20' as binary) and c27= CAST('20' as binary) and + c28= CAST('20' as binary) and c29= CAST('20' as binary) and + c30= CAST('20' as binary)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20))) = ? and c21= ? and + c22= ? and c23= ? and c25= ? and c26= ? and c27= ? and c28= ? and + c29= ? and c30= ?"; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and +c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and +c27= 20 and c28= 20 and c29= 20 and c30= 20 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and + c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and + c27= 20 and c28= 20 and c29= 20 and c30= 20" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and +c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and +c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and + c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and + c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- insert into date/time columns -- +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +select c1, c13, c14, c15, c16, c17 from t9 order by c1 ; +c1 c13 c14 c15 c16 c17 +20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +21 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +22 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +23 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +30 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +31 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +32 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +33 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +40 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +41 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +42 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +43 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +50 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +51 2010-00-00 2010-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +52 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +53 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +60 NULL NULL 1991-01-01 01:01:01 NULL NULL +61 NULL NULL 1991-01-01 01:01:01 NULL NULL +62 NULL NULL 1991-01-01 01:01:01 NULL NULL +63 NULL NULL 1991-01-01 01:01:01 NULL NULL +71 NULL NULL 1991-01-01 01:01:01 NULL NULL +73 NULL NULL 1991-01-01 01:01:01 NULL NULL +81 NULL NULL 1991-01-01 01:01:01 NULL NULL +83 NULL NULL 1991-01-01 01:01:01 NULL NULL +test_sequence +-- select .. where date/time column = .. -- +set @arg00= '1991-01-01 01:01:01' ; +select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and +c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and +c17= '1991-01-01 01:01:01' ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and + c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and + c17= '1991-01-01 01:01:01'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('1991-01-01 01:01:01' as datetime) ; +select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and +c14= CAST('1991-01-01 01:01:01' as datetime) and +c15= CAST('1991-01-01 01:01:01' as datetime) and +c16= CAST('1991-01-01 01:01:01' as datetime) and +c17= CAST('1991-01-01 01:01:01' as datetime) ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and + c14= CAST('1991-01-01 01:01:01' as datetime) and + c15= CAST('1991-01-01 01:01:01' as datetime) and + c16= CAST('1991-01-01 01:01:01' as datetime) and + c17= CAST('1991-01-01 01:01:01' as datetime)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 1991 ; +select 'true' as found from t9 +where c1= 20 and c17= 1991 ; +found +true +select 'true' as found from t9 +where c1= 20 and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= 1991" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= ?" ; +execute stmt1 using @arg00 ; +found +true +set @arg00= 1.991e+3 ; +select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01 ; +found +true +select 'true' as found from t9 +where c1= 20 and abs(c17 - @arg00) < 0.01 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - ?) < 0.01" ; +execute stmt1 using @arg00 ; +found +true +drop table t1, t9; diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def index ab3892cd5ca..ed0e665c9ce 100644 --- a/mysql-test/t/disabled.def +++ b/mysql-test/t/disabled.def @@ -35,8 +35,9 @@ synchronization : Bug#24529 Test 'synchronization' fails on Mac pushb #ndb_binlog_ddl_multi : BUG#18976 2006-04-10 kent CRBR: multiple binlog, second binlog may miss schema log events #ndb_binlog_discover : bug#21806 2006-08-24 #ndb_autodiscover3 : bug#21806 - mysql_upgrade : Bug#25074 mysql_upgrade gives inconsisten results plugin : Bug#25659 memory leak via "plugins" test rpl_ndb_dd_advance : Bug#25913 rpl_ndb_dd_advance fails randomly ndb_alter_table : Bug##25774 ndb_alter_table.test fails in DBUG_ASSERT() on Linux x64 +maria : Until maria is fully functional +ps_maria : Until maria is fully functional diff --git a/mysql-test/t/events_logs_tests.test b/mysql-test/t/events_logs_tests.test index e45fea1dfad..ba6049d0810 100644 --- a/mysql-test/t/events_logs_tests.test +++ b/mysql-test/t/events_logs_tests.test @@ -94,7 +94,7 @@ CREATE EVENT long_event2 ON SCHEDULE EVERY 1 MINUTE DO INSERT INTO slow_event_te SELECT * FROM slow_event_test; --echo "Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10" --replace_column 1 USER_HOST 2 SLEEPVAL -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; DROP EVENT long_event2; --echo "Make it quite long" SET SESSION long_query_time=300; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test new file mode 100644 index 00000000000..87a38ff60b0 --- /dev/null +++ b/mysql-test/t/maria.test @@ -0,0 +1,1082 @@ +# +# Test bugs in the MARIA code +# +-- source include/have_maria.inc + +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + +# Initialise +--disable_warnings +drop table if exists t1,t2; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# Test problem with CHECK TABLE; +# + +CREATE TABLE t1 ( + STRING_DATA char(255) default NULL, + KEY string_data (STRING_DATA) +); + +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +drop table t1; + +# +# Test problem with rows that are 65517-65520 bytes long +# + +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); + +let $1=100; +disable_query_log; +--disable_warnings +SET SQL_WARNINGS=0; +while ($1) +{ + eval insert into t1 (b) values(repeat(char(65+$1),65550-$1)); + dec $1; +} +SET SQL_WARNINGS=1; +--enable_warnings +enable_query_log; +check table t1; +repair table t1; +delete from t1 where (a & 1); +check table t1; +repair table t1; +check table t1; +drop table t1; + +# +# Test bug: Two optimize in a row reset index cardinality +# + +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +show index from t1; +optimize table t1; +show index from t1; +drop table t1; + +# +# Test of how ORDER BY works when doing it on the whole table +# + +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select a from t1 order by a; +explain select b from t1 order by b; +explain select a,b from t1 order by b; +explain select a,b from t1; +explain select a,b,c from t1; +drop table t1; + +# +# Test of OPTIMIZE of locked and modified tables +# +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +OPTIMIZE TABLE t1; +DROP TABLE t1; + +# +# Test of optimize, when only mi_sort_index (but not mi_repair*) is done +# in ha_maria::repair, and index size is changed (decreased). +# + +create table t1 ( t1 char(255), key(t1(250))); +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +check table t1; +drop table t1; + +# +# test of maria with huge number of packed fields +# + +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +delete from t1 where i8=1; +select i1,i2 from t1; +check table t1; +drop table t1; + +# +# Test of REPAIR that once failed +# +CREATE TABLE `t1` ( + `post_id` mediumint(8) unsigned NOT NULL auto_increment, + `topic_id` mediumint(8) unsigned NOT NULL default '0', + `post_time` datetime NOT NULL default '0000-00-00 00:00:00', + `post_text` text NOT NULL, + `icon_url` varchar(10) NOT NULL default '', + `sign` tinyint(1) unsigned NOT NULL default '0', + `post_edit` varchar(150) NOT NULL default '', + `poster_login` varchar(35) NOT NULL default '', + `ip` varchar(15) NOT NULL default '', + PRIMARY KEY (`post_id`), + KEY `post_time` (`post_time`), + KEY `ip` (`ip`), + KEY `poster_login` (`poster_login`), + KEY `topic_id` (`topic_id`), + FULLTEXT KEY `post_text` (`post_text`) +); + +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); + +REPAIR TABLE t1; +CHECK TABLE t1; +drop table t1; + +# +# Test of creating table with too long key +# + +--error 1071 +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); +--error 1071 +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +DROP TABLE t1; + +# +# Test of cardinality of keys with NULL +# + +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +show index from t1; +explain select * from t1,t2 where t1.a=t2.a; +explain select * from t1,t2 force index(a) where t1.a=t2.a; +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +explain select * from t1,t2 where t1.b=t2.b; +explain select * from t1,t2 force index(c) where t1.a=t2.a; +explain select * from t1 where a=0 or a=2; +explain select * from t1 force index (a) where a=0 or a=2; +explain select * from t1 where c=1; +explain select * from t1 use index() where c=1; +drop table t1,t2; + +# +# Test bug when updating a split dynamic row where keys are not changed +# + +create table t1 (a int not null auto_increment primary key, b varchar(255)); +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; + +# +# check updating with keys +# + +disable_query_log; +let $1 = 100; +while ($1) +{ + eval insert into t1 (b) values (repeat(char(($1 & 32)+65), $1)); + dec $1; +} +enable_query_log; +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +drop table t1; + +# +# two bugs in maria-space-stripping feature +# +create table t1 ( a text not null, key a (a(20))); +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +repair table t1; +select concat(a,'.') from t1 where a='aaa'; +select concat(a,'.') from t1 where binary a='aaa'; +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +drop table t1; + +# +# Third bug in the same code (BUG#2295) +# + +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +drop table t1; + +# +# space-stripping in _mi_prefix_search: BUG#5284 +# +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +CHECK TABLE t1; +DROP TABLE t1; + +# +# Verify blob handling +# +create table t1 (a blob); +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +drop table t1; + +# +# Test text and unique +# +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +update t1 set b='b ' where a=2; +--error 1062 +update t1 set b='b ' where a > 1; +--error 1062 +insert into t1 (b) values ('b'); +select * from t1; +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +drop table t1; + +# +# Test keys with 0 segments. (Bug #3203) +# +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +select distinct t1.a from t1,t2 order by t2.a; +select sql_big_result distinct t1.a from t1,t2; +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +explain select distinct t1.a from t1,t2 order by t2.a; +drop table t1,t2; + +# +# Bug#14616 - Freshly imported table returns error 124 when using LIMIT +# +create table t1 ( + c1 varchar(32), + key (c1) +); +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +drop table t1; + +# +# Bug #14400 Join could miss concurrently inserted row +# +# Partial key. +create table t1 (a int not null, primary key(a)); +create table t2 (a int not null, b int not null, primary key(a,b)); +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +connect (root,localhost,root,,test,$MASTER_MYPORT,master.sock); +insert into t2 values(2,0); +disconnect root; +connection default; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +drop table t1,t2; +# +# Full key. +CREATE TABLE t1 (c1 varchar(250) NOT NULL); +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)); +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +connect (con1,localhost,root,,); +connection con1; +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +disconnect con1; +connection default; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +DROP TABLE t1,t2; + +# End of 4.0 tests + +# +# Test RTREE index +# +--error 1235, 1289 +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); +# INSERT INTO t1 VALUES (1,1),(1,1); +# DELETE FROM rt WHERE a<1; +# DROP TABLE IF EXISTS t1; + +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +checksum table t1, t2, t3; +checksum table t1, t2, t3 extended; +#show table status; +drop table t1,t2; + +create table t1 (a int, key (a)); +show keys from t1; +alter table t1 disable keys; +show keys from t1; +create table t2 (a int); +let $i=1000; +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +--disable_query_log +while ($i) +{ + dec $i; + insert t2 values (rand()*100000); +} +--enable_query_log +insert t1 select * from t2; +show keys from t1; +alter table t1 enable keys; +show keys from t1; +alter table t1 engine=heap; +alter table t1 disable keys; +show keys from t1; +drop table t1,t2; + +# +# index search for NULL in blob. Bug #4816 +# +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +select count(*) from t1 where a is null; +drop table t1; + +# +# bug9188 - Corruption Can't open file: 'table.MYI' (errno: 145) +# +create table t1 (c1 int, c2 varchar(4) not null default '', + key(c2(3))) default charset=utf8; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +drop table t1; + + +# +# Bug#12296 - CHECKSUM TABLE reports 0 for the table +# This happened if the first record was marked as deleted. +# +create table t1 (c1 int); +insert into t1 values (1),(2),(3),(4); +checksum table t1; +delete from t1 where c1 = 1; +create table t2 as select * from t1; +# The following returns 0 with the bug in place. +checksum table t1; +# The above should give the same number as the following. +checksum table t2; +drop table t1, t2; + +# +# BUG#12232: New maria_stats_method variable. +# + +show variables like 'maria_stats_method'; + +create table t1 (a int, key(a)); +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; + +# default: NULLs considered inequal +analyze table t1; +show index from t1; +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +show index from t1; + +# Set nulls to be equal: +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +# Set nulls back to be equal +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +drop table t1; + +# WL#2609, CSC#XXXX: MARIA +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; + +create table t1 ( + a char(3), b char(4), c char(5), d char(6), + key(a,b,c,d) +); +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +show index from t1; +delete from t1; +analyze table t1; +show index from t1; + +set maria_stats_method=DEFAULT; +drop table t1; + +# BUG#13814 - key value packed incorrectly for TINYBLOBs + +create table t1( + cip INT NOT NULL, + time TIME NOT NULL, + score INT NOT NULL DEFAULT 0, + bob TINYBLOB +); + +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), + (6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +drop table t1; + +# +# Bug#14980 - COUNT(*) incorrect on MARIA table with certain INDEX +# +create table t1 ( + id1 int not null auto_increment, + id2 int not null default '0', + t text not null, + primary key (id1), + key x (id2, t(32)) +) engine=maria; # engine clause is redundant but it's to test its parsing +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +select count(id1) from t1 where id2 = 10; +drop table t1; + +# +# BUG##20357 - Got error 124 from storage engine using MIN and MAX functions +# in queries +# +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +DROP TABLE t1; + +# +# BUG#18036 - update of table joined to self reports table as crashed +# +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +DROP TABLE t1; + +# +# Bug#8283 - OPTIMIZE TABLE causes data loss +# +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +# +# Test OPTIMIZE. This creates a new data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +OPTIMIZE TABLE t1; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +# Test REPAIR QUICK. This retains the old data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=maria DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +REPAIR TABLE t1 QUICK; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; +# +# Test varchar +# + +source include/varchar.inc; + +# +# Some errors/warnings on create +# + +create table t1 (v varchar(65530), key(v)); +drop table if exists t1; +create table t1 (v varchar(65536)); +show create table t1; +drop table t1; +create table t1 (v varchar(65530) character set utf8); +show create table t1; +drop table t1; + +# MARIA specific varchar tests +--error 1118 +create table t1 (v varchar(65535)); + +# +# Test concurrent insert +# First with static record length +# +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int); +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 values (10),(11),(12); +select * from t1; +check table t1; +drop table t1; +disconnect con1; + +# Same test with dynamic record length +create table t1 (a int, b varchar(30) default "hello"); +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 (a) values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 (a) values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 (a) values (10),(11),(12); +select a from t1; +check table t1; +drop table t1; +disconnect con1; +set global concurrent_insert=@save_concurrent_insert; + + +# BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce +# different statistics on the same table with NULL values. +create table t1 (a int, key(a)); + +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +show keys from t1; + +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; + +drop table t1; + +# +# Bug#8706 - temporary table with data directory option fails +# +connect (session1,localhost,root,,); +connect (session2,localhost,root,,); + +connection session1; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 9 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection session2; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 99 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection default; +create table t1 (a int) select 42 a; + +connection session1; +select * from t1; +disconnect session1; +connection session2; +select * from t1; +disconnect session2; +connection default; +select * from t1; +drop table t1; + +--echo End of 4.1 tests + +# +# Bug#10056 - PACK_KEYS option take values greater than 1 while creating table +# +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; +--error 1064 +create table t4 (c1 int) pack_keys=2; +drop table t1, t2, t3; + +--echo End of 5.0 tests + +# +# Test of key_block_size +# + +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a)); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +show create table t1; +alter table t1 key_block_size=2048; +show create table t1; +alter table t1 add c int, add key (c); +show create table t1; +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +drop table t1; + + +# Test limits and errors of key_block_size + +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +drop table t1; + +--error 1064 +create table t1 (a int not null, key key_block_size=1024 (a)); +--error 1064 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); + + +# +# Bug#22119 - Changing MI_KEY_BLOCK_LENGTH makes a wrong myisamchk +# +CREATE TABLE t1 ( + c1 INT, + c2 VARCHAR(300), + KEY (c1) KEY_BLOCK_SIZE 1024, + KEY (c2) KEY_BLOCK_SIZE 8192 + ); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), + (11, REPEAT('b', CEIL(RAND() * 300))), + (12, REPEAT('c', CEIL(RAND() * 300))), + (13, REPEAT('d', CEIL(RAND() * 300))), + (14, REPEAT('e', CEIL(RAND() * 300))), + (15, REPEAT('f', CEIL(RAND() * 300))), + (16, REPEAT('g', CEIL(RAND() * 300))), + (17, REPEAT('h', CEIL(RAND() * 300))), + (18, REPEAT('i', CEIL(RAND() * 300))), + (19, REPEAT('j', CEIL(RAND() * 300))), + (20, REPEAT('k', CEIL(RAND() * 300))), + (21, REPEAT('l', CEIL(RAND() * 300))), + (22, REPEAT('m', CEIL(RAND() * 300))), + (23, REPEAT('n', CEIL(RAND() * 300))), + (24, REPEAT('o', CEIL(RAND() * 300))), + (25, REPEAT('p', CEIL(RAND() * 300))), + (26, REPEAT('q', CEIL(RAND() * 300))), + (27, REPEAT('r', CEIL(RAND() * 300))), + (28, REPEAT('s', CEIL(RAND() * 300))), + (29, REPEAT('t', CEIL(RAND() * 300))), + (30, REPEAT('u', CEIL(RAND() * 300))), + (31, REPEAT('v', CEIL(RAND() * 300))), + (32, REPEAT('w', CEIL(RAND() * 300))), + (33, REPEAT('x', CEIL(RAND() * 300))), + (34, REPEAT('y', CEIL(RAND() * 300))), + (35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +REPAIR TABLE t1; +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +DROP TABLE t1; + +--echo End of 5.1 tests + +eval set global storage_engine=$default; + +# End of 5.2 tests diff --git a/mysql-test/t/ps_maria.test b/mysql-test/t/ps_maria.test new file mode 100644 index 00000000000..2be99842132 --- /dev/null +++ b/mysql-test/t/ps_maria.test @@ -0,0 +1,46 @@ +############################################### +# # +# Prepared Statements test on MARIA tables # +# # +############################################### + +# +# NOTE: PLEASE SEE ps_1general.test (bottom) +# BEFORE ADDING NEW TEST CASES HERE !!! + +use test; + +-- source include/have_maria.inc + +let $type= 'MARIA' ; +-- source include/ps_create.inc +-- source include/ps_renew.inc + +-- source include/ps_query.inc + +# parameter in SELECT ... MATCH/AGAINST +# case derived from client_test.c: test_bug1500() +--disable_warnings +drop table if exists t2 ; +--enable_warnings +eval create table t2 (s varchar(25), fulltext(s)) +ENGINE = $type ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; + +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +drop table t2 ; + +-- source include/ps_modify.inc +-- source include/ps_modify1.inc +-- source include/ps_conv.inc + +drop table t1, t9; + +# End of 4.1 tests diff --git a/mysys/Makefile.am b/mysys/Makefile.am index fd6a0f76332..b02e0f84ec1 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -30,7 +30,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \ my_malloc.c my_realloc.c my_once.c mulalloc.c \ my_alloc.c safemalloc.c my_new.cc \ - my_vle.c my_atomic.c \ + my_vle.c my_atomic.c lf_hash.c \ + lf_dynarray.c lf_alloc-pin.c \ my_fopen.c my_fstream.c my_getsystime.c \ my_error.c errors.c my_div.c my_messnc.c \ mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \ @@ -52,7 +53,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_gethostbyname.c rijndael.c my_aes.c sha1.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ - my_windac.c my_access.c base64.c my_libwrap.c + my_windac.c my_access.c base64.c my_libwrap.c \ + mf_pagecache.c wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ @@ -126,5 +128,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/array.c b/mysys/array.c index 8f4a6087c00..7f909999fe9 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -60,7 +60,8 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size, array->max_element=init_alloc; array->alloc_increment=alloc_increment; array->size_of_element=element_size; - if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME)))) + if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc, + MYF(MY_WME)))) { array->max_element=0; DBUG_RETURN(TRUE); @@ -153,7 +154,7 @@ byte *pop_dynamic(DYNAMIC_ARRAY *array) } /* - Replace elemnent in array with given element and index + Replace element in array with given element and index SYNOPSIS set_dynamic() @@ -174,19 +175,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) { if (idx >= array->elements) { - if (idx >= array->max_element) - { - uint size; - char *new_ptr; - size=(idx+array->alloc_increment)/array->alloc_increment; - size*= array->alloc_increment; - if (!(new_ptr=(char*) my_realloc(array->buffer,size* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return TRUE; - array->buffer=new_ptr; - array->max_element=size; - } + if (idx >= array->max_element && allocate_dynamic(array, idx)) + return TRUE; bzero((gptr) (array->buffer+array->elements*array->size_of_element), (idx - array->elements)*array->size_of_element); array->elements=idx+1; @@ -196,6 +186,42 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) return FALSE; } + +/* + Ensure that dynamic array has enough elements + + SYNOPSIS + allocate_dynamic() + array + max_elements Numbers of elements that is needed + + NOTES + Any new allocated element are NOT initialized + + RETURN VALUE + FALSE Ok + TRUE Allocation of new memory failed +*/ + +my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) +{ + if (max_elements >= array->max_element) + { + uint size; + char *new_ptr; + size= (max_elements + array->alloc_increment)/array->alloc_increment; + size*= array->alloc_increment; + if (!(new_ptr= (char*) my_realloc(array->buffer,size* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + return TRUE; + array->buffer= new_ptr; + array->max_element= size; + } + return FALSE; +} + + /* Get an element from array by given index diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c new file mode 100644 index 00000000000..e964553a64c --- /dev/null +++ b/mysys/lf_alloc-pin.c @@ -0,0 +1,443 @@ +/* QQ: TODO multi-pinbox */ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + wait-free concurrent allocator based on pinning addresses + + It works as follows: every thread (strictly speaking - every CPU, but + it's too difficult to do) has a small array of pointers. They're called + "pins". Before using an object its address must be stored in this array + (pinned). When an object is no longer necessary its address must be + removed from this array (unpinned). When a thread wants to free() an + object it scans all pins of all threads to see if somebody has this + object pinned. If yes - the object is not freed (but stored in a + "purgatory"). To reduce the cost of a single free() pins are not scanned + on every free() but only added to (thread-local) purgatory. On every + LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects + are freed. + + Pins are used to solve ABA problem. To use pins one must obey + a pinning protocol: + 1. Let's assume that PTR is a shared pointer to an object. Shared means + that any thread may modify it anytime to point to a different object + and free the old object. Later the freed object may be potentially + allocated by another thread. If we're unlucky that another thread may + set PTR to point to this object again. This is ABA problem. + 2. Create a local pointer LOCAL_PTR. + 3. Pin the PTR in a loop: + do + { + LOCAL_PTR= PTR; + pin(PTR, PIN_NUMBER); + } while (LOCAL_PTR != PTR) + 4. It is guaranteed that after the loop has ended, LOCAL_PTR + points to an object (or NULL, if PTR may be NULL), that + will never be freed. It is not guaranteed though + that LOCAL_PTR == PTR (as PTR can change any time) + 5. When done working with the object, remove the pin: + unpin(PIN_NUMBER) + 6. When copying pins (as in the list traversing loop: + pin(CUR, 1); + while () + { + do // standard + { // pinning + NEXT=CUR->next; // loop + pin(NEXT, 0); // see #3 + } while (NEXT != CUR->next); // above + ... + ... + CUR=NEXT; + pin(CUR, 1); // copy pin[0] to pin[1] + } + which keeps CUR address constantly pinned), note than pins may be + copied only upwards (!!!), that is pin[N] to pin[M], M > N. + 7. Don't keep the object pinned longer than necessary - the number of + pins you have is limited (and small), keeping an object pinned + prevents its reuse and cause unnecessary mallocs. + + Implementation details: + Pins are given away from a "pinbox". Pinbox is stack-based allocator. + It used dynarray for storing pins, new elements are allocated by dynarray + as necessary, old are pushed in the stack for reuse. ABA is solved by + versioning a pointer - because we use an array, a pointer to pins is 32 bit, + upper 32 bits are used for a version. +*/ + +#include <my_global.h> +#include <my_sys.h> +#include <lf.h> + +#define LF_PINBOX_MAX_PINS 65536 + +static void _lf_pinbox_real_free(LF_PINS *pins); + +/* + Initialize a pinbox. Normally called from lf_alloc_init. + See the latter for details. +*/ +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void *free_func_arg) +{ + DBUG_ASSERT(sizeof(LF_PINS) == 128); + DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); + lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); + pinbox->pinstack_top_ver= 0; + pinbox->pins_in_stack= 0; + pinbox->free_ptr_offset= free_ptr_offset; + pinbox->free_func= free_func; + pinbox->free_func_arg= free_func_arg; +} + +void lf_pinbox_destroy(LF_PINBOX *pinbox) +{ + lf_dynarray_destroy(&pinbox->pinstack); +} + +/* + Get pins from a pinbox. Usually called via lf_alloc_get_pins() or + lf_hash_get_pins(). + + DESCRIPTION + get a new LF_PINS structure from a stack of unused pins, + or allocate a new one out of dynarray. +*/ +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) +{ + uint32 pins, next, top_ver; + LF_PINS *el; + + top_ver= pinbox->pinstack_top_ver; + do + { + if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) + { + pins= my_atomic_add32(&pinbox->pins_in_stack, 1)+1; + el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + break; + } + el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + next= el->link; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins+next+LF_PINBOX_MAX_PINS)); + el->link= pins; + el->purgatory_count= 0; + el->pinbox= pinbox; + return el; +} + +/* + Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or + lf_hash_put_pins(). + + DESCRIPTION + empty the purgatory (XXX deadlock warning below!), + push LF_PINS structure to a stack +*/ +void _lf_pinbox_put_pins(LF_PINS *pins) +{ + LF_PINBOX *pinbox= pins->pinbox; + uint32 top_ver, nr; + nr= pins->link; +#ifdef MY_LF_EXTRA_DEBUG + { + int i; + for (i= 0; i < LF_PINBOX_PINS; i++) + DBUG_ASSERT(pins->pin[i] == 0); + } +#endif + /* + XXX this will deadlock if other threads will wait for + the caller to do something after _lf_pinbox_put_pins(), + and they would have pinned addresses that the caller wants to free. + Thus: only free pins when all work is done and nobody can wait for you!!! + */ + while (pins->purgatory_count) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + } + } + top_ver= pinbox->pinstack_top_ver; + if (nr == pinbox->pins_in_stack) + { + int32 tmp= nr; + if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) + goto ret; + } + + do + { + pins->link= top_ver % LF_PINBOX_MAX_PINS; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); +ret: + return; +} + +static int ptr_cmp(void **a, void **b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +#define add_to_purgatory(PINS, ADDR) \ + do \ + { \ + *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ + (PINS)->purgatory; \ + (PINS)->purgatory= (ADDR); \ + (PINS)->purgatory_count++; \ + } while (0) + +/* + Free an object allocated via pinbox allocator + + DESCRIPTION + add an object to purgatory. if necessary, call _lf_pinbox_real_free() + to actually free something. +*/ +void _lf_pinbox_free(LF_PINS *pins, void *addr) +{ + add_to_purgatory(pins, addr); + if (pins->purgatory_count % LF_PURGATORY_SIZE) + _lf_pinbox_real_free(pins); +} + +struct st_harvester { + void **granary; + int npins; +}; + +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and accumulate all pins +*/ +static int harvest_pins(LF_PINS *el, struct st_harvester *hv) +{ + int i; + LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) + *hv->granary++= p; + } + } + hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; +} + +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and see if addr is present there +*/ +static int match_pins(LF_PINS *el, void *addr) +{ + int i; + LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + for (i= 0; i < LF_PINBOX_PINS; i++) + if (el->pin[i] == addr) + return 1; + return 0; +} + +/* + Scan the purgatory as free everything that can be freed +*/ +static void _lf_pinbox_real_free(LF_PINS *pins) +{ + int npins; + void *list; + void **addr; + LF_PINBOX *pinbox= pins->pinbox; + + npins= pinbox->pins_in_stack+1; + +#ifdef HAVE_ALLOCA + /* create a sorted list of pinned addresses, to speed up searches */ + if (sizeof(void *)*LF_PINBOX_PINS*npins < my_thread_stack_size) + { + struct st_harvester hv; + addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); + hv.granary= addr; + hv.npins= npins; + /* scan the dynarray and accumulate all pinned addresses */ + _lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)harvest_pins, &hv); + + npins= hv.granary-addr; + /* and sort them */ + if (npins) + qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); + } + else +#endif + addr= 0; + + list= pins->purgatory; + pins->purgatory= 0; + pins->purgatory_count= 0; + while (list) + { + void *cur= list; + list= *(void **)((char *)cur+pinbox->free_ptr_offset); + if (npins) + { + if (addr) /* use binary search */ + { + void **a, **b, **c; + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) + if (cur == *c) + a= b= c; + else if (cur > *c) + a= c; + else + b= c; + if (cur == *a || cur == *b) + goto found; + } + else /* no alloca - no cookie. linear search here */ + { + if (_lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)match_pins, cur)) + goto found; + } + } + /* not pinned - freeing */ + pinbox->free_func(cur, pinbox->free_func_arg); + continue; +found: + /* pinned - keeping */ + add_to_purgatory(pins, cur); + } +} + +/* + callback for _lf_pinbox_real_free to free an unpinned object - + add it back to the allocator stack +*/ +static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator) +{ + struct st_lf_alloc_node *tmp; + tmp= allocator->top; + do + { + node->next= tmp; + } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && + LF_BACKOFF); +} + +/* lock-free memory allocator for fixed-size objects */ + +LF_REQUIRE_PINS(1); + +/* + initialize lock-free allocatod. + + SYNOPSYS + allocator - + size a size of an object to allocate + free_ptr_offset an offset inside the object to a sizeof(void *) + memory that is guaranteed to be unused after + the object is put in the purgatory. Unused by ANY + thread, not only the purgatory owner. +*/ +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) +{ + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; + DBUG_ASSERT(size >= (int)sizeof(void *)); + DBUG_ASSERT(free_ptr_offset < size); +} + +/* + destroy the allocator, free everything that's in it +*/ +void lf_alloc_destroy(LF_ALLOCATOR *allocator) +{ + struct st_lf_alloc_node *node= allocator->top; + while (node) + { + struct st_lf_alloc_node *tmp= node->next; + my_free((void *)node, MYF(0)); + node= tmp; + } + lf_pinbox_destroy(&allocator->pinbox); + allocator->top= 0; +} + +/* + Allocate and return an new object. + + DESCRIPTION + Pop an unused object from the stack or malloc it is the stack is empty. + pin[0] is used, it's removed on return. +*/ +void *_lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + struct st_lf_alloc_node *node; + for (;;) + { + do + { + node= allocator->top; + _lf_pin(pins, 0, node); + } while (node != allocator->top && LF_BACKOFF); + if (!node) + { + if (!(node= (void *)my_malloc(allocator->element_size, + MYF(MY_WME|MY_ZEROFILL)))) + break; +#ifdef MY_LF_EXTRA_DEBUG + my_atomic_add32(&allocator->mallocs, 1); +#endif + break; + } + if (my_atomic_casptr((void **)&allocator->top, + (void *)&node, *(void **)node)) + break; + } + _lf_unpin(pins, 0); + return node; +} + +/* + count the number of objects in a pool. + + NOTE + This is NOT thread-safe !!! +*/ +uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) +{ + uint i; + struct st_lf_alloc_node *node; + for (node= allocator->top, i= 0; node; node= node->next, i++) + /* no op */; + return i; +} + diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c new file mode 100644 index 00000000000..c6dd654bf03 --- /dev/null +++ b/mysys/lf_dynarray.c @@ -0,0 +1,204 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Analog of DYNAMIC_ARRAY that never reallocs + (so no pointer into the array may ever become invalid). + + Memory is allocated in non-contiguous chunks. + This data structure is not space efficient for sparse arrays. + + The number of elements is limited to 4311810304 + + Every element is aligned to sizeof(element) boundary + (to avoid false sharing if element is big enough). + + LF_DYNARRAY is a recursive structure. On the zero level + LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements, + on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers + to arrays of elements, on the second level it's an array of pointers + to arrays of pointers to arrays of elements. And so on. + + Actually, it's wait-free, not lock-free ;-) +*/ + +#include <my_global.h> +#include <strings.h> +#include <my_sys.h> +#include <lf.h> + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) +{ + bzero(array, sizeof(*array)); + array->size_of_element= element_size; + my_atomic_rwlock_init(&array->lock); +} + +static void recursive_free(void **alloc, int level) +{ + if (!alloc) + return; + + if (level) + { + int i; + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + recursive_free(alloc[i], level-1); + my_free((void *)alloc, MYF(0)); + } + else + my_free(alloc[-1], MYF(0)); +} + +void lf_dynarray_destroy(LF_DYNARRAY *array) +{ + int i; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + recursive_free(array->level[i], i); + my_atomic_rwlock_destroy(&array->lock); +} + +static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH + + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH +}; + +static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH, +}; + +/* + Returns a valid lvalue pointer to the element number 'idx'. + Allocates memory if necessary. +*/ +void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + { + void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); + } + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx%= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + { + void *alloc, *data; + alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + max(array->size_of_element, sizeof(void *)), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + /* reserve the space for free() address */ + data= alloc + sizeof(void *); + { /* alignment */ + intptr mod= ((intptr)data) % array->size_of_element; + if (mod) + data+= array->size_of_element - mod; + } + ((void **)data)[-1]= alloc; /* free() will need the original pointer */ + if (my_atomic_casptr(ptr_ptr, &ptr, data)) + ptr= data; + else + my_free(alloc, MYF(0)); + } + return ptr + array->size_of_element * idx; +} + +/* + Returns a pointer to the element number 'idx' + or NULL if an element does not exists +*/ +void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + return(NULL); + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx %= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + return(NULL); + return ptr + array->size_of_element * idx; +} + +static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, + lf_dynarray_func func, void *arg) +{ + int res, i; + if (!ptr) + return 0; + if (!level) + return func(ptr, arg); + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + return res; + return 0; +} + +/* + Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements + in lf_dynarray. + + DESCRIPTION + lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements + each. _lf_dynarray_iterate() calls user-supplied function on every array + from the set. It is the fastest way to scan the array, faster than + for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); } +*/ +int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) +{ + int i, res; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + if ((res= recursive_iterate(array, array->level[i], i, func, arg))) + return res; + return 0; +} + diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c new file mode 100644 index 00000000000..fb2fb88492f --- /dev/null +++ b/mysys/lf_hash.c @@ -0,0 +1,400 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + extensible hash + + TODO + try to get rid of dummy nodes ? + for non-unique hash, count only _distinct_ values + (but how to do it in lf_hash_delete ?) +*/ +#include <my_global.h> +#include <m_string.h> +#include <my_sys.h> +#include <my_bit.h> +#include <lf.h> + +LF_REQUIRE_PINS(3); + +/* An element of the list */ +typedef struct { + intptr volatile link; /* a pointer to the next element in a listand a flag */ + uint32 hashnr; /* reversed hash number, for sorting */ + const byte *key; + uint keylen; +} LF_SLIST; + +/* + a structure to pass the context (pointers two the three successive elements + in a list) from lfind to linsert/ldelete +*/ +typedef struct { + intptr volatile *prev; + LF_SLIST *curr, *next; +} CURSOR; + +/* + the last bit in LF_SLIST::link is a "deleted" flag. + the helper macros below convert it to a pure pointer or a pure flag +*/ +#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + DESCRIPTION + Search for hashnr/key/keylen in the list starting from 'head' and + position the cursor. The list is ORDER BY hashnr, key + + RETURN + 0 - not found + 1 - found + + NOTE + cursor is positioned in either case + pins[0..2] are used, they are NOT removed on return +*/ +static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, + const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins) +{ + uint32 cur_hashnr; + const byte *cur_key; + uint cur_keylen; + intptr link; + +retry: + cursor->prev= (intptr *)head; + do { + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + return 0; + do { + /* QQ: XXX or goto retry ? */ + link= cursor->curr->link; + cursor->next= PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr= cursor->curr->hashnr; + cur_key= cursor->curr->key; + cur_keylen= cursor->curr->keylen; + if (*cursor->prev != (intptr)cursor->curr) + { + (void)LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr >= hashnr) + { + int r= 1; + if (cur_hashnr > hashnr || + (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key, + keylen)) >= 0) + return !r; + } + cursor->prev= &(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + (void)LF_BACKOFF; + goto retry; + } + } + cursor->curr= cursor->next; + _lf_pin(pins, 1, cursor->curr); + } +} + +/* + DESCRIPTION + insert a 'node' in the list that starts from 'head' in the correct + position (as found by lfind) + + RETURN + 0 - inserted + not 0 - a pointer to a duplicate (not pinned and thus unusable) + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, + LF_SLIST *node, LF_PINS *pins, uint flags) +{ + CURSOR cursor; + int res= -1; + + do + { + if (lfind(head, cs, node->hashnr, node->key, node->keylen, + &cursor, pins) && + (flags & LF_HASH_UNIQUE)) + res= 0; /* duplicate found */ + else + { + node->link= (intptr)cursor.curr; + assert(node->link != (intptr)node); + assert(cursor.prev != &node->link); + if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + res= 1; /* inserted ok */ + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res ? 0 : cursor.curr; +} + +/* + DESCRIPTION + deletes a node as identified by hashnr/keey/keylen from the list + that starts from 'head' + + RETURN + 0 - ok + 1 - not found + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, + const byte *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res= -1; + + do + { + if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins)) + res= 1; + else + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lfind(head, cs, hashnr, key, keylen, &cursor, pins); + res= 0; + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res; +} + +/* + DESCRIPTION + searches for a node as identified by hashnr/keey/keylen in the list + that starts from 'head' + + RETURN + 0 - not found + node - found + + NOTE + it uses pins[0..2], on return the pin[2] keeps the node found + all other pins are removed. +*/ +static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, + uint32 hashnr, const byte *key, uint keylen, + LF_PINS *pins) +{ + CURSOR cursor; + int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins); + if (res) _lf_pin(pins, 2, cursor.curr); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + return res ? cursor.curr : 0; +} + +static inline const byte* hash_key(const LF_HASH *hash, + const byte *record, uint *length) +{ + if (hash->get_key) + return (*hash->get_key)(record, length, 0); + *length= hash->key_length; + return record + hash->key_offset; +} + +static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) +{ + ulong nr1= 1, nr2= 4; + hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen, + &nr1, &nr2); + return nr1 & INT_MAX32; +} + +#define MAX_LOAD 1.0 +static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); + +/* + Initializes lf_hash, the arguments are compatible with hash_init +*/ +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset) +{ + lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, + offsetof(LF_SLIST, key)); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); + hash->size= 1; + hash->count= 0; + hash->element_size= element_size; + hash->flags= flags; + hash->charset= charset ? charset : &my_charset_bin; + hash->key_offset= key_offset; + hash->key_length= key_length; + hash->get_key= get_key; + DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); +} + +void lf_hash_destroy(LF_HASH *hash) +{ + LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_real_free(&hash->alloc, el); + else + my_free((void *)el, MYF(0)); + el= (LF_SLIST *)next; + } + lf_alloc_destroy(&hash->alloc); + lf_dynarray_destroy(&hash->array); +} + +/* + DESCRIPTION + inserts a new element to a hash. it will have a _copy_ of + data, not a pointer to it. + + RETURN + 0 - inserted + 1 - didn't (unique key conflict) + + NOTE + see linsert() for pin usage notes +*/ +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) +{ + int csize, bucket, hashnr; + LF_SLIST *node, * volatile *el; + + lf_rwlock_by_pins(pins); + node= (LF_SLIST *)_lf_alloc_new(pins); + memcpy(node+1, data, hash->element_size); + node->key= hash_key(hash, (byte *)(node+1), &node->keylen); + hashnr= calc_hash(hash, node->key, node->keylen); + bucket= hashnr % hash->size; + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + node->hashnr= my_reverse_bits(hashnr) | 1; + if (linsert(el, hash->charset, node, pins, hash->flags)) + { + _lf_alloc_free(pins, node); + lf_rwunlock_by_pins(pins); + return 1; + } + csize= hash->size; + if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&hash->size, &csize, csize*2); + lf_rwunlock_by_pins(pins); + return 0; +} + +/* + RETURN + 0 - deleted + 1 - didn't (not found) + NOTE + see ldelete() for pin usage notes +*/ +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + LF_SLIST * volatile *el; + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + + bucket= hashnr % hash->size; + lf_rwlock_by_pins(pins); + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, + (byte *)key, keylen, pins)) + { + lf_rwunlock_by_pins(pins); + return 1; + } + my_atomic_add32(&hash->count, -1); + lf_rwunlock_by_pins(pins); + return 0; +} + +/* + NOTE + see lsearch() for pin usage notes +*/ +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + LF_SLIST * volatile *el, *found; + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + + bucket= hashnr % hash->size; + lf_rwlock_by_pins(pins); + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, + (byte *)key, keylen, pins); + lf_rwunlock_by_pins(pins); + return found ? found+1 : 0; +} + +static const char *dummy_key= ""; + +static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, + uint bucket, LF_PINS *pins) +{ + uint parent= my_clear_highest_bit(bucket); + LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp= 0, *cur; + LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent); + if (*el == NULL && bucket) + initialize_bucket(hash, el, parent, pins); + dummy->hashnr= my_reverse_bits(bucket); + dummy->key= (char*) dummy_key; + dummy->keylen= 0; + if ((cur= linsert(el, hash->charset, dummy, pins, 0))) + { + my_free((void *)dummy, MYF(0)); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 86394fec239..95a9f08a07a 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -42,6 +42,7 @@ #include <keycache.h> #include "my_static.h" #include <m_string.h> +#include <my_bit.h> #include <errno.h> #include <stdarg.h> @@ -1009,12 +1010,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); keycache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block %u status=%x #requests=%u #available=%u", BLOCK_NUMBER(block), block->status, block->requests, keycache->blocks_available)); - KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0); #endif } @@ -1643,9 +1644,9 @@ restart: KEYCACHE_DBUG_ASSERT(page_status != -1); *page_st=page_status; KEYCACHE_DBUG_PRINT("find_key_block", - ("fd: %d pos: %lu block->status: %u page_status: %u", + ("fd: %d pos: %lu block->status: %u page_status: %d", file, (ulong) filepos, block->status, - (uint) page_status)); + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_keycache2", @@ -1793,8 +1794,6 @@ byte *key_cache_read(KEY_CACHE *keycache, uint offset= 0; byte *start= buff; DBUG_ENTER("key_cache_read"); - DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", - (uint) file, (ulong) filepos, length)); if (keycache->can_be_used) { @@ -1804,6 +1803,11 @@ byte *key_cache_read(KEY_CACHE *keycache, uint status; int page_st; + DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); /* Read data in key_cache_block_size increments */ do @@ -2055,10 +2059,6 @@ int key_cache_write(KEY_CACHE *keycache, reg1 BLOCK_LINK *block; int error=0; DBUG_ENTER("key_cache_write"); - DBUG_PRINT("enter", - ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u", - (uint) file, (ulong) filepos, length, block_length, - keycache ? keycache->key_cache_block_size : 0)); if (!dont_write) { @@ -2080,6 +2080,12 @@ int key_cache_write(KEY_CACHE *keycache, int page_st; uint offset; + DBUG_PRINT("enter", + ("fd: %u pos: %lu page: %lu length: %u block_length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length, block_length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); do { diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c index 51ad54159e5..3f40f4f4010 100644 --- a/mysys/mf_keycaches.c +++ b/mysys/mf_keycaches.c @@ -147,7 +147,8 @@ static void safe_hash_free(SAFE_HASH *hash) Return the value stored for a key or default value if no key */ -static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) +static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def) { byte *result; DBUG_ENTER("safe_hash_search"); @@ -155,7 +156,7 @@ static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) result= hash_search(&hash->hash, key, length); rw_unlock(&hash->mutex); if (!result) - result= hash->default_value; + result= def; else result= ((SAFE_HASH_ENTRY*) result)->data; DBUG_PRINT("exit",("data: 0x%lx", (long) result)); @@ -315,6 +316,7 @@ void multi_keycache_free(void) multi_key_cache_search() key key to find (usually table path) uint length Length of key. + def Default value if no key cache NOTES This function is coded in such a way that we will return the @@ -325,11 +327,13 @@ void multi_keycache_free(void) key cache to use */ -KEY_CACHE *multi_key_cache_search(byte *key, uint length) +KEY_CACHE *multi_key_cache_search(byte *key, uint length, + KEY_CACHE *def) { if (!key_cache_hash.hash.records) - return dflt_key_cache; - return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length); + return def; + return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length, + (void*) def); } diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c new file mode 100755 index 00000000000..1b9d48c80e6 --- /dev/null +++ b/mysys/mf_pagecache.c @@ -0,0 +1,4102 @@ +/* Copyright (C) 2000-2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + These functions handle page cacheing for Maria tables. + + One cache can handle many files. + It must contain buffers of the same blocksize. + init_pagecache() should be used to init cache handler. + + The free list (free_block_list) is a stack like structure. + When a block is freed by free_block(), it is pushed onto the stack. + When a new block is required it is first tried to pop one from the stack. + If the stack is empty, it is tried to get a never-used block from the pool. + If this is empty too, then a block is taken from the LRU ring, flushing it + to disk, if necessary. This is handled in find_block(). + With the new free list, the blocks can have three temperatures: + hot, warm and cold (which is free). This is remembered in the block header + by the enum BLOCK_TEMPERATURE temperature variable. Remembering the + temperature is necessary to correctly count the number of warm blocks, + which is required to decide when blocks are allowed to become hot. Whenever + a block is inserted to another (sub-)chain, we take the old and new + temperature into account to decide if we got one more or less warm block. + blocks_unused is the sum of never used blocks in the pool and of currently + free blocks. blocks_used is the number of blocks fetched from the pool and + as such gives the maximum number of in-use blocks at any time. +*/ + +#include "mysys_priv.h" +#include <m_string.h> +#include <pagecache.h> +#include "my_static.h" +#include <my_bit.h> +#include <errno.h> +#include <stdarg.h> + +/* + Some compilation flags have been added specifically for this module + to control the following: + - not to let a thread to yield the control when reading directly + from page cache, which might improve performance in many cases; + to enable this add: + #define SERIALIZED_READ_FROM_CACHE + - to set an upper bound for number of threads simultaneously + using the page cache; this setting helps to determine an optimal + size for hash table and improve performance when the number of + blocks in the page cache much less than the number of threads + accessing it; + to set this number equal to <N> add + #define MAX_THREADS <N> + - to substitute calls of pthread_cond_wait for calls of + pthread_cond_timedwait (wait with timeout set up); + this setting should be used only when you want to trap a deadlock + situation, which theoretically should not happen; + to set timeout equal to <T> seconds add + #define PAGECACHE_TIMEOUT <T> + - to enable the module traps and to send debug information from + page cache module to a special debug log add: + #define PAGECACHE_DEBUG + the name of this debug log file <LOG NAME> can be set through: + #define PAGECACHE_DEBUG_LOG <LOG NAME> + if the name is not defined, it's set by default; + if the PAGECACHE_DEBUG flag is not set up and we are in a debug + mode, i.e. when ! defined(DBUG_OFF), the debug information from the + module is sent to the regular debug log. + + Example of the settings: + #define SERIALIZED_READ_FROM_CACHE + #define MAX_THREADS 100 + #define PAGECACHE_TIMEOUT 1 + #define PAGECACHE_DEBUG + #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" +*/ + +/* + In key cache we have external raw locking here we use + SERIALIZED_READ_FROM_CACHE to avoid problem of reading + not consistent data from the page. + (keycache functions (key_cache_read(), key_cache_insert() and + key_cache_write()) rely on external MyISAM lock, we don't) +*/ +#define SERIALIZED_READ_FROM_CACHE yes + +#define BLOCK_INFO(B) \ + DBUG_PRINT("info", \ + ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \ + "wrlock: %c", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status, \ + (ulong)(B)->hash_link, \ + (uint) (B)->requests, \ + (uint)((B)->hash_link ? \ + (B)->hash_link->requests : \ + 0), \ + ((block->status & BLOCK_WRLOCK)?'Y':'N'))) + +/* TODO: put it to my_static.c */ +my_bool my_disable_flush_pagecache_blocks= 0; + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) + +/* types of condition variables */ +#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ +#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ +#define COND_FOR_WRLOCK 2 /* queue of write lock */ +#define COND_SIZE 3 /* number of COND_* queues */ + +typedef pthread_cond_t KEYCACHE_CONDVAR; + +/* descriptor of the page in the page cache block buffer */ +struct st_pagecache_page +{ + PAGECACHE_FILE file; /* file to which the page belongs to */ + pgcache_page_no_t pageno; /* number of the page in the file */ +}; + +/* element in the chain of a hash table bucket */ +struct st_pagecache_hash_link +{ + struct st_pagecache_hash_link + *next, **prev; /* to connect links in the same bucket */ + struct st_pagecache_block_link + *block; /* reference to the block for the page: */ + PAGECACHE_FILE file; /* from such a file */ + pgcache_page_no_t pageno; /* this page */ + uint requests; /* number of requests for the page */ +}; + +/* simple states of a block */ +#define BLOCK_ERROR 1 /* an error occurred when performing disk i/o */ +#define BLOCK_READ 2 /* the is page in the block buffer */ +#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ +#define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ +#define BLOCK_IN_FLUSH 16 /* block is in flush operation */ +#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ +#define BLOCK_WRLOCK 64 /* write locked block */ + +/* page status, returned by find_block */ +#define PAGE_READ 0 +#define PAGE_TO_BE_READ 1 +#define PAGE_WAIT_TO_BE_READ 2 + +/* block temperature determines in which (sub-)chain the block currently is */ +enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; + +/* debug info */ +#ifndef DBUG_OFF +static char *page_cache_page_type_str[]= +{ + (char*)"PLAIN", + (char*)"LSN" +}; +static char *page_cache_page_write_mode_str[]= +{ + (char*)"DELAY", + (char*)"NOW", + (char*)"DONE" +}; +static char *page_cache_page_lock_str[]= +{ + (char*)"free -> free ", + (char*)"read -> read ", + (char*)"write -> write", + (char*)"free -> read ", + (char*)"free -> write", + (char*)"read -> free ", + (char*)"write -> free ", + (char*)"write -> read " +}; +static char *page_cache_page_pin_str[]= +{ + (char*)"pinned -> pinned ", + (char*)"unpinned -> unpinned", + (char*)"unpinned -> pinned ", + (char*)"pinned -> unpinned" +}; +#endif +#ifdef PAGECACHE_DEBUG +typedef struct st_pagecache_pin_info +{ + struct st_pagecache_pin_info *next, **prev; + struct st_my_thread_var *thread; +} PAGECACHE_PIN_INFO; +/* + st_pagecache_lock_info structure should be kept in next, prev, thread part + compatible with st_pagecache_pin_info to be compatible in functions. +*/ +typedef struct st_pagecache_lock_info +{ + struct st_pagecache_lock_info *next, **prev; + struct st_my_thread_var *thread; + my_bool write_lock; +} PAGECACHE_LOCK_INFO; + + +/* service functions maintain debugging info about pin & lock */ + + +/* + Links information about thread pinned/locked the block to the list + + SYNOPSIS + info_link() + list the list to link in + node the node which should be linked +*/ + +static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +{ + if ((node->next= *list)) + node->next->prev= &(node->next); + *list= node; + node->prev= list; +} + + +/* + Unlinks information about thread pinned/locked the block from the list + + SYNOPSIS + info_unlink() + node the node which should be unlinked +*/ + +static void info_unlink(PAGECACHE_PIN_INFO *node) +{ + if ((*node->prev= node->next)) + node->next->prev= node->prev; +} + + +/* + Finds information about given thread in the list of threads which + pinned/locked this block. + + SYNOPSIS + info_find() + list the list where to find the thread + thread thread ID (reference to the st_my_thread_var + of the thread) + + RETURN + 0 - the thread was not found + pointer to the information node of the thread in the list +*/ + +static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) +{ + register PAGECACHE_PIN_INFO *i= list; + for(; i != 0; i= i->next) + if (i->thread == thread) + return i; + return 0; +} +#endif + +/* page cache block */ +struct st_pagecache_block_link +{ + struct st_pagecache_block_link + *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ + struct st_pagecache_block_link + *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ + struct st_pagecache_hash_link + *hash_link; /* backward ptr to referring hash_link */ + WQUEUE + wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ + uint requests; /* number of requests for the block */ + byte *buffer; /* buffer for the block page */ + uint status; /* state of the block */ + uint pins; /* pin counter */ +#ifdef PAGECACHE_DEBUG + PAGECACHE_PIN_INFO *pin_list; + PAGECACHE_LOCK_INFO *lock_list; +#endif + enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ + enum pagecache_page_type type; /* type of the block */ + uint hits_left; /* number of hits left until promotion */ + ulonglong last_hit_time; /* timestamp of the last hit */ + LSN rec_lsn; /* LSN when first became dirty */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ +}; + +#ifdef PAGECACHE_DEBUG +/* debug checks */ +static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) +{ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + DBUG_ENTER("info_check_pin"); + if (info) + { + if (mode == PAGECACHE_PIN_LEFT_UNPINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_UNPINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_PIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: PIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + else + { + if (mode == PAGECACHE_PIN_LEFT_PINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_PINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_UNPIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: UNPIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Debug function which checks current lock/pin state and requested changes + + SYNOPSIS + info_check_lock() + lock requested lock changes + pin requested pin changes + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, + thread); + DBUG_ENTER("info_check_lock"); + switch(lock) + { + case PAGECACHE_LOCK_LEFT_UNLOCKED: + if (pin != PAGECACHE_PIN_LEFT_UNPINNED || + info) + goto error; + break; + case PAGECACHE_LOCK_LEFT_READLOCKED: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN_LEFT_PINNED) || + info == 0 || info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_LEFT_WRITELOCKED: + if (pin != PAGECACHE_PIN_LEFT_PINNED || + info == 0 || !info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_READ: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN) || + info != 0) + goto error; + break; + case PAGECACHE_LOCK_WRITE: + if (pin != PAGECACHE_PIN || + info != 0) + goto error; + break; + case PAGECACHE_LOCK_READ_UNLOCK: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_WRITE_UNLOCK: + if (pin != PAGECACHE_UNPIN || + info == 0 || !info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_WRITE_TO_READ: + if ((pin != PAGECACHE_PIN_LEFT_PINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || !info->write_lock) + goto error; + break; + } + DBUG_RETURN(0); +error: + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d," + "to lock: %s, to pin: %s", + (ulong)thread, (ulong)block, test(info), + (info ? info->write_lock : 0), + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_RETURN(1); +} +#endif + +#define FLUSH_CACHE 2000 /* sort this many blocks at once */ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +static void test_key_cache(PAGECACHE *pagecache, + const char *where, my_bool lock); + +#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ + (ulong) (f).file) & (p->hash_entries-1)) +#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1)) + +#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" + +#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) +#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG +#endif + +#if defined(PAGECACHE_DEBUG_LOG) +static FILE *pagecache_debug_log= NULL; +static void pagecache_debug_print _VARARGS((const char *fmt, ...)); +#define PAGECACHE_DEBUG_OPEN \ + if (!pagecache_debug_log) \ + { \ + pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ + (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ + } + +#define PAGECACHE_DEBUG_CLOSE \ + if (pagecache_debug_log) \ + { \ + fclose(pagecache_debug_log); \ + pagecache_debug_log= 0; \ + } +#else +#define PAGECACHE_DEBUG_OPEN +#define PAGECACHE_DEBUG_CLOSE +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (pagecache_debug_log) \ + fprintf(pagecache_debug_log, "%s: ", l); \ + pagecache_debug_print m; } + +#define KEYCACHE_DBUG_ASSERT(a) \ + { if (! (a) && pagecache_debug_log) \ + fclose(pagecache_debug_log); \ + assert(a); } +#else +#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) +#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) +#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ + +#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) +#ifdef THREAD +static long pagecache_thread_id; +#define KEYCACHE_THREAD_TRACE(l) \ + KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) + +#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ + { struct st_my_thread_var *thread_var= my_thread_var; \ + pagecache_thread_id= thread_var->id; \ + KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } + +#define KEYCACHE_THREAD_TRACE_END(l) \ + KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) +#else /* THREAD */ +#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,("")) +#endif /* THREAD */ +#else +#define KEYCACHE_THREAD_TRACE_BEGIN(l) +#define KEYCACHE_THREAD_TRACE_END(l) +#define KEYCACHE_THREAD_TRACE(l) +#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */ + +#define BLOCK_NUMBER(p, b) \ + ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK))) +#define PAGECACHE_HASH_LINK_NUMBER(p, h) \ + ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \ + sizeof(PAGECACHE_HASH_LINK))) + +#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex); +#else +#define pagecache_pthread_cond_wait pthread_cond_wait +#endif + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); +#define pagecache_pthread_mutex_lock(M) \ +{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_lock(M);} +#define pagecache_pthread_mutex_unlock(M) \ +{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_unlock(M);} +#define pagecache_pthread_cond_signal(M) \ +{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_cond_signal(M);} +#else +#define pagecache_pthread_mutex_lock pthread_mutex_lock +#define pagecache_pthread_mutex_unlock pthread_mutex_unlock +#define pagecache_pthread_cond_signal pthread_cond_signal +#endif /* defined(PAGECACHE_DEBUG) */ + +extern my_bool translog_flush(LSN lsn); + +/* + Write page to the disk + + SYNOPSIS + pagecache_fwrite() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer which we will write + type - page type (plain or with LSN) + flags - MYF() flags + + RETURN + 0 - OK + !=0 - Error +*/ + +static uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + pgcache_page_no_t pageno, + enum pagecache_page_type type, + myf flags) +{ + DBUG_ENTER("pagecache_fwrite"); + if (type == PAGECACHE_LSN_PAGE) + { + LSN lsn; + DBUG_PRINT("info", ("Log handler call")); + /* TODO: integrate with page format */ +#define PAGE_LSN_OFFSET 0 + lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); + /* + check CONTROL_FILE_IMPOSSIBLE_FILENO & + CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET + */ + DBUG_ASSERT(lsn != 0); + translog_flush(lsn); + } + DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, + (pageno)<<(pagecache->shift), flags)); +} + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fread() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + pageno - page number + flags - MYF() flags +*/ +#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \ + my_pread((filedesc)->file, buffer, pagecache->block_size, \ + (pageno)<<(pagecache->shift), flags) + + +/* + next_power(value) is 2 at the power of (1+floor(log2(value))); + e.g. next_power(2)=4, next_power(3)=4. +*/ +static inline uint next_power(uint value) +{ + return (uint) my_round_up_to_next_power((uint32) value) << 1; +} + + +/* + Initialize a page cache + + SYNOPSIS + init_pagecache() + pagecache pointer to a page cache data structure + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the key cache + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + block_size size of block (should be power of 2) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + if pagecache->inited != 0 we assume that the key cache + is already initialized. This is for now used by myisamchk, but shouldn't + be something that a program should rely on! + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. + +*/ + +int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size) +{ + uint blocks, hash_links, length; + int error; + DBUG_ENTER("init_pagecache"); + DBUG_ASSERT(block_size >= 512); + + PAGECACHE_DEBUG_OPEN; + if (pagecache->inited && pagecache->disk_blocks > 0) + { + DBUG_PRINT("warning",("key cache already in use")); + DBUG_RETURN(0); + } + + pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; + pagecache->global_cache_read= pagecache->global_cache_write= 0; + pagecache->disk_blocks= -1; + if (! pagecache->inited) + { + pagecache->inited= 1; + pagecache->in_init= 0; + pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST); + pagecache->resize_queue.last_thread= NULL; + } + + pagecache->mem_size= use_mem; + pagecache->block_size= block_size; + pagecache->shift= my_bit_log2(block_size); + DBUG_PRINT("info", ("block_size: %u", + block_size)); + DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); + + blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + + 2 * sizeof(PAGECACHE_HASH_LINK) + + sizeof(PAGECACHE_HASH_LINK*) * + 5/4 + block_size)); + /* It doesn't make sense to have too few blocks (less than 8) */ + if (blocks >= 8 && pagecache->disk_blocks < 0) + { + for ( ; ; ) + { + /* Set my_hash_entries to the next bigger 2 power */ + if ((pagecache->hash_entries= next_power(blocks)) < + (blocks) * 5/4) + pagecache->hash_entries<<= 1; + hash_links= 2 * blocks; +#if defined(MAX_THREADS) + if (hash_links < MAX_THREADS + blocks - 1) + hash_links= MAX_THREADS + blocks - 1; +#endif + while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) + + ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))) + + (((ulong) blocks) << pagecache->shift) > use_mem) + blocks--; + /* Allocate memory for cache page buffers */ + if ((pagecache->block_mem= + my_large_malloc((ulong) blocks * pagecache->block_size, + MYF(MY_WME)))) + { + /* + Allocate memory for blocks, hash_links and hash entries; + For each block 2 hash links are allocated + */ + if ((pagecache->block_root= + (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, + MYF(0)))) + break; + my_large_free(pagecache->block_mem, MYF(0)); + pagecache->block_mem= 0; + } + if (blocks < 8) + { + my_errno= ENOMEM; + goto err; + } + blocks= blocks / 4*3; + } + pagecache->blocks_unused= (ulong) blocks; + pagecache->disk_blocks= (int) blocks; + pagecache->hash_links= hash_links; + pagecache->hash_root= + (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + + ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK))); + pagecache->hash_link_root= + (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root + + ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))); + bzero((byte*) pagecache->block_root, + pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK)); + bzero((byte*) pagecache->hash_root, + pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*)); + bzero((byte*) pagecache->hash_link_root, + pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK)); + pagecache->hash_links_used= 0; + pagecache->free_hash_list= NULL; + pagecache->blocks_used= pagecache->blocks_changed= 0; + + pagecache->global_blocks_changed= 0; + pagecache->blocks_available=0; /* For debugging */ + + /* The LRU chain is empty after initialization */ + pagecache->used_last= NULL; + pagecache->used_ins= NULL; + pagecache->free_block_list= NULL; + pagecache->time= 0; + pagecache->warm_blocks= 0; + pagecache->min_warm_blocks= (division_limit ? + blocks * division_limit / 100 + 1 : + blocks); + pagecache->age_threshold= (age_threshold ? + blocks * age_threshold / 100 : + blocks); + + pagecache->cnt_for_resize_op= 0; + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 1; + + pagecache->waiting_for_hash_link.last_thread= NULL; + pagecache->waiting_for_block.last_thread= NULL; + DBUG_PRINT("exit", + ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ + hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", + pagecache->disk_blocks, (long) pagecache->block_root, + pagecache->hash_entries, (long) pagecache->hash_root, + pagecache->hash_links, (long) pagecache->hash_link_root)); + bzero((gptr) pagecache->changed_blocks, + sizeof(pagecache->changed_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + bzero((gptr) pagecache->file_blocks, + sizeof(pagecache->file_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + } + + pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; + DBUG_RETURN((uint) pagecache->blocks); + +err: + error= my_errno; + pagecache->disk_blocks= 0; + pagecache->blocks= 0; + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + } + if (pagecache->block_root) + { + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + my_errno= error; + pagecache->can_be_used= 0; + DBUG_RETURN(0); +} + + +/* + Flush all blocks in the key cache to disk +*/ + +#ifdef NOT_USED +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} +#endif /* NOT_USED */ + +/* + Resize a key cache + + SYNOPSIS + resize_pagecache() + pagecache pointer to a page cache data structure + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first compares the memory size parameter + with the key cache value. + + If they differ the function free the the memory allocated for the + old key cache blocks by calling the end_pagecache function and + then rebuilds the key cache with new blocks by calling + init_key_cache. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). + + Before being usable, this function needs: + - to receive fixes for BUG#17332 "changing key_buffer_size on a running + server can crash under load" similar to those done to the key cache + - to have us (Sanja) look at the additional constraints placed on + resizing, due to the page locking specific to this page cache. + So we disable it for now. +*/ +#if NOT_USED /* keep disabled until code is fixed see above !! */ +int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold) +{ + int blocks; +#ifdef THREAD + struct st_my_thread_var *thread; + WQUEUE *wqueue; + +#endif + DBUG_ENTER("resize_pagecache"); + + if (!pagecache->inited) + DBUG_RETURN(pagecache->disk_blocks); + + if(use_mem == pagecache->mem_size) + { + change_pagecache_param(pagecache, division_limit, age_threshold); + DBUG_RETURN(pagecache->disk_blocks); + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + +#ifdef THREAD + wqueue= &pagecache->resize_queue; + thread= my_thread_var; + wqueue_link_into_queue(wqueue, thread); + + while (wqueue->last_thread->next != thread) + { + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#endif + + pagecache->resize_in_flush= 1; + if (flush_all_key_blocks(pagecache)) + { + /* TODO: if this happens, we should write a warning in the log file ! */ + pagecache->resize_in_flush= 0; + blocks= 0; + pagecache->can_be_used= 0; + goto finish; + } + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 0; +#ifdef THREAD + while (pagecache->cnt_for_resize_op) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); +#endif + + end_pagecache(pagecache, 0); /* Don't free mutex */ + /* The following will work even if use_mem is 0 */ + blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, + division_limit, age_threshold); + +finish: +#ifdef THREAD + wqueue_unlink_from_queue(wqueue, thread); + /* Signal for the next resize request to proceeed if any */ + if (wqueue->last_thread) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: signal", + ("thread %ld", wqueue->last_thread->next->id)); + pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); + } +#endif + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(blocks); +} +#endif /* 0 */ + + +/* + Increment counter blocking resize key cache operation +*/ +static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) +{ + pagecache->cnt_for_resize_op++; +} + + +/* + Decrement counter blocking resize key cache operation; + Signal the operation to proceed when counter becomes equal zero +*/ +static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) +{ +#ifdef THREAD + struct st_my_thread_var *last_thread; + if (!--pagecache->cnt_for_resize_op && + (last_thread= pagecache->resize_queue.last_thread)) + { + KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", + ("thread %ld", last_thread->next->id)); + pagecache_pthread_cond_signal(&last_thread->next->suspend); + } +#else + pagecache->cnt_for_resize_op--; +#endif +} + +/* + Change the page cache parameters + + SYNOPSIS + change_pagecache_param() + pagecache pointer to a page cache data structure + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + none + + NOTES. + Presently the function resets the key cache parameters + concerning midpoint insertion strategy - division_limit and + age_threshold. +*/ + +void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold) +{ + DBUG_ENTER("change_pagecache_param"); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (division_limit) + pagecache->min_warm_blocks= (pagecache->disk_blocks * + division_limit / 100 + 1); + if (age_threshold) + pagecache->age_threshold= (pagecache->disk_blocks * + age_threshold / 100); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Removes page cache from memory. Does NOT flush pages to disk. + + SYNOPSIS + end_pagecache() + pagecache page cache handle + cleanup Complete free (Free also mutex for key cache) + + RETURN VALUE + none +*/ + +void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) +{ + DBUG_ENTER("end_pagecache"); + DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache)); + + if (!pagecache->inited) + DBUG_VOID_RETURN; + + if (pagecache->disk_blocks > 0) + { + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + pagecache->disk_blocks= -1; + /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ + pagecache->blocks_changed= 0; + } + + DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " + "writes: %lu r_requests: %lu reads: %lu", + pagecache->blocks_used, pagecache->global_blocks_changed, + (ulong) pagecache->global_cache_w_requests, + (ulong) pagecache->global_cache_write, + (ulong) pagecache->global_cache_r_requests, + (ulong) pagecache->global_cache_read)); + + if (cleanup) + { + pthread_mutex_destroy(&pagecache->cache_lock); + pagecache->inited= pagecache->can_be_used= 0; + PAGECACHE_DEBUG_CLOSE; + } + DBUG_VOID_RETURN; +} /* end_pagecache */ + + +/* + Unlink a block from the chain of dirty/clean blocks +*/ + +static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_changed) + block->next_changed->prev_changed= block->prev_changed; + *block->prev_changed= block->next_changed; +} + + +/* + Link a block into the chain of dirty/clean blocks +*/ + +static inline void link_changed(PAGECACHE_BLOCK_LINK *block, + PAGECACHE_BLOCK_LINK **phead) +{ + block->prev_changed= phead; + if ((block->next_changed= *phead)) + (*phead)->prev_changed= &block->next_changed; + *phead= block; +} + + +/* + Unlink a block from the chain of dirty/clean blocks, if it's asked for, + and link it to the chain of clean blocks for the specified file +*/ + +static void link_to_file_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + PAGECACHE_FILE *file, my_bool unlink) +{ + if (unlink) + unlink_changed(block); + link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); + if (block->status & BLOCK_CHANGED) + { + block->status&= ~BLOCK_CHANGED; + block->rec_lsn= 0; + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + } +} + + +/* + Unlink a block from the chain of clean blocks for the specified + file and link it to the chain of dirty blocks for this file +*/ + +static inline void link_to_changed_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + unlink_changed(block); + link_changed(block, + &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]); + block->status|=BLOCK_CHANGED; + pagecache->blocks_changed++; + pagecache->global_blocks_changed++; +} + + +/* + Link a block to the LRU chain at the beginning or at the end of + one of two parts. + + SYNOPSIS + link_block() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + hot <-> to link the block into the hot subchain + at_end <-> to link the block at the end of the subchain + + RETURN VALUE + none + + NOTES. + The LRU chain is represented by a curcular list of block structures. + The list is double-linked of the type (**prev,*next) type. + The LRU chain is divided into two parts - hot and warm. + There are two pointers to access the last blocks of these two + parts. The beginning of the warm part follows right after the + end of the hot part. + Only blocks of the warm part can be used for replacement. + The first block from the beginning of this subchain is always + taken for eviction (pagecache->last_used->next) + + LRU chain: +------+ H O T +------+ + +----| end |----...<----| beg |----+ + | +------+last +------+ | + v<-link in latest hot (new end) | + | link in latest warm (new end)->^ + | +------+ W A R M +------+ | + +----| beg |---->...----| end |----+ + +------+ +------+ins + first for eviction +*/ + +static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + my_bool hot, my_bool at_end) +{ + PAGECACHE_BLOCK_LINK *ins; + PAGECACHE_BLOCK_LINK **ptr_ins; + + BLOCK_INFO(block); + KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); +#ifdef THREAD + if (!hot && pagecache->waiting_for_block.last_thread) + { + /* Signal that in the LRU warm sub-chain an available block has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_block.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_HASH_LINK *hash_link= + (PAGECACHE_HASH_LINK *) first_thread->opt_info; + struct st_my_thread_var *thread; + do + { + thread= next_thread; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) + { + KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); + block->requests++; + } + } + while (thread != last_thread); + hash_link->block= block; + KEYCACHE_THREAD_TRACE("link_block: after signaling"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_PRINT("link_block", + ("linked,unlinked block %u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); +#endif + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); + /* Condition not transformed using DeMorgan, to keep the text identical */ +#endif /* THREAD */ + ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *ptr_ins; + if (ins) + { + ins->next_used->prev_used= &block->next_used; + block->next_used= ins->next_used; + block->prev_used= &ins->next_used; + ins->next_used= block; + if (at_end) + *ptr_ins= block; + } + else + { + /* The LRU chain is empty */ + pagecache->used_last= pagecache->used_ins= block->next_used= block; + block->prev_used= &block->next_used; + } + KEYCACHE_THREAD_TRACE("link_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available++; + KEYCACHE_DBUG_PRINT("link_block", + ("linked block %u:%1u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), at_end, block->status, + block->requests, pagecache->blocks_available)); + KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= + pagecache->blocks_used); +#endif +} + + +/* + Unlink a block from the LRU chain + + SYNOPSIS + unlink_block() + pagecache pointer to a page cache data structure + block pointer to the block to unlink from the LRU chain + + RETURN VALUE + none + + NOTES. + See NOTES for link_block +*/ + +static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("unlink_block"); + DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); + if (block->next_used == block) + /* The list contains only one member */ + pagecache->used_last= pagecache->used_ins= NULL; + else + { + block->next_used->prev_used= block->prev_used; + *block->prev_used= block->next_used; + if (pagecache->used_last == block) + pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + if (pagecache->used_ins == block) + pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + } + block->next_used= NULL; + + KEYCACHE_THREAD_TRACE("unlink_block"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); + pagecache->blocks_available--; + KEYCACHE_DBUG_PRINT("unlink_block", + ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", + (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); + BLOCK_INFO(block); +#endif + DBUG_VOID_RETURN; +} + + +/* + Register requests for a block + + SYNOPSIS + reg_requests() + pagecache this page cache reference + block the block we request reference + count how many requests we register (it is 1 everywhere) + + NOTE + Registration of request means we are going to use this block so we exclude + it from the LRU if it is first request +*/ +static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + int count) +{ + DBUG_ENTER("reg_requests"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + if (! block->requests) + /* First request for the block unlinks it */ + unlink_block(pagecache, block); + block->requests+= count; + DBUG_VOID_RETURN; +} + + +/* + Unregister request for a block + linking it to the LRU chain if it's the last request + + SYNOPSIS + unreg_request() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + at_end <-> to link the block at the end of the LRU chain + + RETURN VALUE + none + + NOTES. + Every linking to the LRU chain decrements by one a special block + counter (if it's positive). If the at_end parameter is TRUE the block is + added either at the end of warm sub-chain or at the end of hot sub-chain. + It is added to the hot subchain if its counter is zero and number of + blocks in warm sub-chain is not less than some low limit (determined by + the division_limit parameter). Otherwise the block is added to the warm + sub-chain. If the at_end parameter is FALSE the block is always added + at beginning of the warm sub-chain. + Thus a warm block can be promoted to the hot sub-chain when its counter + becomes zero for the first time. + At the same time the block at the very beginning of the hot subchain + might be moved to the beginning of the warm subchain if it stays untouched + for a too long time (this time is determined by parameter age_threshold). +*/ + +static void unreg_request(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, int at_end) +{ + DBUG_ENTER("unreg_request"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + DBUG_ASSERT(block->requests > 0); + if (! --block->requests) + { + my_bool hot; + if (block->hits_left) + block->hits_left--; + hot= !block->hits_left && at_end && + pagecache->warm_blocks > pagecache->min_warm_blocks; + if (hot) + { + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_HOT; + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + pagecache->warm_blocks)); + } + link_block(pagecache, block, hot, (my_bool)at_end); + block->last_hit_time= pagecache->time; + pagecache->time++; + + block= pagecache->used_ins; + /* Check if we should link a hot block to the warm block */ + if (block && pagecache->time - block->last_hit_time > + pagecache->age_threshold) + { + unlink_block(pagecache, block); + link_block(pagecache, block, 0, 0); + if (block->temperature != BLOCK_WARM) + { + pagecache->warm_blocks++; + block->temperature= BLOCK_WARM; + } + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + pagecache->warm_blocks)); + } + } + DBUG_VOID_RETURN; +} + +/* + Remove a reader of the page in block +*/ + +static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("remove_reader"); + BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); +#ifdef THREAD + if (! --block->hash_link->requests && block->condvar) + pagecache_pthread_cond_signal(block->condvar); +#else + --block->hash_link->requests; +#endif + DBUG_VOID_RETURN; +} + + +/* + Wait until the last reader of the page in block + signals on its termination +*/ + +static inline void wait_for_readers(PAGECACHE *pagecache + __attribute__((unused)), + PAGECACHE_BLOCK_LINK *block) +{ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + while (block->hash_link->requests) + { + KEYCACHE_DBUG_PRINT("wait_for_readers: wait", + ("suspend thread %ld block %u", + thread->id, BLOCK_NUMBER(pagecache, block))); + block->condvar= &thread->suspend; + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + block->condvar= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); +#endif +} + + +/* + Add a hash link to a bucket in the hash_table +*/ + +static inline void link_hash(PAGECACHE_HASH_LINK **start, + PAGECACHE_HASH_LINK *hash_link) +{ + if (*start) + (*start)->prev= &hash_link->next; + hash_link->next= *start; + hash_link->prev= start; + *start= hash_link; +} + + +/* + Remove a hash link from the hash table +*/ + +static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) +{ + KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", + (uint) hash_link->file.file, (ulong) hash_link->pageno, + hash_link->requests)); + KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + if ((*hash_link->prev= hash_link->next)) + hash_link->next->prev= hash_link->prev; + hash_link->block= NULL; +#ifdef THREAD + if (pagecache->waiting_for_hash_link.last_thread) + { + /* Signal that a free hash link has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_hash_link.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info); + struct st_my_thread_var *thread; + + hash_link->file= first_page->file; + hash_link->pageno= first_page->pageno; + do + { + PAGECACHE_PAGE *page; + thread= next_thread; + page= (PAGECACHE_PAGE *) thread->opt_info; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if (page->file.file == hash_link->file.file && + page->pageno == hash_link->pageno) + { + KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + } + } + while (thread != last_thread); + link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, + hash_link->file, + hash_link->pageno)], + hash_link); + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread)); +#endif /* THREAD */ + hash_link->next= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link; +} + + +/* + Get the hash link for the page if it is in the cache (do not put the + page in the cache if it is absent there) + + SYNOPSIS + get_present_hash_link() + pagecache Pagecache reference + file file ID + pageno page number in the file + start where to put pointer to found hash bucket (for + direct referring it) + + RETURN + found hashlink pointer +*/ + +static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + PAGECACHE_HASH_LINK ***start) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; +#if defined(PAGECACHE_DEBUG) + int cnt; +#endif + DBUG_ENTER("get_present_hash_link"); + + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + + /* + Find the bucket in the hash table for the pair (file, pageno); + start contains the head of the bucket list, + hash_link points to the first member of the list + */ + hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache, + *file, pageno)]); +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + /* Look for an element for the pair (file, pageno) in the bucket chain */ + while (hash_link && + (hash_link->pageno != pageno || + hash_link->file.file != file->file)) + { + hash_link= hash_link->next; +#if defined(PAGECACHE_DEBUG) + cnt++; + if (! (cnt <= pagecache->hash_links_used)) + { + int i; + for (i=0, hash_link= **start ; + i < cnt ; i++, hash_link= hash_link->next) + { + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) hash_link->file.file, (ulong) hash_link->pageno)); + } + } + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); +#endif + } + if (hash_link) + { + /* Register the request for the page */ + hash_link->requests++; + } + + DBUG_RETURN(hash_link); +} + + +/* + Get the hash link for a page +*/ + +static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_HASH_LINK **start; + + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + +restart: + /* try to find the page in the cache */ + hash_link= get_present_hash_link(pagecache, file, pageno, + &start); + if (!hash_link) + { + /* There is no hash link in the hash table for the pair (file, pageno) */ + if (pagecache->free_hash_list) + { + hash_link= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link->next; + } + else if (pagecache->hash_links_used < pagecache->hash_links) + { + hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++]; + } + else + { +#ifdef THREAD + /* Wait for a free hash link */ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PAGE page; + KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); + page.file= *file; + page.pageno= pageno; + thread->opt_info= (void *) &page; + wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); + KEYCACHE_DBUG_PRINT("get_hash_link: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + thread->opt_info= NULL; +#else + KEYCACHE_DBUG_ASSERT(0); +#endif + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + hash_link->file= *file; + hash_link->pageno= pageno; + link_hash(start, hash_link); + /* Register the request for the page */ + hash_link->requests++; + } + + return hash_link; +} + + +/* + Get a block for the file page requested by a pagecache read/write operation; + If the page is not in the cache return a free block, if there is none + return the lru block after saving its buffer if the page is dirty. + + SYNOPSIS + + find_block() + pagecache pointer to a page cache data structure + file handler for the file to read page from + pageno number of the page in the file + init_hits_left how initialize the block counter for the page + wrmode <-> get for writing + reg_req Register request to thye page + page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} + + RETURN VALUE + Pointer to the found block if successful, 0 - otherwise + + NOTES. + For the page from file positioned at pageno the function checks whether + the page is in the key cache specified by the first parameter. + If this is the case it immediately returns the block. + If not, the function first chooses a block for this page. If there is + no not used blocks in the key cache yet, the function takes the block + at the very beginning of the warm sub-chain. It saves the page in that + block if it's dirty before returning the pointer to it. + The function returns in the page_st parameter the following values: + PAGE_READ - if page already in the block, + PAGE_TO_BE_READ - if it is to be read yet by the current thread + WAIT_TO_BE_READ - if it is to be read by another thread + If an error occurs THE BLOCK_ERROR bit is set in the block status. + It might happen that there are no blocks in LRU chain (in warm part) - + all blocks are unlinked for some read/write operations. Then the function + waits until first of this operations links any block back. +*/ + +static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) +{ + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_BLOCK_LINK *block; + int error= 0; + int page_status; + + DBUG_ENTER("find_block"); + KEYCACHE_THREAD_TRACE("find_block:begin"); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, wrmode)); + KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, + wrmode)); +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of find_block", 0);); +#endif + +restart: + /* Find the hash link for the requested page (file, pageno) */ + hash_link= get_hash_link(pagecache, file, pageno); + + page_status= -1; + if ((block= hash_link->block) && + block->hash_link == hash_link && (block->status & BLOCK_READ)) + page_status= PAGE_READ; + + if (wrmode && pagecache->resize_in_flush) + { + /* This is a write request during the flush phase of a resize operation */ + + if (page_status != PAGE_READ) + { + /* We don't need the page in the cache: we are going to write on disk */ + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + unlink_hash(pagecache, hash_link); + return 0; + } + if (!(block->status & BLOCK_IN_FLUSH)) + { + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + /* + Remove block to invalidate the page in the block buffer + as we are going to write directly on disk. + Although we have an exclusive lock for the updated key part + the control can be yielded by the current thread as we might + have unfinished readers of other key parts in the block + buffer. Still we are guaranteed not to have any readers + of the key part we are writing into until the block is + removed from the cache as we set the BLOCK_REASSIGNED + flag (see the code below that handles reading requests). + */ + free_block(pagecache, block); + return 0; + } + /* Wait until the page is flushed on disk */ + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* + Given the use of "resize_in_flush", it seems impossible + that this whole branch is ever entered in single-threaded case + because "(wrmode && pagecache->resize_in_flush)" cannot be true. + TODO: Check this, and then put the whole branch into the + "#ifdef THREAD" guard. + */ +#endif + } + /* Invalidate page in the block if it has not been done yet */ + if (block->status) + free_block(pagecache, block); + return 0; + } + + if (page_status == PAGE_READ && + (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED))) + { + /* This is a request for a page to be removed from cache */ + + KEYCACHE_DBUG_PRINT("find_block", + ("request for old page in block %u " + "wrmode: %d block->status: %d", + BLOCK_NUMBER(pagecache, block), wrmode, + block->status)); + /* + Only reading requests can proceed until the old dirty page is flushed, + all others are to be suspended, then resubmitted + */ + if (!wrmode && !(block->status & BLOCK_REASSIGNED)) + { + if (reg_req) + reg_requests(pagecache, block, 1); + } + else + { + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + KEYCACHE_DBUG_PRINT("find_block", + ("request waiting for old page to be saved")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into the queue of those waiting for the old page */ + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + /* Wait until the request can be resubmitted */ + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("find_block", + ("request for old page resubmitted")); + DBUG_PRINT("info", ("restarting...")); + /* Resubmit the request */ + goto restart; + } + block->status&= ~BLOCK_IN_SWITCH; + } + else + { + /* This is a request for a new page or for a page not to be removed */ + if (! block) + { + /* No block is assigned for the page yet */ + if (pagecache->blocks_unused) + { + if (pagecache->free_block_list) + { + /* There is a block in the free list. */ + block= pagecache->free_block_list; + pagecache->free_block_list= block->next_used; + block->next_used= NULL; + } + else + { + /* There are some never used blocks, take first of them */ + block= &pagecache->block_root[pagecache->blocks_used]; + block->buffer= ADD_TO_PTR(pagecache->block_mem, + ((ulong) pagecache->blocks_used* + pagecache->block_size), + byte*); + pagecache->blocks_used++; + } + pagecache->blocks_unused--; + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->requests= 1; + block->temperature= BLOCK_COLD; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + link_to_file_list(pagecache, block, file, 0); + block->hash_link= hash_link; + hash_link->block= block; + page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + KEYCACHE_DBUG_PRINT("find_block", + ("got free or never used block %u", + BLOCK_NUMBER(pagecache, block))); + } + else + { + /* There are no never used blocks, use a block from the LRU chain */ + + /* + Wait until a new block is added to the LRU chain; + several threads might wait here for the same page, + all of them must get the same block + */ + +#ifdef THREAD + if (! pagecache->used_last) + { + struct st_my_thread_var *thread= my_thread_var; + thread->opt_info= (void *) hash_link; + wqueue_link_into_queue(&pagecache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); + thread->opt_info= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->used_last); +#endif + block= hash_link->block; + if (! block) + { + /* + Take the first block from the LRU chain + unlinking it from the chain + */ + block= pagecache->used_last->next_used; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + if (reg_req) + reg_requests(pagecache, block, 1); + hash_link->block= block; + } + BLOCK_INFO(block); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + + if (block->hash_link != hash_link && + ! (block->status & BLOCK_IN_SWITCH) ) + { + /* this is a primary request for a new page */ + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); + + KEYCACHE_DBUG_PRINT("find_block", + ("got block %u for new page", + BLOCK_NUMBER(pagecache, block))); + + if (block->status & BLOCK_CHANGED) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 0); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + } + + block->status|= BLOCK_REASSIGNED; + if (block->hash_link) + { + /* + Wait until all pending read requests + for this page are executed + (we could have avoided this waiting, if we had read + a page in the cache in a sweep, without yielding control) + */ + wait_for_readers(pagecache, block); + + /* Remove the hash link for this page from the hash table */ + unlink_hash(pagecache, block->hash_link); + /* All pending requests for this page must be resubmitted */ +#ifdef THREAD + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif + } + link_to_file_list(pagecache, block, file, + (my_bool)(block->hash_link ? 1 : 0)); + BLOCK_INFO(block); + block->status= error? BLOCK_ERROR : 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->hash_link= hash_link; + page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + + KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); + KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); + } + else + { + /* This is for secondary requests for a new page only */ + KEYCACHE_DBUG_PRINT("find_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + pagecache->global_cache_read++; + } + else + { + if (reg_req) + reg_requests(pagecache, block, 1); + KEYCACHE_DBUG_PRINT("find_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + + KEYCACHE_DBUG_ASSERT(page_status != -1); + *page_st= page_status; + DBUG_PRINT("info", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %u", + (ulong) block, (uint) file->file, + (ulong) pageno, block->status, (uint) page_status)); + KEYCACHE_DBUG_PRINT("find_block", + ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", + (ulong) block, + file->file, (ulong) pageno, block->status, + page_status)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of find_block",0);); +#endif + KEYCACHE_THREAD_TRACE("find_block:end"); + DBUG_RETURN(block); +} + + +static void add_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("add_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + block->pins++; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= + (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0)); + info->thread= my_thread_var; + info_link(&block->pin_list, info); + } +#endif + DBUG_VOID_RETURN; +} + +static void remove_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("remove_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + DBUG_ASSERT(block->pins > 0); + block->pins--; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink(info); + my_free((gptr) info, MYF(0)); + } +#endif + DBUG_VOID_RETURN; +} +#ifdef PAGECACHE_DEBUG +static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); + info->thread= my_thread_var; + info->write_lock= wl; + info_link((PAGECACHE_PIN_INFO **)&block->lock_list, + (PAGECACHE_PIN_INFO *)info); +} +static void info_remove_lock(PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink((PAGECACHE_PIN_INFO *)info); + my_free((gptr)info, MYF(0)); +} +static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0 && info->write_lock != wl); + info->write_lock= wl; +} +#else +#define info_add_lock(B,W) +#define info_remove_lock(B) +#define info_change_lock(B,W) +#endif + +/* + Put on the block write lock + + SYNOPSIS + get_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK + 1 - Can't lock this block, need retry +*/ + +static my_bool get_wrlock(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_FILE file= block->hash_link->file; + pgcache_page_no_t pageno= block->hash_link->pageno; + DBUG_ENTER("get_wrlock"); + DBUG_PRINT("info", ("the block 0x%lx " + "files %d(%d) pages %d(%d)", + (ulong)block, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + BLOCK_INFO(block); + while (block->status & BLOCK_WRLOCK) + { + /* Lock failed we will wait */ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block)); + wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + dec_counter_for_resize_op(pagecache); + do + { + KEYCACHE_DBUG_PRINT("get_wrlock: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + DBUG_ASSERT(0); +#endif + BLOCK_INFO(block); + if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) || + file.file != block->hash_link->file.file || + pageno != block->hash_link->pageno) + { + DBUG_PRINT("info", ("the block 0x%lx changed => need retry" + "status %x files %d != %d or pages %d !=%d", + (ulong)block, block->status, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + DBUG_RETURN(1); + } + } + DBUG_ASSERT(block->pins == 0); + /* we are doing it by global cache mutex protection, so it is OK */ + block->status|= BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); + DBUG_RETURN(0); +} + + +/* + Remove write lock from the block + + SYNOPSIS + release_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK +*/ + +static void release_wrlock(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("release_wrlock"); + BLOCK_INFO(block); + DBUG_ASSERT(block->status & BLOCK_WRLOCK); + DBUG_ASSERT(block->pins > 0); + block->status&= ~BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); +#ifdef THREAD + /* release all threads waiting for write lock */ + if (block->wqueue[COND_FOR_WRLOCK].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]); +#endif + BLOCK_INFO(block); + DBUG_VOID_RETURN; +} + + +/* + Try to lock/unlock and pin/unpin the block + + SYNOPSIS + make_lock_and_pin() + pagecache pointer to a page cache data structure + block the block to work with + lock lock change mode + pin pinchange mode + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +static my_bool make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + DBUG_ENTER("make_lock_and_pin"); + DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", + (ulong)block, BLOCK_NUMBER(pagecache, block), + ((block->status & BLOCK_WRLOCK)?'Y':'N'), + block->pins, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + BLOCK_INFO(block); +#ifdef PAGECACHE_DEBUG + DBUG_ASSERT(info_check_pin(block, pin) == 0 && + info_check_lock(block, lock, pin) == 0); +#endif + switch (lock) + { + case PAGECACHE_LOCK_WRITE: /* free -> write */ + /* Writelock and pin the buffer */ + if (get_wrlock(pagecache, block)) + { + /* can't lock => need retry */ + goto retry; + } + + /* The cache is locked so nothing afraid of */ + add_pin(block); + info_add_lock(block, 1); + break; + case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ + case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ + /* + Removes write lock and puts read lock (which is nothing in our + implementation) + */ + release_wrlock(block); + case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ + case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ + if (pin == PAGECACHE_UNPIN) + { + remove_pin(block); + } + if (lock == PAGECACHE_LOCK_WRITE_TO_READ) + { + info_change_lock(block, 0); + } + else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) + { + info_remove_lock(block); + } + break; + case PAGECACHE_LOCK_READ: /* free -> read */ + if (pin == PAGECACHE_PIN) + { + /* The cache is locked so nothing afraid off */ + add_pin(block); + } + info_add_lock(block, 0); + break; + case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ + case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ + break; /* do nothing */ + default: + DBUG_ASSERT(0); /* Never should happened */ + } + + BLOCK_INFO(block); + DBUG_RETURN(0); +retry: + DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); + BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); + block->hash_link->requests--; + DBUG_ASSERT(block->requests > 0); + unreg_request(pagecache, block, 1); + BLOCK_INFO(block); + DBUG_RETURN(1); + +} + + +/* + Read into a key cache block buffer from disk. + + SYNOPSIS + + read_block() + pagecache pointer to a page cache data structure + block block to which buffer the data is to be read + primary <-> the current thread will read the data + validator validator of read from the disk data + validator_data pointer to the data need by the validator + + RETURN VALUE + None + + NOTES. + The function either reads a page data from file to the block buffer, + or waits until another thread reads it. What page to read is determined + by a block parameter - reference to a hash link for this page. + If an error occurs THE BLOCK_ERROR bit is set in the block status. +*/ + +static void read_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + my_bool primary, + pagecache_disk_read_validator validator, + gptr validator_data) +{ + uint got_length; + + /* On entry cache_lock is locked */ + + DBUG_ENTER("read_block"); + if (primary) + { + /* + This code is executed only by threads + that submitted primary requests + */ + + DBUG_PRINT("read_block", + ("page to be read by primary request")); + + /* Page is not in buffer yet, is to be read from disk */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ + got_length= pagecache_fread(pagecache, &block->hash_link->file, + block->buffer, + block->hash_link->pageno, MYF(0)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (got_length < pagecache->block_size) + block->status|= BLOCK_ERROR; + else + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + + if (validator != NULL && + (*validator)(block->buffer, validator_data)) + block->status|= BLOCK_ERROR; + + DBUG_PRINT("read_block", + ("primary request: new page in cache")); + /* Signal that all pending requests for this page now can be processed */ +#ifdef THREAD + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif + } + else + { + /* + This code is executed only by threads + that submitted secondary requests + */ + DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into a queue and wait until it can be processed */ + wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + do + { + DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + DBUG_PRINT("read_block", + ("secondary request: new page in cache")); + } + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + + SYNOPSIS + pagecache_unlock_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + pin pin page + first_REDO_LSN_for_page do not set it if it is zero + + NOTE + Pininig uses requests registration mechanism it works following way: + | beginnig | ending | + | of func. | of func. | + ----------------------------+-------------+---------------+ + PAGECACHE_PIN_LEFT_PINNED | - | - | + PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request | + PAGECACHE_PIN | reg request | - | + PAGECACHE_UNPIN | - | unreg request | + + +*/ + +void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unlock_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock because want + to unlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock_page about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, + test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st); + BLOCK_INFO(block); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + if (first_REDO_LSN_for_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); + } + +#ifndef DBUG_OFF + if ( +#endif + make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file +*/ + +void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unpin_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu", + (uint) file->file, (ulong) pageno)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock_page about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unlock() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) + lock lock change + pin pin page + first_REDO_LSN_for_page do not set it if it is zero +*/ + +void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unlock"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu l%s p%s", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* + We do not allow any lock/pin increasing here and page can't be + unpinned because we use direct link. + */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + pin != PAGECACHE_PIN_LEFT_UNPINNED && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { +#ifndef DBUG_OFF + if ( +#endif + /* block do not need here so we do not provide it */ + make_lock_and_pin(pagecache, 0, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + if (first_REDO_LSN_for_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); + } + +#ifndef DBUG_OFF + if ( +#endif + make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) +*/ + +void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unpin"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a cached file into a buffer; + + SYNOPSIS + pagecache_valid_read() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + link link to the page if we pin it + validator validator of read from the disk data + validator_data pointer to the data need by the validator + + RETURN VALUE + Returns address from where the data is placed if sucessful, 0 - otherwise. + + Pin will be choosen according to lock parameter (see lock_to_pin) +*/ +static enum pagecache_page_pin lock_to_pin[]= +{ + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + PAGECACHE_PAGE_LINK fake_link; + DBUG_ENTER("pagecache_valid_read"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + + if (!link) + link= &fake_link; + else + *link= 0; + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + PAGECACHE_BLOCK_LINK *block; + uint status; + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_r_requests++; + /* See NOTE for pagecache_unlock_page about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(lock == PAGECACHE_LOCK_WRITE), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, + (my_bool)(page_st == PAGE_TO_BE_READ), + validator, validator_data); + DBUG_PRINT("info", ("read is done")); + } + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to write lock the block, cache is unlocked, + we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + if (! ((status= block->status) & BLOCK_ERROR)) + { +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); +#endif + + DBUG_ASSERT((pagecache->block_size & 511) == 0); + /* Copy data from the cache buffer */ + bmove512(buff, block->buffer, pagecache->block_size); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_lock(&pagecache->cache_lock); +#endif + } + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. + */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + if (status & BLOCK_ERROR) + DBUG_RETURN((byte *) 0); + + DBUG_RETURN(buff); + } + +no_key_cache: /* Key cache is not used */ + + /* We can't use mutex here as the key cache may not be initialized */ + pagecache->global_cache_r_requests++; + pagecache->global_cache_read++; + if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP))) + error= 1; + DBUG_RETURN(error ? (byte*) 0 : buff); +} + + +/* + Delete page from the buffer + + SYNOPSIS + pagecache_delete_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + flush flush page if it is dirty + + RETURN VALUE + 0 - deleted or was not present at all + 1 - error + + NOTES. + lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked + before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete) +*/ +my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + DBUG_ENTER("pagecache_delete_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || + lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + DBUG_ASSERT(pin == PAGECACHE_PIN || + pin == PAGECACHE_PIN_LEFT_PINNED); + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK **unused_start, *link; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + goto end; + + inc_counter_for_resize_op(pagecache); + link= get_present_hash_link(pagecache, file, pageno, &unused_start); + if (!link) + { + DBUG_PRINT("info", ("There is no such page in the cache")); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(0); + } + block= link->block; + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN) + reg_requests(pagecache, block, 1); + DBUG_ASSERT(block != 0); + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + if (block->status & BLOCK_CHANGED) + { + if (flush) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) + { + block->status|= BLOCK_ERROR; + goto err; + } + } + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + /* + free_block() will change the status and rec_lsn of the block so no + need to change them here. + */ + } + /* Cache is locked, so we can relese page before freeing it */ + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + DBUG_ASSERT(link->requests > 0); + link->requests--; + /* See NOTE for pagecache_unlock_page about registering requests. */ + free_block(pagecache, block); + +err: + dec_counter_for_resize_op(pagecache); +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + } + + DBUG_RETURN(error); +} + + +/* + Write a buffer into a cached file. + + SYNOPSIS + + pagecache_write() + pagecache pointer to a page cache data structure + file handler for the file to write data to + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + pin pin page + write_mode how to write page + link link to the page if we pin it + + RETURN VALUE + 0 if a success, 1 - otherwise. +*/ + +/* description of how to change lock before and after write */ +struct write_lock_change +{ + int need_lock_change; /* need changing of lock at the end of write */ + enum pagecache_page_lock new_lock; /* lock at the beginning */ + enum pagecache_page_lock unlock_lock; /* lock at the end */ +}; + +static struct write_lock_change write_lock_change_table[]= +{ + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/, + {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +/* description of how to change pin before and after write */ +struct write_pin_change +{ + enum pagecache_page_pin new_pin; /* pin status at the beginning */ + enum pagecache_page_pin unlock_pin; /* pin status at the end */ +}; + +static struct write_pin_change write_pin_change_table[]= +{ + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/, + {PAGECACHE_PIN, + PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/, + {PAGECACHE_PIN, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/, + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/ +}; + +my_bool pagecache_write(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link) +{ + reg1 PAGECACHE_BLOCK_LINK *block= NULL; + PAGECACHE_PAGE_LINK fake_link; + int error= 0; + int need_lock_change= write_lock_change_table[lock].need_lock_change; + DBUG_ENTER("pagecache_write"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s m%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin], + page_cache_page_write_mode_str[write_mode])); + DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED && + lock != PAGECACHE_LOCK_READ_UNLOCK); + if (!link) + link= &fake_link; + else + *link= 0; + + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* we allow direct write if we do not use long term lockings */ + DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); + /* Force writing from buff into disk */ + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + DBUG_RETURN(1); + } +restart: + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of key_cache_write", 1);); +#endif + + if (pagecache->can_be_used) + { + /* Key cache is used */ + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_w_requests++; + /* See NOTE for pagecache_unlock_page about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); + if (!block) + { + DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); + /* It happens only for requests submitted during resize operation */ + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* Write to the disk key cache is in resize at the moment*/ + goto no_key_cache; + } + + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + + if (make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + + if (write_mode == PAGECACHE_WRITE_DONE) + { + if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ) + { + /* Copy data from buff */ + bmove512(block->buffer, buff, pagecache->block_size); + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + KEYCACHE_DBUG_PRINT("key_cache_insert", + ("primary request: new page in cache")); +#ifdef THREAD + /* Signal that all pending requests for this now can be processed. */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif + } + } + else + { + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* buff has been written to disk at start */ + if ((block->status & BLOCK_CHANGED) && + !(block->status & BLOCK_ERROR)) + link_to_file_list(pagecache, block, &block->hash_link->file, 1); + } + else if (! (block->status & BLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + + if (! (block->status & BLOCK_ERROR)) + { + bmove512(block->buffer, buff, pagecache->block_size); + block->status|= BLOCK_READ; + } + } + + + if (need_lock_change) + { +#ifndef DBUG_OFF + int rc= +#endif + /* + QQ: We are doing an unlock here, so need to give the page its rec_lsn + */ + make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); +#endif + } + + /* Unregister the request */ + DBUG_ASSERT(block->hash_link->requests > 0); + block->hash_link->requests--; + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + + if (block->status & BLOCK_ERROR) + error= 1; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + goto end; + } + +no_key_cache: + /* Key cache is not used */ + if (write_mode == PAGECACHE_WRITE_DELAY) + { + pagecache->global_cache_w_requests++; + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + } + +end: +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("exec", + test_key_cache(pagecache, "end of key_cache_write", 1);); +#endif + BLOCK_INFO(block); + DBUG_RETURN(error); +} + + +/* + Free block: remove reference to it from hash table, + remove it from the chain file of dirty/clean blocks + and add it to the free list. +*/ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block %u to be freed, hash_link %p", + BLOCK_NUMBER(pagecache, block), block->hash_link)); + if (block->hash_link) + { + /* + While waiting for readers to finish, new readers might request the + block. But since we set block->status|= BLOCK_REASSIGNED, they + will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled + later. + */ + block->status|= BLOCK_REASSIGNED; + wait_for_readers(pagecache, block); + unlink_hash(pagecache, block->hash_link); + } + + unlink_changed(block); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->rec_lsn= 0; + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block is freed")); + unreg_request(pagecache, block, 0); + block->hash_link= NULL; + + /* Remove the free block from the LRU ring. */ + unlink_block(pagecache, block); + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_COLD; + /* Insert the free block in the free list. */ + block->next_used= pagecache->free_block_list; + pagecache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + pagecache->blocks_unused++; + +#ifdef THREAD + /* All pending requests for this page must be resubmitted. */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif +} + + +static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b) +{ + return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : + ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); +} + + +/* + Flush a portion of changed blocks to disk, + free used blocks if requested +*/ + +static int flush_cached_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + PAGECACHE_BLOCK_LINK **cache, + PAGECACHE_BLOCK_LINK **end, + enum flush_type type) +{ + int error; + int last_errno= 0; + uint count= (uint) (end-cache); + DBUG_ENTER("flush_cached_blocks"); + + /* Don't lock the cache during the flush */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH + we are guarunteed no thread will change them + */ + qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + for (; cache != end; cache++) + { + PAGECACHE_BLOCK_LINK *block= *cache; + + if (block->pins) + { + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + last_errno= -1; + unreg_request(pagecache, block, 1); + continue; + } + /* if the block is not pinned then it is not write locked */ + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); +#ifndef DBUG_OFF + { + int rc= +#endif + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); + } +#endif + + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("block %u (0x%lx) pins: %u", + BLOCK_NUMBER(pagecache, block), (ulong)block, + block->pins)); + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + + pagecache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + if (!last_errno) + last_errno= errno ? errno : -1; + } +#ifdef THREAD + /* + Let to proceed for possible waiting requests to write to the block page. + It might happen only during an operation to resize the key cache. + */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif + /* type will never be FLUSH_IGNORE_CHANGED here */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + else + { + block->status&= ~BLOCK_IN_FLUSH; + link_to_file_list(pagecache, block, file, 1); + unreg_request(pagecache, block, 1); + } + } + DBUG_RETURN(last_errno); +} + + +/* + flush all key blocks for a file to disk, but don't do any mutex locks + + flush_pagecache_blocks_int() + pagecache pointer to a key cache data structure + file handler for the file to flush to + flush_type type of the flush + + NOTES + This function doesn't do any mutex locks because it needs to be called + both from flush_pagecache_blocks and flush_all_key_blocks (the later one + does the mutex lock in the resize_pagecache() function). + + RETURN + 0 ok + 1 error +*/ + +static int flush_pagecache_blocks_int(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + enum flush_type type) +{ + PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; + int last_errno= 0; + DBUG_ENTER("flush_pagecache_blocks_int"); + DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", + file->file, pagecache->blocks_used, pagecache->blocks_changed)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, + "start of flush_pagecache_blocks", 0);); +#endif + + cache= cache_buff; + if (pagecache->disk_blocks > 0 && + (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP)) + { + /* Key cache exists and flush is not disabled */ + int error= 0; + uint count= 0; + PAGECACHE_BLOCK_LINK **pos, **end; + PAGECACHE_BLOCK_LINK *first_in_switch= NULL; + PAGECACHE_BLOCK_LINK *block, *next; +#if defined(PAGECACHE_DEBUG) + uint cnt= 0; +#endif + + if (type != FLUSH_IGNORE_CHANGED) + { + /* + Count how many key blocks we have to cache to be able + to flush all dirty pages with minimum seek moves + */ + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= block->next_changed) + { + if (block->hash_link->file.file == file->file) + { + count++; + KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); + } + } + /* Allocate a new buffer only if its bigger than the one we have */ + if (count > FLUSH_CACHE && + !(cache= + (PAGECACHE_BLOCK_LINK**) + my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0)))) + { + cache= cache_buff; + count= FLUSH_CACHE; + } + } + + /* Retrieve the blocks and write them to a buffer to be flushed */ +restart: + end= (pos= cache)+count; + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file) + { + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence ot flushing dirty file pages + */ + block->status|= BLOCK_IN_FLUSH; + + if (! (block->status & BLOCK_IN_SWITCH)) + { + /* + We care only for the blocks for which flushing was not + initiated by other threads as a result of page swapping + */ + reg_requests(pagecache, block, 1); + if (type != FLUSH_IGNORE_CHANGED) + { + /* It's not a temporary file */ + if (pos == end) + { + /* + This happens only if there is not enough + memory for the big block + */ + if ((error= flush_cached_blocks(pagecache, file, cache, + end,type))) + last_errno=error; + DBUG_PRINT("info", ("restarting...")); + /* + Restart the scan as some other thread might have changed + the changed blocks chain: the blocks that were in switch + state before the flush started have to be excluded + */ + goto restart; + } + *pos++= block; + } + else + { + /* It's a temporary file */ + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + } + else + { + /* Link the block into a list of blocks 'in switch' */ + /* QQ: + #warning this unlink_changed() is a serious problem for + Maria's Checkpoint: it removes a page from the list of dirty + pages, while it's still dirty. A solution is to abandon + first_in_switch, just wait for this page to be + flushed by somebody else, and loop. TODO: check all places + where we remove a page from the list of dirty pages + */ + unlink_changed(block); + link_changed(block, &first_in_switch); + } + } + } + if (pos != cache) + { + if ((error= flush_cached_blocks(pagecache, file, cache, pos, type))) + last_errno= error; + } + /* Wait until list of blocks in switch is empty */ + while (first_in_switch) + { +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + block= first_in_switch; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + } + /* The following happens very seldom */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { +#if defined(PAGECACHE_DEBUG) + cnt=0; +#endif + for (block= pagecache->file_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file && + (! (block->status & BLOCK_CHANGED) + || type == FLUSH_IGNORE_CHANGED)) + { + reg_requests(pagecache, block, 1); + free_block(pagecache, block); + } + } + } + } + +#ifndef DBUG_OFF + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of flush_pagecache_blocks", 0);); +#endif + if (cache != cache_buff) + my_free((gptr) cache, MYF(0)); + if (last_errno) + errno=last_errno; /* Return first error */ + DBUG_RETURN(last_errno != 0); +} + + +/* + Flush all blocks for a file to disk + + SYNOPSIS + + flush_pagecache_blocks() + pagecache pointer to a page cache data structure + file handler for the file to flush to + flush_type type of the flush + + RETURN + 0 ok + 1 error +*/ + +int flush_pagecache_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, enum flush_type type) +{ + int res; + DBUG_ENTER("flush_pagecache_blocks"); + DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache)); + + if (pagecache->disk_blocks <= 0) + DBUG_RETURN(0); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + inc_counter_for_resize_op(pagecache); + res= flush_pagecache_blocks_int(pagecache, file, type); + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Reset the counters of a key cache. + + SYNOPSIS + reset_pagecache_counters() + name the name of a key cache + pagecache pointer to the pagecache to be reset + + DESCRIPTION + This procedure is used to reset the counters of all currently used key + caches, both the default one and the named ones. + + RETURN + 0 on success (always because it can't fail) +*/ + +int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) +{ + DBUG_ENTER("reset_pagecache_counters"); + if (!pagecache->inited) + { + DBUG_PRINT("info", ("Key cache %s not initialized.", name)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); + + pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + pagecache->global_cache_r_requests= 0; /* Key_read_requests */ + pagecache->global_cache_read= 0; /* Key_reads */ + pagecache->global_cache_w_requests= 0; /* Key_write_requests */ + pagecache->global_cache_write= 0; /* Key_writes */ + DBUG_RETURN(0); +} + + +/* + Allocates a buffer and stores in it some information about all dirty pages + of type PAGECACHE_LSN_PAGE. + + SYNOPSIS + pagecache_collect_changed_blocks_with_lsn() + pagecache pointer to the page cache + str (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all + relevant dirty pages will be put + + DESCRIPTION + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they + are not interesting for a checkpoint record. + The caller has the intention of doing checkpoints. + + RETURN + 0 on success + 1 on error +*/ +my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *max_lsn) +{ + my_bool error; + ulong stored_list_size= 0; + uint file_hash; + char *ptr; + DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); + + *max_lsn= 0; + DBUG_ASSERT(NULL == str->str); + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + When we enter here, we must be sure that no "first_in_switch" situation + is happening or will happen (either we have to get rid of + first_in_switch in the code or, first_in_switch has to increment a + "danger" counter for this function to know it has to wait). TODO. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* Count how many dirty pages are interesting */ + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + /* + Q: is there somthing subtle with block->hash_link: can it be NULL? + does it have to be == hash_link->block... ? + */ + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it */ + /* + In the current pagecache, rec_lsn is not set correctly: + 1) it is set on pagecache_unlock(), too late (a page is dirty + (BLOCK_CHANGED) since the first pagecache_write()). So in this + scenario: + thread1: thread2: + write_REDO + pagecache_write() checkpoint : reclsn not known + pagecache_unlock(sets rec_lsn) + commit + crash, + at recovery we will wrongly skip the REDO. It also affects the + low-water mark's computation. + 2) sometimes the unlocking can be an implicit action of + pagecache_write(), without any call to pagecache_unlock(), then + rec_lsn is not set. + 1) and 2) are critical problems. + TODO: fix this when Monty has explained how he writes BLOB pages. + */ + if (block->rec_lsn == 0) + { + DBUG_ASSERT(0); + goto err; + } + stored_list_size++; + } + } + + str->length= 8+(4+4+8)*stored_list_size; + if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) + goto err; + ptr= str->str; + int8store(ptr, stored_list_size); + ptr+= 8; + if (0 == stored_list_size) + goto end; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it in the checkpoint record */ + DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) && + (4 == sizeof(block->hash_link->pageno))); + int4store(ptr, block->hash_link->file.file); + ptr+= 4; + int4store(ptr, block->hash_link->pageno); + ptr+= 4; + int8store(ptr, (ulonglong) block->rec_lsn); + ptr+= 8; + set_if_bigger(*max_lsn, block->rec_lsn); + } + } + error= 0; + goto end; +err: + error= 1; +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(error); +} + + +#ifndef DBUG_OFF +/* + Test if disk-cache is ok +*/ +static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) +{ + /* TODO */ +} +#endif + +#if defined(PAGECACHE_TIMEOUT) + +#define KEYCACHE_DUMP_FILE "pagecache_dump.txt" +#define MAX_QUEUE_LEN 100 + + +static void pagecache_dump(PAGECACHE *pagecache) +{ + FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); + struct st_my_thread_var *last; + struct st_my_thread_var *thread; + PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_PAGE *page; + uint i; + + fprintf(pagecache_dump_file, "thread:%u\n", thread->id); + + i=0; + thread=last=waiting_for_hash_link.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n"); + if (thread) + do + { + thread= thread->next; + page= (PAGECACHE_PAGE *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u, (file,pageno)=(%u,%lu)\n", + thread->id,(uint) page->file.file,(ulong) page->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + i=0; + thread=last=waiting_for_block.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for block\n"); + if (thread) + do + { + thread=thread->next; + hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", + thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), + (uint) hash_link->file.file,(ulong) hash_link->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + for (i=0 ; i < pagecache->blocks_used ; i++) + { + int j; + block= &pagecache->block_root[i]; + hash_link= block->hash_link; + fprintf(pagecache_dump_file, + "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", + i, (int) (hash_link ? + PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) : + -1), + block->status, block->requests, block->condvar ? 1 : 0); + for (j=0 ; j < COND_SIZE; j++) + { + PAGECACHE_WQUEUE *wqueue=&block->wqueue[j]; + thread= last= wqueue->last_thread; + fprintf(pagecache_dump_file, "queue #%d\n", j); + if (thread) + { + do + { + thread=thread->next; + fprintf(pagecache_dump_file, + "thread:%u\n", thread->id); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + } + } + } + fprintf(pagecache_dump_file, "LRU chain:"); + block= pagecache= used_last; + if (block) + { + do + { + block= block->next_used; + fprintf(pagecache_dump_file, + "block:%u, ", BLOCK_NUMBER(pagecache, block)); + } + while (block != pagecache->used_last); + } + fprintf(pagecache_dump_file, "\n"); + + fclose(pagecache_dump_file); +} + +#endif /* defined(PAGECACHE_TIMEOUT) */ + +#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) + + +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + struct timeval now; /* time when we started waiting */ + struct timespec timeout; /* timeout value for the wait function */ + struct timezone tz; +#if defined(PAGECACHE_DEBUG) + int cnt=0; +#endif + + /* Get current time */ + gettimeofday(&now, &tz); + /* Prepare timeout value */ + timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT; + /* + timeval uses microseconds. + timespec uses nanoseconds. + 1 nanosecond = 1000 micro seconds + */ + timeout.tv_nsec= now.tv_usec * 1000; + KEYCACHE_THREAD_TRACE_END("started waiting"); +#if defined(PAGECACHE_DEBUG) + cnt++; + if (cnt % 100 == 0) + fprintf(pagecache_debug_log, "waiting...\n"); + fflush(pagecache_debug_log); +#endif + rc= pthread_cond_timedwait(cond, mutex, &timeout); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + if (rc == ETIMEDOUT || rc == ETIME) + { +#if defined(PAGECACHE_DEBUG) + fprintf(pagecache_debug_log,"aborted by pagecache timeout\n"); + fclose(pagecache_debug_log); + abort(); +#endif + pagecache_dump(); + } + +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); +#else + assert(rc != ETIMEDOUT); +#endif + return rc; +} +#else +#if defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + KEYCACHE_THREAD_TRACE_END("started waiting"); + rc= pthread_cond_wait(cond, mutex); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + return rc; +} +#endif +#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int rc; + rc= pthread_mutex_lock(mutex); + KEYCACHE_THREAD_TRACE_BEGIN(""); + return rc; +} + + +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + KEYCACHE_THREAD_TRACE_END(""); + pthread_mutex_unlock(mutex); +} + + +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) +{ + int rc; + KEYCACHE_THREAD_TRACE("signal"); + rc= pthread_cond_signal(cond); + return rc; +} + + +#if defined(PAGECACHE_DEBUG_LOG) + + +static void pagecache_debug_print(const char * fmt, ...) +{ + va_list args; + va_start(args,fmt); + if (pagecache_debug_log) + { + VOID(vfprintf(pagecache_debug_log, fmt, args)); + VOID(fputc('\n',pagecache_debug_log)); + } + va_end(args); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) + + +void pagecache_debug_log_close(void) +{ + if (pagecache_debug_log) + fclose(pagecache_debug_log); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#endif /* defined(PAGECACHE_DEBUG) */ diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 6a30267eb80..aa04d55f624 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -17,11 +17,10 @@ #include <my_pthread.h> #ifndef HAVE_INLINE -/* - the following will cause all inline functions to be instantiated -*/ +/* the following will cause all inline functions to be instantiated */ #define HAVE_INLINE -#define static extern +#undef STATIC_INLINE +#define STATIC_INLINE extern #endif #include <my_atomic.h> @@ -35,7 +34,7 @@ */ int my_atomic_initialize() { - DBUG_ASSERT(sizeof(intptr) == sizeof(void *)); + compile_time_assert(sizeof(intptr) == sizeof(void *)); /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; diff --git a/mysys/my_bit.c b/mysys/my_bit.c index 5a9b1187c83..2881eb1ebd2 100644 --- a/mysys/my_bit.c +++ b/mysys/my_bit.c @@ -13,23 +13,18 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Some useful bit functions */ +#include <my_global.h> -#include "mysys_priv.h" - -/* - Find smallest X in 2^X >= value - This can be used to divide a number with value by doing a shift instead -*/ +#ifndef HAVE_INLINE +/* the following will cause all inline functions to be instantiated */ +#define HAVE_INLINE +#undef STATIC_INLINE +#define STATIC_INLINE extern +#endif -uint my_bit_log2(ulong value) -{ - uint bit; - for (bit=0 ; value > 1 ; value>>=1, bit++) ; - return bit; -} +#include <my_bit.h> -static char nbits[256] = { +const char _my_bits_nbits[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, @@ -48,60 +43,29 @@ static char nbits[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -uint my_count_bits(ulonglong v) -{ -#if SIZEOF_LONG_LONG > 4 - /* The following code is a bit faster on 16 bit machines than if we would - only shift v */ - ulong v2=(ulong) (v >> 32); - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)] + - nbits[(uchar) (v2)] + - nbits[(uchar) (v2 >> 8)] + - nbits[(uchar) (v2 >> 16)] + - nbits[(uchar) (v2 >> 24)]); -#else - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)]); -#endif -} - -uint my_count_bits_ushort(ushort v) -{ - return nbits[v]; -} - - /* - Next highest power of two - - SYNOPSIS - my_round_up_to_next_power() - v Value to check - - RETURN - Next or equal power of 2 - Note: 0 will return 0 - - NOTES - Algorithm by Sean Anderson, according to: - http://graphics.stanford.edu/~seander/bithacks.html - (Orignal code public domain) - - Comments shows how this works with 01100000000000000000000000001011 + perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)' */ +const uchar _my_bits_reverse_table[256]={ +0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, +0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, +0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, +0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, +0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, +0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, +0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, +0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, +0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, +0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, +0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, +0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, +0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, +0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, +0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, +0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, +0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, +0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, +0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, +0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; -uint32 my_round_up_to_next_power(uint32 v) -{ - v--; /* 01100000000000000000000000001010 */ - v|= v >> 1; /* 01110000000000000000000000001111 */ - v|= v >> 2; /* 01111100000000000000000000001111 */ - v|= v >> 4; /* 01111111110000000000000000001111 */ - v|= v >> 8; /* 01111111111111111100000000001111 */ - v|= v >> 16; /* 01111111111111111111111111111111 */ - return v+1; /* 10000000000000000000000000000000 */ -} diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 10eff40b9ed..e127b2584ae 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -38,6 +38,7 @@ #include "mysys_priv.h" #include <my_bitmap.h> #include <m_string.h> +#include <my_bit.h> void create_last_word_mask(MY_BITMAP *map) { diff --git a/mysys/my_create.c b/mysys/my_create.c index 5639459f5a9..13e4675ee66 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -52,6 +52,13 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif + if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && + my_sync_dir_by_file(FileName, MyFlags)) + { + my_close(fd, MyFlags); + fd= -1; + } + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); } /* my_create */ diff --git a/mysys/my_delete.c b/mysys/my_delete.c index bac3e2513e1..14374fd3fa8 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -29,6 +29,9 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } + else if ((MyFlags & MY_SYNC_DIR) && + my_sync_dir_by_file(name, MyFlags)) + err= -1; DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 2fd7eed7778..8f7cc5b7029 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -34,10 +34,6 @@ ulonglong my_getsystime() LARGE_INTEGER t_cnt; if (!offset) { - /* strictly speaking there should be a mutex to protect - initialization section. But my_getsystime() is called from - UUID() code, and UUID() calls are serialized with a mutex anyway - */ LARGE_INTEGER li; FILETIME ft; GetSystemTimeAsFileTime(&ft); diff --git a/mysys/my_handler.c b/mysys/my_handler.c index afc44cc2838..757cbe490f8 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -16,9 +16,11 @@ MA 02111-1307, USA */ #include <my_global.h> -#include "my_handler.h" +#include <m_ctype.h> +#include <my_base.h> +#include <my_handler.h> -int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, +int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { @@ -174,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -187,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, { uint length=(uint) (end-a), a_length=length, b_length=length; if (piks && - (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length, + (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -235,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool) ((nextflag & (SEARCH_FIND | @@ -482,12 +484,15 @@ end: DESCRIPTION Find the first NULL value in index-suffix values tuple. - TODO Consider optimizing this fuction or its use so we don't search for - NULL values in completely NOT NULL index suffixes. + + TODO + Consider optimizing this function or its use so we don't search for + NULL values in completely NOT NULL index suffixes. RETURN - First key part that has NULL as value in values tuple, or the last key part - (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs. + First key part that has NULL as value in values tuple, or the last key + part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain + NULLs. */ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) diff --git a/mysys/my_init.c b/mysys/my_init.c index 7784c09d9d6..e8a55fdc1e6 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -43,6 +43,7 @@ static void netware_init(); my_bool my_init_done= 0; uint mysys_usage_id= 0; /* Incremented for each my_init() */ +ulong my_thread_stack_size= 65536; static ulong atoi_octal(const char *str) { diff --git a/mysys/my_open.c b/mysys/my_open.c index 21bdedddc48..6fe7883b99b 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -161,6 +161,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type } pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); + fd= -1; my_errno=ENOMEM; } else diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 7a09c21e039..2b9a994299f 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -51,7 +51,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, if (!error) /* Seek was successful */ { if ((readbytes = (uint) read(Filedes, Buffer, Count)) == -1L) - my_errno= errno; + my_errno= errno ? errno : -1; /* We should seek back, even if read failed. If this fails, @@ -67,7 +67,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, #else if ((error= ((readbytes = (uint) pread(Filedes, Buffer, Count, offset)) != Count))) - my_errno= errno; + my_errno= errno ? errno : -1; #endif if (error || readbytes != Count) { @@ -87,8 +87,10 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); else if (MyFlags & (MY_NABP | MY_FNABP)) + { my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); + } } if ((int) readbytes == -1 || (MyFlags & (MY_FNABP | MY_NABP))) DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ @@ -158,7 +160,8 @@ uint my_pwrite(int Filedes, const byte *Buffer, uint Count, my_off_t offset, Count-=writenbytes; offset+=writenbytes; } - DBUG_PRINT("error",("Write only %d bytes",writenbytes)); + DBUG_PRINT("error",("Write only %d bytes, error: %d", + writenbytes, my_errno)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 6a6aa6a5796..64dbac955ea 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -16,8 +16,9 @@ #include "mysys_priv.h" #include <my_dir.h> #include "mysys_err.h" - +#include "m_string.h" #undef my_rename + /* On unix rename deletes to file if it exists */ int my_rename(const char *from, const char *to, myf MyFlags) @@ -60,5 +61,18 @@ int my_rename(const char *from, const char *to, myf MyFlags) if (MyFlags & (MY_FAE+MY_WME)) my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno); } + else if (MyFlags & MY_SYNC_DIR) + { +#ifdef NEED_EXPLICIT_SYNC_DIR + /* do only the needed amount of syncs: */ + char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; + dirname_part(dir_from, from); + dirname_part(dir_to, to); + if (my_sync_dir(dir_from, MyFlags) || + (strcmp(dir_from, dir_to) && + my_sync_dir(dir_to, MyFlags))) + error= -1; +#endif + } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c index 810c0c72632..98059ccd508 100644 --- a/mysys/my_symlink.c +++ b/mysys/my_symlink.c @@ -84,6 +84,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags) if (MyFlags & MY_WME) my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno); } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags)) + result= -1; DBUG_RETURN(result); #endif /* HAVE_READLINK */ } diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 64fce3aac21..ab3fc89e0d3 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -48,6 +48,16 @@ int my_sync(File fd, myf my_flags) do { +#if defined(F_FULLFSYNC) + /* + In Mac OS X >= 10.3 this call is safer than fsync() (it forces the + disk's cache and guarantees ordered writes). + */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some file systems don't support F_FULLFSYNC and fail above: */ + DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); +#endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); #elif defined(HAVE_FSYNC) @@ -55,6 +65,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else +#error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -66,10 +77,78 @@ int my_sync(File fd, myf my_flags) my_errno= -1; /* Unknown error */ if ((my_flags & MY_IGNORE_BADFD) && (er == EBADF || er == EINVAL || er == EROFS)) + { + DBUG_PRINT("info", ("ignoring errno %d", er)); res= 0; + } else if (my_flags & MY_WME) my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno); } DBUG_RETURN(res); } /* my_sync */ + +static const char cur_dir_name[]= {FN_CURLIB, 0}; +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ +int my_sync_dir(const char *dir_name, myf my_flags) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); + File dir_fd; + int res= 0; + const char *correct_dir_name; + /* Sometimes the path does not contain an explicit directory */ + correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name; + /* + Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. + EIO on the other hand is very important. Hence MY_IGNORE_BADFD. + */ + if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0) + { + if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) + res= 2; + if (my_close(dir_fd, MYF(my_flags))) + res= 3; + } + else + res= 1; + DBUG_RETURN(res); +#else + return 0; +#endif +} + + +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ +int my_sync_dir_by_file(const char *file_name, myf my_flags) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + char dir_name[FN_REFLEN]; + dirname_part(dir_name, file_name); + return my_sync_dir(dir_name, my_flags); +#else + return 0; +#endif +} + diff --git a/mysys/wqueue.c b/mysys/wqueue.c new file mode 100644 index 00000000000..28e044ff606 --- /dev/null +++ b/mysys/wqueue.c @@ -0,0 +1,167 @@ + +#include <wqueue.h> + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + wqueue_link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wqueue_add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + wqueue_unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev= thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + wqueue_realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +void wqueue_release_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread= next; + pthread_cond_signal(&thread->suspend); + next= thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} + + +/* + Add thread and wait + + SYNOPSYS + wqueue_add_and_wait() + wqueue queue to add to + thread thread which is waiting + lock mutex need for the operation +*/ + +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, pthread_mutex_t *lock) +{ + DBUG_ENTER("wqueue_add_and_wait"); + DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx", + (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); + wqueue_add_to_queue(wqueue, thread); + do + { + DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx", + (ulong) &thread->suspend, (ulong) lock)); + pthread_cond_wait(&thread->suspend, lock); + DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx", + (ulong) &thread->suspend, (ulong) lock, + (ulong) thread->next)); + } + while (thread->next); + DBUG_VOID_RETURN; +} diff --git a/plugin/daemon_example/daemon_example.cc b/plugin/daemon_example/daemon_example.cc index 994c385796e..e73a3d336ea 100644 --- a/plugin/daemon_example/daemon_example.cc +++ b/plugin/daemon_example/daemon_example.cc @@ -81,7 +81,7 @@ pthread_handler_t mysql_heartbeat(void *p) 1 failure (cannot happen) */ -static int daemon_example_plugin_init(void *p) +static int daemon_example_plugin_init(void *p __attribute__ ((unused))) { DBUG_ENTER("daemon_example_plugin_init"); @@ -147,7 +147,7 @@ static int daemon_example_plugin_init(void *p) */ -static int daemon_example_plugin_deinit(void *p) +static int daemon_example_plugin_deinit(void *p __attribute__ ((unused))) { DBUG_ENTER("daemon_example_plugin_deinit"); char buffer[HEART_STRING_BUFFER]; diff --git a/sql/filesort.cc b/sql/filesort.cc index 2f9a96472ca..4b0170a0db0 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -1064,7 +1064,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length) { - uchar *reuse_end= reuse->base + reuse->max_keys * key_length; + byte *reuse_end= reuse->base + reuse->max_keys * key_length; for (uint i= 0; i < queue->elements; ++i) { BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i); @@ -1135,7 +1135,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, offset= rec_length-res_length; maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1)); to_start_filepos= my_b_tell(to_file); - strpos= (uchar*) sort_buffer; + strpos= sort_buffer; org_max_rows=max_rows= param->max_rows; /* The following will fire if there is not enough space in sort_buffer */ @@ -1147,10 +1147,10 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, DBUG_RETURN(1); /* purecov: inspected */ for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys= maxcount; strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek, - rec_length)); + rec_length)); if (error == -1) goto err; /* purecov: inspected */ buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected @@ -1239,7 +1239,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, } } buffpek= (BUFFPEK*) queue_top(&queue); - buffpek->base= sort_buffer; + buffpek->base= (byte*) sort_buffer; buffpek->max_keys= param->keys; /* @@ -1274,7 +1274,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key+offset; + strpos= (uchar*) buffpek->key+offset; for (end= strpos+buffpek->mem_count*rec_length ; strpos != end ; strpos+= rec_length) diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc index 2d78999017a..7a78bcf591e 100644 --- a/sql/gen_lex_hash.cc +++ b/sql/gen_lex_hash.cc @@ -481,8 +481,8 @@ int main(int argc,char **argv) printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2); printf("\ -static inline SYMBOL *get_hash_symbol(const char *s,\n\ - unsigned int len,bool function)\n\ +static SYMBOL *get_hash_symbol(const char *s,\n\ + unsigned int len,bool function)\n\ {\n\ register uchar *hash_map;\n\ register const char *cur_str= s;\n\ diff --git a/sql/handler.h b/sql/handler.h index 82970cc1ac6..b6fd807194e 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -20,6 +20,7 @@ #pragma interface /* gcc class implementation */ #endif +#include <my_handler.h> #include <ft_global.h> #include <keycache.h> @@ -259,6 +260,7 @@ enum legacy_db_type DB_TYPE_TABLE_FUNCTION, DB_TYPE_MEMCACHE, DB_TYPE_FALCON, + DB_TYPE_MARIA, DB_TYPE_FIRST_DYNAMIC=42, DB_TYPE_DEFAULT=127 // Must be last }; diff --git a/sql/item_func.cc b/sql/item_func.cc index aa344a343de..8f3ffd00e34 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -26,6 +26,7 @@ #include <hash.h> #include <time.h> #include <ft_global.h> +#include <my_bit.h> #include "sp_head.h" #include "sp_rcontext.h" diff --git a/sql/item_func.h b/sql/item_func.h index 3306b059097..29dd524d849 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -1326,7 +1326,6 @@ public: /* for fulltext search */ -#include <ft_global.h> class Item_func_match :public Item_real_func { diff --git a/sql/log.cc b/sql/log.cc index e25f008e90c..8de6e8d7d22 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2397,6 +2397,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), 0, MYF(MY_WME | MY_WAIT_IF_FULL))) { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with my_create() not my_open(). + */ if (index_file_nr >= 0) my_close(index_file_nr,MYF(0)); return TRUE; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index bf6bd374ef8..755f7c80fc2 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1592,7 +1592,7 @@ extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout, slave_trans_retries; extern uint max_user_connections; extern ulong what_to_log,flush_time; -extern ulong query_buff_size, thread_stack; +extern ulong query_buff_size; extern ulong max_prepared_stmt_count, prepared_stmt_count; extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit; extern ulong max_binlog_size, max_relay_log_size; @@ -1694,7 +1694,7 @@ extern SHOW_COMP_OPTION have_innodb; extern SHOW_COMP_OPTION have_csv_db; extern SHOW_COMP_OPTION have_ndbcluster; extern SHOW_COMP_OPTION have_partition_db; - +extern SHOW_COMP_OPTION have_maria_db; extern handlerton *partition_hton; extern handlerton *myisam_hton; extern handlerton *heap_hton; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a6525c1c78c..4b722479516 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -16,6 +16,7 @@ #include "mysql_priv.h" #include <m_ctype.h> #include <my_dir.h> +#include <my_bit.h> #include "slave.h" #include "sql_repl.h" #include "rpl_filter.h" @@ -26,6 +27,9 @@ #include "events.h" #include "../storage/myisam/ha_myisam.h" +#ifdef WITH_MARIA_STORAGE_ENGINE +#include "../storage/maria/ha_maria.h" +#endif #include "rpl_injector.h" @@ -469,7 +473,7 @@ uint volatile thread_count, thread_running; ulonglong thd_startup_options; ulong back_log, connect_timeout, concurrency, server_id; ulong table_cache_size, table_def_size; -ulong thread_stack, what_to_log; +ulong what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong slave_net_timeout, slave_trans_retries; @@ -531,6 +535,7 @@ char *mysqld_unix_port, *opt_mysql_tmpdir; const char **errmesg; /* Error messages */ const char *myisam_recover_options_str="OFF"; const char *myisam_stats_method_str="nulls_unequal"; +const char *maria_stats_method_str="nulls_unequal"; /* name of reference on left espression in rewritten IN subquery */ const char *in_left_expr_name= "<left expr>"; @@ -2224,7 +2229,7 @@ the thread stack. Please read http://www.mysql.com/doc/en/Linux.html\n\n", { fprintf(stderr,"thd: 0x%lx\n",(long) thd); print_stacktrace(thd ? (gptr) thd->thread_stack : (gptr) 0, - thread_stack); + my_thread_stack_size); } if (thd) { @@ -2368,9 +2373,9 @@ static void start_signal_handler(void) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&thr_attr,thread_stack*2); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&thr_attr,thread_stack); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size); #endif #endif @@ -3616,9 +3621,9 @@ int main(int argc, char **argv) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&connection_attrib,thread_stack*2); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&connection_attrib,thread_stack); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size); #endif #ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE { @@ -3629,15 +3634,15 @@ int main(int argc, char **argv) stack_size/= 2; #endif /* We must check if stack_size = 0 as Solaris 2.9 can return 0 here */ - if (stack_size && stack_size < thread_stack) + if (stack_size && stack_size < my_thread_stack_size) { if (global_system_variables.log_warnings) sql_print_warning("Asked for %lu thread stack, but got %ld", - thread_stack, (long) stack_size); + my_thread_stack_size, (long) stack_size); #if defined(__ia64__) || defined(__ia64) - thread_stack= stack_size*2; + my_thread_stack_size= stack_size*2; #else - thread_stack= stack_size; + my_thread_stack_size= stack_size; #endif } } @@ -4886,10 +4891,17 @@ enum options_mysqld OPT_MAX_LENGTH_FOR_SORT_DATA, OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE, OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, + OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, - OPT_MYISAM_USE_MMAP, + OPT_MYISAM_USE_MMAP, OPT_MYISAM_REPAIR_THREADS, OPT_MYISAM_STATS_METHOD, + + OPT_MARIA_BLOCK_SIZE, + OPT_MARIA_MAX_SORT_FILE_SIZE, OPT_MARIA_SORT_BUFFER_SIZE, + OPT_MARIA_USE_MMAP, OPT_MARIA_REPAIR_THREADS, + OPT_MARIA_STATS_METHOD, + OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT, OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT, OPT_OPEN_FILES_LIMIT, @@ -4903,7 +4915,7 @@ enum options_mysqld OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, - OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS, + OPT_WAIT_TIMEOUT, OPT_INNODB_MIRRORED_LOG_GROUPS, OPT_INNODB_LOG_FILES_IN_GROUP, OPT_INNODB_LOG_FILE_SIZE, @@ -6042,6 +6054,49 @@ log and this option does nothing anymore.", 0 #endif , 0, 2, 0, 1, 0}, +#ifdef WITH_MARIA_STORAGE_ENGINE + {"maria_block_size", OPT_MARIA_BLOCK_SIZE, + "Block size to be used for MARIA index pages.", + (gptr*) &maria_block_size, + (gptr*) &maria_block_size, 0, GET_ULONG, REQUIRED_ARG, + MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, + 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + {"maria_key_buffer_size", OPT_KEY_BUFFER_SIZE, + "The size of the buffer used for index blocks for Maria tables. Increase " + "this to get better index handling (for all reads and multiple writes) to " + "as much as you can afford; 64M on a 256M machine that mainly runs MySQL " + "is quite common.", + (gptr*) &maria_key_cache_var.param_buff_size, (gptr*) 0, + 0, (GET_ULL | GET_ASK_ADDR), + REQUIRED_ARG, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, MALLOC_OVERHEAD, + IO_SIZE, 0}, + {"maria_max_sort_file_size", OPT_MARIA_MAX_SORT_FILE_SIZE, + "Don't use the fast sort index method to created index if the temporary " + "file would get bigger than this.", + (gptr*) &global_system_variables.maria_max_sort_file_size, + (gptr*) &max_system_variables.maria_max_sort_file_size, 0, + GET_ULL, REQUIRED_ARG, (longlong) LONG_MAX, 0, (ulonglong) MAX_FILE_SIZE, + 0, 1024*1024, 0}, + {"maria_repair_threads", OPT_MARIA_REPAIR_THREADS, + "Number of threads to use when repairing maria tables. The value of 1 " + "disables parallel repair.", + (gptr*) &global_system_variables.maria_repair_threads, + (gptr*) &max_system_variables.maria_repair_threads, 0, + GET_ULONG, REQUIRED_ARG, 1, 1, ~0L, 0, 1, 0}, + {"maria_sort_buffer_size", OPT_MARIA_SORT_BUFFER_SIZE, + "The buffer that is allocated when sorting the index when doing a REPAIR " + "or when creating indexes with CREATE INDEX or ALTER TABLE.", + (gptr*) &global_system_variables.maria_sort_buff_size, + (gptr*) &max_system_variables.maria_sort_buff_size, 0, + GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, + {"maria_stats_method", OPT_MARIA_STATS_METHOD, + "Specifies how maria index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif {"max_allowed_packet", OPT_MAX_ALLOWED_PACKET, "Max packetlength to send/receive from to server.", (gptr*) &global_system_variables.max_allowed_packet, @@ -6145,12 +6200,6 @@ The minimum value for this variable is 4096.", (gptr*) &myisam_data_pointer_size, (gptr*) &myisam_data_pointer_size, 0, GET_ULONG, REQUIRED_ARG, 6, 2, 7, 0, 1, 0}, - {"myisam_max_extra_sort_file_size", OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, - "Deprecated option", - (gptr*) &global_system_variables.myisam_max_extra_sort_file_size, - (gptr*) &max_system_variables.myisam_max_extra_sort_file_size, - 0, GET_ULL, REQUIRED_ARG, (ulonglong) MI_MAX_TEMP_LENGTH, - 0, (ulonglong) MAX_FILE_SIZE, 0, 1, 0}, {"myisam_max_sort_file_size", OPT_MYISAM_MAX_SORT_FILE_SIZE, "Don't use the fast sort index method to created index if the temporary file would get bigger than this.", (gptr*) &global_system_variables.myisam_max_sort_file_size, @@ -6363,8 +6412,8 @@ The minimum value for this variable is 4096.", REQUIRED_ARG, 20, 1, 16384, 0, 1, 0}, #endif {"thread_stack", OPT_THREAD_STACK, - "The stack size for each thread.", (gptr*) &thread_stack, - (gptr*) &thread_stack, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, + "The stack size for each thread.", (gptr*) &my_thread_stack_size, + (gptr*) &my_thread_stack_size, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, 1024L*128L, ~0L, 0, 1024, 0}, { "time_format", OPT_TIME_FORMAT, "The TIME format (for future).", @@ -7109,15 +7158,24 @@ static void mysql_init_variables(void) global_query_id= thread_id= 1L; strmov(server_version, MYSQL_SERVER_VERSION); myisam_recover_options_str= sql_mode_str= "OFF"; - myisam_stats_method_str= "nulls_unequal"; + myisam_stats_method_str= maria_stats_method_str= "nulls_unequal"; my_bind_addr = htonl(INADDR_ANY); threads.empty(); thread_cache.empty(); key_caches.empty(); if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str, - default_key_cache_base.length))) + default_key_cache_base.length))) exit(1); - multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ +#ifdef WITH_MARIA_STORAGE_ENGINE + if (!(maria_key_cache= get_or_create_key_cache(maria_key_cache_base.str, + maria_key_cache_base.length))) + exit(1); + maria_key_cache->param_buff_size= maria_key_cache_var.param_buff_size; + maria_key_cache->param_block_size= maria_block_size; +#endif + + /* set key_cache_hash.default_value = dflt_key_cache */ + multi_keycache_init(); /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); @@ -7159,6 +7217,7 @@ static void mysql_init_variables(void) when collecting index statistics for MyISAM tables. */ global_system_variables.myisam_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; + global_system_variables.maria_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; /* Variables that depends on compile options */ #ifndef DBUG_OFF @@ -7176,6 +7235,11 @@ static void mysql_init_variables(void) #else have_csv_db= SHOW_OPTION_NO; #endif +#ifdef WITH_MARIA_STORAGE_ENGINE + have_maria_db= SHOW_OPTION_YES; +#else + have_maria_db= SHOW_OPTION_NO; +#endif #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE have_ndbcluster= SHOW_OPTION_DISABLED; #else @@ -7773,7 +7837,6 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), int method; LINT_INIT(method_conv); - myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) { fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument); @@ -8267,11 +8330,13 @@ void refresh_status(THD *thd) #undef have_innodb #undef have_ndbcluster #undef have_csv_db +#undef have_maria_db SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_maria_db= SHOW_OPTION_NO; #ifndef WITH_INNOBASE_STORAGE_ENGINE uint innobase_flush_log_at_trx_commit; diff --git a/sql/set_var.cc b/sql/set_var.cc index ad5559165f3..2d40cb67bab 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -55,6 +55,9 @@ #include <thr_alarm.h> #include <myisam.h> #include <my_dir.h> +#ifdef WITH_MARIA_STORAGE_ENGINE +#include <maria.h> +#endif #include "events.h" @@ -141,6 +144,7 @@ static void fix_max_join_size(THD *thd, enum_var_type type); static void fix_query_cache_size(THD *thd, enum_var_type type); static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type); static void fix_myisam_max_sort_file_size(THD *thd, enum_var_type type); +static void fix_maria_max_sort_file_size(THD *thd, enum_var_type type); static void fix_max_binlog_size(THD *thd, enum_var_type type); static void fix_max_relay_log_size(THD *thd, enum_var_type type); static void fix_max_connections(THD *thd, enum_var_type type); @@ -343,6 +347,14 @@ sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method", &myisam_stats_method_typelib, NULL); +sys_var_thd_ulonglong sys_maria_max_sort_file_size("maria_max_sort_file_size", &SV::maria_max_sort_file_size, fix_maria_max_sort_file_size, 1); +sys_var_thd_ulong sys_maria_repair_threads("maria_repair_threads", &SV::maria_repair_threads); +sys_var_thd_ulong sys_maria_sort_buffer_size("maria_sort_buffer_size", &SV::maria_sort_buff_size); +sys_var_thd_enum sys_maria_stats_method("maria_stats_method", + &SV::maria_stats_method, + &myisam_stats_method_typelib, + NULL); + sys_var_thd_ulong sys_net_buffer_length("net_buffer_length", &SV::net_buffer_length); sys_var_thd_ulong sys_net_read_timeout("net_read_timeout", @@ -667,6 +679,7 @@ sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db); sys_var_have_variable sys_have_dlopen("have_dynamic_loading", &have_dlopen); sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_maria_db("have_maria", &have_maria_db); sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); sys_var_have_variable sys_have_partition_db("have_partitioning", @@ -792,6 +805,7 @@ SHOW_VAR init_vars[]= { {sys_have_dlopen.name, (char*) &have_dlopen, SHOW_HAVE}, {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_maria_db.name, (char*) &have_maria_db, SHOW_HAVE}, {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, @@ -871,6 +885,15 @@ SHOW_VAR init_vars[]= { {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS}, {"lower_case_file_system", (char*) &lower_case_file_system, SHOW_MY_BOOL}, {"lower_case_table_names", (char*) &lower_case_table_names, SHOW_INT}, + + {sys_maria_max_sort_file_size.name, (char*) &sys_maria_max_sort_file_size, + SHOW_SYS}, + {sys_maria_repair_threads.name, (char*) &sys_maria_repair_threads, + SHOW_SYS}, + {sys_maria_sort_buffer_size.name, (char*) &sys_maria_sort_buffer_size, + SHOW_SYS}, + {sys_maria_stats_method.name, (char*) &sys_maria_stats_method, SHOW_SYS}, + {sys_max_allowed_packet.name,(char*) &sys_max_allowed_packet, SHOW_SYS}, {sys_max_binlog_cache_size.name,(char*) &sys_max_binlog_cache_size, SHOW_SYS}, {sys_max_binlog_size.name, (char*) &sys_max_binlog_size, SHOW_SYS}, @@ -1019,10 +1042,10 @@ SHOW_VAR init_vars[]= { #if HAVE_POOL_OF_THREADS == 1 {sys_thread_pool_size.name, (char*) &sys_thread_pool_size, SHOW_SYS}, #endif - {"thread_stack", (char*) &thread_stack, SHOW_LONG}, + {"thread_stack", (char*) &my_thread_stack_size, SHOW_LONG}, {sys_time_format.name, (char*) &sys_time_format, SHOW_SYS}, {"time_zone", (char*) &sys_time_zone, SHOW_SYS}, - {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS}, + {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS}, {sys_tmp_table_size.name, (char*) &sys_tmp_table_size, SHOW_SYS}, {sys_tmpdir.name, (char*) &sys_tmpdir, SHOW_SYS}, {sys_trans_alloc_block_size.name, (char*) &sys_trans_alloc_block_size, @@ -1165,6 +1188,16 @@ fix_myisam_max_sort_file_size(THD *thd, enum_var_type type) (my_off_t) global_system_variables.myisam_max_sort_file_size; } +static void +fix_maria_max_sort_file_size(THD *thd, enum_var_type type) +{ +#ifdef WITH_MARIA_STORAGE_ENGINE + maria_max_temp_length= + (my_off_t) global_system_variables.myisam_max_sort_file_size; +#endif +} + + /* Set the OPTION_BIG_SELECTS flag if max_join_size == HA_POS_ERROR */ @@ -2364,6 +2397,7 @@ void sys_var_collation_server::set_default(THD *thd, enum_var_type type) LEX_STRING default_key_cache_base= {(char *) "default", 7 }; +LEX_STRING maria_key_cache_base= {(char *) "maria", 5 }; static KEY_CACHE zero_key_cache; @@ -2373,7 +2407,7 @@ KEY_CACHE *get_key_cache(LEX_STRING *cache_name) if (!cache_name || ! cache_name->length) cache_name= &default_key_cache_base; return ((KEY_CACHE*) find_named(&key_caches, - cache_name->str, cache_name->length, 0)); + cache_name->str, cache_name->length, 0)); } diff --git a/sql/set_var.h b/sql/set_var.h index 8887d91ec69..0540f9964e0 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -1120,6 +1120,7 @@ public: extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; +extern LEX_STRING maria_key_cache_base; /* For sql_yacc */ struct sys_var_with_base diff --git a/sql/sql_class.h b/sql/sql_class.h index 1f5f7aedbb4..cbfbc72d0aa 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -180,7 +180,7 @@ class Time_zone; struct system_variables { - ulonglong myisam_max_extra_sort_file_size; + ulonglong maria_max_sort_file_size; ulonglong myisam_max_sort_file_size; ulonglong max_heap_table_size; ulonglong tmp_table_size; @@ -196,6 +196,9 @@ struct system_variables ulong max_sort_length; ulong max_tmp_tables; ulong max_insert_delayed_threads; + ulong maria_repair_threads; + ulong maria_sort_buff_size; + ulong maria_stats_method; ulong multi_range_count; ulong myisam_repair_threads; ulong myisam_sort_buff_size; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 11eb510d6c8..ec2807750bd 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4868,10 +4868,10 @@ bool check_stack_overrun(THD *thd, long margin, long stack_used; DBUG_ASSERT(thd == current_thd); if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >= - (long) (thread_stack - margin)) + (long) (my_thread_stack_size - margin)) { sprintf(errbuff[0],ER(ER_STACK_OVERRUN_NEED_MORE), - stack_used,thread_stack,margin); + stack_used,my_thread_stack_size,margin); my_message(ER_STACK_OVERRUN_NEED_MORE,errbuff[0],MYF(0)); thd->fatal_error(); return 1; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 75b3f5ae981..f58433cd220 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25,6 +25,7 @@ #include "sql_cursor.h" #include <m_ctype.h> +#include <my_bit.h> #include <hash.h> #include <ft_global.h> diff --git a/sql/sql_sort.h b/sql/sql_sort.h index da28ca07e2c..1572f6304e1 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -34,7 +34,9 @@ the callback function 'unpack_addon_fields'. */ -typedef struct st_sort_addon_field { /* Sort addon packed field */ +typedef struct st_sort_addon_field +{ + /* Sort addon packed field */ Field *field; /* Original field */ uint offset; /* Offset from the last sorted field */ uint null_offset; /* Offset to to null bit from the last sorted field */ @@ -42,13 +44,6 @@ typedef struct st_sort_addon_field { /* Sort addon packed field */ uint8 null_bit; /* Null bit mask for the field */ } SORT_ADDON_FIELD; -typedef struct st_buffpek { /* Struktur om sorteringsbuffrarna */ - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; typedef struct st_sort_param { uint rec_length; /* Length of sorted records */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 9e30cf5878c..07bd1390116 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -459,7 +459,7 @@ void mysql_print_status() VOID(my_getwd(current_dir, sizeof(current_dir),MYF(0))); printf("Current dir: %s\n", current_dir); printf("Running threads: %d Stack size: %ld\n", thread_count, - (long) thread_stack); + (long) my_thread_stack_size); thr_print_locks(); // Write some debug info #ifndef DBUG_OFF print_cached_tables(); @@ -534,7 +534,7 @@ Estimated memory (with thread stack): %ld\n", (int) info.uordblks, (int) info.fordblks, (int) info.keepcost, - (long) (thread_count * thread_stack + info.hblkhd + info.arena)); + (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena)); #endif Events::get_instance()->dump_internal_status(); diff --git a/sql/uniques.cc b/sql/uniques.cc index 9eb827f62a3..17c9c875659 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -449,7 +449,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, */ for (top= begin; top != end; ++top) { - top->base= merge_buffer + (top - begin) * piece_size; + top->base= (byte*) (merge_buffer + (top - begin) * piece_size); top->max_keys= max_key_count_per_piece; bytes_read= read_to_buffer(file, top, key_length); if (bytes_read == (uint) (-1)) diff --git a/sql/unireg.cc b/sql/unireg.cc index d90420313a6..c55d6db3c85 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -284,8 +284,10 @@ bool mysql_create_frm(THD *thd, const char *file_name, my_free((gptr) keybuff, MYF(0)); if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) && - my_sync(file, MYF(MY_WME))) - goto err2; + (my_sync(file, MYF(MY_WME)) || + my_sync_dir_by_file(file_name, MYF(MY_WME)))) + goto err2; + if (my_close(file,MYF(MY_WME))) goto err3; diff --git a/storage/Makefile.am b/storage/Makefile.am index b978453d29d..eec499aabf4 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -19,7 +19,12 @@ AUTOMAKE_OPTIONS = foreign # These are built from source in the Docs directory EXTRA_DIST = -SUBDIRS = @mysql_se_dirs@ +# Until we remove fulltext-related references from Maria to MyISAM +# MyISAM must be built before Maria, which is not the case by default +# because of alphabetical order +# So we put myisam first; this is very ugly regarding plugins' logic +# but it works, and we'll remove it soon. +SUBDIRS = myisam @mysql_se_dirs@ # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt new file mode 100644 index 00000000000..cfe23054e2f --- /dev/null +++ b/storage/maria/CMakeLists.txt @@ -0,0 +1 @@ +# empty for the moment; will fill it when we build under Windows diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am new file mode 100644 index 00000000000..b6c713571d7 --- /dev/null +++ b/storage/maria/Makefile.am @@ -0,0 +1,162 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + +# "." is needed first because tests in unittest need libmaria +SUBDIRS = . unittest + +EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in +pkgdata_DATA = ma_test_all ma_test_all.res +pkglib_LIBRARIES = libmaria.a +bin_PROGRAMS = maria_chk maria_pack maria_ftdump +maria_chk_DEPENDENCIES= $(LIBRARIES) +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +# For now, it imposes that storage/myisam be built before storage/maria. +maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +maria_pack_DEPENDENCIES=$(LIBRARIES) +maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test +noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ + ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ + ma_ft_eval.h trnman.h lockman.h tablockman.h \ + ma_control_file.h ha_maria.h ma_blockrec.h +ma_test1_DEPENDENCIES= $(LIBRARIES) +ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_test2_DEPENDENCIES= $(LIBRARIES) +ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_test3_DEPENDENCIES= $(LIBRARIES) +ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +#ma_ft_test1_DEPENDENCIES= $(LIBRARIES) +#ma_ft_eval_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_rt_test_DEPENDENCIES= $(LIBRARIES) +ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_sp_test_DEPENDENCIES= $(LIBRARIES) +ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ + ma_rnext.c ma_rnext_same.c \ + ma_search.c ma_page.c ma_key.c ma_locking.c \ + ma_rrnd.c ma_scan.c ma_cache.c \ + ma_statrec.c ma_packrec.c ma_dynrec.c \ + ma_blockrec.c ma_bitmap.c \ + ma_update.c ma_write.c ma_unique.c \ + ma_delete.c \ + ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \ + ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\ + ma_range.c ma_dbug.c ma_checksum.c \ + ma_changed.c ma_static.c ma_delete_all.c \ + ma_delete_table.c ma_rename.c ma_check.c \ + ma_keycache.c ma_preload.c ma_ft_parser.c \ + ma_ft_update.c ma_ft_boolean_search.c \ + ma_ft_nlq_search.c ft_maria.c ma_sort.c \ + ha_maria.cc trnman.c lockman.c tablockman.c \ + ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ + ma_sp_key.c ma_control_file.c ma_loghandler.c +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? + +SUFFIXES = .sh + +.sh: + @RM@ -f $@ $@-t + @SED@ \ + -e 's!@''bindir''@!$(bindir)!g' \ + -e 's!@''scriptdir''@!$(bindir)!g' \ + -e 's!@''prefix''@!$(prefix)!g' \ + -e 's!@''datadir''@!$(datadir)!g' \ + -e 's!@''localstatedir''@!$(localstatedir)!g' \ + -e 's!@''libexecdir''@!$(libexecdir)!g' \ + -e 's!@''CC''@!@CC@!'\ + -e 's!@''CXX''@!@CXX@!'\ + -e 's!@''GXX''@!@GXX@!'\ + -e 's!@''PERL''@!@PERL@!' \ + -e 's!@''CFLAGS''@!@SAVE_CFLAGS@!'\ + -e 's!@''CXXFLAGS''@!@SAVE_CXXFLAGS@!'\ + -e 's!@''LDFLAGS''@!@SAVE_LDFLAGS@!'\ + -e 's!@''VERSION''@!@VERSION@!' \ + -e 's!@''MYSQL_SERVER_SUFFIX''@!@MYSQL_SERVER_SUFFIX@!' \ + -e 's!@''COMPILATION_COMMENT''@!@COMPILATION_COMMENT@!' \ + -e 's!@''MACHINE_TYPE''@!@MACHINE_TYPE@!' \ + -e 's!@''HOSTNAME''@!@HOSTNAME@!' \ + -e 's!@''SYSTEM_TYPE''@!@SYSTEM_TYPE@!' \ + -e 's!@''CHECK_PID''@!@CHECK_PID@!' \ + -e 's!@''FIND_PROC''@!@FIND_PROC@!' \ + -e 's!@''MYSQLD_DEFAULT_SWITCHES''@!@MYSQLD_DEFAULT_SWITCHES@!' \ + -e 's!@''MYSQL_UNIX_ADDR''@!@MYSQL_UNIX_ADDR@!' \ + -e 's!@''TARGET_LINUX''@!@TARGET_LINUX@!' \ + -e "s!@""CONF_COMMAND""@!@CONF_COMMAND@!" \ + -e 's!@''MYSQLD_USER''@!@MYSQLD_USER@!' \ + -e 's!@''sysconfdir''@!@sysconfdir@!' \ + -e 's!@''SHORT_MYSQL_INTRO''@!@SHORT_MYSQL_INTRO@!' \ + -e 's!@''SHARED_LIB_VERSION''@!@SHARED_LIB_VERSION@!' \ + -e 's!@''MYSQL_BASE_VERSION''@!@MYSQL_BASE_VERSION@!' \ + -e 's!@''MYSQL_NO_DASH_VERSION''@!@MYSQL_NO_DASH_VERSION@!' \ + -e 's!@''MYSQL_TCP_PORT''@!@MYSQL_TCP_PORT@!' \ + -e 's!@''PERL_DBI_VERSION''@!@PERL_DBI_VERSION@!' \ + -e 's!@''PERL_DBD_VERSION''@!@PERL_DBD_VERSION@!' \ + -e 's!@''PERL_DATA_DUMPER''@!@PERL_DATA_DUMPER@!' \ + $< > $@-t + @CHMOD@ +x $@-t + @MV@ $@-t $@ + +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c new file mode 100644 index 00000000000..7104c6704ba --- /dev/null +++ b/storage/maria/ft_maria.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and maria +*/ + +#include "ma_ftdefs.h" + +FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query, + query_len, cs); + else + res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len, + flags, record); + return res; +} + +const struct _ft_vft _ma_ft_vft_nlq = { + maria_ft_nlq_read_next, maria_ft_nlq_find_relevance, + maria_ft_nlq_close_search, maria_ft_nlq_get_relevance, + maria_ft_nlq_reinit_search +}; +const struct _ft_vft _ma_ft_vft_boolean = { + maria_ft_boolean_read_next, maria_ft_boolean_find_relevance, + maria_ft_boolean_close_search, maria_ft_boolean_get_relevance, + maria_ft_boolean_reinit_search +}; + diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc new file mode 100644 index 00000000000..102f987f01a --- /dev/null +++ b/storage/maria/ha_maria.cc @@ -0,0 +1,1896 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include <mysql/plugin.h> +#include <m_ctype.h> +#include <myisampack.h> +#include <my_bit.h> +#include "ha_maria.h" + +#include "maria_def.h" +#include "ma_rt_index.h" + +ulong maria_recover_options= HA_RECOVER_NONE; + +/* bits in maria_recover_options */ +const char *maria_recover_names[]= +{ + "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS +}; +TYPELIB maria_recover_typelib= +{ + array_elements(maria_recover_names) - 1, "", + maria_recover_names, NULL +}; + +const char *maria_stats_method_names[]= +{ + "nulls_unequal", "nulls_equal", + "nulls_ignored", NullS +}; +TYPELIB maria_stats_method_typelib= +{ + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL +}; + + +/***************************************************************************** +** MARIA tables +*****************************************************************************/ + +static handler *maria_create_handler(handlerton *hton, + TABLE_SHARE * table, + MEM_ROOT *mem_root) +{ + return new (mem_root) ha_maria(hton, table); +} + + +// collect errors printed by maria_check routines + +static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, + const char *fmt, va_list args) +{ + THD *thd= (THD *) param->thd; + Protocol *protocol= thd->protocol; + uint length, msg_length; + char msgbuf[MARIA_MAX_MSG_BUF]; + char name[NAME_LEN * 2 + 2]; + + msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args); + msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia + + DBUG_PRINT(msg_type, ("message: %s", msgbuf)); + + if (!thd->vio_ok()) + { + sql_print_error(msgbuf); + return; + } + + if (param->testflag & + (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) + { + my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + return; + } + length= (uint) (strxmov(name, param->db_name, ".", param->table_name, + NullS) - name); + protocol->prepare_for_resend(); + protocol->store(name, length, system_charset_info); + protocol->store(param->op_name, system_charset_info); + protocol->store(msg_type, system_charset_info); + protocol->store(msgbuf, msg_length, system_charset_info); + if (protocol->write()) + sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", + msgbuf); + return; +} + +extern "C" { + +volatile int *_ma_killed_ptr(HA_CHECK *param) +{ + /* In theory Unsafe conversion, but should be ok for now */ + return (int*) &(((THD *) (param->thd))->killed); +} + + +void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) +{ + param->error_printed |= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "error", fmt, args); + va_end(args); +} + + +void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "info", fmt, args); + va_end(args); +} + + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) +{ + param->warning_printed= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "warning", fmt, args); + va_end(args); +} + +} + + +ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg): +handler(hton, table_arg), file(0), +int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | + HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | + HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), +can_enable_indexes(1) +{} + + +handler *ha_maria::clone(MEM_ROOT *mem_root) +{ + ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(mem_root)); + if (new_handler) + new_handler->file->state= file->state; + return new_handler; +} + + +static const char *ha_maria_exts[]= +{ + MARIA_NAME_IEXT, + MARIA_NAME_DEXT, + NullS +}; + + +const char **ha_maria::bas_ext() const +{ + return ha_maria_exts; +} + + +const char *ha_maria::index_type(uint key_number) +{ + return ((table->key_info[key_number].flags & HA_FULLTEXT) ? + "FULLTEXT" : + (table->key_info[key_number].flags & HA_SPATIAL) ? + "SPATIAL" : + (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? + "RTREE" : "BTREE"); +} + + +#ifdef HAVE_REPLICATION +int ha_maria::net_read_dump(NET * net) +{ + int data_fd= file->dfile; + int error= 0; + + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (;;) + { + ulong packet_len= my_net_read(net); + if (!packet_len) + break; // end of file + if (packet_len == packet_error) + { + sql_print_error("ha_maria::net_read_dump - read error "); + error= -1; + goto err; + } + if (my_write(data_fd, (byte *) net->read_pos, (uint) packet_len, + MYF(MY_WME | MY_FNABP))) + { + error= errno; + goto err; + } + } +err: + return error; +} + + +int ha_maria::dump(THD * thd, int fd) +{ + MARIA_SHARE *share= file->s; + NET *net= &thd->net; + uint block_size= share->block_size; + my_off_t bytes_to_read= share->state.state.data_file_length; + int data_fd= file->dfile; + byte *buf= (byte *) my_malloc(block_size, MYF(MY_WME)); + if (!buf) + return ENOMEM; + + int error= 0; + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (; bytes_to_read > 0;) + { + uint bytes= my_read(data_fd, buf, block_size, MYF(MY_WME)); + if (bytes == MY_FILE_ERROR) + { + error= errno; + goto err; + } + + if (fd >= 0) + { + if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP))) + { + error= errno ? errno : EPIPE; + goto err; + } + } + else + { + if (my_net_write(net, (char*) buf, bytes)) + { + error= errno ? errno : EPIPE; + goto err; + } + } + bytes_to_read -= bytes; + } + + if (fd < 0) + { + if (my_net_write(net, "", 0)) + error= errno ? errno : EPIPE; + net_flush(net); + } + +err: + my_free((gptr) buf, MYF(0)); + return error; +} +#endif /* HAVE_REPLICATION */ + + +bool ha_maria::check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_privileged_thread) +{ + /* + To be able to open and lock for reading system tables like 'mysql.proc', + when we already have some tables opened and locked, and avoid deadlocks + we have to disallow write-locking of these tables with any other tables. + */ + if (table->s->system_table && + table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && count != 1) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str, + table->s->table_name.str); + return FALSE; + } + + /* + Deny locking of the log tables, which is incompatible with + concurrent insert. Unless called from a logger THD (general_log_thd + or slow_log_thd) or by a privileged thread. + */ + if (!called_by_privileged_thread) + return check_if_log_table_locking_is_allowed(sql_command, type, table); + + return TRUE; +} + + + /* Name is here without an extension */ + +int ha_maria::open(const char *name, int mode, uint test_if_locked) +{ + uint i; + if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) + return (my_errno ? my_errno : -1); + + if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) + VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + +#ifdef NOT_USED + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap) + VOID(maria_extra(file, HA_EXTRA_MMAP, 0)); +#endif + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) + VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); + if (!table->s->db_record_offset) + int_table_flags |= HA_REC_NOT_IN_SEQ; + if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + int_table_flags |= HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *) parser->plugin->info; + table->key_info[i].block_size= file->s->keyinfo[i].block_length; + } + return (0); +} + + +int ha_maria::close(void) +{ + MARIA_HA *tmp= file; + file= 0; + return maria_close(tmp); +} + + +int ha_maria::write_row(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); + + /* If we have a timestamp column, update it to the current time */ + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + + /* + If we have an auto_increment column and we are writing a changed row + or a new row, then update the auto_increment value in the record. + */ + if (table->next_number_field && buf == table->record[0]) + { + int error; + if ((error= update_auto_increment())) + return error; + } + return maria_write(file, buf); +} + + +int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) +{ + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + int error; + HA_CHECK param; + MARIA_SHARE *share= file->s; + const char *old_proc_info= thd->proc_info; + + thd->proc_info= "Checking table"; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "check"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= check_opt->flags | T_CHECK | T_SILENT; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(table->db_stat & HA_READ_ONLY)) + param.testflag |= T_STATISTICS; + param.using_global_keycache= 1; + + if (!maria_is_crashed(file) && + (((param.testflag & T_CHECK_ONLY_CHANGED) && + !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR)) && + share->state.open_count == 0) || + ((param.testflag & T_FAST) && (share->state.open_count == + (uint) (share->global_changed ? 1 : + 0))))) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_status(¶m, file); // Not fatal + error= maria_chk_size(¶m, file); + if (!error) + error |= maria_chk_del(¶m, file, param.testflag); + if (!error) + error= maria_chk_key(¶m, file); + if (!error) + { + if ((!(param.testflag & T_QUICK) && + ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || + (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file)) + { + uint old_testflag= param.testflag; + param.testflag |= T_MEDIUM; + if (!(error= init_io_cache(¶m.read_cache, file->dfile, + my_default_record_cache_size, READ_CACHE, + share->pack.header_length, 1, MYF(MY_WME)))) + { + error= maria_chk_data_link(¶m, file, param.testflag & T_EXTEND); + end_io_cache(&(param.read_cache)); + } + param.testflag= old_testflag; + } + } + if (!error) + { + if ((share->state.changed & (STATE_CHANGED | + STATE_CRASHED_ON_REPAIR | + STATE_CRASHED | STATE_NOT_ANALYZED)) || + (param.testflag & T_STATISTICS) || maria_is_crashed(file)) + { + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + pthread_mutex_lock(&share->intern_lock); + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + if (!(table->db_stat & HA_READ_ONLY)) + error= maria_update_state_info(¶m, file, UPDATE_TIME | UPDATE_OPEN_COUNT | + UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + } + } + else if (!maria_is_crashed(file) && !thd->killed) + { + maria_mark_crashed(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + + thd->proc_info= old_proc_info; + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +/* + Analyze the key distribution in the table + As the table may be only locked for read, we have to take into account that + two threads may do an analyze at the same time! +*/ + +int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) +{ + int error= 0; + HA_CHECK param; + MARIA_SHARE *share= file->s; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "analyze"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | + T_DONT_CHECK_CHECKSUM); + param.using_global_keycache= 1; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(share->state.changed & STATE_NOT_ANALYZED)) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_key(¶m, file); + if (!error) + { + pthread_mutex_lock(&share->intern_lock); + error= maria_update_state_info(¶m, file, UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + } + else if (!maria_is_crashed(file) && !thd->killed) + maria_mark_crashed(file); + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt) +{ + HA_CHECK_OPT tmp_check_opt; + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char *errmsg; + DBUG_ENTER("restore"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + DBUG_RETURN(HA_ADMIN_INVALID); + + strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed in my_copy (Error %d)"; + goto err; + } + + tmp_check_opt.init(); + tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK; + DBUG_RETURN(repair(thd, &tmp_check_opt)); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "restore"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt) +{ + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char *errmsg; + DBUG_ENTER("ha_maria::backup"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + reg_ext)) + { + errmsg= "Failed in fn_format() for .frm file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed copying .frm file (errno: %d)"; + goto err; + } + + /* Change extension */ + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + { + errmsg= "Failed in fn_format() for .MYD file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + errmsg= "Failed copying .MYD file (errno: %d)"; + error= HA_ADMIN_FAILED; + goto err; + } + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "backup"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + HA_CHECK param; + ha_rows start_records; + + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "repair"; + param.testflag= ((check_opt->flags & ~(T_EXTEND)) | + T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | + (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); + param.sort_buffer_length= check_opt->sort_buffer_size; + start_records= file->state->records; + while ((error= repair(thd, param, 0)) && param.retry_repair) + { + param.retry_repair= 0; + if (test_all_bits(param.testflag, + (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK))) + { + param.testflag &= ~T_RETRY_WITHOUT_QUICK; + sql_print_information("Retrying repair of: '%s' without quick", + table->s->path.str); + continue; + } + param.testflag &= ~T_QUICK; + if ((param.testflag & T_REP_BY_SORT)) + { + param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP; + sql_print_information("Retrying repair of: '%s' with keycache", + table->s->path.str); + continue; + } + break; + } + if (!error && start_records != file->state->records && + !(check_opt->flags & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + sql_print_information("Found %s of %s rows when repairing '%s'", + llstr(file->state->records, llbuff), + llstr(start_records, llbuff2), + table->s->path.str); + } + return error; +} + +int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + HA_CHECK param; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "optimize"; + param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | + T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); + param.sort_buffer_length= check_opt->sort_buffer_size; + if ((error= repair(thd, param, 1)) && param.retry_repair) + { + sql_print_warning("Warning: Optimize table got errno %d, retrying", + my_errno); + param.testflag &= ~T_REP_BY_SORT; + error= repair(thd, param, 1); + } + return error; +} + + +int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool optimize) +{ + int error= 0; + uint local_testflag= param.testflag; + bool optimize_done= !optimize, statistics_done= 0; + const char *old_proc_info= thd->proc_info; + char fixed_name[FN_REFLEN]; + MARIA_SHARE *share= file->s; + ha_rows rows= file->state->records; + DBUG_ENTER("ha_maria::repair"); + + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.tmpfile_createflag= O_RDWR | O_TRUNC; + param.using_global_keycache= 1; + param.thd= thd; + param.tmpdir= &mysql_tmpdir_list; + param.out_flag= 0; + strmov(fixed_name, file->filename); + + // Don't lock tables if we have used LOCK TABLE + if (!thd->locked_tables && + maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) + { + _ma_check_print_error(¶m, ER(ER_CANT_LOCK), my_errno); + DBUG_RETURN(HA_ADMIN_FAILED); + } + + if (!optimize || + ((file->state->del || share->state.split != file->state->records) && + (!(param.testflag & T_QUICK) || + !(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)))) + { + ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ? + maria_get_mask_all_keys_active(share->base.keys) : + share->state.key_map); + uint testflag= param.testflag; + if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) && + (local_testflag & T_REP_BY_SORT)) + { + local_testflag |= T_STATISTICS; + param.testflag |= T_STATISTICS; // We get this for free + statistics_done= 1; + if (thd->variables.maria_repair_threads > 1) + { + char buf[40]; + /* TODO: respect maria_repair_threads variable */ + my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map)); + thd->proc_info= buf; + error= maria_repair_parallel(¶m, file, fixed_name, + param.testflag & T_QUICK); + thd->proc_info= "Repair done"; // to reset proc_info, as + // it was pointing to local buffer + } + else + { + thd->proc_info= "Repair by sorting"; + error= maria_repair_by_sort(¶m, file, fixed_name, + param.testflag & T_QUICK); + } + } + else + { + thd->proc_info= "Repair with keycache"; + param.testflag &= ~T_REP_BY_SORT; + error= maria_repair(¶m, file, fixed_name, param.testflag & T_QUICK); + } + param.testflag= testflag; + optimize_done= 1; + } + if (!error) + { + if ((local_testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + { + optimize_done= 1; + thd->proc_info= "Sorting index"; + error= maria_sort_index(¶m, file, fixed_name); + } + if (!statistics_done && (local_testflag & T_STATISTICS)) + { + if (share->state.changed & STATE_NOT_ANALYZED) + { + optimize_done= 1; + thd->proc_info= "Analyzing"; + error= maria_chk_key(¶m, file); + } + else + local_testflag &= ~T_STATISTICS; // Don't update statistics + } + } + thd->proc_info= "Saving state"; + if (!error) + { + if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file)) + { + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + /* + the following 'if', thought conceptually wrong, + is a useful optimization nevertheless. + */ + if (file->state != &file->s->state.state) + file->s->state.state= *file->state; + if (file->s->base.auto_key) + _ma_update_auto_increment_key(¶m, file, 1); + if (optimize_done) + error= maria_update_state_info(¶m, file, + UPDATE_TIME | UPDATE_OPEN_COUNT | + (local_testflag & + T_STATISTICS ? UPDATE_STAT : 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + if (rows != file->state->records && !(param.testflag & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + _ma_check_print_warning(¶m, "Number of rows changed from %s to %s", + llstr(rows, llbuff), + llstr(file->state->records, llbuff2)); + } + } + else + { + maria_mark_crashed_on_repair(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + maria_update_state_info(¶m, file, 0); + } + thd->proc_info= old_proc_info; + if (!thd->locked_tables) + maria_lock_database(file, F_UNLCK); + DBUG_RETURN(error ? HA_ADMIN_FAILED : + !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK); +} + + +/* + Assign table indexes to a specific key cache. +*/ + +int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt) +{ + KEY_CACHE *new_key_cache= check_opt->key_cache; + const char *errmsg= 0; + int error= HA_ADMIN_OK; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + DBUG_ENTER("ha_maria::assign_to_keycache"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + /* We only come here when the user did specify an index map */ + key_map kmap; + if (get_key_map_from_key_list(&kmap, table, table_list->use_index)) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + map= kmap.to_ulonglong(); + } + + if ((error= maria_assign_to_key_cache(file, map, new_key_cache))) + { + char buf[STRING_BUFFER_USUAL_SIZE]; + my_snprintf(buf, sizeof(buf), + "Failed to flush to index file (errno: %d)", error); + errmsg= buf; + error= HA_ADMIN_CORRUPT; + } + +err: + if (error != HA_ADMIN_OK) + { + /* Send error to user */ + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "assign_to_keycache"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + } + DBUG_RETURN(error); +} + + +/* + Preload pages of the index file for a table into the key cache. +*/ + +int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + const char *errmsg; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + my_bool ignore_leaves= table_list->ignore_leaves; + + DBUG_ENTER("ha_maria::preload_keys"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + key_map kmap; + get_key_map_from_key_list(&kmap, table, table_list->use_index); + if (kmap.is_set_all()) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + if (!kmap.is_clear_all()) + map= kmap.to_ulonglong(); + } + + maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE, + (void*) &thd->variables.preload_buff_size); + + if ((error= maria_preload(file, map, ignore_leaves))) + { + switch (error) { + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: + errmsg= "Indexes use different block sizes"; + break; + case HA_ERR_OUT_OF_MEM: + errmsg= "Failed to allocate buffer"; + break; + default: + char buf[ERRMSGSIZE + 20]; + my_snprintf(buf, ERRMSGSIZE, + "Failed to read from index file (errno: %d)", my_errno); + errmsg= buf; + } + error= HA_ADMIN_FAILED; + goto err; + } + + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "preload_keys"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + DBUG_RETURN(error); + } +} + + +/* + Disable indexes, making it persistent if requested. + + SYNOPSIS + disable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ disable all non-unique keys + HA_KEY_SWITCH_ALL disable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::disable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + /* call a storage engine function to switch the key map */ + error= maria_disable_indexes(file); + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + maria_extra(file, HA_EXTRA_NO_KEYS, 0); + info(HA_STATUS_CONST); // Read new key info + error= 0; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Enable indexes, making it persistent if requested. + + SYNOPSIS + enable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ enable all non-unique keys + HA_KEY_SWITCH_ALL enable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent + + DESCRIPTION + Enable indexes, which might have been disabled by disable_index() before. + The modes without _SAVE work only if both data and indexes are empty, + since the MARIA repair would enable them persistently. + To be sure in these cases, call handler::delete_all_rows() before. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + !=0 Error, among others: + HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry. + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::enable_indexes(uint mode) +{ + int error; + + if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys)) + { + /* All indexes are enabled already. */ + return 0; + } + + if (mode == HA_KEY_SWITCH_ALL) + { + error= maria_enable_indexes(file); + /* + Do not try to repair on error, + as this could make the enabled state persistent, + but mode==HA_KEY_SWITCH_ALL forbids it. + */ + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + THD *thd= current_thd; + HA_CHECK param; + const char *save_proc_info= thd->proc_info; + thd->proc_info= "Creating index"; + maria_chk_init(¶m); + param.op_name= "recreating_index"; + param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK | + T_CREATE_MISSING_KEYS); + param.myf_rw &= ~MY_WAIT_IF_FULL; + param.sort_buffer_length= thd->variables.maria_sort_buff_size; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + param.tmpdir= &mysql_tmpdir_list; + if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param.retry_repair) + { + sql_print_warning("Warning: Enabling keys got errno %d, retrying", + my_errno); + /* Repairing by sort failed. Now try standard repair method. */ + param.testflag &= ~(T_REP_BY_SORT | T_QUICK); + error= (repair(thd, param, 0) != HA_ADMIN_OK); + /* + If the standard repair succeeded, clear all error messages which + might have been set by the first repair. They can still be seen + with SHOW WARNINGS then. + */ + if (!error) + thd->clear_error(); + } + info(HA_STATUS_CONST); + thd->proc_info= save_proc_info; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Test if indexes are disabled. + + + SYNOPSIS + indexes_are_disabled() + no parameters + + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int ha_maria::indexes_are_disabled(void) +{ + return maria_indexes_are_disabled(file); +} + + +/* + prepare for a many-rows insert operation + e.g. - disable indexes (if they can be recreated fast) or + activate special bulk-insert optimizations + + SYNOPSIS + start_bulk_insert(rows) + rows Rows to be inserted + 0 if we don't know + + NOTICE + Do not forget to call end_bulk_insert() later! +*/ + +void ha_maria::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_maria::start_bulk_insert"); + THD *thd= current_thd; + ulong size= min(thd->variables.read_buff_size, + table->s->avg_row_length * rows); + DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu", + (ulong) rows, size)); + + /* don't enable row cache if too few rows */ + if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE)) + maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size); + + can_enable_indexes= maria_is_all_keys_active(file->s->state.key_map, + file->s->base.keys); + + if (!(specialflag & SPECIAL_SAFE_MODE)) + { + /* + Only disable old index if the table was empty and we are inserting + a lot of rows. + We should not do this for only a few rows as this is slower and + we don't want to update the key statistics based of only a few rows. + */ + if (file->state->records == 0 && can_enable_indexes && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)) + maria_disable_non_unique_index(file, rows); + else + if (!file->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)) + { + maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows); + } + } + DBUG_VOID_RETURN; +} + + +/* + end special bulk-insert optimizations, + which have been activated by start_bulk_insert(). + + SYNOPSIS + end_bulk_insert() + no arguments + + RETURN + 0 OK + != 0 Error +*/ + +int ha_maria::end_bulk_insert() +{ + maria_end_bulk_insert(file); + int err= maria_extra(file, HA_EXTRA_NO_CACHE, 0); + return err ? err : can_enable_indexes ? + enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0; +} + + +bool ha_maria::check_and_repair(THD *thd) +{ + int error= 0; + int marked_crashed; + char *old_query; + uint old_query_length; + HA_CHECK_OPT check_opt; + DBUG_ENTER("ha_maria::check_and_repair"); + + check_opt.init(); + check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; + // Don't use quick if deleted rows + if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK)) + check_opt.flags |= T_QUICK; + sql_print_warning("Checking table: '%s'", table->s->path.str); + + old_query= thd->query; + old_query_length= thd->query_length; + pthread_mutex_lock(&LOCK_thread_count); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; + pthread_mutex_unlock(&LOCK_thread_count); + + if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt)) + { + sql_print_warning("Recovering table: '%s'", table->s->path.str); + check_opt.flags= + ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | + (marked_crashed ? 0 : T_QUICK) | + (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) | + T_AUTO_REPAIR); + if (repair(thd, &check_opt)) + error= 1; + } + pthread_mutex_lock(&LOCK_thread_count); + thd->query= old_query; + thd->query_length= old_query_length; + pthread_mutex_unlock(&LOCK_thread_count); + DBUG_RETURN(error); +} + + +bool ha_maria::is_crashed() const +{ + return (file->s->state.changed & STATE_CRASHED || + (my_disable_locking && file->s->state.open_count)); +} + + +int ha_maria::update_row(const byte * old_data, byte * new_data) +{ + statistic_increment(table->in_use->status_var.ha_update_count, &LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + return maria_update(file, old_data, new_data); +} + + +int ha_maria::delete_row(const byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_delete_count, &LOCK_status); + return maria_delete(file, buf); +} + + +int ha_maria::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_last(byte * buf, const byte * key, uint key_len) +{ + DBUG_ENTER("ha_maria::index_read_last"); + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status= error ? STATUS_NOT_FOUND : 0; + DBUG_RETURN(error); +} + + +int ha_maria::index_next(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_prev(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error= maria_rprev(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_first(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error= maria_rfirst(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_last(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error= maria_rlast(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_next_same(byte * buf, + const byte *key __attribute__ ((unused)), + uint length __attribute__ ((unused))) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext_same(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::rnd_init(bool scan) +{ + if (scan) + return maria_scan_init(file); + return maria_reset(file); // Free buffers +} + + +int ha_maria::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error= maria_scan(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::restart_rnd_next(byte *buf, byte *pos) +{ + return rnd_pos(buf, pos); +} + + +int ha_maria::rnd_pos(byte * buf, byte *pos) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length)); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +void ha_maria::position(const byte * record) +{ + my_off_t position= maria_position(file); + my_store_ptr(ref, ref_length, position); +} + + +int ha_maria::info(uint flag) +{ + MARIA_INFO info; + char name_buff[FN_REFLEN]; + + (void) maria_status(file, &info, flag); + if (flag & HA_STATUS_VARIABLE) + { + stats.records= info.records; + stats.deleted= info.deleted; + stats.data_file_length= info.data_file_length; + stats.index_file_length= info.index_file_length; + stats.delete_length= info.delete_length; + stats.check_time= info.check_time; + stats.mean_rec_length= info.mean_reclength; + } + if (flag & HA_STATUS_CONST) + { + TABLE_SHARE *share= table->s; + stats.max_data_file_length= info.max_data_file_length; + stats.max_index_file_length= info.max_index_file_length; + stats.create_time= info.create_time; + ref_length= info.reflength; + share->db_options_in_use= info.options; + stats.block_size= maria_block_size; + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); + share->keys_in_use.set_prefix(share->keys); + share->keys_in_use.intersect_extended(info.key_map); + share->keys_for_keyread.intersect(share->keys_in_use); + share->db_record_offset= info.record_offset; + if (share->key_parts) + memcpy((char*) table->key_info[0].rec_per_key, + (char*) info.rec_per_key, + sizeof(table->key_info[0].rec_per_key) * share->key_parts); + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); + + /* + Set data_file_name and index_file_name to point at the symlink value + if table is symlinked (Ie; Real name is not same as generated name) + */ + data_file_name= index_file_name= 0; + fn_format(name_buff, file->filename, "", MARIA_NAME_DEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.data_file_name)) + data_file_name= info.data_file_name; + fn_format(name_buff, file->filename, "", MARIA_NAME_IEXT, MY_APPEND_EXT); + if (strcmp(name_buff, info.index_file_name)) + index_file_name= info.index_file_name; + } + if (flag & HA_STATUS_ERRKEY) + { + errkey= info.errkey; + my_store_ptr(dup_ref, ref_length, info.dup_key_pos); + } + /* Faster to always update, than to do it based on flag */ + stats.update_time= info.update_time; + stats.auto_increment_value= info.auto_increment; + + return 0; +} + + +int ha_maria::extra(enum ha_extra_function operation) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD) + return 0; + return maria_extra(file, operation, 0); +} + +int ha_maria::reset(void) +{ + return maria_reset(file); +} + +/* To be used with WRITE_CACHE and EXTRA_CACHE */ + +int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE) + return 0; + return maria_extra(file, operation, (void*) &cache_size); +} + + +int ha_maria::delete_all_rows() +{ + return maria_delete_all_rows(file); +} + + +int ha_maria::delete_table(const char *name) +{ + return maria_delete_table(name); +} + + +int ha_maria::external_lock(THD *thd, int lock_type) +{ + return maria_lock_database(file, !table->s->tmp_table ? + lock_type : ((lock_type == F_UNLCK) ? + F_UNLCK : F_EXTRA_LCK)); +} + + +THR_LOCK_DATA **ha_maria::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK) + file->lock.type= lock_type; + *to++= &file->lock; + return to; +} + + +void ha_maria::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value= stats.auto_increment_value; + } + create_info->data_file_name= data_file_name; + create_info->index_file_name= index_file_name; +} + + +int ha_maria::create(const char *name, register TABLE *table_arg, + HA_CREATE_INFO *info) +{ + int error; + uint i, j, recpos, minpos, fieldpos, temp_length, length, create_flags= 0; + bool found_real_auto_increment= 0; + enum ha_base_keytype type; + char buff[FN_REFLEN]; + byte *record; + KEY *pos; + MARIA_KEYDEF *keydef; + MARIA_COLUMNDEF *recinfo, *recinfo_pos; + HA_KEYSEG *keyseg; + TABLE_SHARE *share= table_arg->s; + uint options= share->db_options_in_use; + enum data_file_type row_type; + DBUG_ENTER("ha_maria::create"); + + type= HA_KEYTYPE_BINARY; // Keep compiler happy + if (!(my_multi_malloc(MYF(MY_WME), + &recinfo, (share->fields * 2 + 2) * + sizeof(MARIA_COLUMNDEF), + &keydef, share->keys * sizeof(MARIA_KEYDEF), + &keyseg, + ((share->key_parts + share->keys) * + sizeof(HA_KEYSEG)), NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + pos= table_arg->key_info; + for (i= 0; i < share->keys; i++, pos++) + { + if (pos->flags & HA_USES_PARSER) + create_flags |= HA_CREATE_RELIES_ON_SQL_LAYER; + keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); + keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? + (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : + pos->algorithm; + keydef[i].block_length= pos->block_size; + + keydef[i].seg= keyseg; + keydef[i].keysegs= pos->key_parts; + for (j= 0; j < pos->key_parts; j++) + { + Field *field= pos->key_part[j].field; + type= field->key_type(); + keydef[i].seg[j].flag= pos->key_part[j].key_part_flag; + + if (options & HA_OPTION_PACK_KEYS || + (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY | + HA_SPACE_PACK_USED))) + { + if (pos->key_part[j].length > 8 && + (type == HA_KEYTYPE_TEXT || + type == HA_KEYTYPE_NUM || + (type == HA_KEYTYPE_BINARY && !field->zero_pack()))) + { + /* No blobs here */ + if (j == 0) + keydef[i].flag |= HA_PACK_KEY; + if (!(field->flags & ZEROFILL_FLAG) && + (field->type() == MYSQL_TYPE_STRING || + field->type() == MYSQL_TYPE_VAR_STRING || + ((int) (pos->key_part[j].length - field->decimals())) >= 4)) + keydef[i].seg[j].flag |= HA_SPACE_PACK; + } + else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16)) + keydef[i].flag |= HA_BINARY_PACK_KEY; + } + keydef[i].seg[j].type= (int) type; + keydef[i].seg[j].start= pos->key_part[j].offset; + keydef[i].seg[j].length= pos->key_part[j].length; + keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end= + keydef[i].seg[j].bit_length= 0; + keydef[i].seg[j].bit_pos= 0; + keydef[i].seg[j].language= field->charset()->number; + + if (field->null_ptr) + { + keydef[i].seg[j].null_bit= field->null_bit; + keydef[i].seg[j].null_pos= (uint) (field->null_ptr - + (uchar *) table_arg->record[0]); + } + else + { + keydef[i].seg[j].null_bit= 0; + keydef[i].seg[j].null_pos= 0; + } + if (field->type() == FIELD_TYPE_BLOB || + field->type() == FIELD_TYPE_GEOMETRY) + { + keydef[i].seg[j].flag |= HA_BLOB_PART; + /* save number of bytes used to pack length */ + keydef[i].seg[j].bit_start= (uint) (field->pack_length() - + share->blob_ptr_size); + } + else if (field->type() == FIELD_TYPE_BIT) + { + keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len; + keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs; + keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr - + (uchar *) table_arg->record[0]); + } + } + keyseg += pos->key_parts; + } + + if (table_arg->found_next_number_field) + { + keydef[share->next_number_index].flag |= HA_AUTO_KEY; + found_real_auto_increment= share->next_number_key_offset == 0; + } + + record= table_arg->record[0]; + recpos= 0; + recinfo_pos= recinfo; + while (recpos < (uint) share->reclength) + { + Field **field, *found= 0; + minpos= share->reclength; + length= 0; + + for (field= table_arg->field; *field; field++) + { + if ((fieldpos=(*field)->offset(record)) >= recpos && + fieldpos <= minpos) + { + /* skip null fields */ + if (!(temp_length= (*field)->pack_length_in_rec())) + continue; /* Skip null-fields */ + if (!found || fieldpos < minpos || + (fieldpos == minpos && temp_length < length)) + { + minpos= fieldpos; + found= *field; + length= temp_length; + } + } + } + DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d", + (long) found, recpos, minpos, length)); + if (recpos != minpos) + { // Reserved space (Null bits?) + bzero((char*) recinfo_pos, sizeof(*recinfo_pos)); + recinfo_pos->type= FIELD_NORMAL; + recinfo_pos++->length= (uint16) (minpos - recpos); + } + if (!found) + break; + + if (found->flags & BLOB_FLAG) + recinfo_pos->type= FIELD_BLOB; + else if (found->type() == MYSQL_TYPE_VARCHAR) + recinfo_pos->type= FIELD_VARCHAR; + else if (!(options & HA_OPTION_PACK_RECORD)) + recinfo_pos->type= FIELD_NORMAL; + else if (found->zero_pack()) + recinfo_pos->type= FIELD_SKIP_ZERO; + else + recinfo_pos->type= ((length <= 3 || + (found->flags & ZEROFILL_FLAG)) ? + FIELD_NORMAL : + found->type() == MYSQL_TYPE_STRING || + found->type() == MYSQL_TYPE_VAR_STRING ? + FIELD_SKIP_ENDSPACE : FIELD_SKIP_PRESPACE); + if (found->null_ptr) + { + recinfo_pos->null_bit= found->null_bit; + recinfo_pos->null_pos= (uint) (found->null_ptr - + (uchar *) table_arg->record[0]); + } + else + { + recinfo_pos->null_bit= 0; + recinfo_pos->null_pos= 0; + } + (recinfo_pos++)->length= (uint16) length; + recpos= minpos + length; + DBUG_PRINT("loop", ("length: %d type: %d", + recinfo_pos[-1].length, recinfo_pos[-1].type)); + + } + MARIA_CREATE_INFO create_info; + bzero((char*) &create_info, sizeof(create_info)); + create_info.max_rows= share->max_rows; + create_info.reloc_rows= share->min_rows; + create_info.with_auto_increment= found_real_auto_increment; + create_info.auto_increment= (info->auto_increment_value ? + info->auto_increment_value - 1 : (ulonglong) 0); + create_info.data_file_length= ((ulonglong) share->max_rows * + share->avg_row_length); + create_info.data_file_name= info->data_file_name; + create_info.index_file_name= info->index_file_name; + + if (info->options & HA_LEX_CREATE_TMP_TABLE) + create_flags |= HA_CREATE_TMP_TABLE; + if (options & HA_OPTION_PACK_RECORD) + create_flags |= HA_PACK_RECORD; + if (options & HA_OPTION_CHECKSUM) + create_flags |= HA_CREATE_CHECKSUM; + if (options & HA_OPTION_DELAY_KEY_WRITE) + create_flags |= HA_CREATE_DELAY_KEY_WRITE; + + switch (info->row_type) { + case ROW_TYPE_FIXED: + row_type= STATIC_RECORD; + break; + case ROW_TYPE_DYNAMIC: + row_type= DYNAMIC_RECORD; + break; + default: + case ROW_TYPE_PAGES: + row_type= BLOCK_RECORD; + break; + } + + /* TODO: Check that the following fn_format is really needed */ + error= + maria_create(fn_format(buff, name, "", "", + MY_UNPACK_FILENAME | MY_APPEND_EXT), + row_type, share->keys, keydef, (uint) (recinfo_pos - recinfo), + recinfo, 0, (MARIA_UNIQUEDEF *) 0, &create_info, + create_flags); + + my_free((gptr) recinfo, MYF(0)); + DBUG_RETURN(error); +} + + +int ha_maria::rename_table(const char *from, const char *to) +{ + return maria_rename(from, to); +} + + +void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + ulonglong nr; + int error; + byte key[HA_MAX_KEY_LENGTH]; + + if (!table->s->next_number_key_offset) + { // Autoincrement at key-start + ha_maria::info(HA_STATUS_AUTO); + *first_value= stats.auto_increment_value; + /* Maria has only table-level lock for now, so reserves to +inf */ + *nb_reserved_values= ULONGLONG_MAX; + return; + } + + /* it's safe to call the following if bulk_insert isn't on */ + maria_flush_bulk_insert(file, table->s->next_number_index); + + (void) extra(HA_EXTRA_KEYREAD); + key_copy(key, table->record[0], + table->key_info + table->s->next_number_index, + table->s->next_number_key_offset); + error= maria_rkey(file, table->record[1], (int) table->s->next_number_index, + key, table->s->next_number_key_offset, HA_READ_PREFIX_LAST); + if (error) + nr= 1; + else + { + /* Get data from record[1] */ + nr= ((ulonglong) table->next_number_field-> + val_int_offset(table->s->rec_buff_length) + 1); + } + extra(HA_EXTRA_NO_KEYREAD); + *first_value= nr; + /* + MySQL needs to call us for next row: assume we are inserting ("a",null) + here, we return 3, and next this statement will want to insert ("b",null): + there is no reason why ("b",3+1) would be the good row to insert: maybe it + already exists, maybe 3+1 is too large... + */ + *nb_reserved_values= 1; +} + + +/* + Find out how many rows there is in the given range + + SYNOPSIS + records_in_range() + inx Index to use + min_key Start of range. Null pointer if from first key + max_key End of range. Null pointer if to last key + + NOTES + min_key.flag can have one of the following values: + HA_READ_KEY_EXACT Include the key in the range + HA_READ_AFTER_KEY Don't include key in range + + max_key.flag can have one of the following values: + HA_READ_BEFORE_KEY Don't include key in range + HA_READ_AFTER_KEY Include all 'end_key' values in the range + + RETURN + HA_POS_ERROR Something is wrong with the index tree. + 0 There is no matching keys in the given range + number > 0 There is approximately 'number' matching rows in + the range. +*/ + +ha_rows ha_maria::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key); +} + + +int ha_maria::ft_read(byte * buf) +{ + int error; + + if (!ft_handler) + return -1; + + thread_safe_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); // why ? + + error= ft_handler->please->read_next(ft_handler, (char*) buf); + + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +uint ha_maria::checksum() const +{ + return (uint) file->state->checksum; +} + + +bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != stats.auto_increment_value || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO || + table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + +extern int maria_panic(enum ha_panic_function flag); +int maria_panic(handlerton *hton, ha_panic_function flag) +{ + return maria_panic(flag); +} + +static int ha_maria_init(void *p) +{ + handlerton *maria_hton; + + maria_hton= (handlerton *)p; + maria_hton->state= SHOW_OPTION_YES; + maria_hton->db_type= DB_TYPE_MARIA; + maria_hton->create= maria_create_handler; + maria_hton->panic= maria_panic; + /* TODO: decide if we support Maria being used for log tables */ + maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; + return test(maria_init()); +} + +struct st_mysql_storage_engine maria_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +mysql_declare_plugin(maria) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &maria_storage_engine, + "Maria", + "MySQL AB", + "Traditional transactional MySQL tables", + PLUGIN_LICENSE_GPL, + ha_maria_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + NULL, /* status variables */ + NULL, /* system variables */ + NULL /* config options */ +} +mysql_declare_plugin_end; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h new file mode 100644 index 00000000000..1f243f9ec59 --- /dev/null +++ b/storage/maria/ha_maria.h @@ -0,0 +1,145 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the maria handler */ + +#include <maria.h> + +#define HA_RECOVER_NONE 0 /* No automatic recover */ +#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */ +#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */ +#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */ +#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */ + +extern ulong maria_sort_buffer_size; +extern TYPELIB maria_recover_typelib; +extern ulong maria_recover_options; + +class ha_maria :public handler +{ + MARIA_HA *file; + ulonglong int_table_flags; + char *data_file_name, *index_file_name; + bool can_enable_indexes; + int repair(THD * thd, HA_CHECK ¶m, bool optimize); + +public: + ha_maria(handlerton *hton, TABLE_SHARE * table_arg); + ~ha_maria() {} + handler *clone(MEM_ROOT *mem_root); + const char *table_type() const + { return "MARIA"; } + const char *index_type(uint key_number); + const char **bas_ext() const; + ulonglong table_flags() const + { return int_table_flags; } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | + HA_READ_ORDER | HA_KEYREAD_ONLY); + } + uint max_supported_keys() const + { return MARIA_MAX_KEY; } + uint max_supported_key_length() const + { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const + { return HA_MAX_KEY_LENGTH; } + uint checksum() const; + + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE * table, + uint count, + bool called_by_logger_thread); + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int index_next_same(byte * buf, const byte * key, uint keylen); + int ft_init() + { + if (!ft_handler) + return 1; + ft_handler->please->reinit_search(ft_handler); + return 0; + } + FT_INFO *ft_init_ext(uint flags, uint inx, String * key) + { + return maria_ft_init_search(flags, file, inx, + (byte *) key->ptr(), key->length(), + key->charset(), table->record[0]); + } + int ft_read(byte * buf); + int rnd_init(bool scan); + int rnd_next(byte * buf); + int rnd_pos(byte * buf, byte * pos); + int restart_rnd_next(byte * buf, byte * pos); + void position(const byte * record); + int info(uint); + int extra(enum ha_extra_function operation); + int extra_opt(enum ha_extra_function operation, ulong cache_size); + int reset(void); + int external_lock(THD * thd, int lock_type); + int delete_all_rows(void); + int disable_indexes(uint mode); + int enable_indexes(uint mode); + int indexes_are_disabled(void); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); + ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key); + void update_create_info(HA_CREATE_INFO * create_info); + int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); + THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); + int rename_table(const char *from, const char *to); + int delete_table(const char *name); + int check(THD * thd, HA_CHECK_OPT * check_opt); + int analyze(THD * thd, HA_CHECK_OPT * check_opt); + int repair(THD * thd, HA_CHECK_OPT * check_opt); + bool check_and_repair(THD * thd); + bool is_crashed() const; + bool auto_repair() const + { return maria_recover_options != 0; } + int optimize(THD * thd, HA_CHECK_OPT * check_opt); + int restore(THD * thd, HA_CHECK_OPT * check_opt); + int backup(THD * thd, HA_CHECK_OPT * check_opt); + int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt); + int preload_keys(THD * thd, HA_CHECK_OPT * check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); +#ifdef HAVE_REPLICATION + int dump(THD * thd, int fd); + int net_read_dump(NET * net); +#endif +}; diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c new file mode 100644 index 00000000000..fdacda84875 --- /dev/null +++ b/storage/maria/lockman.c @@ -0,0 +1,787 @@ +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: TODO instant duration locks */ +/* QQ: #warning automatically place S instead of LS if possible */ + +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Generic Lock Manager + + Lock manager handles locks on "resources", a resource must be uniquely + identified by a 64-bit number. Lock manager itself does not imply + anything about the nature of a resource - it can be a row, a table, a + database, or just anything. + + Locks belong to "lock owners". A Lock owner is uniquely identified by a + 16-bit number. A function loid2lo must be provided by the application + that takes such a number as an argument and returns a LOCK_OWNER + structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + Internally lock manager is based on a lock-free hash, see lf_hash.c for + details. All locks are stored in a hash, with a resource id as a search + key, so all locks for the same resource will be considered collisions and + will be put in a one (lock-free) linked list. The main lock-handling + logic is in the inner loop that searches for a lock in such a linked + list - lockfind(). + + This works as follows. Locks generally are added to the end of the list + (with one exception, see below). When scanning the list it is always + possible to determine what locks are granted (active) and what locks are + waiting - first lock is obviously active, the second is active if it's + compatible with the first, and so on, a lock is active if it's compatible + with all previous locks and all locks before it are also active. + To calculate the "compatible with all previous locks" all locks are + accumulated in prev_lock variable using lock_combining_matrix. + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock is + placed in the list. Depending on other locks it is immediately active or + it will wait for other locks. Here's an exception to "locks are added + to the end" rule - upgraded locks are added after the last active lock + but before all waiting locks. Old lock (the one we upgraded from) is + not removed from the list, indeed it may be needed if the new lock was + in a savepoint that gets rolled back. So old lock is marked as "ignored" + (IGNORE_ME flag). New lock gets an UPGRADED flag. + + Loose locks add an important exception to the above. Loose locks do not + always commute with other locks. In the list IX-LS both locks are active, + while in the LS-IX list only the first lock is active. This creates a + problem in lock upgrades. If the list was IX-LS and the owner of the + first lock wants to place LS lock (which can be immediately granted), the + IX lock is upgraded to LSIX and the list becomes IX-LS-LSIX, which, + according to the lock compatibility matrix means that the last lock is + waiting - of course it all happened because IX and LS were swapped and + they don't commute. To work around this there's ACTIVE flag which is set + in every lock that never waited (was placed active), and this flag + overrides "compatible with all previous locks" rule. + + When a lock is placed to the end of the list it's either compatible with + all locks and all locks are active - new lock becomes active at once, or + it conflicts with some of the locks, in this case in the 'blocker' + variable a conflicting lock is returned and the calling thread waits on a + pthread condition in the LOCK_OWNER structure of the owner of the + conflicting lock. Or a new lock is compatible with all locks, but some + existing locks are not compatible with each other (example: request IS, + when the list is S-IX) - that is not all locks are active. In this case a + first waiting lock is returned in the 'blocker' variable, lockman_getlock() + notices that a "blocker" does not conflict with the requested lock, and + "dereferences" it, to find the lock that it's waiting on. The calling + thread than begins to wait on the same lock. + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. +*/ + +#include <my_global.h> +#include <my_sys.h> +#include <my_bit.h> +#include <lf.h> +#include "lockman.h" + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock <value in the row + label>, can I set the lock <value in the column label> ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ + { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { X, X, X, X, X, X, X, X, X, X}, /* X */ + { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +#define REPEAT_ONCE_MORE 0 +#define OK_TO_PLACE_THE_LOCK 1 +#define OK_TO_PLACE_THE_REQUEST 2 +#define ALREADY_HAVE_THE_LOCK 4 +#define ALREADY_HAVE_THE_REQUEST 8 +#define PLACE_NEW_DISABLE_OLD 16 +#define REQUEST_NEW_DISABLE_OLD 32 +#define RESOURCE_WAS_UNLOCKED 64 + +#define NEED_TO_WAIT (OK_TO_PLACE_THE_REQUEST | ALREADY_HAVE_THE_REQUEST |\ + REQUEST_NEW_DISABLE_OLD) +#define ALREADY_HAVE (ALREADY_HAVE_THE_LOCK | ALREADY_HAVE_THE_REQUEST) +#define LOCK_UPGRADE (PLACE_NEW_DISABLE_OLD | REQUEST_NEW_DISABLE_OLD) + + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock <value in the row label>, + what value should be returned for <value in the column label> ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +LF_REQUIRE_PINS(4); + +typedef struct lockman_lock { + uint64 resource; + struct lockman_lock *lonext; + intptr volatile link; + uint32 hashnr; + /* QQ: TODO - remove hashnr from LOCK */ + uint16 loid; + uchar lock; /* sizeof(uchar) <= sizeof(enum) */ + uchar flags; +} LOCK; + +#define IGNORE_ME 1 +#define UPGRADED 2 +#define ACTIVE 4 + +typedef struct { + intptr volatile *prev; + LOCK *curr, *next; + LOCK *blocker, *upgrade_from; +} CURSOR; + +#define PTR(V) (LOCK *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + NOTE + cursor is positioned in either case + pins[0..3] are used, they are NOT removed on return +*/ +static int lockfind(LOCK * volatile *head, LOCK *node, + CURSOR *cursor, LF_PINS *pins) +{ + uint32 hashnr, cur_hashnr; + uint64 resource, cur_resource; + intptr link; + my_bool cur_active, compatible, upgrading, prev_active; + enum lock_type lock, prev_lock, cur_lock; + uint16 loid, cur_loid; + int cur_flags, flags; + + hashnr= node->hashnr; + resource= node->resource; + lock= node->lock; + loid= node->loid; + flags= node->flags; + +retry: + cursor->prev= (intptr *)head; + prev_lock= N; + cur_active= TRUE; + compatible= TRUE; + upgrading= FALSE; + cursor->blocker= cursor->upgrade_from= 0; + _lf_unpin(pins, 3); + do { + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + break; + do { + link= cursor->curr->link; + cursor->next= PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr= cursor->curr->hashnr; + cur_resource= cursor->curr->resource; + cur_lock= cursor->curr->lock; + cur_loid= cursor->curr->loid; + cur_flags= cursor->curr->flags; + if (*cursor->prev != (intptr)cursor->curr) + { + (void)LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr > hashnr || + (cur_hashnr == hashnr && cur_resource >= resource)) + { + if (cur_hashnr > hashnr || cur_resource > resource) + break; + /* ok, we have a lock for this resource */ + DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0); + DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0); + if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME)) + { + DBUG_ASSERT(cur_active); + if (cur_loid == loid) + cursor->upgrade_from= cursor->curr; + } + else + { + prev_active= cur_active; + if (cur_flags & ACTIVE) + DBUG_ASSERT(prev_active == TRUE); + else + cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; + if (upgrading && !cur_active /*&& !(cur_flags & UPGRADED)*/) + break; + if (prev_active && !cur_active) + { + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + if (cur_loid == loid) + { + /* we already have a lock on this resource */ + DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N); + DBUG_ASSERT(!upgrading || (flags & IGNORE_ME)); + if (lock_combining_matrix[cur_lock][lock] == cur_lock) + { + /* new lock is compatible */ + if (cur_active) + { + cursor->blocker= cursor->curr; /* loose-locks! */ + _lf_unpin(pins, 3); /* loose-locks! */ + return ALREADY_HAVE_THE_LOCK; + } + else + return ALREADY_HAVE_THE_REQUEST; + } + /* not compatible, upgrading */ + upgrading= TRUE; + cursor->upgrade_from= cursor->curr; + } + else + { + if (!lock_compatibility_matrix[cur_lock][lock]) + { + compatible= FALSE; + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + } + prev_lock= lock_combining_matrix[prev_lock][cur_lock]; + DBUG_ASSERT(prev_lock != N); + } + } + cursor->prev= &(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + (void)LF_BACKOFF; + goto retry; + } + } + cursor->curr= cursor->next; + _lf_pin(pins, 1, cursor->curr); + } + /* + either the end of lock list - no more locks for this resource, + or upgrading and the end of active lock list + */ + if (upgrading) + { + if (compatible /*&& prev_active*/) + return PLACE_NEW_DISABLE_OLD; + else + return REQUEST_NEW_DISABLE_OLD; + } + if (cur_active && compatible) + { + /* + either no locks for this resource or all are compatible. + ok to place the lock in any case. + */ + return prev_lock == N ? RESOURCE_WAS_UNLOCKED + : OK_TO_PLACE_THE_LOCK; + } + /* we have a lock conflict. ok to place a lock request. And wait */ + return OK_TO_PLACE_THE_REQUEST; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res != ALREADY_HAVE_THE_REQUEST); + if (!(res & ALREADY_HAVE)) + { + if (res & LOCK_UPGRADE) + { + node->flags|= UPGRADED; + node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock]; + } + if (!(res & NEED_TO_WAIT)) + node->flags|= ACTIVE; + node->link= (intptr)cursor.curr; + DBUG_ASSERT(node->link != (intptr)node); + DBUG_ASSERT(cursor.prev != &node->link); + if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + { + res= REPEAT_ONCE_MORE; + node->flags&= ~ACTIVE; + } + if (res & LOCK_UPGRADE) + cursor.upgrade_from->flags|= IGNORE_ME; + /* + QQ: is this OK ? if a reader has already read upgrade_from, + it may find it conflicting with node :( + - see the last test from test_lockman_simple() + */ + } + + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + /* + note that blocker is not necessarily pinned here (when it's == curr). + this is ok as in such a case it's either a dummy node for + initialize_bucket() and dummy nodes don't need pinning, + or it's a lock of the same transaction for lockman_getlock, + and it cannot be removed by another thread + */ + *blocker= cursor.blocker; + return res; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockpeek(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + res= lockfind(head, node, &cursor, pins); + + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + if (blocker) + *blocker= cursor.blocker; + return res; +} + +/* + NOTE + it uses pins[0..3], on return all pins are removed. + + One _must_ have the lock (or request) to call this +*/ +static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res & ALREADY_HAVE); + + if (cursor.upgrade_from) + cursor.upgrade_from->flags&= ~IGNORE_ME; + + /* + XXX this does not work with savepoints, as old lock is left ignored. + It cannot be unignored, as would basically mean moving the lock back + in the lock chain (from upgraded). And the latter is not allowed - + because it breaks list scanning. So old ignored lock must be deleted, + new - same - lock must be installed right after the lock we're deleting, + then we can delete. Good news is - this is only required when rolling + back a savepoint. + */ + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lockfind(head, node, &cursor, pins); + } + else + { + res= REPEAT_ONCE_MORE; + if (cursor.upgrade_from) + cursor.upgrade_from->flags|= IGNORE_ME; + } + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + _lf_unpin(pins, 3); + return res; +} + +void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout) +{ + lf_alloc_init(&lm->alloc, sizeof(LOCK), offsetof(LOCK, lonext)); + lf_dynarray_init(&lm->array, sizeof(LOCK **)); + lm->size= 1; + lm->count= 0; + lm->loid_to_lo= func; + lm->lock_timeout= timeout; +} + +void lockman_destroy(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_real_free(&lm->alloc, el); + else + my_free((void *)el, MYF(0)); + el= (LOCK *)next; + } + lf_alloc_destroy(&lm->alloc); + lf_dynarray_destroy(&lm->array); +} + +/* TODO: optimize it */ +#define MAX_LOAD 1 + +static void initialize_bucket(LOCKMAN *lm, LOCK * volatile *node, + uint bucket, LF_PINS *pins) +{ + int res; + uint parent= my_clear_highest_bit(bucket); + LOCK *dummy= (LOCK *)my_malloc(sizeof(LOCK), MYF(MY_WME)); + LOCK **tmp= 0, *cur; + LOCK * volatile *el= _lf_dynarray_lvalue(&lm->array, parent); + + if (*el == NULL && bucket) + initialize_bucket(lm, el, parent, pins); + dummy->hashnr= my_reverse_bits(bucket); + dummy->loid= 0; + dummy->lock= X; /* doesn't matter, in fact */ + dummy->resource= 0; + dummy->flags= 0; + res= lockinsert(el, dummy, pins, &cur); + DBUG_ASSERT(res & (ALREADY_HAVE_THE_LOCK | RESOURCE_WAS_UNLOCKED)); + if (res & ALREADY_HAVE_THE_LOCK) + { + my_free((void *)dummy, MYF(0)); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} + +static inline uint calc_hash(uint64 resource) +{ + const uchar *pos= (uchar *)&resource; + ulong nr1= 1, nr2= 4, i; + for (i= 0; i < sizeof(resource) ; i++, pos++) + { + nr1^= (ulong) ((((uint) nr1 & 63)+nr2) * ((uint)*pos)) + (nr1 << 8); + nr2+= 3; + } + return nr1 & INT_MAX32; +} + +/* + RETURN + see enum lockman_getlock_result + NOTE + uses pins[0..3], they're removed on return +*/ +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock) +{ + int res; + uint csize, bucket, hashnr; + LOCK *node, * volatile *el, *blocker; + LF_PINS *pins= lo->pins; + enum lock_type old_lock; + + DBUG_ASSERT(lo->loid); + lf_rwlock_by_pins(pins); + node= (LOCK *)_lf_alloc_new(pins); + node->flags= 0; + node->lock= lock; + node->loid= lo->loid; + node->resource= resource; + hashnr= calc_hash(resource); + bucket= hashnr % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + node->hashnr= my_reverse_bits(hashnr) | 1; + res= lockinsert(el, node, pins, &blocker); + if (res & ALREADY_HAVE) + { + int r; + old_lock= blocker->lock; + _lf_alloc_free(pins, node); + lf_rwunlock_by_pins(pins); + r= getlock_result[old_lock][lock]; + DBUG_ASSERT(r); + return r; + } + /* a new value was added to the hash */ + csize= lm->size; + if ((my_atomic_add32(&lm->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&lm->size, &csize, csize*2); + node->lonext= lo->all_locks; + lo->all_locks= node; + for ( ; res & NEED_TO_WAIT; res= lockpeek(el, node, pins, &blocker)) + { + LOCK_OWNER *wait_for_lo; + ulonglong deadline; + struct timespec timeout; + + _lf_assert_pin(pins, 3); /* blocker must be pinned here */ + wait_for_lo= lm->loid_to_lo(blocker->loid); + + /* + now, this is tricky. blocker is not necessarily a LOCK + we're waiting for. If it's compatible with what we want, + then we're waiting for a lock that blocker is waiting for + (see two places where blocker is set in lockfind) + In the latter case, let's "dereference" it + */ + if (lock_compatibility_matrix[blocker->lock][lock]) + { + blocker= wait_for_lo->all_locks; + _lf_pin(pins, 3, blocker); + if (blocker != wait_for_lo->all_locks) + continue; + wait_for_lo= wait_for_lo->waiting_for; + } + + /* + note that the blocker transaction may have ended by now, + its LOCK_OWNER and short id were reused, so 'wait_for_lo' may point + to an unrelated - albeit valid - LOCK_OWNER + */ + if (!wait_for_lo) + continue; + + lo->waiting_for= wait_for_lo; + lf_rwunlock_by_pins(pins); + + /* + We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must + belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER, + if there're other active LOCK_OWNERs. + */ + /* QQ: race condition here */ + pthread_mutex_lock(wait_for_lo->mutex); + if (DELETED(blocker->link)) + { + /* + blocker transaction was ended, or a savepoint that owned + the lock was rolled back. Either way - the lock was removed + */ + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + continue; + } + + /* yuck. waiting */ + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + do + { + pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout); + } while (!DELETED(blocker->link) && my_getsystime() < deadline); + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + if (!DELETED(blocker->link)) + { + /* + timeout. + note that we _don't_ release the lock request here. + Instead we're relying on the caller to abort the transaction, + and release all locks at once - see lockman_release_locks() + */ + _lf_unpin(pins, 3); + lf_rwunlock_by_pins(pins); + return DIDNT_GET_THE_LOCK; + } + } + lo->waiting_for= 0; + _lf_assert_unpin(pins, 3); /* unpin should not be needed */ + lf_rwunlock_by_pins(pins); + return getlock_result[lock][lock]; +} + +/* + RETURN + 0 - deleted + 1 - didn't (not found) + NOTE + see lockdelete() for pin usage notes +*/ +int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) +{ + LOCK * volatile *el, *node, *next; + uint bucket; + LF_PINS *pins= lo->pins; + + pthread_mutex_lock(lo->mutex); + lf_rwlock_by_pins(pins); + for (node= lo->all_locks; node; node= next) + { + next= node->lonext; + bucket= calc_hash(node->resource) % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + lockdelete(el, node, pins); + my_atomic_add32(&lm->count, -1); + } + lf_rwunlock_by_pins(pins); + lo->all_locks= 0; + /* now signal all waiters */ + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + return 0; +} + +#ifdef MY_LF_EXTRA_DEBUG +static const char *lock2str[]= +{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; +/* + NOTE + the function below is NOT thread-safe !!! +*/ +void print_lockhash(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + printf("hash: size %u count %u\n", lm->size, lm->count); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + { + printf("0x%08lx { resource %lu, loid %u, lock %s", + (long) el->hashnr, (ulong) el->resource, el->loid, + lock2str[el->lock]); + if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); + if (el->flags & UPGRADED) printf(" UPGRADED"); + if (el->flags & ACTIVE) printf(" ACTIVE"); + if (DELETED(next)) printf(" ***DELETED***"); + printf("}\n"); + } + else + { + /*printf("0x%08x { dummy }\n", el->hashnr);*/ + DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X); + } + el= PTR(next); + } +} +#endif diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h new file mode 100644 index 00000000000..fd96f8930d5 --- /dev/null +++ b/storage/maria/lockman.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _lockman_h +#define _lockman_h + +/* + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; + +struct lockman_lock; + +typedef struct st_lock_owner LOCK_OWNER; +struct st_lock_owner { + LF_PINS *pins; /* must be allocated from lockman's pinbox */ + struct lockman_lock *all_locks; /* a LIFO */ + LOCK_OWNER *waiting_for; + pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid; +}; + +typedef LOCK_OWNER *loid_to_lo_func(uint16); +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ + uint lock_timeout; + loid_to_lo_func *loid_to_lo; +} LOCKMAN; +#define DIDNT_GET_THE_LOCK 0 +enum lockman_getlock_result { + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; + +void lockman_init(LOCKMAN *, loid_to_lo_func *, uint); +void lockman_destroy(LOCKMAN *); +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock); +int lockman_release_locks(LOCKMAN *, LOCK_OWNER *); + +#ifdef EXTRA_DEBUG +void print_lockhash(LOCKMAN *lm); +#endif + +#endif diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c new file mode 100644 index 00000000000..5ed5a776658 --- /dev/null +++ b/storage/maria/ma_bitmap.c @@ -0,0 +1,1704 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Bitmap handling (for records in block) + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + The bitmap code assumes there is always an active bitmap page and thus + that there is at least one bitmap page in the file + + Structure of bitmap page: + + Fixed size records (to be implemented later): + + 2 bits are used to indicate: + + 0 Empty + 1 50-75 % full (at least room for 2 records) + 2 75-100 % full (at least room for one record) + 3 100 % full (no more room for records) + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M + + (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN) + + Note that for fixed size rows, we can't add more columns without doing + a full reorganization of the table. The user can always force a dynamic + size row format by specifying ROW_FORMAT=dynamic. + + + Dynamic size records: + + 3 bits are used to indicate + + 0 Empty page + 1 0-30 % full (at least room for 3 records) + 2 30-60 % full (at least room for 2 records) + 3 60-90 % full (at least room for one record) + 4 100 % full (no more room for records) + 5 Tail page, 0-40 % full + 6 Tail page, 40-80 % full + 7 Full tail page or full blob page + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M + + Note that values 1-3 may be adjust for each individual table based on + 'min record length'. Tail pages are for overflow data which can be of + any size and thus doesn't have to be adjusted for different tables. + If we add more columns to the table, some of the originally calculated + 'cut off' points may not be optimal, but they shouldn't be 'drasticly + wrong'. + + When allocating data from the bitmap, we are trying to do it in a + 'best fit' manner. Blobs and varchar blocks are given out in large + continuous extents to allow fast access to these. Before allowing a + row to 'flow over' to other blocks, we will compact the page and use + all space on it. If there is many rows in the page, we will ensure + there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other + rows to grow. + + The bitmap format allows us to extend the row file in big chunks, if needed. + + When calculating the size for a packed row, we will calculate the following + things separately: + - Row header + null_bits + empty_bits fixed size segments etc. + - Size of all char/varchar fields + - Size of each blob field + + The bitmap handler will get all the above information and return + either one page or a set of pages to put the different parts. + + Bitmaps are read on demand in response to insert/delete/update operations. + The following bitmap pointers will be cached and stored on disk on close: + - Current insert_bitmap; When inserting new data we will first try to + fill this one. + - First bitmap which is not completely full. This is updated when we + free data with an update or delete. + + While flushing out bitmaps, we will cache the status of the bitmap in memory + to avoid having to read a bitmap for insert of new data that will not + be of any use + - Total empty space + - Largest number of continuous pages + + Bitmap ONLY goes to disk in the following scenarios + - The file is closed (and we flush all changes to disk) + - On checkpoint + (Ie: When we do a checkpoint, we have to ensure that all bitmaps are + put on disk even if they are not in the page cache). + - When explicitely requested (for example on backup or after recvoery, + to simplify things) + +*/ + +#include "maria_def.h" +#include "ma_blockrec.h" + +/* Number of pages to store blob parts */ +#define BLOB_SEGMENT_MIN_SIZE 128 + +#define FULL_HEAD_PAGE 4 +#define FULL_TAIL_PAGE 7 + +static inline my_bool write_changed_bitmap(MARIA_SHARE *share, + MARIA_FILE_BITMAP *bitmap) +{ + return (key_cache_write(share->key_cache, + bitmap->file, bitmap->page * bitmap->block_size, 0, + (byte*) bitmap->map, + bitmap->block_size, bitmap->block_size, 1)); +} + +/* + Initialize bitmap. This is called the first time a file is opened +*/ + +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) +{ + uint aligned_bit_blocks; + uint max_page_size; + MARIA_FILE_BITMAP *bitmap= &share->bitmap; + uint size= share->block_size; +#ifndef DBUG_OFF + /* We want to have a copy of the bitmap to be able to print differences */ + size*= 2; +#endif + + if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME)))) + return 1; + + bitmap->file= file; + bitmap->changed= 0; + bitmap->block_size= share->block_size; + /* Size needs to be alligned on 6 */ + aligned_bit_blocks= share->block_size / 6; + bitmap->total_size= aligned_bit_blocks * 6; + /* + In each 6 bytes, we have 6*8/3 = 16 pages covered + The +1 is to add the bitmap page, as this doesn't have to be covered + */ + bitmap->pages_covered= aligned_bit_blocks * 16 + 1; + + /* Update size for bits */ + /* TODO; Make this dependent of the row size */ + max_page_size= share->block_size - PAGE_OVERHEAD_SIZE; + bitmap->sizes[0]= max_page_size; /* Empty page */ + bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; + bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; + bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100; + bitmap->sizes[4]= 0; /* Full page */ + bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100; + bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100; + bitmap->sizes[7]= 0; + + pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW); + + /* + Start by reading first page (assume table scan) + Later code is simpler if it can assume we always have an active bitmap. + */ + if (_ma_read_bitmap_page(share, bitmap, (ulonglong) 0)) + return(1); + return 0; +} + + +/* + Free data allocated by _ma_bitmap_init +*/ + +my_bool _ma_bitmap_end(MARIA_SHARE *share) +{ + my_bool res= 0; + _ma_flush_bitmap(share); + pthread_mutex_destroy(&share->bitmap.bitmap_lock); + my_free((byte*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR)); + return res; +} + + +/* + Flush bitmap to disk +*/ + +my_bool _ma_flush_bitmap(MARIA_SHARE *share) +{ + my_bool res= 0; + if (share->bitmap.changed) + { + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.changed) + { + res= write_changed_bitmap(share, &share->bitmap); + share->bitmap.changed= 0; + } + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + } + return res; +} + + +/* + Return bitmap pattern for the smallest head block that can hold 'size' + + SYNOPSIS + size_to_head_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0-3 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[3]) + return 3; + if (size <= bitmap->sizes[2]) + return 2; + if (size <= bitmap->sizes[1]) + return 1; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +/* + Return bitmap pattern for block where there is size bytes free +*/ + +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size < bitmap->sizes[3]) + return 4; + if (size < bitmap->sizes[2]) + return 3; + if (size < bitmap->sizes[1]) + return 2; + return (size < bitmap->sizes[0]) ? 1 : 0; +} + + +/* + Return bitmap pattern for the smallest tail block that can hold 'size' + + SYNOPSIS + size_to_tail_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0, 5 or 6 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[6]) + return 6; + if (size <= bitmap->sizes[5]) + return 5; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size >= bitmap->sizes[0]) + return 0; /* Revert to empty page */ + if (size < bitmap->sizes[6]) + return 7; + if (size < bitmap->sizes[5]) + return 6; + return 5; +} + + +/* + Return size guranteed to be available on a page + + SYNOPSIS + pattern_to_head_size + bitmap Bitmap + pattern Pattern (0-7) + + RETURN + 0 - block_size +*/ + +static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) +{ + DBUG_ASSERT(pattern <= 7); + return bitmap->sizes[pattern]; +} + + +/* + Print bitmap for debugging +*/ + +#ifndef DBUG_OFF + +const char *bits_to_txt[]= +{ + "empty", "00-30% full", "30-60% full", "60-90% full", "full", + "tail 00-40 % full", "tail 40-80 % full", "tail/blob full" +}; + +static void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap) +{ + uchar *pos, *end, *org_pos; + ulong page; + + end= bitmap->map+ bitmap->used_size; + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n", + (ulong) bitmap->page); + + page= (ulong) bitmap->page+1; + for (pos= bitmap->map, org_pos= bitmap->map+bitmap->block_size ; pos < end ; + pos+= 6, org_pos+= 6) + { + ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ + ulonglong org_bits= uint6korr(org_pos); + uint i; + if (bits != org_bits) + { + for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3) + { + if ((bits & 7) != (org_bits & 7)) + fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i, + bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]); + } + } + page+= 16; + } + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; + memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size); +} + +#endif /* DBUG_OFF */ + + +/*************************************************************************** + Reading & writing bitmap pages +***************************************************************************/ + +/* + Read a given bitmap page + + SYNOPSIS + read_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Page to read + + TODO + Update 'bitmap->used_size' to real size of used bitmap + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + my_off_t position= page * bitmap->block_size; + my_bool res; + DBUG_ENTER("_ma_read_bitmap_page"); + DBUG_ASSERT(page % bitmap->pages_covered == 0); + + bitmap->page= page; + if (position >= share->state.state.data_file_length) + { + share->state.state.data_file_length= position + bitmap->block_size; + bzero(bitmap->map, bitmap->block_size); + bitmap->used_size= 0; + DBUG_RETURN(0); + } + bitmap->used_size= bitmap->total_size; + res= key_cache_read(share->key_cache, + bitmap->file, position, 0, + (byte*) bitmap->map, + bitmap->block_size, bitmap->block_size, 0) == 0; +#ifndef DBUG_OFF + if (!res) + memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size); +#endif + DBUG_RETURN(res); +} + + +/* + Change to another bitmap page + + SYNOPSIS + _ma_change_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Bitmap page to read + + NOTES + If old bitmap was changed, write it out before reading new one + We return empty bitmap if page is outside of file size + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +static my_bool _ma_change_bitmap_page(MARIA_HA *info, + MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + DBUG_ENTER("_ma_change_bitmap_page"); + DBUG_ASSERT(page % bitmap->pages_covered == 0); + + if (bitmap->changed) + { + if (write_changed_bitmap(info->s, bitmap)) + DBUG_RETURN(1); + bitmap->changed= 0; + } + DBUG_RETURN(_ma_read_bitmap_page(info->s, bitmap, page)); +} + + +/* + Read next suitable bitmap + + SYNOPSIS + move_to_next_bitmap() + bitmap Bitmap handle + + TODO + Add cache of bitmaps to not read something that is not usable + + RETURN + 0 ok + 1 error (either couldn't save old bitmap or read new one +*/ + +static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) +{ + ulonglong page= bitmap->page; + MARIA_STATE_INFO *state= &info->s->state; + DBUG_ENTER("move_to_next_bitmap"); + + if (state->first_bitmap_with_space != ~(ulonglong) 0 && + state->first_bitmap_with_space != page) + { + page= state->first_bitmap_with_space; + state->first_bitmap_with_space= ~(ulonglong) 0; + } + else + page+= bitmap->pages_covered; + DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); +} + + +/**************************************************************************** + Allocate data in bitmaps +****************************************************************************/ + +/* + Store data in 'block' and mark the place used in the bitmap + + SYNOPSIS + fill_block() + bitmap Bitmap handle + block Store data about what we found + best_data Pointer to best 6 byte aligned area in bitmap->map + best_pos Which bit in *best_data the area starts + 0 = first bit pattern, 1 second bit pattern etc + fill_pattern Bitmap pattern to store in best_data[best_pos] +*/ + +static void fill_block(MARIA_FILE_BITMAP *bitmap, + MARIA_BITMAP_BLOCK *block, + uchar *best_data, uint best_pos, uint best_bits, + uint fill_pattern) +{ + uint page, offset, tmp; + uchar *data; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= (best_data - bitmap->map) / 6 * 16 + best_pos; + block->page= bitmap->page + 1 + page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= pattern_to_size(bitmap, best_bits); + block->sub_blocks= 1; + block->org_bitmap_value= best_bits; + block->used= BLOCKUSED_TAIL; + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + best_pos*= 3; + data= best_data+ best_pos / 8; + offset= best_pos & 7; + tmp= uint2korr(data); + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Allocate data for head block + + SYNOPSIS + allocate_head() + bitmap bitmap + size Size of block we need to find + block Store found information here + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_head_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_head"); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + anything matching the following pattern as this will be either + a full page or a tail page + */ + if ((!bits && best_data) || + ((bits & LL(04444444444444444)) == LL(04444444444444444))) + continue; + for (i= 0; i < 16 ; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits) + { + if (pattern == min_bits) + { + /* Found perfect match */ + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_data= data; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for tail block + + SYNOPSIS + allocate_tail() + bitmap bitmap + size Size of block we need to find + block Store found information here + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_tail_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_tail"); + DBUG_PRINT("enter", ("size: %u", size)); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + the following patterns: 1-4 or 7 + */ + + if ((!bits && best_data) || bits == LL(0xffffffffffff)) + continue; + for (i= 0; i < 16; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits && (!pattern || pattern >= 5)) + { + if (pattern == min_bits) + { + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for full blocks + + SYNOPSIS + allocate_full_pages() + bitmap bitmap + pages_needed Total size in pages (bitmap->total_size) we would like to have + block Store found information here + full_page 1 if we are not allowed to split extent + + IMPLEMENTATION + We will return the smallest area >= size. If there is no such + block, we will return the biggest area that satisfies + area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size) + + To speed up searches, we will only consider areas that has at least 16 free + pages starting on an even boundary. When finding such an area, we will + extend it with all previous and following free pages. This will ensure + we don't get holes between areas + + RETURN + # Blocks used + 0 error (no space in bitmap; block is not touched) +*/ + +static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, + ulong pages_needed, + MARIA_BITMAP_BLOCK *block, my_bool full_page) +{ + uchar *data= bitmap->map, *data_end= data + bitmap->used_size; + uchar *page_end= data + bitmap->total_size; + uchar *best_data= 0; + uint min_size; + uint best_area_size, best_prefix_area_size, best_suffix_area_size; + uint page, size; + ulonglong best_prefix_bits; + DBUG_ENTER("allocate_full_pages"); + DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed)); + + /* Following variables are only used if best_data is set */ + LINT_INIT(best_prefix_bits); + LINT_INIT(best_prefix_area_size); + LINT_INIT(best_suffix_area_size); + + min_size= pages_needed; + if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE) + min_size= BLOB_SEGMENT_MIN_SIZE; + best_area_size= ~(uint) 0; + + for (; data < page_end; data+= 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uchar *data_start; + ulonglong prefix_bits= 0; + uint area_size, prefix_area_size, suffix_area_size; + + /* Find area with at least 16 free pages */ + if (bits) + continue; + data_start= data; + /* Find size of area */ + for (data+=6 ; data < data_end ; data+= 6) + { + if ((bits= uint6korr(data))) + break; + } + area_size= (data - data_start) / 6 * 16; + if (area_size >= best_area_size) + continue; + prefix_area_size= suffix_area_size= 0; + if (!bits) + { + /* + End of page; All the rest of the bits on page are part of area + This is needed because bitmap->used_size only covers the set bits + in the bitmap. + */ + area_size+= (page_end - data) / 6 * 16; + if (area_size >= best_area_size) + break; + data= page_end; + } + else + { + /* Add bits at end of page */ + for (; !(bits & 7); bits >>= 3) + suffix_area_size++; + area_size+= suffix_area_size; + } + if (data_start != bitmap->map) + { + /* Add bits before page */ + bits= prefix_bits= uint6korr(data_start - 6); + DBUG_ASSERT(bits != 0); + /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */ + if (!(bits & LL(07000000000000000))) + { + data_start-= 6; + do + { + prefix_area_size++; + bits<<= 3; + } while (!(bits & LL(07000000000000000))); + area_size+= prefix_area_size; + /* Calculate offset to page from data_start */ + prefix_area_size= 16 - prefix_area_size; + } + } + if (area_size >= min_size && area_size <= best_area_size) + { + best_data= data_start; + best_area_size= area_size; + best_prefix_bits= prefix_bits; + best_prefix_area_size= prefix_area_size; + best_suffix_area_size= suffix_area_size; + + /* Prefer to put data in biggest possible area */ + if (area_size <= pages_needed) + min_size= area_size; + else + min_size= pages_needed; + } + } + if (!best_data) + DBUG_RETURN(0); /* No room on page */ + + /* + Now allocate min(pages_needed, area_size), starting from + best_start + best_prefix_area_size + */ + if (best_area_size > pages_needed) + best_area_size= pages_needed; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= ((best_data - bitmap->map) * 8) / 3 + best_prefix_area_size; + block->page= bitmap->page + 1 + page; + block->page_count= best_area_size; + block->empty_space= 0; + block->sub_blocks= 1; + block->org_bitmap_value= 0; + block->used= 0; + DBUG_PRINT("info", ("page: %lu page_count: %u", + (ulong) block->page, block->page_count)); + + if (best_prefix_area_size) + { + ulonglong tmp; + /* Convert offset back to bits */ + best_prefix_area_size= 16 - best_prefix_area_size; + if (best_area_size < best_prefix_area_size) + { + tmp= (LL(1) << best_area_size*3) - 1; + best_area_size= best_prefix_area_size; /* for easy end test */ + } + else + tmp= (LL(1) << best_prefix_area_size*3) - 1; + tmp<<= (16 - best_prefix_area_size) * 3; + DBUG_ASSERT((best_prefix_bits & tmp) == 0); + best_prefix_bits|= tmp; + int6store(best_data, best_prefix_bits); + if (!(best_area_size-= best_prefix_area_size)) + { + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); + } + best_data+= 6; + } + best_area_size*= 3; /* Bits to set */ + size= best_area_size/8; /* Bytes to set */ + bfill(best_data, size, 255); + best_data+= size; + if ((best_area_size-= size * 8)) + { + /* fill last byte */ + *best_data|= (uchar) ((1 << best_area_size) -1); + best_data++; + } + if (data_end < best_data) + bitmap->used_size= (uint) (best_data - bitmap->map); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); +} + + +/**************************************************************************** + Find right bitmaps where to store data +****************************************************************************/ + +/* + Find right bitmap and position for head block + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_head(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + /* There is always place for head blocks in bitmap_blocks */ + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_head(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + return 1; + return 0; +} + + +/* + Find right bitmap and position for tail + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_tail(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + DBUG_ENTER("find_tail"); + + /* Needed, as there is no error checking in dynamic_element */ + if (allocate_dynamic(&info->bitmap_blocks, position)) + DBUG_RETURN(1); + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_tail(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Find right bitmap and position for full blocks in one extent + + NOTES + This is used to allocate the main extent after the 'head' block + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_full_pages(bitmap, pages, block, 1)) + { + if (move_to_next_bitmap(info, bitmap)) + return 1; + } + return 0; +} + + +/* + Find right bitmap and position for putting a blob + + NOTES + The extents are stored last in info->bitmap_blocks + + IMPLEMENTATION + Allocate all full pages for the block + optionally one tail + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_blob(MARIA_HA *info, ulong length) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint full_page_size= FULL_PAGE_SIZE(info->s->block_size); + ulong pages; + uint rest_length, used; + uint first_block_pos; + MARIA_BITMAP_BLOCK *first_block= 0; + DBUG_ENTER("find_blob"); + DBUG_PRINT("enter", ("length: %lu", length)); + + pages= length / full_page_size; + rest_length= (uint) (length - pages * full_page_size); + if (rest_length >= MAX_TAIL_SIZE(info->s->block_size)) + { + pages++; + rest_length= 0; + } + + if (pages) + { + MARIA_BITMAP_BLOCK *block; + if (allocate_dynamic(&info->bitmap_blocks, + info->bitmap_blocks.elements + + pages / BLOB_SEGMENT_MIN_SIZE + 2)) + DBUG_RETURN(1); + first_block_pos= info->bitmap_blocks.elements; + block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements, + MARIA_BITMAP_BLOCK*); + first_block= block; + do + { + used= allocate_full_pages(bitmap, + (pages >= 65535 ? 65535 : (uint) pages), block, + 0); + if (!used && move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + info->bitmap_blocks.elements++; + block++; + } while ((pages-= used) != 0); + } + if (rest_length && find_tail(info, rest_length, + info->bitmap_blocks.elements++)) + DBUG_RETURN(1); + if (first_block) + first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos; + DBUG_RETURN(0); +} + + +static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row) +{ + ulong *length, *end; + uint elements; + /* + Reserve size for: + head block + one extent + tail block + */ + elements= info->bitmap_blocks.elements; + for (length= row->blob_lengths, end= length + info->s->base.blobs; + length < end; length++) + { + if (*length && find_blob(info, *length)) + return 1; + } + row->extents_count= (info->bitmap_blocks.elements - elements); + return 0; +} + + +static void use_head(MARIA_HA *info, ulonglong page, uint size, + uint block_position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + uchar *data; + uint offset, tmp, offset_page; + + block= dynamic_element(&info->bitmap_blocks, block_position, + MARIA_BITMAP_BLOCK*); + block->page= page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= size; + block->sub_blocks= 1; + block->used= BLOCKUSED_TAIL; + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page= (uint) (page - bitmap->page - 1) * 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + block->org_bitmap_value= (tmp >> offset) & 7; + tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Find out where to split the row; +*/ + +static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, + uint extents_length, uint split_size) +{ + uint row_length= row->base_length; + uint *lengths, *lengths_end; + + DBUG_ASSERT(row_length < split_size); + /* + Store first in all_field_lengths the different parts that are written + to the row. This needs to be in same order as in + ma_block_rec.c::write_block_record() + */ + row->null_field_lengths[-3]= extents_length; + row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; + row->null_field_lengths[-1]= row->field_lengths_length; + for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, + lengths_end= (lengths + share->base.pack_fields - share->base.blobs + + EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) + { + if (row_length + *lengths > split_size) + break; + row_length+= *lengths; + } + return row_length; +} + + +static my_bool write_rest_of_head(MARIA_HA *info, uint position, + ulong rest_length) +{ + MARIA_SHARE *share= info->s; + uint full_page_size= FULL_PAGE_SIZE(share->block_size); + MARIA_BITMAP_BLOCK *block; + + if (position == 0) + { + /* Write out full pages */ + uint pages= rest_length / full_page_size; + + rest_length%= full_page_size; + if (rest_length >= MAX_TAIL_SIZE(share->block_size)) + { + /* Put tail on a full page */ + pages++; + rest_length= 0; + } + if (find_mid(info, rest_length / full_page_size, 1)) + return 1; + /* + Insert empty block after full pages, to allow write_block_record() to + split segment into used + free page + */ + block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*); + block->page_count= 0; + block->used= 0; + } + if (rest_length) + { + if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1)) + return 1; + } + else + { + /* Empty tail block */ + block= dynamic_element(&info->bitmap_blocks, + ELEMENTS_RESERVED_FOR_MAIN_PART - 1, + MARIA_BITMAP_BLOCK *); + block->page_count= 0; + block->used= 0; + } + return 0; +} + + +/* + Find where to store one row + + SYNPOSIS + _ma_bitmap_find_place() + info Maria handler + row Information about row to write + blocks Store data about allocated places here + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position, max_page_size; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + /* + Reserver place for the following blocks: + - Head block + - Full page block + - Marker block to allow write_block_record() to split full page blocks + into full and free part + - Tail block + */ + + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE); + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + + if (row->total_length <= max_page_size) + { + /* Row fits in one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, (uint) row->total_length, position)) + goto abort; + goto end; + } + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= max_page_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, head_length, position)) + goto abort; + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, + max_page_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + if (find_head(info, row_length, position)) + goto abort; + rest_length= head_length - row_length; + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Find where to put row on update (when head page is already defined) + + SYNPOSIS + _ma_bitmap_find_new_place() + info Maria handler + row Information about row to write + page On which page original row was stored + free_size Free size on head page + blocks Store data about allocated places here + + NOTES + This function is only called when the new row can't fit in the space of + the old row in the head page. + + This is essently same as _ma_bitmap_find_place() except that + we don't call find_head() to search in bitmaps where to put the page. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_new_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.page != page / share->bitmap.pages_covered && + _ma_change_bitmap_page(info, &share->bitmap, + page / share->bitmap.pages_covered)) + goto abort; + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= free_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + use_head(info, page, head_length, position); + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, free_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + use_head(info, page, row_length, position); + rest_length= head_length - row_length; + + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/**************************************************************************** + Clear and reset bits +****************************************************************************/ + +static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint fill_pattern) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp, org_tmp; + uchar *data; + DBUG_ENTER("set_page_bits"); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + org_tmp= tmp= uint2korr(data); + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + if (tmp == org_tmp) + DBUG_RETURN(0); /* No changes */ + int2store(data, tmp); + + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + if (fill_pattern != 3 && fill_pattern != 7 && + page < info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= page; + DBUG_RETURN(0); +} + + +/* + Get bitmap pattern for a given page + + SYNOPSIS + + get_page_bits() + info Maria handler + bitmap Bitmap handler + page Page number + + RETURN + 0-7 Bitmap pattern + ~0 Error (couldn't read page) +*/ + +static uint get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp; + uchar *data; + DBUG_ENTER("get_page_bits"); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(~ (uint) 0); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + DBUG_RETURN((tmp >> offset) & 7); +} + + +/* + Mark all pages in a region as free + + SYNOPSIS + reset_full_page_bits() + info Maria handler + bitmap Bitmap handler + page Start page + page_count Number of pages + + NOTES + We assume that all pages in region is covered by same bitmap + One must have a lock on info->s->bitmap.bitmap_lock + + RETURN + 0 ok + 1 Error (when reading bitmap) +*/ + +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count) +{ + ulonglong bitmap_page; + uint offset, bit_start, bit_count, tmp; + uchar *data; + DBUG_ENTER("_ma_reset_full_page_bits"); + DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count)); + safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock); + + bitmap_page= page / bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + page= page - bitmap->page - 1; + + /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */ + bit_start= page * 3; + bit_count= page_count * 3; + + data= bitmap->map + bit_start / 8; + offset= bit_start & 7; + + tmp= (255 << offset); /* Bits to keep */ + if (bit_count + offset < 8) + { + /* Only clear bits between 'offset' and 'offset+bit_count-1' */ + tmp^= (255 << (offset + bit_count)); + } + *data&= ~tmp; + + if ((int) (bit_count-= (8 - offset)) > 0) + { + uint fill; + data++; + /* + -1 is here to avoid one 'if' statement and to let the following code + handle the last byte + */ + if ((fill= (bit_count - 1) / 8)) + { + bzero(data, fill); + data+= fill; + } + bit_count-= fill * 8; /* Bits left to clear */ + tmp= (1 << bit_count) - 1; + *data&= ~tmp; + } + if (bitmap->page < (ulonglong) info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= bitmap->page; + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(0); +} + + +/* + Correct bitmap pages to reflect the true allocation + + SYNOPSIS + _ma_bitmap_release_unused() + info Maria handle + blocks Bitmap blocks + + IMPLEMENTATION + If block->used & BLOCKUSED_TAIL is set: + If block->used & BLOCKUSED_USED is set, then the bits for the + corresponding page is set according to block->empty_space + If block->used & BLOCKUSED_USED is not set, then the bits for + the corresponding page is set to org_bitmap_value; + + If block->used & BLOCKUSED_TAIL is not set: + if block->used is not set, the bits for the corresponding page are + cleared + + For the first block (head block) the logic is same as for a tail block + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count; + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits, current_bitmap_value; + DBUG_ENTER("_ma_bitmap_release_unused"); + + /* + We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full' + when we allocated space in the page + */ + current_bitmap_value= FULL_HEAD_PAGE; + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + + /* First handle head block */ + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("head empty_space: %u", block->empty_space)); + bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space); + if (block->used & BLOCKUSED_USE_ORG_BITMAP) + current_bitmap_value= block->org_bitmap_value; + } + else + bits= block->org_bitmap_value; + if (bits != current_bitmap_value && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + + + /* Handle all full pages and tail pages (for head page and blob) */ + for (block++; block < end; block++) + { + if (block->used & BLOCKUSED_TAIL) + { + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("tail empty_space: %u", block->empty_space)); + bits= free_size_to_tail_pattern(bitmap, block->empty_space); + } + else + bits= block->org_bitmap_value; + if (bits != FULL_TAIL_PAGE && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + } + if (!(block->used & BLOCKUSED_USED) && + _ma_reset_full_page_bits(info, bitmap, + block->page, block->page_count)) + goto err; + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); + +err: + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); +} + + +/* + Free full pages from bitmap + + SYNOPSIS + _ma_bitmap_free_full_pages() + info Maria handle + extents Extents (as stored on disk) + count Number of extents + + IMPLEMENTATION + Mark all full pages (not tails) from extents as free + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count) +{ + DBUG_ENTER("_ma_bitmap_free_full_pages"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + for (; count--; extents += ROW_EXTENT_SIZE) + { + ulonglong page= uint5korr(extents); + uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE); + if (!(page_count & TAIL_BIT)) + { + if (_ma_reset_full_page_bits(info, &info->s->bitmap, page, page_count)) + { + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); + } + } + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); +} + + + +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head, + uint empty_space) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits; + my_bool res; + DBUG_ENTER("_ma_bitmap_set"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + bits= (head ? + _ma_free_size_to_head_pattern(bitmap, empty_space) : + free_size_to_tail_pattern(bitmap, empty_space)); + res= set_page_bits(info, bitmap, pos, bits); + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Check that bitmap pattern is correct for a page + + NOTES + Used in maria_chk + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern) +{ + uint bits; + switch (page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + bits= 0; + break; + case HEAD_PAGE: + bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space); + break; + case TAIL_PAGE: + bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space); + break; + case BLOB_PAGE: + bits= FULL_TAIL_PAGE; + break; + } + return (*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) != + bits; +} + + +/* + Check that bitmap pattern is correct for a page + + NOTES + Used in maria_chk + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern) +{ + if ((*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) > 7) + return 1; /* Couldn't read page */ + switch (page_type) { + case HEAD_PAGE: + return *bitmap_pattern < 1 || *bitmap_pattern > 4; + case TAIL_PAGE: + return *bitmap_pattern < 5; + case BLOB_PAGE: + return *bitmap_pattern != 7; + default: + break; + } + DBUG_ASSERT(0); + return 1; +} diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c new file mode 100644 index 00000000000..f1345b2c2f3 --- /dev/null +++ b/storage/maria/ma_blockrec.c @@ -0,0 +1,2743 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block + + Maria will have a LSN at start of each page (including the bitmap page) + Maria will for each row have the additional information: + + TRANSID Transaction ID that last updated row (6 bytes) + VER_PTR Version pointer that points on the UNDO entry that + contains last version of the row versions (7 bytes) + + The different page types that are in a data file are: + + Bitmap pages Map of free pages in the next extent (8129 page size + gives us 256M of mapped pages / bitmap) + Head page Start of rows are stored on this page. + A rowid always points to a head page + Blob page This page is totally filled with data from one blob or by + a set of long VARCHAR/CHAR fields + Tail page This contains the last part from different rows, blobs + or varchar fields. + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + For information about the bitmap page, see ma_bitmap.c + + Structure of data and tail page: + + The page has a row directory at end of page to allow us to do deletes + without having to reorganize the page. It also allows us to store some + extra bytes after each row to allow them to grow without having to move + around other rows + + Page header: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 1 for head / 2 for tail / 3 for blob + NO 1 byte Number of row/tail entries on page + empty space 2 bytes Empty space on page + + The upmost bit in PAGE_TYPE is set to 1 if the data on the page + can be compacted to get more space. (PAGE_CAN_BE_COMPACTED) + + Row data + + Row directory of NO entires, that consist of the following for each row + (in reverse order; ie, first record is stored last): + + Position 2 bytes Position of row on page + Length 2 bytes Length of entry + + For Position and Length, the 1 upmost bit of the position and the 1 + upmost bit of the length could be used for some states of the row (in + other words, we should try to keep these reserved) + + eof flag 1 byte Reserved for full page read testing + + ---------------- + + Structure of blob pages: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 3 + + data + + ----------------- + + Row data structure: + + Flag 1 byte Marker of which header field exists + TRANSID 6 bytes TRANSID of changing transaction + (optional, added on insert and first + update/delete) + VER_PTR 7 bytes Pointer to older version in log + (undo record) + (optional, added after first + update/delete) + DELETE_TRANSID 6 bytes (optional). TRANSID of original row. + Added on delete. + Nulls_extended 1 byte To allow us to add new DEFAULT NULL + fields (optional, added after first + change of row after alter table) + Number of ROW_EXTENT's 1-3 byte Length encoded, optional + This is the number of extents the + row is split into + First row_extent 7 byte Pointer to first row extent (optional) + + Total length of length array 1-3 byte Only used if we have + char/varchar/blob fields. + Row checksum 1 byte Only if table created with checksums + Null_bits .. One bit for each NULL field + Empty_bits .. One bit for each NOT NULL field. This bit is + 0 if the value is 0 or empty string. + + field_offsets 2 byte/offset + For each 32 field, there is one offset that + points to where the field information starts + in the block. This is to provide fast access + to later field in the row when we only need + to return a small set of fields. + + Things marked above as 'optional' will only be present if the corresponding + bit is set in 'Flag' field. + + Data in the following order: + (Field order is precalculated when table is created) + + Critical fixed length, not null, fields. (Note, these can't be dropped) + Fixed length, null fields + + Length array, 1-4 byte per field for all CHAR/VARCHAR/BLOB fields. + Number of bytes used in length array per entry is depending on max length + for field. + + ROW_EXTENT's + CHAR data (space stripped) + VARCHAR data + BLOB data + + Fields marked in null_bits or empty_bits are not stored in data part or + length array. + + If row doesn't fit into the given block, then the first EXTENT will be + stored last on the row. This is done so that we don't break any field + data in the middle. + + We first try to store the full row into one block. If that's not possible + we move out each big blob into their own extents. If this is not enough we + move out a concatenation of all varchars to their own extent. + + Each blob and the concatenated char/varchar fields are stored the following + way: + - Store the parts in as many full-contiguous pages as possible. + - The last part, that doesn't fill a full page, is stored in tail page. + + When doing an insert of a new row, we don't have to have + VER_PTR in the row. This will make rows that are not changed stored + efficiently. On update and delete we would add TRANSID (if it was an old + committed row) and VER_PTR to + the row. On row page compaction we can easily detect rows where + TRANSID was committed before the the longest running transaction + started and we can then delete TRANSID and VER_PTR from the row to + gain more space. + + If a row is deleted in Maria, we change TRANSID to current transid and + change VER_PTR to point to the undo record for the delete. The undo + record must contain the original TRANSID, so that another transaction + can use this to check if they should use the found row or go to the + previous row pointed to by the VER_PTR in the undo row. + + Description of the different parts: + + Flag is coded as: + + Description bit + TRANS_ID_exists 0 + VER_PTR_exists 1 + Row is deleted 2 (Means that DELETE_TRANSID exists) + Nulls_extended_exists 3 + Row is split 7 This means that 'Number_of_row_extents' exists + + + This would be a way to get more space on a page when doing page + compaction as we don't need to store TRANSID that have committed + before the smallest running transaction we have in memory. + + Nulls_extended is the number of new DEFAULT NULL fields in the row + compared to the number of DEFAULT NULL fields when the first version + of the table was created. If Nulls_extended doesn't exist in the row, + we know it's 0 as this must be one of the original rows from when the + table was created first time. This coding allows us to add 255*8 = + 2048 new fields without requiring a full alter table. + + Empty_bits is used to allow us to store 0, 0.0, empty string, empty + varstring and empty blob efficiently. (This is very good for data + warehousing where NULL's are often regarded as evil). Having this + bitmap also allows us to drop information of a field during a future + delete if field was deleted with ALTER TABLE DROP COLUMN. To be able + to handle DROP COLUMN, we must store in the index header the fields + that has been dropped. When unpacking a row we will ignore dropped + fields. When storing a row, we will mark a dropped field either with a + null in the null bit map or in the empty_bits and not store any data + for it. + + One ROW_EXTENT is coded as: + + START_PAGE 5 bytes + PAGE_COUNT 2 bytes. High bit is used to indicate tail page/ + end of blob + With 8K pages, we can cover 256M in one extent. This coding gives us a + maximum file size of 2^40*8192 = 8192 tera + + As an example of ROW_EXTENT handling, assume a row with one integer + field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2 + big BLOB fields that we have updated. + + The record format for storing this into an empty file would be: + + Page 1: + + 00 00 00 00 00 00 00 LSN + 01 Only one row in page + xx xx Empty space on page + + 10 Flag: row split, VER_PTR exists + 01 00 00 00 00 00 TRANSID 1 + 00 00 00 00 00 01 00 VER_PTR to first block in LOG file 1 + 5 Number of row extents + 02 00 00 00 00 03 00 VARCHAR's are stored in full pages 2,3,4 + 0 No null fields + 0 No empty fields + 05 00 00 00 00 00 80 Tail page for VARCHAR, rowid 0 + 06 00 00 00 00 80 00 First blob, stored at page 6-133 + 05 00 00 00 00 01 80 Tail of first blob (896 bytes) at page 5 + 86 00 00 00 00 80 00 Second blob, stored at page 134-262 + 05 00 00 00 00 02 80 Tail of second blob (896 bytes) at page 5 + 05 00 5 integer + FA Length of first varchar field (size 250) + 00 60 Length of second varchar field (size 8192*3) + 00 60 10 First medium BLOB, 1M + 01 00 10 00 Second BLOB, 1M + xx xx xx xx xx xx Varchars are stored here until end of page + + ..... until end of page + + 09 00 F4 1F 00 (Start position 9, length 8180, end byte) +*/ + +#define SANITY_CHECKS + +#include "maria_def.h" +#include "ma_blockrec.h" + +typedef struct st_maria_extent_cursor +{ + byte *extent; + byte *data_start; /* For error checking */ + MARIA_RECORD_POS *tail_positions; + my_off_t page; + uint extent_count, page_count; + uint tail; /* <> 0 if current extent is a tail page */ + my_bool first_extent; +} MARIA_EXTENT_CURSOR; + + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head); +static void _ma_print_directory(byte *buff, uint block_size); + +/**************************************************************************** + Initialization +****************************************************************************/ + +/* + Initialize data needed for block structures +*/ + + +/* Size of the different header elements for a row */ + +static uchar header_sizes[]= +{ + TRANSID_SIZE, + VERPTR_SIZE, + TRANSID_SIZE, /* Delete transid */ + 1 /* Null extends */ +}; + +/* + Calculate array of all used headers + + Used to speed up: + + size= 1; + if (flag & 1) + size+= TRANSID_SIZE; + if (flag & 2) + size+= VERPTR_SIZE; + if (flag & 4) + size+= TRANSID_SIZE + if (flag & 8) + size+= 1; + + NOTES + This is called only once at startup of Maria +*/ + +static uchar total_header_size[1 << array_elements(header_sizes)]; +#define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1) + +void _ma_init_block_record_data(void) +{ + uint i; + bzero(total_header_size, sizeof(total_header_size)); + total_header_size[0]= FLAG_SIZE; /* Flag byte */ + for (i= 1; i < array_elements(total_header_size); i++) + { + uint size= FLAG_SIZE, j, bit; + for (j= 0; (bit= (1 << j)) <= i; j++) + { + if (i & bit) + size+= header_sizes[j]; + } + total_header_size[i]= size; + } +} + + +my_bool _ma_once_init_block_row(MARIA_SHARE *share, File data_file) +{ + + share->base.max_data_file_length= + (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) * + share->block_size; +#if SIZEOF_OFF_T == 4 + set_if_smaller(max_data_file_length, INT_MAX32); +#endif + return _ma_bitmap_init(share, data_file); +} + + +my_bool _ma_once_end_block_row(MARIA_SHARE *share) +{ + int res= _ma_bitmap_end(share); + if (flush_key_blocks(share->key_cache, share->bitmap.file, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + res= 1; + if (share->bitmap.file >= 0 && my_close(share->bitmap.file, MYF(MY_WME))) + res= 1; + return res; +} + + +/* Init info->cur_row structure */ + +my_bool _ma_init_block_row(MARIA_HA *info) +{ + MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row; + DBUG_ENTER("_ma_init_block_row"); + + if (!my_multi_malloc(MY_WME, + &row->empty_bits_buffer, info->s->base.pack_bytes, + &row->field_lengths, info->s->base.max_field_lengths, + &row->blob_lengths, sizeof(ulong) * info->s->base.blobs, + &row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs)), + &row->tail_positions, (sizeof(MARIA_RECORD_POS) * + (info->s->base.blobs + 2)), + &new_row->empty_bits_buffer, info->s->base.pack_bytes, + &new_row->field_lengths, + info->s->base.max_field_lengths, + &new_row->blob_lengths, + sizeof(ulong) * info->s->base.blobs, + &new_row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs)), + NullS, 0)) + DBUG_RETURN(1); + if (my_init_dynamic_array(&info->bitmap_blocks, + sizeof(MARIA_BITMAP_BLOCK), + ELEMENTS_RESERVED_FOR_MAIN_PART, 16)) + my_free((char*) &info->bitmap_blocks, MYF(0)); + row->base_length= new_row->base_length= info->s->base_length; + DBUG_RETURN(0); +} + +void _ma_end_block_row(MARIA_HA *info) +{ + DBUG_ENTER("_ma_end_block_row"); + my_free((gptr) info->cur_row.empty_bits_buffer, MYF(MY_ALLOW_ZERO_PTR)); + delete_dynamic(&info->bitmap_blocks); + my_free((gptr) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + Helper functions +****************************************************************************/ + +static inline uint empty_pos_after_row(byte *dir) +{ + byte *prev; + /* + Find previous used entry. (There is always a previous entry as + the directory never starts with a deleted entry) + */ + for (prev= dir - DIR_ENTRY_SIZE ; + prev[0] == 0 && prev[1] == 0 ; + prev-= DIR_ENTRY_SIZE) + {} + return (uint) uint2korr(prev); +} + + +static my_bool check_if_zero(byte *pos, uint length) +{ + byte *end; + for (end= pos+ length; pos != end ; pos++) + if (pos[0] != 0) + return 1; + return 0; +} + + +/* + Find free postion in directory + + SYNOPSIS + find_free_position() + buff Page + block_size Size of page + res_rownr Store index to free position here + res_length Store length of found segment here + empty_space Store length of empty space on disk here. This is + all empty space, including the found block. + + NOTES + If there is a free directory entry (entry with postion == 0), + then use it and change it to be the size of the empty block + after the previous entry. This guarantees that all row entries + are stored on disk in inverse directory order, which makes life easier for + 'compact_page()' and to know if there is free space after any block. + + If there is no free entry (entry with postion == 0), then we create + a new one. + + We will update the offset and the length of the found dir entry to + match the position and empty space found. + + buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller + + RETURN + 0 Error (directory full) + # Pointer to directory entry on page +*/ + +static byte *find_free_position(byte *buff, uint block_size, uint *res_rownr, + uint *res_length, uint *empty_space) +{ + uint max_entry= (uint) ((uchar*) buff)[DIR_ENTRY_OFFSET]; + uint entry, length, first_pos; + byte *dir, *end; + + dir= (buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE); + end= buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + first_pos= PAGE_HEADER_SIZE; + *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + /* Search after first empty position */ + for (entry= 0 ; dir <= end ; end-= DIR_ENTRY_SIZE, entry--) + { + if (end[0] == 0 && end[1] == 0) /* Found not used entry */ + { + length= empty_pos_after_row(end) - first_pos; + int2store(end, first_pos); /* Update dir entry */ + int2store(end + 2, length); + *res_rownr= entry; + *res_length= length; + return end; + } + first_pos= uint2korr(end) + uint2korr(end + 2); + } + /* No empty places in dir; create a new one */ + if (max_entry == MAX_ROWS_PER_PAGE) + return 0; + buff[DIR_ENTRY_OFFSET]= (byte) (uchar) max_entry+1; + dir-= DIR_ENTRY_SIZE; + length= (uint) (dir - buff - first_pos); + DBUG_ASSERT(length <= *empty_space - DIR_ENTRY_SIZE); + int2store(dir, first_pos); + int2store(dir+2, length); /* Current max length */ + *res_rownr= max_entry; + *res_length= length; + + /* Reduce directory entry size from free space size */ + (*empty_space)-= DIR_ENTRY_SIZE; + return dir; + +} + +/**************************************************************************** + Updating records +****************************************************************************/ + +/* + Calculate length of all the different field parts +*/ + +static void calc_record_size(MARIA_HA *info, const byte *record, + MARIA_ROW *row) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data; + MARIA_COLUMNDEF *rec, *end_field; + uint blob_count= 0, *null_field_lengths= row->null_field_lengths; + + row->normal_length= row->char_length= row->varchar_length= + row->blob_length= row->extents_count= 0; + + /* Create empty bitmap and calculate length of each varlength/char field */ + bzero(row->empty_bits_buffer, share->base.pack_bytes); + row->empty_bits= row->empty_bits_buffer; + field_length_data= row->field_lengths; + for (rec= share->rec + share->base.fixed_not_null_fields, + end_field= share->rec + share->base.fields; + rec < end_field; rec++, null_field_lengths++) + { + if ((record[rec->null_pos] & rec->null_bit)) + { + if (rec->type != FIELD_BLOB) + *null_field_lengths= 0; + continue; + } + switch ((enum en_fieldtype) rec->type) { + case FIELD_CHECK: + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_ZERO: + DBUG_ASSERT(rec->empty_bit == 0); + /* fall through */ + case FIELD_SKIP_PRESPACE: /* Not packed */ + row->normal_length+= rec->length; + *null_field_lengths= rec->length; + break; + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (memcmp(record+ rec->null_pos, maria_zero_string, + rec->length) == 0) + { + row->empty_bits[rec->empty_pos] |= rec->empty_bit; + *null_field_lengths= 0; + } + else + { + row->normal_length+= rec->length; + *null_field_lengths= rec->length; + } + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + const char *pos, *end; + for (pos= record + rec->offset, end= pos + rec->length; + end > pos && end[-1] == ' '; end--) + ; + if (pos == end) /* If empty string */ + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + *null_field_lengths= 0; + } + else + { + uint length= (end - pos); + if (rec->length <= 255) + *field_length_data++= (byte) (uchar) length; + else + { + int2store(field_length_data, length); + field_length_data+= 2; + } + row->char_length+= length; + *null_field_lengths= length; + } + break; + } + case FIELD_VARCHAR: + { + uint length; + const byte *field_pos= record + rec->offset; + /* 256 is correct as this includes the length byte */ + if (rec->length <= 256) + { + if (!(length= (uint) (uchar) *field_pos)) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + *null_field_lengths= 0; + break; + } + *field_length_data++= *field_pos; + } + else + { + if (!(length= uint2korr(field_pos))) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + break; + } + field_length_data[0]= field_pos[0]; + field_length_data[1]= field_pos[1]; + field_length_data+= 2; + } + row->varchar_length+= length; + *null_field_lengths= length; + break; + } + case FIELD_BLOB: + { + const byte *field_pos= record + rec->offset; + uint size_length= rec->length - maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_pos); + if (!blob_length) + { + row->empty_bits[rec->empty_pos]|= rec->empty_bit; + row->blob_lengths[blob_count++]= 0; + break; + } + row->blob_length+= blob_length; + row->blob_lengths[blob_count++]= blob_length; + memcpy(field_length_data, field_pos, size_length); + field_length_data+= size_length; + break; + } + default: + DBUG_ASSERT(0); + } + } + row->field_lengths_length= (uint) (field_length_data - row->field_lengths); + row->head_length= (row->base_length + + share->base.fixed_not_null_fields_length + + row->field_lengths_length + + size_to_store_key_length(row->field_lengths_length) + + row->normal_length + + row->char_length + row->varchar_length); + row->total_length= (row->head_length + row->blob_length); + if (row->total_length < share->base.min_row_length) + row->total_length= share->base.min_row_length; +} + + +/* + Compact page by removing all space between rows + + IMPLEMENTATION + Move up all rows to start of page. + Move blocks that are directly after each other with one memmove. + + TODO LATER + Remove TRANSID from rows that are visible to all transactions + + SYNOPSIS + compact_page() + buff Page to compact + block_size Size of page + recnr Put empty data after this row +*/ + + +void compact_page(byte *buff, uint block_size, uint rownr) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; + uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block; + byte *dir, *end; + DBUG_ENTER("compact_page"); + DBUG_PRINT("enter", ("rownr: %u", rownr)); + DBUG_ASSERT(max_entry > 0 && + max_entry < (block_size - PAGE_HEADER_SIZE - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE); + + /* Move all entries before and including rownr up to start of page */ + dir= buff + block_size - DIR_ENTRY_SIZE * (rownr+1) - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE; + diff= 0; + for (; dir <= end ; end-= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(end); + + if (offset) + { + uint row_length= uint2korr(end + 2); + DBUG_ASSERT(offset >= page_pos); + DBUG_ASSERT(buff + offset + row_length <= dir); + + if (offset != next_free_pos) + { + uint length= (next_free_pos - start_of_found_block); + /* + There was empty space before this and prev block + Check if we have to move prevous block up to page start + */ + if (page_pos != start_of_found_block) + { + /* move up previous block */ + memmove(buff + page_pos, buff + start_of_found_block, length); + } + page_pos+= length; + /* next continous block starts here */ + start_of_found_block= offset; + diff= offset - page_pos; + } + int2store(end, offset - diff); /* correct current pos */ + next_free_pos= offset + row_length; + } + } + if (page_pos != start_of_found_block) + { + uint length= (next_free_pos - start_of_found_block); + memmove(buff + page_pos, buff + start_of_found_block, length); + } + start_of_found_block= uint2korr(dir); + + if (rownr != max_entry - 1) + { + /* Move all entries after rownr to end of page */ + uint rownr_length; + next_free_pos= end_of_found_block= page_pos= + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + diff= 0; + /* End points to entry before 'rownr' */ + for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(dir); + uint row_length= uint2korr(dir + 2); + uint row_end= offset + row_length; + if (!offset) + continue; + DBUG_ASSERT(offset >= start_of_found_block && row_end <= next_free_pos); + + if (row_end != next_free_pos) + { + uint length= (end_of_found_block - next_free_pos); + if (page_pos != end_of_found_block) + { + /* move next block down */ + memmove(buff + page_pos - length, buff + next_free_pos, length); + } + page_pos-= length; + /* next continous block starts here */ + end_of_found_block= row_end; + diff= page_pos - row_end; + } + int2store(dir, offset + diff); /* correct current pos */ + next_free_pos= offset; + } + if (page_pos != end_of_found_block) + { + uint length= (end_of_found_block - next_free_pos); + memmove(buff + page_pos - length, buff + next_free_pos, length); + next_free_pos= page_pos- length; + } + /* Extend rownr block to cover hole */ + rownr_length= next_free_pos - start_of_found_block; + int2store(dir+2, rownr_length); + } + else + { + /* Extend last block cover whole page */ + uint length= (uint) (dir - buff) - start_of_found_block; + int2store(dir+2, length); + + buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED; + } + DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); + DBUG_VOID_RETURN; +} + + + +/* + Read or initialize new head or tail page + + SYNOPSIS + get_head_or_tail_page() + info Maria handler + block Block to read + buff Suggest this buffer to key cache + length Minimum space needed + page_type HEAD_PAGE || TAIL_PAGE + res Store result position here + + NOTES + We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data + as we don't know how much data the caller will actually use. + + RETURN + 0 ok All slots in 'res' are updated + 1 error my_errno is set +*/ + +struct st_row_pos_info +{ + byte *buff; /* page buffer */ + byte *data; /* Place for data */ + byte *dir; /* Directory */ + uint length; /* Length for data */ + uint offset; /* Offset to directory */ + uint empty_space; /* Space left on page */ +}; + +static my_bool get_head_or_tail_page(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *buff, uint length, uint page_type, + struct st_row_pos_info *res) +{ + uint block_size; + DBUG_ENTER("get_head_or_tail_page"); + + block_size= info->s->block_size; + if (block->org_bitmap_value == 0) /* Empty block */ + { + /* New page */ + bzero(buff, PAGE_HEADER_SIZE); + + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE + length, + block_size - length - PAGE_HEADER_SIZE - DIR_ENTRY_SIZE - + PAGE_SUFFIX_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_ENTRY_OFFSET]= 1; + res->buff= buff; + res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); + res->data= (buff + PAGE_HEADER_SIZE); + res->dir= res->data + res->length; + /* Store poistion to the first row */ + int2store(res->dir, PAGE_HEADER_SIZE); + res->offset= 0; + DBUG_ASSERT(length <= res->length); + } + else + { + byte *dir; + /* Read old page */ + if (!(res->buff= key_cache_read(info->s->key_cache, + info->dfile, + (my_off_t) block->page * block_size, 0, + buff, block_size, block_size, 0))) + DBUG_RETURN(1); + DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type); + if (!(dir= find_free_position(buff, block_size, &res->offset, + &res->length, &res->empty_space))) + { + if (res->length < length) + { + if (res->empty_space + res->length < length) + { + compact_page(res->buff, block_size, res->offset); + /* All empty space are now after current position */ + res->length= res->empty_space= uint2korr(dir+2); + } + if (res->length < length) + goto crashed; /* Wrong bitmap information */ + } + } + res->dir= dir; + res->data= res->buff + uint2korr(dir); + } + DBUG_RETURN(0); + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_RETURN(1); +} + + +/* + Write tail of non-blob-data or blob + + SYNOPSIS + write_tail() + info Maria handler + block Block to tail page + row_part Data to write to page + length Length of data + + NOTES + block->page_count is updated to the directory offset for the tail + so that we can store the position in the row extent information + + RETURN + 0 ok + block->page_count is set to point (dir entry + TAIL_BIT) + + 1 error; In this case my_errno is set to the error +*/ + +static my_bool write_tail(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *row_part, uint length) +{ + MARIA_SHARE *share= share= info->s; + uint block_size= share->block_size, empty_space; + struct st_row_pos_info row_pos; + my_off_t position; + DBUG_ENTER("write_tail"); + DBUG_PRINT("enter", ("page: %lu length: %u", + (ulong) block->page, length)); + + info->keybuff_used= 1; + if (get_head_or_tail_page(info, block, info->keyread_buff, length, + TAIL_PAGE, &row_pos)) + DBUG_RETURN(1); + + memcpy(row_pos.data, row_part, length); + int2store(row_pos.dir + 2, length); + empty_space= row_pos.empty_space - length; + int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space); + block->page_count= row_pos.offset + TAIL_BIT; + /* + If there is less directory entries free than number of possible tails + we can write for a row, we mark the page full to ensure that we don't + during _ma_bitmap_find_place() allocate more entires on the tail page + than it can hold + */ + block->empty_space= ((uint) ((uchar*) row_pos.buff)[DIR_ENTRY_OFFSET] <= + MAX_ROWS_PER_PAGE - 1 - info->s->base.blobs ? + empty_space : 0); + block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + + /* Increase data file size, if extended */ + position= (my_off_t) block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + DBUG_RETURN(key_cache_write(share->key_cache, + info->dfile, position, 0, + row_pos.buff, block_size, block_size, 1)); +} + + +/* + Write full pages + + SYNOPSIS + write_full_pages() + info Maria handler + block Where to write data + data Data to write + length Length of data + +*/ + +static my_bool write_full_pages(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *data, ulong length) +{ + my_off_t page; + MARIA_SHARE *share= share= info->s; + uint block_size= share->block_size; + uint data_size= FULL_PAGE_SIZE(block_size); + byte *buff= info->keyread_buff; + uint page_count; + my_off_t position; + DBUG_ENTER("write_full_pages"); + DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu", + (ulong) length, (ulong) block->page, + (ulong) block->page_count)); + + info->keybuff_used= 1; + page= block->page; + page_count= block->page_count; + + position= (my_off_t) (page + page_count) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + + /* Increase data file size, if extended */ + + for (; length; data+= data_size) + { + uint copy_length; + if (!page_count--) + { + block++; + page= block->page; + page_count= block->page_count - 1; + DBUG_PRINT("info", ("page: %lu page_count: %lu", + (ulong) block->page, (ulong) block->page_count)); + + position= (page + page_count + 1) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + } + bzero(buff, LSN_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) BLOB_PAGE; + copy_length= min(data_size, length); + memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length); + length-= copy_length; + + if (key_cache_write(share->key_cache, + info->dfile, (my_off_t) page * block_size, 0, + buff, block_size, block_size, 1)) + DBUG_RETURN(1); + page++; + block->used= BLOCKUSED_USED; + } + DBUG_RETURN(0); +} + + + + +/* + Store packed extent data + + SYNOPSIS + store_extent_info() + to Store first packed data here + row_extents_second_part Store rest here + first_block First block to store + count Number of blocks + + NOTES + We don't have to store the position for the head block +*/ + +static void store_extent_info(byte *to, + byte *row_extents_second_part, + MARIA_BITMAP_BLOCK *first_block, + uint count) +{ + MARIA_BITMAP_BLOCK *block, *end_block; + uint copy_length; + my_bool first_found= 0; + + for (block= first_block, end_block= first_block+count ; + block < end_block; block++) + { + /* The following is only false for marker blocks */ + if (likely(block->used)) + { + int5store(to, block->page); + int2store(to + 5, block->page_count); + to+= ROW_EXTENT_SIZE; + if (!first_found) + { + first_found= 1; + to= row_extents_second_part; + } + } + } + copy_length= (count -1) * ROW_EXTENT_SIZE; + /* + In some unlikely cases we have allocated to many blocks. Clear this + data. + */ + bzero(to, (my_size_t) (row_extents_second_part + copy_length - to)); +} + +/* + Write a record to a (set of) pages +*/ + +static my_bool write_block_record(MARIA_HA *info, const byte *record, + MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *bitmap_blocks, + struct st_row_pos_info *row_pos) +{ + byte *data, *end_of_data, *tmp_data_used, *tmp_data; + byte *row_extents_first_part, *row_extents_second_part; + byte *field_length_data; + byte *page_buff; + MARIA_BITMAP_BLOCK *block, *head_block; + MARIA_SHARE *share; + MARIA_COLUMNDEF *rec, *end_field; + uint block_size, flag; + ulong *blob_lengths; + my_off_t position; + my_bool row_extents_in_use; + DBUG_ENTER("write_block_record"); + + LINT_INIT(row_extents_first_part); + LINT_INIT(row_extents_second_part); + + share= info->s; + head_block= bitmap_blocks->block; + block_size= share->block_size; + + info->cur_row.lastpos= ma_recordpos(head_block->page, row_pos->offset); + page_buff= row_pos->buff; + data= row_pos->data; + end_of_data= data + row_pos->length; + + /* Write header */ + flag= share->base.default_row_flag; + row_extents_in_use= 0; + if (unlikely(row->total_length > row_pos->length)) + { + /* Need extent */ + if (bitmap_blocks->count <= 1) + goto crashed; /* Wrong in bitmap */ + flag|= ROW_FLAG_EXTENTS; + row_extents_in_use= 1; + } + /* For now we have only a minimum header */ + *data++= (uchar) flag; + if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED)) + *data++= (uchar) (share->base.null_bytes - + share->base.original_null_bytes); + if (row_extents_in_use) + { + /* Store first extent in header */ + store_key_length_inc(data, bitmap_blocks->count - 1); + row_extents_first_part= data; + data+= ROW_EXTENT_SIZE; + } + if (share->base.pack_fields) + store_key_length_inc(data, row->field_lengths_length); + if (share->calc_checksum) + *(data++)= (byte) info->cur_row.checksum; + memcpy(data, record, share->base.null_bytes); + data+= share->base.null_bytes; + memcpy(data, row->empty_bits, share->base.pack_bytes); + data+= share->base.pack_bytes; + + /* + Allocate a buffer of rest of data (except blobs) + + To avoid double copying of data, we copy as many columns that fits into + the page. The rest goes into info->packed_row. + + Using an extra buffer, instead of doing continous writes to different + pages, uses less code and we don't need to have to do a complex call + for every data segment we want to store. + */ + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + row->head_length)) + DBUG_RETURN(1); + + tmp_data_used= 0; /* Either 0 or last used byte in 'data' */ + tmp_data= data; + + if (row_extents_in_use) + { + uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE; + if (!tmp_data_used && tmp_data + copy_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + row_extents_second_part= tmp_data; + /* + We will copy the extents here when we have figured out the tail + positions. + */ + tmp_data+= copy_length; + } + + /* Copy fields that has fixed lengths (primary key etc) */ + for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields; + rec < end_field; rec++) + { + if (!tmp_data_used && tmp_data + rec->length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy(tmp_data, record + rec->offset, rec->length); + tmp_data+= rec->length; + } + + /* Copy length of data for variable length fields */ + if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + field_length_data= row->field_lengths; + memcpy(tmp_data, field_length_data, row->field_lengths_length); + tmp_data+= row->field_lengths_length; + + /* Copy variable length fields and fields with null/zero */ + for (end_field= share->rec + share->base.fields - share->base.blobs; + rec < end_field ; + rec++) + { + const byte *field_pos; + ulong length; + if ((record[rec->null_pos] & rec->null_bit) || + (row->empty_bits[rec->empty_pos] & rec->empty_bit)) + continue; + + field_pos= record + rec->offset; + switch ((enum en_fieldtype) rec->type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + length= rec->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + /* Char that is space filled */ + if (rec->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } + break; + case FIELD_VARCHAR: + if (rec->length <= 256) + { + length= (uint) (uchar) *field_length_data++; + field_pos++; /* Skip length byte */ + } + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + field_pos+= 2; + } + break; + default: /* Wrong data */ + DBUG_ASSERT(0); + break; + } + if (!tmp_data_used && tmp_data + length > end_of_data) + { + /* Data didn't fit in page; Change to use tmp buffer */ + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy((char*) tmp_data, (char*) field_pos, length); + tmp_data+= length; + } + + block= head_block + head_block->sub_blocks; /* Point to first blob data */ + + end_field= rec + share->base.blobs; + blob_lengths= row->blob_lengths; + if (!tmp_data_used) + { + /* Still room on page; Copy as many blobs we can into this page */ + data= tmp_data; + for (; rec < end_field && *blob_lengths < (ulong) (end_of_data - data); + rec++, blob_lengths++) + { + byte *tmp_pos; + uint length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte*) &tmp_pos, record + rec->offset + length, + sizeof(char*)); + memcpy(data, tmp_pos, *blob_lengths); + data+= *blob_lengths; + /* Skip over tail page that was to be used to store blob */ + block++; + bitmap_blocks->tail_page_skipped= 1; + } + if (head_block->sub_blocks > 1) + { + /* We have allocated pages that where not used */ + bitmap_blocks->page_skipped= 1; + } + } + else + data= tmp_data_used; /* Get last used on page */ + + { + /* Update page directory */ + uint length= (uint) (data - row_pos->data); + DBUG_PRINT("info", ("head length: %u", length)); + if (length < info->s->base.min_row_length) + length= info->s->base.min_row_length; + + int2store(row_pos->dir + 2, length); + /* update empty space at start of block */ + row_pos->empty_space-= length; + int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space); + /* Mark in bitmaps how the current page was actually used */ + head_block->empty_space= row_pos->empty_space; + if (page_buff[DIR_ENTRY_OFFSET] == (char) MAX_ROWS_PER_PAGE) + head_block->empty_space= 0; /* Page is full */ + head_block->used= BLOCKUSED_USED; + } + + /* + Now we have to write tail pages, as we need to store the position + to them in the row extent header. + + We first write out all blob tails, to be able to store them in + the current page or 'tmp_data'. + + Then we write the tail of the non-blob fields (The position to the + tail page is stored either in row header, the extents in the head + page or in the first full page of the non-blob data. It's never in + the tail page of the non-blob data) + */ + + if (row_extents_in_use) + { + if (rec != end_field) /* If blob fields */ + { + MARIA_COLUMNDEF *save_rec= rec; + MARIA_BITMAP_BLOCK *save_block= block; + MARIA_BITMAP_BLOCK *end_block; + ulong *save_blob_lengths= blob_lengths; + + for (; rec < end_field; rec++, blob_lengths++) + { + byte *blob_pos; + if (!*blob_lengths) /* Null or "" */ + continue; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + { + uint length; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte *) &blob_pos, record + rec->offset + length, + sizeof(char*)); + length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */ + if (write_tail(info, block + block->sub_blocks-1, + blob_pos + *blob_lengths - length, + length)) + goto disk_err; + } + for (end_block= block + block->sub_blocks; block < end_block; block++) + { + /* + Set only a bit, to not cause bitmap code to belive a block is full + when there is still a lot of entries in it + */ + block->used|= BLOCKUSED_USED; + } + } + rec= save_rec; + block= save_block; + blob_lengths= save_blob_lengths; + } + + if (tmp_data_used) /* non blob data overflows */ + { + MARIA_BITMAP_BLOCK *cur_block, *end_block; + MARIA_BITMAP_BLOCK *head_tail_block= 0; + ulong length; + ulong data_length= (tmp_data - info->rec_buff); + +#ifdef SANITY_CHECK + if (cur_block->sub_blocks == 1) + goto crashed; /* no reserved full or tails */ +#endif + + /* + Find out where to write tail for non-blob fields. + + Problem here is that the bitmap code may have allocated more + space than we need. We have to handle the following cases: + + - Bitmap code allocated a tail page we don't need. + - The last full page allocated needs to be changed to a tail page + (Because we put more data than we thought on the head page) + + The reserved pages in bitmap_blocks for the main page has one of + the following allocations: + - Full pages, with following blocks: + # * full pages + empty page ; To be used if we change last full to tail page. This + has 'count' = 0. + tail page (optional, if last full page was part full) + - One tail page + */ + + cur_block= head_block + 1; + end_block= head_block + head_block->sub_blocks; + while (data_length >= (length= (cur_block->page_count * + FULL_PAGE_SIZE(block_size)))) + { +#ifdef SANITY_CHECK + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) + goto crashed; +#endif + data_length-= length; + (cur_block++)->used= BLOCKUSED_USED; + } + if (data_length) + { +#ifdef SANITY_CHECK + if ((cur_block == end_block)) + goto crashed; +#endif + if (cur_block->used & BLOCKUSED_TAIL) + { + DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size)); + /* tail written to full tail page */ + cur_block->used= BLOCKUSED_USED; + head_tail_block= cur_block; + } + else if (data_length > length - MAX_TAIL_SIZE(block_size)) + { + /* tail written to full page */ + cur_block->used= BLOCKUSED_USED; + if ((cur_block != end_block - 1) && + (end_block[-1].used & BLOCKUSED_TAIL)) + bitmap_blocks->tail_page_skipped= 1; + } + else + { + /* + cur_block is a full block, followed by an empty and optional + tail block. Change cur_block to a tail block or split it + into full blocks and tail blocks. + */ + DBUG_ASSERT(cur_block[1].page_count == 0); + if (cur_block->page_count == 1) + { + /* convert full block to tail block */ + cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + head_tail_block= cur_block; + } + else + { + DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size)); + DBUG_PRINT("info", ("Splitting blocks into full and tail")); + cur_block[1].page= (cur_block->page + cur_block->page_count - 1); + cur_block[1].page_count= 1; + cur_block[1].used= 1; + cur_block->page_count--; + cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + head_tail_block= cur_block + 1; + } + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + } + else + { + /* Must be an empty or tail page */ + DBUG_ASSERT(cur_block->page_count == 0 || + cur_block->used & BLOCKUSED_TAIL); + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + + /* + Write all extents into page or tmp_buff + + Note that we still don't have a correct position for the tail + of the non-blob fields. + */ + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + if (head_tail_block) + { + ulong data_length= (tmp_data - info->rec_buff); + uint length; + byte *extent_data; + + length= (uint) (data_length % FULL_PAGE_SIZE(block_size)); + if (write_tail(info, head_tail_block, data + data_length - length, + length)) + goto disk_err; + tmp_data-= length; /* Remove the tail */ + + /* Store the tail position for the non-blob fields */ + if (head_tail_block == head_block + 1) + extent_data= row_extents_first_part; + else + extent_data= row_extents_second_part + + ((head_tail_block - head_block) - 2) * ROW_EXTENT_SIZE; + int5store(extent_data, head_tail_block->page); + int2store(extent_data + 5, head_tail_block->page_count); + } + } + else + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + } + /* Increase data file size, if extended */ + position= (my_off_t) head_block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + if (key_cache_write(share->key_cache, + info->dfile, position, 0, + page_buff, share->block_size, share->block_size, 1)) + goto disk_err; + + if (tmp_data_used) + { + /* Write data stored in info->rec_buff to pages */ + DBUG_ASSERT(bitmap_blocks->count != 0); + if (write_full_pages(info, bitmap_blocks->block + 1, info->rec_buff, + (ulong) (tmp_data - info->rec_buff))) + goto disk_err; + } + + /* Write rest of blobs (data, but no tails as they are already written) */ + for (; rec < end_field; rec++, blob_lengths++) + { + byte *blob_pos; + uint length; + ulong blob_length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= rec->length - maria_portable_sizeof_char_ptr; + memcpy_fixed((byte*) &blob_pos, record + rec->offset + length, + sizeof(char*)); + /* remove tail part */ + blob_length= *blob_lengths; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + + if (write_full_pages(info, block, blob_pos, blob_length)) + goto disk_err; + block+= block->sub_blocks; + } + /* Release not used space in used pages */ + if (_ma_bitmap_release_unused(info, bitmap_blocks)) + goto disk_err; + DBUG_RETURN(0); + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ +disk_err: + /* Something was wrong with data on record */ + DBUG_RETURN(1); +} + + +/* + Write a record (to get the row id for it) + + SYNOPSIS + _ma_write_init_block_record() + info Maria handler + record Record to write + + NOTES + This is done BEFORE we write the keys to the row! + + RETURN + HA_OFFSET_ERROR Something went wrong + # Rowid for row +*/ + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + struct st_row_pos_info row_pos; + DBUG_ENTER("_ma_write_init_block_record"); + + calc_record_size(info, record, &info->cur_row); + if (_ma_bitmap_find_place(info, &info->cur_row, blocks)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reding bitmap */ + if (get_head_or_tail_page(info, blocks->block, info->buff, + info->s->base.min_row_length, HEAD_PAGE, &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); + info->cur_row.lastpos= ma_recordpos(blocks->block->page, row_pos.offset); + if (info->s->calc_checksum) + info->cur_row.checksum= (info->s->calc_checksum)(info,record); + if (write_block_record(info, record, &info->cur_row, blocks, &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reading bitmap */ + DBUG_PRINT("exit", ("Rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(info->cur_row.lastpos); +} + + +/* + Dummy function for (*info->s->write_record)() + + Nothing to do here, as we already wrote the record in + _ma_write_init_block_record() +*/ + +my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused)) +) +{ + return 0; /* Row already written */ +} + + +/* + Remove row written by _ma_write_block_record + + SYNOPSIS + _ma_abort_write_block_record() + info Maria handler + + INFORMATION + This is called in case we got a duplicate unique key while + writing keys. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_write_abort_block_record(MARIA_HA *info) +{ + my_bool res= 0; + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + MARIA_BITMAP_BLOCK *block, *end; + DBUG_ENTER("_ma_abort_write_block_record"); + + if (delete_head_or_tail(info, + ma_recordpos_to_page(info->cur_row.lastpos), + ma_recordpos_to_offset(info->cur_row.lastpos), 1)) + res= 1; + for (block= blocks->block + 1, end= block + blocks->count - 1; block < end; + block++) + { + if (block->used & BLOCKUSED_TAIL) + { + /* + block->page_count is set to the tail directory entry number in + write_block_record() + */ + if (delete_head_or_tail(info, block->page, block->page_count & ~TAIL_BIT, + 0)) + res= 1; + } + else + { + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + if (_ma_reset_full_page_bits(info, &info->s->bitmap, block->page, + block->page_count)) + res= 1; + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + } + } + DBUG_RETURN(res); +} + + +/* + Update a record + + NOTES + For the moment, we assume that info->curr_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows split into many extents. +*/ + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos, + const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + byte *buff; + MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row; + uint rownr, org_empty_size, head_length; + uint block_size= info->s->block_size; + byte *dir; + ulonglong page; + struct st_row_pos_info row_pos; + DBUG_ENTER("_ma_update_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + calc_record_size(info, record, new_row); + page= ma_recordpos_to_page(record_pos); + + if (!(buff= key_cache_read(info->s->key_cache, + info->dfile, (my_off_t) page * block_size, 0, + info->buff, block_size, block_size, 0))) + DBUG_RETURN(1); + org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET); + rownr= ma_recordpos_to_offset(record_pos); + dir= (buff + block_size - DIR_ENTRY_SIZE * rownr - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + + if ((org_empty_size + cur_row->head_length) >= new_row->total_length) + { + uint empty, offset, length; + MARIA_BITMAP_BLOCK block; + + /* + We can fit the new row in the same page as the original head part + of the row + */ + block.org_bitmap_value= _ma_free_size_to_head_pattern(&info->s->bitmap, + org_empty_size); + offset= uint2korr(dir); + length= uint2korr(dir + 2); + empty= 0; + if (new_row->total_length > length) + { + /* See if there is empty space after */ + if (rownr != (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET] - 1) + empty= empty_pos_after_row(dir) - (offset + length); + if (new_row->total_length > length + empty) + { + compact_page(buff, info->s->block_size, rownr); + org_empty_size= 0; + length= uint2korr(dir + 2); + } + } + row_pos.buff= buff; + row_pos.offset= rownr; + row_pos.empty_space= org_empty_size + length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= length + empty; + blocks->block= █ + blocks->count= 1; + block.page= page; + block.sub_blocks= 1; + block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP; + block.empty_space= row_pos.empty_space; + /* Update cur_row, if someone calls update at once again */ + cur_row->head_length= new_row->total_length; + if (_ma_bitmap_free_full_pages(info, cur_row->extents, + cur_row->extents_count)) + DBUG_RETURN(1); + DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos)); + } + /* + Allocate all size in block for record + QQ: Need to improve this to do compact if we can fit one more blob into + the head page + */ + head_length= uint2korr(dir + 2); + if (buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED && org_empty_size && + (head_length < new_row->head_length || + (new_row->total_length <= head_length && + org_empty_size + head_length >= new_row->total_length))) + { + compact_page(buff, info->s->block_size, rownr); + org_empty_size= 0; + head_length= uint2korr(dir + 2); + } + + /* Delete old row */ + if (delete_tails(info, cur_row->tail_positions)) + DBUG_RETURN(1); + if (_ma_bitmap_free_full_pages(info, cur_row->extents, + cur_row->extents_count)) + DBUG_RETURN(1); + if (_ma_bitmap_find_new_place(info, new_row, page, head_length, blocks)) + DBUG_RETURN(1); + + row_pos.buff= buff; + row_pos.offset= rownr; + row_pos.empty_space= org_empty_size + head_length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= head_length; + DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos)); +} + + +/* + Delete a head a tail part + + SYNOPSIS + delete_head_or_tail() + info Maria handler + page Page (not file offset!) on which the row is + head 1 if this is a head page + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head) +{ + MARIA_SHARE *share= info->s; + uint number_of_records, empty_space, length; + uint block_size= share->block_size; + byte *buff, *dir; + my_off_t position; + DBUG_ENTER("delete_head_or_tail"); + + info->keybuff_used= 1; + if (!(buff= key_cache_read(share->key_cache, + info->dfile, page * block_size, 0, + info->keyread_buff, + block_size, block_size, 0))) + DBUG_RETURN(1); + + number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > MAX_ROWS_PER_PAGE || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / (DIR_ENTRY_SIZE + MIN_TAIL_SIZE))) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + DBUG_RETURN(1); + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_ENTRY_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + position= (my_off_t) page * block_size; + if (key_cache_write(share->key_cache, + info->dfile, position, 0, + buff, block_size, block_size, 1)) + DBUG_RETURN(1); + } + else + { + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); + } + DBUG_PRINT("info", ("empty_space: %u", empty_space)); + DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); +} + + +/* + delete all tails + + SYNOPSIS + delete_tails() + info Handler + tails Pointer to vector of tail positions, ending with 0 + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails) +{ + my_bool res= 0; + DBUG_ENTER("delete_tails"); + for (; *tails; tails++) + { + if (delete_head_or_tail(info, + ma_recordpos_to_page(*tails), + ma_recordpos_to_offset(*tails), 0)) + res= 1; + } + DBUG_RETURN(res); +} + + +/* + Delete a record + + NOTES + For the moment, we assume that info->cur_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows with many splits. +*/ + +my_bool _ma_delete_block_record(MARIA_HA *info) +{ + DBUG_ENTER("_ma_delete_block_record"); + if (delete_head_or_tail(info, + ma_recordpos_to_page(info->cur_row.lastpos), + ma_recordpos_to_offset(info->cur_row.lastpos), + 1) || + delete_tails(info, info->cur_row.tail_positions)) + DBUG_RETURN(1); + DBUG_RETURN(_ma_bitmap_free_full_pages(info, info->cur_row.extents, + info->cur_row.extents_count)); +} + + +/**************************************************************************** + Reading of records +****************************************************************************/ + +/* + Read position to record from record directory at end of page + + SYNOPSIS + get_record_position() + buff page buffer + block_size block size for page + record_number Record number in index + end_of_data pointer to end of data for record + + RETURN + 0 Error in data + # Pointer to start of record. + In this case *end_of_data is set. +*/ + +static byte *get_record_position(byte *buff, uint block_size, + uint record_number, byte **end_of_data) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET]; + byte *dir; + byte *data; + uint offset, length; + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > MAX_ROWS_PER_PAGE || + record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) / + (DIR_ENTRY_SIZE + MIN_TAIL_SIZE))) + { + DBUG_PRINT("error", + ("Wrong row number: record_number: %u number_of_records: %u", + record_number, number_of_records)); + return 0; + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + offset= uint2korr(dir); + length= uint2korr(dir + 2); +#ifdef SANITY_CHECKS + if (offset < PAGE_HEADER_SIZE || + offset + length > (block_size - + number_of_records * DIR_ENTRY_SIZE - + PAGE_SUFFIX_SIZE)) + { + DBUG_PRINT("error", + ("Wrong row position: record_number: %u offset: %u " + "length: %u number_of_records: %u", + record_number, offset, length, number_of_records)); + return 0; + } +#endif + data= buff + offset; + *end_of_data= data + length; + return data; +} + + +/* + Init extent + + NOTES + extent is a cursor over which pages to read +*/ + +static void init_extent(MARIA_EXTENT_CURSOR *extent, byte *extent_info, + uint extents, MARIA_RECORD_POS *tail_positions) +{ + uint page_count; + extent->extent= extent_info; + extent->extent_count= extents; + extent->page= uint5korr(extent_info); /* First extent */ + page_count= uint2korr(extent_info+5); + extent->page_count= page_count & ~TAIL_BIT; + extent->tail= page_count & TAIL_BIT; + extent->tail_positions= tail_positions; +} + + +/* + Read next extent + + SYNOPSIS + read_next_extent() + info Maria handler + extent Pointer to current extent (this is updated to point + to next) + end_of_data Pointer to end of data in read block (out) + + NOTES + New block is read into info->buff + + RETURN + 0 Error; my_errno is set + # Pointer to start of data in read block + In this case end_of_data is updated to point to end of data. +*/ + +static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, + byte **end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *buff, *data; + DBUG_ENTER("read_next_extent"); + + if (!extent->page_count) + { + uint page_count; + if (!--extent->extent_count) + goto crashed; + extent->extent+= ROW_EXTENT_SIZE; + extent->page= uint5korr(extent->extent); + page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); + extent->tail= page_count & TAIL_BIT; + extent->page_count= (page_count & ~TAIL_BIT); + extent->first_extent= 0; + DBUG_PRINT("info",("New extent. Page: %lu page_count: %u tail_flag: %d", + (ulong) extent->page, extent->page_count, + extent->tail != 0)); + } + + if (info->cur_row.empty_bits != info->cur_row.empty_bits_buffer) + { + /* + First read of extents: Move data from info->buff to + internals buffers. + */ + memcpy(info->cur_row.empty_bits_buffer, info->cur_row.empty_bits, + share->base.pack_bytes); + info->cur_row.empty_bits= info->cur_row.empty_bits_buffer; + } + + if (!(buff= key_cache_read(share->key_cache, + info->dfile, extent->page * share->block_size, 0, + info->buff, + share->block_size, share->block_size, 0))) + { + /* check if we tried to read over end of file (ie: bad data in record) */ + if ((extent->page + 1) * share->block_size > info->state->data_file_length) + goto crashed; + DBUG_RETURN(0); + } + if (!extent->tail) + { + /* Full data page */ + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + extent->page++; /* point to next page */ + extent->page_count--; + *end_of_data= buff + share->block_size; + info->cur_row.full_page_count++; /* For maria_chk */ + DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE); + } + /* Found tail. page_count is in this case the position in the tail page */ + + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + *(extent->tail_positions++)= ma_recordpos(extent->page, + extent->page_count); + info->cur_row.tail_count++; /* For maria_chk */ + + if (!(data= get_record_position(buff, share->block_size, + extent->page_count, + end_of_data))) + goto crashed; + extent->data_start= data; + extent->page_count= 0; /* No more data in extent */ + DBUG_RETURN(data); + + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("wrong extent information")); + DBUG_RETURN(0); +} + + +/* + Read data that may be split over many blocks + + SYNOPSIS + read_long_data() + info Maria handler + to Store result string here (this is allocated) + extent Pointer to current extent position + data Current position in buffer + end_of_data End of data in buffer + + NOTES + When we have to read a new buffer, it's read into info->buff + + This loop is implemented by goto's instead of a for() loop as + the code is notable smaller and faster this way (and it's not nice + to jump into a for loop() or into a 'then' clause) + + RETURN + 0 ok + 1 error +*/ + +static my_bool read_long_data(MARIA_HA *info, byte *to, ulong length, + MARIA_EXTENT_CURSOR *extent, + byte **data, byte **end_of_data) +{ + DBUG_ENTER("read_long_data"); + DBUG_PRINT("enter", ("length: %lu", length)); + DBUG_ASSERT(*data <= *end_of_data); + + for(;;) + { + uint left_length; + left_length= (uint) (*end_of_data - *data); + if (likely(left_length >= length)) + { + memcpy(to, *data, length); + (*data)+= length; + DBUG_RETURN(0); + } + memcpy(to, *data, left_length); + to+= left_length; + length-= left_length; + if (!(*data= read_next_extent(info, extent, end_of_data))) + break; + } + DBUG_RETURN(1); +} + + +/* + Read a record from page (helper function for _ma_read_block_record()) + + SYNOPSIS + _ma_read_block_record2() + info Maria handler + record Store record here + data Start of head data for row + end_of_data End of data for row + + NOTES + The head page is already read by caller + Following data is update in info->cur_row: + + cur_row.head_length is set to size of entry in head block + cur_row.tail_positions is set to point to all tail blocks + cur_row.extents points to extents data + cur_row.extents_counts contains number of extents + cur_row.empty_bits points to empty bits part in read record + cur_row.field_lengths contains packed length of all fields + + RETURN + 0 ok + # Error code +*/ + +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data, *blob_buffer, *start_of_data; + uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths; + my_bool found_blob= 0; + MARIA_EXTENT_CURSOR extent; + MARIA_COLUMNDEF *rec, *end_field; + DBUG_ENTER("_ma_read_block_record2"); + + LINT_INIT(field_lengths); + LINT_INIT(field_length_data); + LINT_INIT(blob_buffer); + + start_of_data= data; + flag= (uint) (uchar) data[0]; + cur_null_bytes= share->base.original_null_bytes; + null_bytes= share->base.null_bytes; + info->cur_row.head_length= (uint) (end_of_data - data); + info->cur_row.full_page_count= info->cur_row.tail_count= 0; + + /* Skip trans header (for now, until we have MVCC csupport) */ + data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)]; + if (flag & ROW_FLAG_NULLS_EXTENDED) + cur_null_bytes+= data[-1]; + + row_extents= 0; + if (flag & ROW_FLAG_EXTENTS) + { + uint row_extent_size; + /* + Record is split over many data pages. + Get number of extents and first extent + */ + get_key_length(row_extents, data); + info->cur_row.extents_count= row_extents; + row_extent_size= row_extents * ROW_EXTENT_SIZE; + if (info->cur_row.extents_buffer_length < row_extent_size && + _ma_alloc_buffer(&info->cur_row.extents, + &info->cur_row.extents_buffer_length, + row_extent_size)) + DBUG_RETURN(my_errno); + memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE); + data+= ROW_EXTENT_SIZE; + init_extent(&extent, info->cur_row.extents, row_extents, + info->cur_row.tail_positions); + } + else + { + info->cur_row.extents_count= 0; + (*info->cur_row.tail_positions)= 0; + extent.page_count= 0; + extent.extent_count= 1; + } + extent.first_extent= 1; + + if (share->base.max_field_lengths) + { + get_key_length(field_lengths, data); +#ifdef SANITY_CHECKS + if (field_lengths > share->base.max_field_lengths) + goto err; +#endif + } + + if (share->calc_checksum) + info->cur_row.checksum= (uint) (uchar) *data++; + /* data now points on null bits */ + memcpy(record, data, cur_null_bytes); + if (unlikely(cur_null_bytes != null_bytes)) + { + /* + This only happens if we have added more NULL columns with + ALTER TABLE and are fetching an old, not yet modified old row + */ + bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes)); + } + data+= null_bytes; + info->cur_row.empty_bits= (byte*) data; /* Pointer to empty bitmask */ + data+= share->base.pack_bytes; + + /* TODO: Use field offsets, instead of just skipping them */ + data+= share->base.field_offsets * FIELD_OFFSET_SIZE; + + /* + Read row extents (note that first extent was already read into + info->cur_row.extents above) + */ + if (row_extents) + { + if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, + (row_extents - 1) * ROW_EXTENT_SIZE, + &extent, &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* + Data now points to start of fixed length field data that can't be null + or 'empty'. Note that these fields can't be split over blocks + */ + for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields; + rec < end_field; rec++) + { + uint rec_length= rec->length; + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(record + rec->offset, data, rec_length); + data+= rec_length; + } + + /* Read array of field lengths. This may be stored in several extents */ + if (share->base.max_field_lengths) + { + field_length_data= info->cur_row.field_lengths; + if (read_long_data(info, field_length_data, field_lengths, &extent, + &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* Read variable length data. Each of these may be split over many extents */ + for (end_field= share->rec + share->base.fields; rec < end_field; rec++) + { + enum en_fieldtype type= (enum en_fieldtype) rec->type; + byte *field_pos= record + rec->offset; + /* First check if field is present in record */ + if (record[rec->null_pos] & rec->null_bit) + continue; + else if (info->cur_row.empty_bits[rec->empty_pos] & rec->empty_bit) + { + if (type == FIELD_SKIP_ENDSPACE) + bfill(record + rec->offset, rec->length, ' '); + else + bzero(record + rec->offset, rec->fill_length); + continue; + } + switch (type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(field_pos, data, rec->length); + data+= rec->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + /* Char that is space filled */ + uint length; + if (rec->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } +#ifdef SANITY_CHECKS + if (length > rec->length) + goto err; +#endif + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + bfill(field_pos + length, rec->length - length, ' '); + break; + } + case FIELD_VARCHAR: + { + ulong length; + if (rec->length <= 256) + { + length= (uint) (uchar) (*field_pos++= *field_length_data++); + } + else + { + length= uint2korr(field_length_data); + field_pos[0]= field_length_data[0]; + field_pos[1]= field_length_data[1]; + field_pos+= 2; + field_length_data+= 2; + } + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + break; + } + case FIELD_BLOB: + { + uint size_length= rec->length - maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_length_data); + + if (!found_blob) + { + /* Calculate total length for all blobs */ + ulong blob_lengths= 0; + byte *length_data= field_length_data; + MARIA_COLUMNDEF *blob_field= rec; + + found_blob= 1; + for (; blob_field < end_field; blob_field++) + { + uint size_length; + if ((record[blob_field->null_pos] & blob_field->null_bit) || + (info->cur_row.empty_bits[blob_field->empty_pos] & + blob_field->empty_bit)) + continue; + size_length= blob_field->length - maria_portable_sizeof_char_ptr; + blob_lengths+= _ma_calc_blob_length(size_length, length_data); + length_data+= size_length; + } + DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths)); + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + blob_lengths)) + DBUG_RETURN(my_errno); + blob_buffer= info->rec_buff; + } + + memcpy(field_pos, field_length_data, size_length); + memcpy_fixed(field_pos + size_length, (byte *) & blob_buffer, + sizeof(char*)); + field_length_data+= size_length; + + /* + After we have read one extent, then each blob is in it's own extent + */ + if (extent.first_extent && (ulong) (end_of_data - data) < blob_length) + end_of_data= data; /* Force read of next extent */ + + if (read_long_data(info, blob_buffer, blob_length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + blob_buffer+= blob_length; + break; + } + default: +#ifdef EXTRA_DEBUG + DBUG_ASSERT(0); /* purecov: deadcode */ +#endif + goto err; + } + continue; + } + + if (row_extents) + { + DBUG_PRINT("info", ("Row read: page_count: %u extent_count: %u", + extent.page_count, extent.extent_count)); + *extent.tail_positions= 0; /* End marker */ + if (extent.page_count) + goto err; + if (extent.extent_count > 1) + if (check_if_zero(extent.extent, + (extent.extent_count-1) * ROW_EXTENT_SIZE)) + goto err; + } + else + { + DBUG_PRINT("info", ("Row read")); + if (data != end_of_data && (uint) (end_of_data - start_of_data) >= + info->s->base.min_row_length) + goto err; + } + + info->update|= HA_STATE_AKTIV; /* We have a aktive record */ + DBUG_RETURN(0); + +err: + /* Something was wrong with data on record */ + DBUG_PRINT("error", ("Found record with wrong data")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Read a record based on record position + + SYNOPSIS + _ma_read_block_record() + info Maria handler + record Store record here + record_pos Record position +*/ + +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos) +{ + byte *data, *end_of_data, *buff; + my_off_t page; + uint offset; + uint block_size= info->s->block_size; + DBUG_ENTER("_ma_read_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + page= ma_recordpos_to_page(record_pos) * block_size; + offset= ma_recordpos_to_offset(record_pos); + + if (!(buff= key_cache_read(info->s->key_cache, + info->dfile, page, 0, info->buff, + block_size, block_size, 1))) + DBUG_RETURN(1); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE); + if (!(data= get_record_position(buff, block_size, offset, &end_of_data))) + { + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("Wrong directory entry in data block")); + DBUG_RETURN(1); + } + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); +} + + +/* compare unique constraint between stored rows */ + +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + byte *org_rec_buff, *old_record; + my_size_t org_rec_buff_size; + int error; + DBUG_ENTER("_ma_cmp_block_unique"); + + if (!(old_record= my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + org_rec_buff= info->rec_buff; + org_rec_buff_size= info->rec_buff_size; + if (info->s->base.blobs) + { + /* Force realloc of record buffer*/ + info->rec_buff= 0; + info->rec_buff_size= 0; + } + error= _ma_read_block_record(info, old_record, pos); + if (!error) + error= _ma_unique_comp(def, record, old_record, def->null_are_equal); + if (info->s->base.blobs) + { + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= org_rec_buff; + info->rec_buff_size= org_rec_buff_size; + } + DBUG_PRINT("exit", ("result: %d", error)); + my_afree(old_record); + DBUG_RETURN(error != 0); +} + + +/**************************************************************************** + Table scan +****************************************************************************/ + +/* + Allocate buffers for table scan + + SYNOPSIS + _ma_scan_init_block_record(MARIA_HA *info) + + IMPLEMENTATION + We allocate one buffer for the current bitmap and one buffer for the + current page +*/ + +my_bool _ma_scan_init_block_record(MARIA_HA *info) +{ + byte *ptr; + if (!(ptr= (byte *) my_malloc(info->s->block_size * 2, MYF(MY_WME)))) + return (1); + info->scan.bitmap_buff= ptr; + info->scan.page_buff= ptr + info->s->block_size; + info->scan.bitmap_end= info->scan.bitmap_buff + info->s->bitmap.total_size; + + /* Set scan variables to get _ma_scan_block() to start with reading bitmap */ + info->scan.number_of_rows= 0; + info->scan.bitmap_pos= info->scan.bitmap_end; + info->scan.bitmap_page= (ulong) - (long) info->s->bitmap.pages_covered; + /* + We have to flush bitmap as we will read the bitmap from the page cache + while scanning rows + */ + return _ma_flush_bitmap(info->s); +} + + +/* Free buffers allocated by _ma_scan_block_init() */ + +void _ma_scan_end_block_record(MARIA_HA *info) +{ + my_free(info->scan.bitmap_buff, MYF(0)); + info->scan.bitmap_buff= 0; +} + + +/* + Read next record while scanning table + + SYNOPSIS + _ma_scan_block_record() + info Maria handler + record Store found here + record_pos Value stored in info->cur_row.next_pos after last call + skip_deleted + + NOTES + - One must have called mi_scan() before this + - In this version, we don't actually need record_pos, we as easily + use a variable in info->scan + + IMPLEMENTATION + Current code uses a lot of goto's to separate the different kind of + states we may be in. This gives us a minimum of executed if's for + the normal cases. I tried several different ways to code this, but + the current one was in the end the most readable and fastest. + + RETURN + 0 ok + # Error code +*/ + +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos, + my_bool skip_deleted __attribute__ ((unused))) +{ + uint block_size; + my_off_t filepos; + DBUG_ENTER("_ma_scan_block_record"); + +restart_record_read: + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; +#ifdef SANITY_CHECKS + if (info->scan.dir < info->scan.dir_end) + goto err; +#endif + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ +#ifdef SANITY_CHECKS + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length) + goto err; +#endif + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + + /* Find next head page in current bitmap */ +restart_bitmap_scan: + block_size= info->s->block_size; + if (likely(info->scan.bitmap_pos < info->scan.bitmap_end)) + { + byte *data= info->scan.bitmap_pos; + longlong bits= info->scan.bits; + uint bit_pos= info->scan.bit_pos; + + do + { + while (likely(bits)) + { + uint pattern= bits & 7; + bits >>= 3; + bit_pos++; + if (pattern > 0 && pattern <= 4) + { + /* Found head page; Read it */ + ulong page; + info->scan.bitmap_pos= data; + info->scan.bits= bits; + info->scan.bit_pos= bit_pos; + page= (info->scan.bitmap_page + 1 + + (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1); + info->scan.row_base_page= ma_recordpos(page, 0); + if (!(key_cache_read(info->s->key_cache, + info->dfile, + (my_off_t) page * block_size, + 0, info->scan.page_buff, + block_size, block_size, 0))) + DBUG_RETURN(my_errno); + if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != + HEAD_PAGE) || + (info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_ENTRY_OFFSET]) == 0) + { + DBUG_PRINT("error", ("Wrong page header")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + } + info->scan.dir= (info->scan.page_buff + block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + record_pos= 0; + goto restart_record_read; + } + } + for (data+= 6; data < info->scan.bitmap_end; data+= 6) + { + bits= uint6korr(data); + if (bits && ((bits & LL(04444444444444444)) != LL(04444444444444444))) + break; + } + bit_pos= 0; + } while (data < info->scan.bitmap_end); + } + + /* Read next bitmap */ + info->scan.bitmap_page+= info->s->bitmap.pages_covered; + filepos= (my_off_t) info->scan.bitmap_page * block_size; + if (unlikely(filepos >= info->state->data_file_length)) + { + DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); + } + if (!(key_cache_read(info->s->key_cache, info->dfile, filepos, + 0, info->scan.bitmap_buff, block_size, block_size, 0))) + DBUG_RETURN(my_errno); + /* Skip scanning 'bits' in bitmap scan code */ + info->scan.bitmap_pos= info->scan.bitmap_buff - 6; + info->scan.bits= 0; + goto restart_bitmap_scan; + +err: + DBUG_PRINT("error", ("Wrong data on page")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Compare a row against a stored one + + NOTES + Not implemented, as block record is not supposed to be used in a shared + global environment +*/ + +my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused))) +{ + return 0; +} + + +#ifndef DBUG_OFF + +static void _ma_print_directory(byte *buff, uint block_size) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET], row= 0; + uint end_of_prev_row= PAGE_HEADER_SIZE; + byte *dir, *end; + + dir= buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"Directory dump (pos:length):\n"); + + for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++) + { + uint offset= uint2korr(end); + uint length= uint2korr(end+2); + fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0); + if (!(row % (80/12))) + fputc('\n', DBUG_FILE); + if (offset) + { + DBUG_ASSERT(offset >= end_of_prev_row); + end_of_prev_row= offset + length; + } + } + fputc('\n', DBUG_FILE); + fflush(DBUG_FILE); + DBUG_UNLOCK_FILE; +} +#endif /* DBUG_OFF */ + diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h new file mode 100644 index 00000000000..ec99dbfcae2 --- /dev/null +++ b/storage/maria/ma_blockrec.h @@ -0,0 +1,160 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block +*/ + +#define LSN_SIZE 7 +#define DIRCOUNT_SIZE 1 /* Stores number of rows on page */ +#define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */ +#define PAGE_TYPE_SIZE 1 +#define PAGE_SUFFIX_SIZE 0 /* Bytes for page suffix */ +#define PAGE_HEADER_SIZE (LSN_SIZE + DIRCOUNT_SIZE + EMPTY_SPACE_SIZE +\ + PAGE_TYPE_SIZE) +#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \ + PAGE_SUFFIX_SIZE) +#define BLOCK_RECORD_POINTER_SIZE 6 + +#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - PAGE_TYPE_SIZE) + +#define ROW_EXTENT_PAGE_SIZE 5 +#define ROW_EXTENT_COUNT_SIZE 2 +#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE) +#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */ +#define ELEMENTS_RESERVED_FOR_MAIN_PART 4 +#define EXTRA_LENGTH_FIELDS 3 + +#define FLAG_SIZE 1 +#define TRANSID_SIZE 6 +#define VERPTR_SIZE 7 +#define DIR_ENTRY_SIZE 4 +#define FIELD_OFFSET_SIZE 2 + +/* Minimum header size needed for a new row */ +#define BASE_ROW_HEADER_SIZE FLAG_SIZE +#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE + +#define PAGE_TYPE_MASK 127 +enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE }; + +#define PAGE_TYPE_OFFSET LSN_SIZE +#define DIR_ENTRY_OFFSET LSN_SIZE+PAGE_TYPE_SIZE +#define EMPTY_SPACE_OFFSET (DIR_ENTRY_OFFSET + DIRCOUNT_SIZE) + +#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */ + +/* Bits used for flag byte (one byte, first in record) */ +#define ROW_FLAG_TRANSID 1 +#define ROW_FLAG_VER_PTR 2 +#define ROW_FLAG_DELETE_TRANSID 4 +#define ROW_FLAG_NULLS_EXTENDED 8 +#define ROW_FLAG_EXTENTS 128 +#define ROW_FLAG_ALL (1+2+4+8+128) + +/* Variables that affects how data pages are utilized */ +#define MIN_TAIL_SIZE 32 + +/* Fixed part of Max possible header size; See table in ma_blockrec.c */ +#define MAX_FIXED_HEADER_SIZE (FLAG_SIZE + 3 + ROW_EXTENT_SIZE + 3) +#define TRANS_MAX_FIXED_HEADER_SIZE (MAX_FIXED_HEADER_SIZE + \ + FLAG_SIZE + TRANSID_SIZE + VERPTR_SIZE + \ + TRANSID_SIZE) + +/* We use 1 byte in record header to store number of directory entries */ +#define MAX_ROWS_PER_PAGE 255 + +/* Bits for MARIA_BITMAP_BLOCKS->used */ +#define BLOCKUSED_USED 1 +#define BLOCKUSED_USE_ORG_BITMAP 2 +#define BLOCKUSED_TAIL 4 + +/* defines that affects allocation (density) of data */ + +/* If we fill up a block to 75 %, don't create a new tail page for it */ +#define MAX_TAIL_SIZE(block_size) ((block_size) *3 / 4) + +/* Functions to convert MARIA_RECORD_POS to/from page:offset */ + +static inline MARIA_RECORD_POS ma_recordpos(ulonglong page, uint offset) +{ + return (MARIA_RECORD_POS) ((page << 8) | offset); +} + +static inline my_off_t ma_recordpos_to_page(MARIA_RECORD_POS record_pos) +{ + return record_pos >> 8; +} + +static inline my_off_t ma_recordpos_to_offset(MARIA_RECORD_POS record_pos) +{ + return record_pos & 255; +} + +/* ma_blockrec.c */ +void _ma_init_block_record_data(void); +my_bool _ma_once_init_block_row(MARIA_SHARE *share, File dfile); +my_bool _ma_once_end_block_row(MARIA_SHARE *share); +my_bool _ma_init_block_row(MARIA_HA *info); +void _ma_end_block_row(MARIA_HA *info); + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record); +my_bool _ma_delete_block_record(MARIA_HA *info); +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos); +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data); +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS, my_bool); +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_scan_init_block_record(MARIA_HA *info); +void _ma_scan_end_block_record(MARIA_HA *info); + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record); +my_bool _ma_write_block_record(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_block_record(MARIA_HA *info); +my_bool _ma_compare_block_record(register MARIA_HA *info, + register const byte *record); + +/* ma_bitmap.c */ +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file); +my_bool _ma_bitmap_end(MARIA_SHARE *share); +my_bool _ma_flush_bitmap(MARIA_SHARE *share); +my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap, + ulonglong page); +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks); +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count); +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head, + uint empty_space); +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count); +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size); +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern); +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern); diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c new file mode 100644 index 00000000000..d6061c647ec --- /dev/null +++ b/storage/maria/ma_cache.c @@ -0,0 +1,108 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions for read record cacheing with maria + Used for reading dynamic/compressed records from datafile. + + Can fetch data directly from file (outside cache), + if reading a small chunk straight before the cached part (with possible + overlap). + + Can be explicitly asked not to use cache (by not setting READING_NEXT in + flag) - useful for occasional out-of-cache reads, when the next read is + expected to hit the cache again. + + Allows "partial read" errors in the record header (when READING_HEADER flag + is set) - unread part is bzero'ed + + Note: out-of-cache reads are enabled for shared IO_CACHE's too, + as these reads will be cached by OS cache (and my_pread is always atomic) +*/ + + +#include "maria_def.h" + +int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, uint length, + int flag) +{ + uint read_length,in_buff_length; + my_off_t offset; + char *in_buff_pos; + DBUG_ENTER("_ma_read_cache"); + + if (pos < info->pos_in_file) + { + read_length=length; + if ((my_off_t) read_length > (my_off_t) (info->pos_in_file-pos)) + read_length=(uint) (info->pos_in_file-pos); + info->seek_not_done=1; + if (my_pread(info->file,buff,read_length,pos,MYF(MY_NABP))) + DBUG_RETURN(1); + if (!(length-=read_length)) + DBUG_RETURN(0); + pos+=read_length; + buff+=read_length; + } + if (pos >= info->pos_in_file && + (offset= (my_off_t) (pos - info->pos_in_file)) < + (my_off_t) (info->read_end - info->request_pos)) + { + in_buff_pos=info->request_pos+(uint) offset; + in_buff_length= min(length,(uint) (info->read_end-in_buff_pos)); + memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length); + if (!(length-=in_buff_length)) + DBUG_RETURN(0); + pos+=in_buff_length; + buff+=in_buff_length; + } + else + in_buff_length=0; + if (flag & READING_NEXT) + { + if (pos != (info->pos_in_file + + (uint) (info->read_end - info->request_pos))) + { + info->pos_in_file=pos; /* Force start here */ + info->read_pos=info->read_end=info->request_pos; /* Everything used */ + info->seek_not_done=1; + } + else + info->read_pos=info->read_end; /* All block used */ + if (!(*info->read_function)(info,buff,length)) + DBUG_RETURN(0); + read_length=info->error; + } + else + { + info->seek_not_done=1; + if ((read_length=my_pread(info->file,buff,length,pos,MYF(0))) == length) + DBUG_RETURN(0); + } + if (!(flag & READING_HEADER) || (int) read_length == -1 || + read_length+in_buff_length < 3) + { + DBUG_PRINT("error", + ("Error %d reading next-multi-part block (Got %d bytes)", + my_errno, (int) read_length)); + if (!my_errno || my_errno == -1) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length - + read_length); + DBUG_RETURN(0); +} /* _ma_read_cache */ diff --git a/storage/maria/ma_changed.c b/storage/maria/ma_changed.c new file mode 100644 index 00000000000..9e86212baa6 --- /dev/null +++ b/storage/maria/ma_changed.c @@ -0,0 +1,34 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Check if somebody has changed table since last check. */ + +#include "maria_def.h" + + /* Return 0 if table isn't changed */ + +int maria_is_changed(MARIA_HA *info) +{ + int result; + DBUG_ENTER("maria_is_changed"); + if (fast_ma_readinfo(info)) + DBUG_RETURN(-1); + VOID(_ma_writeinfo(info,0)); + result=(int) info->data_changed; + info->data_changed=0; + DBUG_PRINT("exit",("result: %d",result)); + DBUG_RETURN(result); +} diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c new file mode 100644 index 00000000000..ccce19de994 --- /dev/null +++ b/storage/maria/ma_check.c @@ -0,0 +1,5096 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +/* + About checksum calculation. + + There are two types of checksums. Table checksum and row checksum. + + Row checksum is an additional byte at the end of dynamic length + records. It must be calculated if the table is configured for them. + Otherwise they must not be used. The variable + MYISAM_SHARE::calc_checksum determines if row checksums are used. + MI_INFO::checksum is used as temporary storage during row handling. + For parallel repair we must assure that only one thread can use this + variable. There is no problem on the write side as this is done by one + thread only. But when checking a record after read this could go + wrong. But since all threads read through a common read buffer, it is + sufficient if only one thread checks it. + + Table checksum is an eight byte value in the header of the index file. + It can be calculated even if row checksums are not used. The variable + MI_CHECK::glob_crc is calculated over all records. + MI_SORT_PARAM::calc_checksum determines if this should be done. This + variable is not part of MI_CHECK because it must be set per thread for + parallel repair. The global glob_crc must be changed by one thread + only. And it is sufficient to calculate the checksum once only. +*/ + +#include "ma_ftdefs.h" +#include <myisamchk.h> +#include <m_ctype.h> +#include <stdarg.h> +#include <my_getopt.h> +#ifdef HAVE_SYS_VADVISE_H +#include <sys/vadvise.h> +#endif +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif +#include "ma_rt_index.h" +#include "ma_blockrec.h" + +/* Functions defined in this file */ + +static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link); +static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level); +static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo); +static ha_checksum calc_checksum(ha_rows count); +static int writekeys(MARIA_SORT_PARAM *sort_param); +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file); +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key); +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key); +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param); +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b); +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a); +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a); +static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + const byte *key); +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + reg1 SORT_KEY_BLOCKS *key_block, + const byte *key, my_off_t prev_block); +static int sort_delete_record(MARIA_SORT_PARAM *sort_param); +/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length); +static ha_checksum maria_byte_checksum(const byte *buf, uint length); +static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); +static void restore_data_file_type(MARIA_SHARE *share); + +void maria_chk_init(HA_CHECK *param) +{ + bzero((gptr) param,sizeof(*param)); + param->opt_follow_links=1; + param->keys_in_use= ~(ulonglong) 0; + param->search_after_block=HA_OFFSET_ERROR; + param->auto_increment_value= 0; + param->use_buffers=USE_BUFFER_INIT; + param->read_buffer_length=READ_BUFFER_INIT; + param->write_buffer_length=READ_BUFFER_INIT; + param->sort_buffer_length=SORT_BUFFER_INIT; + param->sort_key_blocks=BUFFERS_WHEN_SORTING; + param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; + param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL); + param->start_check_pos=0; + param->max_record_length= LONGLONG_MAX; + param->key_cache_block_size= KEY_CACHE_BLOCK_SIZE; + param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; +} + + /* Check the status flags for the table */ + +int maria_chk_status(HA_CHECK *param, register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + + if (maria_is_crashed_on_repair(info)) + _ma_check_print_warning(param, + "Table is marked as crashed and last repair failed"); + else if (maria_is_crashed(info)) + _ma_check_print_warning(param, + "Table is marked as crashed"); + if (share->state.open_count != (uint) (info->s->global_changed ? 1 : 0)) + { + /* Don't count this as a real warning, as check can correct this ! */ + uint save=param->warning_printed; + _ma_check_print_warning(param, + share->state.open_count==1 ? + "%d client is using or hasn't closed the table properly" : + "%d clients are using or haven't closed the table properly", + share->state.open_count); + /* If this will be fixed by the check, forget the warning */ + if (param->testflag & T_UPDATE_STATE) + param->warning_printed=save; + } + return 0; +} + + /* Check delete links */ + +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag) +{ + reg2 ha_rows i; + uint delete_link_length; + my_off_t empty,next_link,old_link; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_del"); + + LINT_INIT(old_link); + param->record_checksum=0; + delete_link_length=((info->s->options & HA_OPTION_PACK_RECORD) ? 20 : + info->s->rec_reflength+1); + + if (!(test_flag & T_SILENT)) + puts("- check record delete-chain"); + + next_link=info->s->state.dellink; + if (info->state->del == 0) + { + if (test_flag & T_VERBOSE) + { + puts("No recordlinks"); + } + } + else + { + if (test_flag & T_VERBOSE) + printf("Recordlinks: "); + empty=0; + for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (test_flag & T_VERBOSE) + printf(" %9s",llstr(next_link,buff)); + if (next_link >= info->state->data_file_length) + goto wrong; + if (my_pread(info->dfile,(char*) buff,delete_link_length, + next_link,MYF(MY_NABP))) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Can't read delete-link at filepos: %s", + llstr(next_link,buff)); + DBUG_RETURN(1); + } + if (*buff != '\0') + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Record at pos: %s is not remove-marked", + llstr(next_link,buff)); + goto wrong; + } + if (info->s->options & HA_OPTION_PACK_RECORD) + { + my_off_t prev_link=mi_sizekorr(buff+12); + if (empty && prev_link != old_link) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2)); + goto wrong; + } + old_link=next_link; + next_link=mi_sizekorr(buff+4); + empty+=mi_uint3korr(buff+1); + } + else + { + param->record_checksum+=(ha_checksum) next_link; + next_link= _ma_rec_pos(info->s, buff+1); + empty+=info->s->base.pack_reclength; + } + } + if (test_flag & T_VERBOSE) + puts("\n"); + if (empty != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space in delete link chain. Should be %s", + llstr(empty,buff2), + llstr(info->state->empty,buff)); + } + if (next_link != HA_OFFSET_ERROR) + { + _ma_check_print_error(param, + "Found more than the expected %s deleted rows in delete link chain", + llstr(info->state->del, buff)); + goto wrong; + } + if (i != 0) + { + _ma_check_print_error(param, + "Found %s deleted rows in delete link chain. Should be %s", + llstr(info->state->del - i, buff2), + llstr(info->state->del, buff)); + goto wrong; + } + } + DBUG_RETURN(0); + +wrong: + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"record delete-link-chain corrupted"); + DBUG_RETURN(1); +} /* maria_chk_del */ + + + /* Check delete links in index file */ + +static int check_k_link(HA_CHECK *param, register MARIA_HA *info, + my_off_t next_link) +{ + uint block_size= info->s->block_size; + ha_rows records; + char llbuff[21], llbuff2[21], *buff; + DBUG_ENTER("check_k_link"); + + records= (ha_rows) (info->state->key_file_length / block_size); + while (next_link != HA_OFFSET_ERROR && records > 0) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (param->testflag & T_VERBOSE) + printf("%16s",llstr(next_link,llbuff)); + + /* Key blocks must lay within the key file length entirely. */ + if (next_link + block_size > info->state->key_file_length) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(next_link, llbuff), block_size, + llstr(info->state->key_file_length, llbuff2)); + DBUG_RETURN(1); + /* purecov: end */ + } + + /* Key blocks must be aligned at block_size */ + if (next_link & (block_size -1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(next_link, llbuff), + block_size); + DBUG_RETURN(1); + /* purecov: end */ + } + + if (!(buff=key_cache_read(info->s->key_cache, + info->s->kfile, next_link, DFLT_INIT_HITS, + (byte*) info->buff, + block_size, block_size, 1))) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "key cache read error for block: %s", + llstr(next_link,llbuff)); + DBUG_RETURN(1); + /* purecov: end */ + } + next_link=mi_sizekorr(buff); + records--; + param->key_file_blocks+=block_size; + } + if (param->testflag & T_VERBOSE) + { + if (next_link != HA_OFFSET_ERROR) + printf("%16s\n",llstr(next_link,llbuff)); + else + puts(""); + } + DBUG_RETURN (next_link != HA_OFFSET_ERROR); +} /* check_k_link */ + + + /* Check sizes of files */ + +int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) +{ + int error=0; + register my_off_t skr,size; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_size"); + + if (!(param->testflag & T_SILENT)) + puts("- check file-size"); + + /* The following is needed if called externally (not from maria_chk) */ + flush_key_blocks(info->s->key_cache, + info->s->kfile, FLUSH_FORCE_WRITE); + + size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0)); + if ((skr=(my_off_t) info->state->key_file_length) != size) + { + /* Don't give error if file generated by mariapack */ + if (skr > size && maria_is_any_key_active(info->s->state.key_map)) + { + error=1; + _ma_check_print_error(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + else if (!(param->testflag & T_VERY_SILENT)) + _ma_check_print_warning(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + if (!(param->testflag & T_VERY_SILENT) && + ! (info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->key_file_length) > + ulonglong2double(info->s->base.margin_key_file_length)*0.9) + _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used", + llstr(info->state->key_file_length,buff), + llstr(info->s->base.max_key_file_length-1,buff)); + + size=my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)); + skr=(my_off_t) info->state->data_file_length; + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + skr+= MEMMAP_EXTRA_MARGIN; +#ifdef USE_RELOC + if (info->data_file_type == STATIC_RECORD && + skr < (my_off_t) info->s->base.reloc*info->s->base.min_pack_length) + skr=(my_off_t) info->s->base.reloc*info->s->base.min_pack_length; +#endif + if (skr != size) + { + info->state->data_file_length=size; /* Skip other errors */ + if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN) + { + error=1; + _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + } + else + { + _ma_check_print_warning(param, + "Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + } + if (!(param->testflag & T_VERY_SILENT) && + !(info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->data_file_length) > + (ulonglong2double(info->s->base.max_data_file_length)*0.9)) + _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used", + llstr(info->state->data_file_length,buff), + llstr(info->s->base.max_data_file_length-1,buff2)); + DBUG_RETURN(error); +} /* maria_chk_size */ + + +/* Check keys */ + +int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) +{ + uint key,found_keys=0,full_text_keys=0,result=0; + ha_rows keys; + ha_checksum old_record_checksum,init_checksum; + my_off_t all_keydata,all_totaldata,key_totlength,length; + ulong *rec_per_key_part; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_key"); + + if (!(param->testflag & T_SILENT)) + puts("- check key delete-chain"); + + param->key_file_blocks=info->s->base.keystart; + if (check_k_link(param, info, info->s->state.key_del)) + { + if (param->testflag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"key delete-link-chain corrupted"); + DBUG_RETURN(-1); + } + + if (!(param->testflag & T_SILENT)) puts("- check index reference"); + + all_keydata=all_totaldata=key_totlength=0; + old_record_checksum=0; + init_checksum=param->record_checksum; + if (share->data_file_type == STATIC_RECORD) + old_record_checksum= (calc_checksum(info->state->records + + info->state->del-1) * + share->base.pack_reclength); + rec_per_key_part= param->rec_per_key_part; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++) + { + param->key_crc[key]=0; + if (! maria_is_key_active(share->state.key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + found_keys++; + + param->record_checksum=init_checksum; + + bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); + bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); + + if ((!(param->testflag & T_SILENT))) + printf ("- check data record references index: %d\n",key+1); + if (keyinfo->flag & HA_FULLTEXT) + full_text_keys++; + if (share->state.key_root[key] == HA_OFFSET_ERROR && + (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT)) + goto do_stat; + if (!_ma_fetch_keypage(info,keyinfo,share->state.key_root[key], + DFLT_INIT_HITS,info->buff,0)) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + llstr(share->state.key_root[key],buff)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + param->key_file_blocks+=keyinfo->block_length; + keys=0; + param->keydata=param->totaldata=0; + param->key_blocks=0; + param->max_level=0; + if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff, + &keys, param->key_crc+key,1)) + DBUG_RETURN(-1); + if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))) + { + if (keys != info->state->records) + { + _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff), + llstr(info->state->records,buff2)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + if ((found_keys - full_text_keys == 1 && + !(share->data_file_type == STATIC_RECORD)) || + (param->testflag & T_DONT_CHECK_CHECKSUM)) + old_record_checksum= param->record_checksum; + else if (old_record_checksum != param->record_checksum) + { + if (key) + _ma_check_print_error(param,"Key %u doesn't point at same records that key 1", + key+1); + else + _ma_check_print_error(param,"Key 1 doesn't point at all records"); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + } + if ((uint) share->base.auto_key -1 == key) + { + /* Check that auto_increment key is bigger than max key value */ + ulonglong auto_increment; + info->lastinx=key; + _ma_read_key_record(info, info->rec_buff, 0); + auto_increment= ma_retrieve_auto_increment(info, info->rec_buff); + if (auto_increment > info->s->state.auto_increment) + { + _ma_check_print_warning(param, "Auto-increment value: %s is smaller " + "than max used value: %s", + llstr(info->s->state.auto_increment,buff2), + llstr(auto_increment, buff)); + } + if (param->testflag & T_AUTO_INC) + { + set_if_bigger(info->s->state.auto_increment, + auto_increment); + set_if_bigger(info->s->state.auto_increment, + param->auto_increment_value); + } + + /* Check that there isn't a row with auto_increment = 0 in the table */ + maria_extra(info,HA_EXTRA_KEYREAD,0); + bzero(info->lastkey,keyinfo->seg->length); + if (!maria_rkey(info, info->rec_buff, key, (const byte*) info->lastkey, + keyinfo->seg->length, HA_READ_KEY_EXACT)) + { + /* Don't count this as a real warning, as maria_chk can't correct it */ + uint save=param->warning_printed; + _ma_check_print_warning(param, "Found row where the auto_increment " + "column has the value 0"); + param->warning_printed=save; + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + } + + length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2; + if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L) + printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n", + key+1, + (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)), + (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/ + my_off_t2double(length)), + param->max_level); + all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length; + +do_stat: + if (param->testflag & T_STATISTICS) + maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + param->notnull_count: NULL, + (ulonglong)info->state->records); + } + if (param->testflag & T_INFO) + { + if (all_totaldata != 0L && found_keys > 0) + printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n", + (int) (my_off_t2double(all_keydata)*100.0/ + my_off_t2double(all_totaldata)), + (int) ((my_off_t2double(key_totlength) - + my_off_t2double(all_keydata))*100.0/ + my_off_t2double(key_totlength))); + else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map)) + puts(""); + } + if (param->key_file_blocks != info->state->key_file_length && + param->keys_in_use != ~(ulonglong) 0) + _ma_check_print_warning(param, "Some data are unreferenced in keyfile"); + if (found_keys != full_text_keys) + param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */ + else + param->record_checksum=0; + DBUG_RETURN(result); +} /* maria_chk_key */ + + +static int chk_index_down(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + char llbuff[22],llbuff2[22]; + DBUG_ENTER("chk_index_down"); + + /* Key blocks must lay within the key file length entirely. */ + if (page + keyinfo->block_length > info->state->key_file_length) + { + /* purecov: begin tested */ + /* Give it a chance to fit in the real file size. */ + my_off_t max_length= my_seek(info->s->kfile, 0L, MY_SEEK_END, MYF(0)); + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(page, llbuff), keyinfo->block_length, + llstr(info->state->key_file_length, llbuff2)); + if (page + keyinfo->block_length > max_length) + goto err; + /* Fix the remembered key file length. */ + info->state->key_file_length= (max_length & + ~ (my_off_t) (keyinfo->block_length - 1)); + /* purecov: end */ + } + + /* Key blocks must be aligned at block length */ + if (page & (info->s->block_size -1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(page, llbuff), info->s->block_size); + goto err; + /* purecov: end */ + } + + if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key from filepos: %s", + llstr(page,llbuff)); + goto err; + } + param->key_file_blocks+=keyinfo->block_length; + if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level)) + goto err; + + DBUG_RETURN(0); + + /* purecov: begin tested */ +err: + DBUG_RETURN(1); + /* purecov: end */ +} + + +/* + "Ignore NULLs" statistics collection method: process first index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_first() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + key IN Key values tuple + + DESCRIPTION + Process the first index tuple - find out which prefix tuples don't + contain NULLs, and update the array of notnull counters accordingly. +*/ + +static +void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, + const byte *key) +{ + uint first_null, kp; + first_null= ha_find_null(keyseg, (uchar*) key) - keyseg; + /* + All prefix tuples that don't include keypart_{first_null} are not-null + tuples (and all others aren't), increment counters for them. + */ + for (kp= 0; kp < first_null; kp++) + notnull[kp]++; +} + + +/* + "Ignore NULLs" statistics collection method: process next index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_next() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + prev_key IN Previous key values tuple + last_key IN Next key values tuple + + DESCRIPTION + Process the next index tuple: + 1. Find out which prefix tuples of last_key don't contain NULLs, and + update the array of notnull counters accordingly. + 2. Find the first keypart number where the prev_key and last_key tuples + are different(A), or last_key has NULL value(B), and return it, so the + caller can count number of unique tuples for each key prefix. We don't + need (B) to be counted, and that is compensated back in + maria_update_key_parts(). + + RETURN + 1 + number of first keypart where values differ or last_key tuple has NULL +*/ + +static +int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, + const byte *prev_key, + const byte *last_key) +{ + uint diffs[2]; + uint first_null_seg, kp; + HA_KEYSEG *seg; + + /* + Find the first keypart where values are different or either of them is + NULL. We get results in diffs array: + diffs[0]= 1 + number of first different keypart + diffs[1]=offset: (last_key + diffs[1]) points to first value in + last_key that is NULL or different from corresponding + value in prev_key. + */ + ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs); + seg= keyseg + diffs[0] - 1; + + /* Find first NULL in last_key */ + first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg; + for (kp= 0; kp < first_null_seg; kp++) + notnull[kp]++; + + /* + Return 1+ number of first key part where values differ. Don't care if + these were NULLs and not .... We compensate for that in + maria_update_key_parts. + */ + return diffs[0]; +} + + + /* Check if index is ok */ + +static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + int flag; + uint used_length,comp_flag,nod_flag,key_length=0; + byte key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + my_off_t next_page,record; + char llbuff[22]; + uint diff_pos[2]; + DBUG_ENTER("chk_index"); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); + + /* TODO: implement appropriate check for RTree keys */ + if (keyinfo->flag & HA_SPATIAL) + DBUG_RETURN(0); + + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for keyblock"); + DBUG_RETURN(-1); + } + + if (keyinfo->flag & HA_NOSAME) + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* Not real duplicates */ + else + comp_flag=SEARCH_SAME; /* Keys in positionorder */ + nod_flag=_ma_test_if_nod(buff); + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + + param->keydata+=used_length; param->totaldata+=keyinfo->block_length; /* INFO */ + param->key_blocks++; + if (level > param->max_level) + param->max_level=level; + + if (used_length > keyinfo->block_length) + { + _ma_check_print_error(param,"Wrong pageinfo at page: %s", + llstr(page,llbuff)); + goto err; + } + for ( ;; ) + { + if (*_ma_killed_ptr(param)) + goto err; + memcpy(info->lastkey, key, key_length); + info->lastkey_length= key_length; + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + if (chk_index_down(param,info,keyinfo,next_page, + temp_buff,keys,key_checksum,level+1)) + goto err; + } + old_keypos=keypos; + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + if (keypos > endpos) + { + _ma_check_print_error(param,"Wrong key block length at page: %s", + llstr(page,llbuff)); + goto err; + } + if ((*keys)++ && + (flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + key_length, comp_flag, diff_pos)) >=0) + { + DBUG_DUMP("old", info->lastkey, info->lastkey_length); + DBUG_DUMP("new", key, key_length); + DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos)); + + if (comp_flag & SEARCH_FIND && flag == 0) + _ma_check_print_error(param,"Found duplicated key at page %s", + llstr(page,llbuff)); + else + _ma_check_print_error(param,"Key in wrong position at page %s", + llstr(page,llbuff)); + goto err; + } + if (param->testflag & T_STATISTICS) + { + if (*keys != 1L) /* not first_key */ + { + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, + diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg, + param->notnull_count, + info->lastkey, key); + } + param->unique_count[diff_pos[0]-1]++; + } + else + { + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, + key); + } + } + (*key_checksum)+= maria_byte_checksum((byte*) key, + key_length- info->s->rec_reflength); + record= _ma_dpos(info,0,key+key_length); + if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + ha_rows tmp_keys=0; + if (chk_index_down(param,info,&info->s->ft2_keyinfo,record, + temp_buff,&tmp_keys,key_checksum,1)) + goto err; + if (tmp_keys + subkeys) + { + _ma_check_print_error(param, + "Number of words in the 2nd level tree " + "does not match the number in the header. " + "Parent word in on the page %s, offset %u", + llstr(page,llbuff), (uint) (old_keypos-buff)); + goto err; + } + (*keys)+=tmp_keys-1; + continue; + } + /* fall through */ + } + if (record >= info->state->data_file_length) + { +#ifndef DBUG_OFF + char llbuff2[22], llbuff3[22]; +#endif + _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff)); + DBUG_PRINT("test",("page: %s record: %s filelength: %s", + llstr(page,llbuff),llstr(record,llbuff2), + llstr(info->state->data_file_length,llbuff3))); + DBUG_DUMP("key",(byte*) key,key_length); + DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + goto err; + } + param->record_checksum+= (ha_checksum) record; + } + if (keypos != endpos) + { + _ma_check_print_error(param,"Keyblock size at page %s is not correct. Block length: %d key length: %d", + llstr(page,llbuff), used_length, (keypos - buff)); + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(0); + err: + my_afree((byte*) temp_buff); + DBUG_RETURN(1); +} /* chk_index */ + + + /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */ + +static ha_checksum calc_checksum(ha_rows count) +{ + ulonglong sum,a,b; + DBUG_ENTER("calc_checksum"); + + sum=0; + a=count; b=count+1; + if (a & 1) + b>>=1; + else + a>>=1; + while (b) + { + if (b & 1) + sum+=a; + a<<=1; b>>=1; + } + DBUG_PRINT("exit",("sum: %lx",(ulong) sum)); + DBUG_RETURN((ha_checksum) sum); +} /* calc_checksum */ + + + /* Calc length of key in normal isam */ + +static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo) +{ + uint length; + HA_KEYSEG *keyseg; + DBUG_ENTER("isam_key_length"); + + length= info->s->rec_reflength; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + length+= keyseg->length; + + DBUG_PRINT("exit",("length: %d",length)); + DBUG_RETURN(length); +} /* key_length */ + + + +static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos, + char *buff) +{ + if (info->s->data_file_type != BLOCK_RECORD) + llstr(recpos, buff); + else + { + my_off_t page= ma_recordpos_to_page(recpos); + uint row= ma_recordpos_to_offset(recpos); + char *end= longlong10_to_str(page, buff, 10); + *(end++)= ':'; + longlong10_to_str(row, end, 10); + } +} + + +/* + Check that keys in records exist in index tree + + SYNOPSIS + check_keys_in_record() + param Check paramenter + info Maria handler + extend Type of check (extended or normal) + start_recpos Position to row + record Record buffer + + NOTES + This function also calculates record checksum & number of rows +*/ + +static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend, + my_off_t start_recpos, byte *record) +{ + MARIA_KEYDEF *keyinfo; + char llbuff[22+4]; + uint key; + + param->tmp_record_checksum+= (ha_checksum) start_recpos; + param->records++; + if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0) + { + printf("%s\r", llstr(param->records, llbuff)); + VOID(fflush(stdout)); + } + + /* Check if keys match the record */ + for (key=0, keyinfo= info->s->keyinfo; key < info->s->base.keys; + key++,keyinfo++) + { + if (maria_is_key_active(info->s->state.key_map, key)) + { + if(!(keyinfo->flag & HA_FULLTEXT)) + { + uint key_length= _ma_make_key(info,key,info->lastkey,record, + start_recpos); + if (extend) + { + /* We don't need to lock the key tree here as we don't allow + concurrent threads when running maria_chk + */ + int search_result= +#ifdef HAVE_RTREE_KEYS + (keyinfo->flag & HA_SPATIAL) ? + maria_rtree_find_first(info, key, info->lastkey, key_length, + MBR_EQUAL | MBR_DATA) : +#endif + _ma_search(info,keyinfo,info->lastkey,key_length, + SEARCH_SAME, info->s->state.key_root[key]); + if (search_result) + { + record_pos_to_txt(info, start_recpos, llbuff); + _ma_check_print_error(param, + "Record at: %14s " + "Can't find key for index: %2d", + llbuff, key+1); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + return -1; + } + } + else + param->tmp_key_crc[key]+= + maria_byte_checksum((byte*) info->lastkey, key_length); + } + } + } + return 0; +} + + +/* + Functions to loop through all rows and check if they are ok + + NOTES + One function for each record format + + RESULT + 0 ok + -1 Interrupted by user + 1 Error +*/ + +static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + + pos= 0; + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + return -1; + if (my_b_read(¶m->read_cache,(byte*) record, + info->s->base.pack_reclength)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + return 1; + } + start_recpos= pos; + pos+= info->s->base.pack_reclength; + param->splits++; + if (*record == '\0') + { + param->del_blocks++; + param->del_length+= info->s->base.pack_reclength; + continue; /* Record removed */ + } + param->glob_crc+= _ma_static_checksum(info,record); + param->used+= info->s->base.pack_reclength; + if (check_keys_in_record(param, info, extend, start_recpos, record)) + return 1; + } + return 0; +} + + +static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + MARIA_BLOCK_INFO block_info; + my_off_t start_recpos, start_block, pos; + byte *to; + ulong left_length; + uint b_type; + char llbuff[22],llbuff2[22],llbuff3[22]; + DBUG_ENTER("check_dynamic_record"); + + LINT_INIT(left_length); + LINT_INIT(start_recpos); + LINT_INIT(to); + + pos= 0; + while (pos < info->state->data_file_length) + { + my_bool got_error= 0; + int flag; + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + flag= block_info.second_read=0; + block_info.next_filepos=pos; + do + { + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, + (start_block=block_info.next_filepos), + sizeof(block_info.header), + (flag ? 0 : READING_NEXT) | READING_HEADER)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at " + "position: %s", + my_errno, llstr(start_block, llbuff)); + DBUG_RETURN(1); + } + + if (start_block & (MARIA_DYN_ALIGN_SIZE-1)) + { + _ma_check_print_error(param,"Wrong aligned block at %s", + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + b_type= _ma_get_block_info(&block_info,-1,start_block); + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & BLOCK_SYNC_ERROR) + { + if (flag) + { + _ma_check_print_error(param,"Unexpected byte: %d at link: %s", + (int) block_info.header[0], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + pos=block_info.filepos+block_info.block_len; + goto next; + } + if (b_type & BLOCK_DELETED) + { + if (block_info.block_len < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Deleted block with impossible length %lu at %s", + block_info.block_len,llstr(pos,llbuff)); + DBUG_RETURN(1); + } + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + _ma_check_print_error(param,"Delete link points outside datafile at %s", + llstr(pos,llbuff)); + DBUG_RETURN(1); + } + param->del_blocks++; + param->del_length+= block_info.block_len; + param->splits++; + pos= block_info.filepos+block_info.block_len; + goto next; + } + _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", + block_info.header[0],block_info.header[1], + block_info.header[2], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + if (info->state->data_file_length < block_info.filepos+ + block_info.block_len) + { + _ma_check_print_error(param, + "Recordlink that points outside datafile at %s", + llstr(pos,llbuff)); + got_error=1; + break; + } + param->splits++; + if (!flag++) /* First block */ + { + start_recpos=pos; + pos=block_info.filepos+block_info.block_len; + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (info->s->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + + { + _ma_check_print_error(param, + "Not enough memory (%lu) for blob at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + } + to= info->rec_buff; + left_length= block_info.rec_len; + } + if (left_length < block_info.data_len) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.data_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (_ma_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, + (uint) block_info.data_len, + flag == 1 ? READING_NEXT : 0)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff)); + + DBUG_RETURN(1); + } + to+=block_info.data_len; + param->link_used+= block_info.filepos-start_block; + param->used+= block_info.filepos - start_block + block_info.data_len; + param->empty+= block_info.block_len-block_info.data_len; + left_length-= block_info.data_len; + if (left_length) + { + if (b_type & BLOCK_LAST) + { + _ma_check_print_error(param, + "Wrong record length %s of %s at %s", + llstr(block_info.rec_len-left_length,llbuff), + llstr(block_info.rec_len, llbuff2), + llstr(start_recpos,llbuff3)); + got_error=1; + break; + } + if (info->state->data_file_length < block_info.next_filepos) + { + _ma_check_print_error(param, + "Found next-recordlink that points outside datafile at %s", + llstr(block_info.filepos,llbuff)); + got_error=1; + break; + } + } + } while (left_length); + + if (! got_error) + { + if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) == + MY_FILE_ERROR) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + else + { + info->cur_row.checksum= _ma_checksum(info,record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) + { + if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, + test(info->s->calc_checksum))) + { + _ma_check_print_error(param,"Found wrong packed record at %s", + llstr(start_recpos,llbuff)); + got_error= 1; + } + } + param->glob_crc+= info->cur_row.checksum; + } + + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + else if (!flag) + pos= block_info.filepos+block_info.block_len; +next:; + } + DBUG_RETURN(0); +} + + +static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + bool got_error= 0; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("check_compressed_record"); + + pos= info->s->pack.header_length; /* Skip header */ + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, pos, + info->s->pack.ref_length, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + DBUG_RETURN(1); + } + + start_recpos= pos; + param->splits++; + VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, -1, + start_recpos)); + pos=block_info.filepos+block_info.rec_len; + if (block_info.rec_len < (uint) info->s->min_pack_length || + block_info.rec_len > (uint) info->s->max_pack_length) + { + _ma_check_print_error(param, + "Found block with wrong recordlength: %d at %s", + block_info.rec_len, llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(block_info.filepos, llbuff)); + DBUG_RETURN(1); + } + if (_ma_pack_rec_unpack(info, &info->bit_buff, record, + info->rec_buff, block_info.rec_len)) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + param->glob_crc+= (*info->s->calc_checksum)(info,record); + param->link_used+= (block_info.filepos - start_recpos); + param->used+= (pos-start_recpos); + +end: + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + got_error= 0; /* Reset for next loop */ + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Check if layout on a page is ok +*/ + +static int check_page_layout(HA_CHECK *param, MARIA_HA *info, + my_off_t page_pos, byte *page, + uint row_count, uint head_empty, + uint *real_rows_found) +{ + uint empty, last_row_end, row, first_dir_entry; + byte *dir_entry; + char llbuff[22]; + DBUG_ENTER("check_page_layout"); + + empty= 0; + last_row_end= PAGE_HEADER_SIZE; + *real_rows_found= 0; + + dir_entry= page+ info->s->block_size - PAGE_SUFFIX_SIZE; + first_dir_entry= info->s->block_size - row_count* DIR_ENTRY_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + { + if (row == row_count -1) + { + _ma_check_print_error(param, + "Page %9s: First entry in directory is 0", + llstr(page_pos, llbuff)); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + continue; /* Deleted row */ + } + (*real_rows_found)++; + length= uint2korr(dir_entry+2); + param->used+= length; + if (pos < last_row_end) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with previous row", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + empty+= (pos - last_row_end); + last_row_end= pos + length; + if (last_row_end > first_dir_entry) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with directory", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + } + empty+= (first_dir_entry - last_row_end); + + if (empty != head_empty) + { + _ma_check_print_error(param, + "Page %9s: Wrong empty size. Stored: %5u Actual: %5u", + llstr(page_pos, llbuff), head_empty, empty); + DBUG_RETURN(param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)); + } + DBUG_RETURN(0); +} + + +/* + Check all rows on head page + + NOTES + Before this, we have already called check_page_layout(), so + we know the block is logicaly correct (even if the rows may not be that) + + RETURN + 0 ok + 1 error +*/ + + +static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record, + int extend, my_off_t page_pos, byte *page_buff, + uint row_count) +{ + byte *dir_entry; + uint row; + char llbuff[22], llbuff2[22]; + DBUG_ENTER("check_head_page"); + + dir_entry= page_buff+ info->s->block_size - PAGE_SUFFIX_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length, flag; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + continue; + length= uint2korr(dir_entry+2); + if (length < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Page %9s: Row %3u is too short (%d bytes)", + llstr(page_pos, llbuff), row, length); + DBUG_RETURN(1); + } + flag= (uint) (uchar) page_buff[pos]; + if (flag & ~(ROW_FLAG_ALL)) + _ma_check_print_error(param, + "Page %9s: Row %3u has wrong flag: %d", + llstr(page_pos, llbuff), row, flag); + + DBUG_PRINT("info", ("rowid: %s page: %lu row: %u", + llstr(ma_recordpos(page_pos/info->s->block_size, row), + llbuff), + (ulong) (page_pos / info->s->block_size), row)); + if (_ma_read_block_record2(info, record, page_buff+pos, + page_buff+pos+length)) + { + _ma_check_print_error(param, + "Page %9s: Row %3d is crashed", + llstr(page_pos, llbuff), row); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + continue; + } + if (info->s->calc_checksum) + { + info->cur_row.checksum= _ma_checksum(info, record); + param->glob_crc+= info->cur_row.checksum; + } + if (info->cur_row.extents_count) + { + byte *extents= info->cur_row.extents; + uint i; + /* Check that bitmap has the right marker for the found extents */ + for (i= 0 ; i < info->cur_row.extents_count ; i++) + { + uint page, page_count, page_type; + page= uint5korr(extents); + page_count= uint2korr(extents+5); + extents+= ROW_EXTENT_SIZE; + page_type= BLOB_PAGE; + if (page_count & TAIL_BIT) + { + page_count= 1; + page_type= TAIL_PAGE; + } + for ( ; page_count--; page++) + { + uint bitmap_pattern; + if (_ma_check_if_right_bitmap_type(info, page_type, page, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Row: %3d has an extent with wrong information in bitmap: Page %9s Page_type: %d Bitmap: %d", + llstr(page_pos, llbuff), row, + llstr(page * info->s->bitmap.block_size, + llbuff2), + page_type, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + } + } + param->full_page_count+= info->cur_row.full_page_count; + param->tail_count+= info->cur_row.tail_count; + if (check_keys_in_record(param, info, extend, + ma_recordpos(page_pos/info->s->block_size, row), + record)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + + +static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t pos; + byte *page_buff, *bitmap_buff, *data; + char llbuff[22], llbuff2[22]; + uint block_size= info->s->block_size; + ha_rows full_page_count, tail_count; + my_bool full_dir; + uint offset_page, offset; + + if (_ma_scan_init_block_record(info)) + { + _ma_check_print_error(param, "got error %d when initializing scan", + my_errno); + return 1; + } + bitmap_buff= info->scan.bitmap_buff; + page_buff= info->scan.page_buff; + full_page_count= tail_count= 0; + param->full_page_count= param->tail_count= 0; + param->used= param->link_used= 0; + + for (pos= 0; + pos < info->state->data_file_length; + pos+= block_size) + { + uint row_count, real_row_count, empty_space, page_type, bitmap_pattern; + LINT_INIT(row_count); + LINT_INIT(empty_space); + + if (*_ma_killed_ptr(param)) + { + _ma_scan_end_block_record(info); + return -1; + } + if (((pos / block_size) % info->s->bitmap.pages_covered) == 0) + { + /* Bitmap page */ + if (_ma_read_cache(¶m->read_cache, bitmap_buff, pos, + block_size, READING_NEXT)) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + param->used+= block_size; + param->link_used+= block_size; + continue; + } + /* Skip pages marked as empty in bitmap */ + offset_page= (((pos / block_size) % info->s->bitmap.pages_covered) -1) * 3; + offset= offset_page & 7; + data= bitmap_buff + offset_page / 8; + bitmap_pattern= uint2korr(data); + param->splits++; + if (!((bitmap_pattern >> offset) & 7)) + { + param->empty+= block_size; + param->del_blocks++; + continue; + } + + if (_ma_read_cache(¶m->read_cache, page_buff, pos, + block_size, READING_NEXT)) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK; + if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_error(param, + "Page %9s: Found wrong page type %d\n", + llstr(pos, llbuff), page_type); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + switch ((enum en_page_type) page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + DBUG_PRINT("warning", + ("Found page with wrong page type: %d", page_type)); + DBUG_ASSERT(0); + break; + case HEAD_PAGE: + row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case TAIL_PAGE: + row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case BLOB_PAGE: + full_page_count++; + full_dir= 0; + empty_space= block_size; /* for error reporting */ + param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE); + param->used+= block_size; + break; + } + if (_ma_check_bitmap_data(info, page_type, pos / block_size, + full_dir ? 0 : empty_space, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Wrong data in bitmap. Page_type: %d empty_space: %u Bitmap: %d", + llstr(pos, llbuff), page_type, empty_space, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + if ((enum en_page_type) page_type == BLOB_PAGE) + continue; + param->empty+= empty_space; + if (check_page_layout(param, info, pos, page_buff, row_count, + empty_space, &real_row_count)) + goto err; + if ((enum en_page_type) page_type == TAIL_PAGE) + { + tail_count+= real_row_count; + continue; + } + if (check_head_page(param, info, record, extend, pos, page_buff, + row_count)) + goto err; + } + + _ma_scan_end_block_record(info); + + if (full_page_count != param->full_page_count) + _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table", + llstr(param->full_page_count, llbuff), + llstr(full_page_count, llbuff2)); + if (tail_count != param->tail_count) + _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table", + llstr(param->tail_count, llbuff), + llstr(tail_count, llbuff2)); + + /* Update splits to avoid warning */ + info->s->state.split= param->splits; + info->state->del= param->del_blocks; + return param->error_printed != 0; + +err: + _ma_scan_end_block_record(info); + return 1; +} + + + /* Check that record-link is ok */ + +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) +{ + int error; + byte *record; + char llbuff[22],llbuff2[22],llbuff3[22]; + DBUG_ENTER("maria_chk_data_link"); + + if (!(param->testflag & T_SILENT)) + { + if (extend) + puts("- check records and index references"); + else + puts("- check record links"); + } + + if (!(record= (byte*) my_malloc(info->s->base.pack_reclength,MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + DBUG_RETURN(-1); + } + param->records= param->del_blocks= 0; + param->used= param->link_used= param->splits= param->del_length= 0; + param->tmp_record_checksum= param->glob_crc= 0; + param->err_count= 0; + + error= 0; + param->empty= info->s->pack.header_length; + + bzero((char*) param->tmp_key_crc, + info->s->base.keys * sizeof(param->tmp_key_crc[0])); + + switch (info->s->data_file_type) { + case BLOCK_RECORD: + error= check_block_record(param, info, extend, record); + break; + case STATIC_RECORD: + error= check_static_record(param, info, extend, record); + break; + case DYNAMIC_RECORD: + error= check_dynamic_record(param, info, extend, record); + break; + case COMPRESSED_RECORD: + error= check_compressed_record(param, info, extend, record); + break; + } /* switch */ + + if (error) + goto err; + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (param->records != info->state->records) + { + _ma_check_print_error(param, + "Record-count is not ok; found %-10s Should be: %s", + llstr(param->records,llbuff), + llstr(info->state->records,llbuff2)); + error=1; + } + else if (param->record_checksum && + param->record_checksum != param->tmp_record_checksum) + { + _ma_check_print_error(param, + "Key pointers and record positions doesn't match"); + error=1; + } + else if (param->glob_crc != info->state->checksum && + (info->s->options & + (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))) + { + _ma_check_print_warning(param, + "Record checksum is not the same as checksum stored in the index file\n"); + error=1; + } + else if (!extend) + { + uint key; + for (key=0 ; key < info->s->base.keys; key++) + { + if (param->tmp_key_crc[key] != param->key_crc[key] && + !(info->s->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL))) + { + _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records", + key+1); + error=1; + } + } + } + + if (param->del_length != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space. Should be %s", + llstr(param->del_length,llbuff2), + llstr(info->state->empty,llbuff)); + } + if (param->used + param->empty + param->del_length != + info->state->data_file_length) + { + _ma_check_print_warning(param, + "Found %s record data and %s unused data and %s deleted data", + llstr(param->used, llbuff), + llstr(param->empty,llbuff2), + llstr(param->del_length,llbuff3)); + _ma_check_print_warning(param, + "Total %s Should be: %s", + llstr((param->used+param->empty+param->del_length), + llbuff), + llstr(info->state->data_file_length,llbuff2)); + } + if (param->del_blocks != info->state->del) + { + _ma_check_print_warning(param, + "Found %10s deleted blocks Should be: %s", + llstr(param->del_blocks,llbuff), + llstr(info->state->del,llbuff2)); + } + if (param->splits != info->s->state.split) + { + _ma_check_print_warning(param, + "Found %10s parts Should be: %s parts", + llstr(param->splits, llbuff), + llstr(info->s->state.split,llbuff2)); + } + if (param->testflag & T_INFO) + { + if (param->warning_printed || param->error_printed) + puts(""); + if (param->used != 0 && ! param->error_printed) + { + if (param->records) + { + printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n", + llstr(param->records,llbuff), + (long)((param->used - param->link_used)/param->records), + (info->s->base.blobs ? 0.0 : + (ulonglong2double((ulonglong) info->s->base.reclength * + param->records)- + my_off_t2double(param->used))/ + ulonglong2double((ulonglong) info->s->base.reclength * + param->records)*100.0)); + printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n", + (ulonglong2double(param->used - param->link_used)/ + ulonglong2double(param->used-param->link_used+param->empty)*100.0), + (!param->records ? 100 : + (int) (ulonglong2double(param->del_length+param->empty)/ + my_off_t2double(param->used)*100.0)), + ulonglong2double(param->splits - param->del_blocks) / + param->records); + } + else + printf("Records:%18s\n", "0"); + } + printf("Record blocks:%12s Delete blocks:%10s\n", + llstr(param->splits - param->del_blocks, llbuff), + llstr(param->del_blocks, llbuff2)); + printf("Record data: %12s Deleted data: %10s\n", + llstr(param->used - param->link_used,llbuff), + llstr(param->del_length, llbuff2)); + printf("Lost space: %12s Linkdata: %10s\n", + llstr(param->empty, llbuff),llstr(param->link_used, llbuff2)); + } + my_free((gptr) record,MYF(0)); + DBUG_RETURN (error); + + err: + my_free((gptr) record,MYF(0)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); +} /* maria_chk_data_link */ + + + /* Recover old table by reading each record and writing all keys */ + /* Save new datafile-name in temp_filename */ + +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick) +{ + int error,got_error; + uint i; + ha_rows start_records,new_header_length; + my_off_t del; + File new_file; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("maria_repair"); + + bzero((char *)&sort_info, sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; + new_header_length=(param->testflag & T_UNPACK) ? 0L : + share->pack.header_length; + got_error=1; + new_file= -1; + sort_param.sort_info=&sort_info; + + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with keycache) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + if (!param->using_global_keycache) + VOID(init_key_cache(maria_key_cache, param->key_cache_block_size, + param->use_buffers, 0, 0)); + + if (init_io_cache(¶m->read_cache,info->dfile, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) + { + bzero(&info->rec_cache,sizeof(info->rec_cache)); + goto err; + } + if (!rep_quick) + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param,new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + info->s->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + } + sort_info.info=info; + sort_info.param = param; + sort_param.read_cache=param->read_cache; + sort_param.pos=sort_param.max_pos=share->pack.header_length; + sort_param.filepos=new_header_length; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)); + sort_info.dupp=0; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master=1; + sort_info.max_records= ~(ha_rows) 0; + + set_data_file_type(&sort_info, share); + del=info->state->del; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + sort_param.calc_checksum= 1; + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* + Clear all keys. Note that all key blocks allocated until now remain + "dead" parts of the key file. (Bug #4692) + */ + for (i=0 ; i < info->s->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + + /* Drop the delete chain. */ + share->state.key_del= HA_OFFSET_ERROR; + + /* + If requested, activate (enable) all keys in key_map. In this case, + all indexes will be (re-)built. + */ + if (param->testflag & T_CREATE_MISSING_KEYS) + maria_set_all_keys_active(share->state.key_map, share->base.keys); + + info->state->key_file_length=share->base.keystart; + + maria_lock_memory(param); /* Everything is alloced */ + + /* Re-create all keys, which are set in key_map. */ + while (!(error=sort_get_next_record(&sort_param))) + { + if (writekeys(&sort_param)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + goto err; + DBUG_DUMP("record",(byte*) sort_param.record,share->base.pack_reclength); + _ma_check_print_info(param,"Duplicate key %2d for record at %10s against new record at %10s", + info->errkey+1, + llstr(sort_param.start_recpos,llbuff), + llstr(info->dup_key_pos,llbuff2)); + if (param->testflag & T_VERBOSE) + { + VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey, + sort_param.record,0L)); + _ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey, + USE_WHOLE_KEY); + } + sort_info.dupp++; + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + param->testflag|=T_RETRY_WITHOUT_QUICK; + param->error_printed=1; + goto err; + } + continue; + } + if (_ma_sort_write_record(&sort_param)) + goto err; + } + if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || + flush_io_cache(&info->rec_cache) || param->read_cache.error < 0) + goto err; + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + { + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + goto err; + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + got_error=1; + goto err; + } + } + + if (!rep_quick) + { + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + info->state->data_file_length=sort_param.filepos; + share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + } + else + { + info->state->data_file_length=sort_param.max_pos; + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + + got_error=0; + /* If invoked by external program that uses thr_lock */ + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d for record at pos %s",my_errno, + llstr(sort_param.start_recpos,llbuff)); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + info->rec_cache.file=-1; /* don't flush data to new_file, it's closed */ + } + maria_mark_crashed_on_repair(info); + } + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + got_error|=_ma_flush_blocks(param, share->key_cache, share->kfile); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES | + STATE_NOT_ANALYZED); + DBUG_RETURN(got_error); +} + + +/* Uppdate keyfile when doing repair */ + +static int writekeys(MARIA_SORT_PARAM *sort_param) +{ + register uint i; + byte *key; + MARIA_HA *info= sort_param->sort_info->info; + byte *buff= sort_param->record; + my_off_t filepos= sort_param->filepos; + DBUG_ENTER("writekeys"); + + key= info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) key,buff,filepos)) + goto err; + } +#ifdef HAVE_SPATIAL + else if (info->s->keyinfo[i].flag & HA_SPATIAL) + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (maria_rtree_insert(info, i, key, key_length)) + goto err; + } +#endif /*HAVE_SPATIAL*/ + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_write(info,i,key,key_length)) + goto err; + } + } + } + DBUG_RETURN(0); + + err: + if (my_errno == HA_ERR_FOUND_DUPP_KEY) + { + info->errkey=(int) i; /* This key was found */ + while ( i-- > 0 ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) key,buff,filepos)) + break; + } + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_delete(info,i,key,key_length)) + break; + } + } + } + } + /* Remove checksum that was added to glob_crc in sort_get_next_record */ + if (sort_param->calc_checksum) + sort_param->sort_info->param->glob_crc-= info->cur_row.checksum; + DBUG_PRINT("error",("errno: %d",my_errno)); + DBUG_RETURN(-1); +} /* writekeys */ + + + /* Change all key-pointers that points to a records */ + +int maria_movepoint(register MARIA_HA *info, byte *record, + MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos, + uint prot_key) +{ + register uint i; + byte *key; + uint key_length; + DBUG_ENTER("maria_movepoint"); + + key= info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys; i++) + { + if (i != prot_key && maria_is_key_active(info->s->state.key_map, i)) + { + key_length= _ma_make_key(info,i,key,record,oldpos); + if (info->s->keyinfo[i].flag & HA_NOSAME) + { /* Change pointer direct */ + uint nod_flag; + MARIA_KEYDEF *keyinfo; + keyinfo=info->s->keyinfo+i; + if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY, + (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF), + info->s->state.key_root[i])) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(info->buff); + _ma_dpointer(info,info->int_keypos-nod_flag- + info->s->rec_reflength,newpos); + if (_ma_write_keypage(info,keyinfo,info->last_keypage, + DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + } + else + { /* Change old key to new */ + if (_ma_ck_delete(info,i,key,key_length)) + DBUG_RETURN(-1); + key_length= _ma_make_key(info,i,key,record,newpos); + if (_ma_ck_write(info,i,key,key_length)) + DBUG_RETURN(-1); + } + } + } + DBUG_RETURN(0); +} /* maria_movepoint */ + + + /* Tell system that we want all memory for our cache */ + +void maria_lock_memory(HA_CHECK *param __attribute__((unused))) +{ +#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ + if (param->opt_maria_lock_memory) + { + int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */ + if (geteuid() == 0 && success != 0) + _ma_check_print_warning(param, + "Failed to lock memory. errno %d",my_errno); + } +#endif +} /* maria_lock_memory */ + + + /* Flush all changed blocks to disk */ + +int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file) +{ + if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) + { + _ma_check_print_error(param,"%d when trying to write bufferts",my_errno); + return(1); + } + if (!param->using_global_keycache) + end_key_cache(key_cache,1); + return 0; +} /* _ma_flush_blocks */ + + + /* Sort index for more efficent reads */ + +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + reg2 uint key; + reg1 MARIA_KEYDEF *keyinfo; + File new_file; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; + uint r_locks,w_locks; + int old_lock; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO old_state; + DBUG_ENTER("maria_sort_index"); + + if (!(param->testflag & T_SILENT)) + printf("- Sorting index for MARIA-table '%s'\n",name); + + /* Get real path for index file */ + fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32); + if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename, + "", INDEX_TMP_EXT,2+4), + 0,param->tmpfile_createflag,MYF(0))) <= 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + DBUG_RETURN(-1); + } + if (maria_filecopy(param, new_file,share->kfile,0L, + (ulong) share->base.keystart, "headerblock")) + goto err; + + param->new_file_pos=share->base.keystart; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + key++,keyinfo++) + { + if (! maria_is_key_active(info->s->state.key_map, key)) + continue; + + if (share->state.key_root[key] != HA_OFFSET_ERROR) + { + index_pos[key]=param->new_file_pos; /* Write first block here */ + if (sort_one_index(param,info,keyinfo,share->state.key_root[key], + new_file)) + goto err; + } + else + index_pos[key]= HA_OFFSET_ERROR; /* No blocks */ + } + + /* Flush key cache for this file if we are calling this outside maria_chk */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + + share->state.version=(ulong) time((time_t*) 0); + old_state= share->state; /* save state if not stored */ + r_locks= share->r_locks; + w_locks= share->w_locks; + old_lock= info->lock_type; + + /* Put same locks as old file */ + share->r_locks= share->w_locks= share->tot_locks= 0; + (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE); + VOID(my_close(share->kfile,MYF(MY_WME))); + share->kfile = -1; + VOID(my_close(new_file,MYF(MY_WME))); + if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT, + INDEX_TMP_EXT, MYF(0)) || + _ma_open_keyfile(share)) + goto err2; + info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */ + _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */ + info->lock_type= old_lock; + share->r_locks= r_locks; + share->w_locks= w_locks; + share->tot_locks= r_locks+w_locks; + share->state= old_state; /* Restore old state */ + + info->state->key_file_length=param->new_file_pos; + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + for (key=0 ; key < info->s->base.keys ; key++) + info->s->state.key_root[key]=index_pos[key]; + info->s->state.key_del= HA_OFFSET_ERROR; + + info->s->state.changed&= ~STATE_NOT_SORTED_PAGES; + DBUG_RETURN(0); + +err: + VOID(my_close(new_file,MYF(MY_WME))); +err2: + VOID(my_delete(param->temp_filename,MYF(MY_WME))); + DBUG_RETURN(-1); +} /* maria_sort_index */ + + + /* Sort records recursive using one index */ + +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file) +{ + uint length,nod_flag,used_length, key_length; + byte *buff,*keypos,*endpos; + byte key[HA_MAX_POSSIBLE_KEY_BUFF]; + my_off_t new_page_pos,next_page; + char llbuff[22]; + DBUG_ENTER("sort_one_index"); + + new_page_pos=param->new_file_pos; + param->new_file_pos+=keyinfo->block_length; + + if (!(buff= (byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key block from filepos: %s", + llstr(pagepos,llbuff)); + goto err; + } + if ((nod_flag=_ma_test_if_nod(buff)) || keyinfo->flag & HA_FULLTEXT) + { + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + /* Save new pos */ + _ma_kpointer(info,keypos-nod_flag,param->new_file_pos); + if (sort_one_index(param,info,keyinfo,next_page,new_file)) + { + DBUG_PRINT("error", + ("From page: %ld, keyoffset: %lu used_length: %d", + (ulong) pagepos, (ulong) (keypos - buff), + (int) used_length)); + DBUG_DUMP("buff",(byte*) buff,used_length); + goto err; + } + } + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + DBUG_ASSERT(keypos <= endpos); + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + next_page= _ma_dpos(info,0,key+key_length); + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + param->new_file_pos); /* Save new pos */ + if (sort_one_index(param,info,&info->s->ft2_keyinfo, + next_page,new_file)) + goto err; + } + } + } + } + + /* Fill block with zero and write it to the new index file */ + length=maria_getint(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + if (my_pwrite(new_file,(byte*) buff,(uint) keyinfo->block_length, + new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL))) + { + _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno); + goto err; + } + my_afree((gptr) buff); + DBUG_RETURN(0); +err: + my_afree((gptr) buff); + DBUG_RETURN(1); +} /* sort_one_index */ + + + /* + Let temporary file replace old file. + This assumes that the new file was created in the same + directory as given by realpath(filename). + This will ensure that any symlinks that are used will still work. + Copy stats from old file to new file, deletes orignal and + changes new file name to old file name + */ + +int maria_change_to_newfile(const char * filename, const char * old_ext, + const char * new_ext, myf MyFlags) +{ + char old_filename[FN_REFLEN],new_filename[FN_REFLEN]; +#ifdef USE_RAID + if (raid_chunks) + return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4), + fn_format(new_filename,filename,"",new_ext,2+4), + raid_chunks, + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +#endif + /* Get real path to filename */ + (void) fn_format(old_filename,filename,"",old_ext,2+4+32); + return my_redel(old_filename, + fn_format(new_filename,old_filename,"",new_ext,2+4), + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +} /* maria_change_to_newfile */ + + +/* Copy a block between two files */ + +int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start, + my_off_t length, const char *type) +{ + char tmp_buff[IO_SIZE],*buff; + ulong buff_length; + DBUG_ENTER("maria_filecopy"); + + buff_length=(ulong) min(param->write_buffer_length,length); + if (!(buff=my_malloc(buff_length,MYF(0)))) + { + buff=tmp_buff; buff_length=IO_SIZE; + } + + VOID(my_seek(from,start,MY_SEEK_SET,MYF(0))); + while (length > buff_length) + { + if (my_read(from,(byte*) buff,buff_length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,buff_length,param->myf_rw)) + goto err; + length-= buff_length; + } + if (my_read(from,(byte*) buff,(uint) length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,(uint) length,param->myf_rw)) + goto err; + if (buff != tmp_buff) + my_free(buff,MYF(0)); + DBUG_RETURN(0); +err: + if (buff != tmp_buff) + my_free(buff,MYF(0)); + _ma_check_print_error(param,"Can't copy %s to tempfile, error %d", + type,my_errno); + DBUG_RETURN(1); +} + + +/* + Repair table or given index using sorting + + SYNOPSIS + maria_repair_by_sort() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ + int got_error; + uint i; + ulong length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM sort_param; + MARIA_SHARE *share=info->s; + HA_KEYSEG *keyseg; + ulong *rec_per_key_part; + char llbuff[22]; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + DBUG_ENTER("maria_repair_by_sort"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + if (!(sort_info.key_block= + alloc_key_blocks(param, + (uint) param->sort_key_blocks, + share->base.max_key_block_length)) + || init_io_cache(¶m->read_cache,info->dfile, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + (! rep_quick && + init_io_cache(&info->rec_cache,info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE,new_header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file=info->dfile; /* for sort_delete_record */ + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file=my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + maria_chk + */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_param.filepos=new_header_length; + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + sort_param.wordlist=NULL; + init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + + if (share->data_file_type == DYNAMIC_RECORD) + length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + length=share->base.min_block_length; + else + length=share->base.pack_reclength; + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records : + (ha_rows) (sort_info.filelength/length+1)); + sort_param.key_cmp=sort_key_cmp; + sort_param.lock_in_memory=maria_lock_memory; + sort_param.tmpdir=param->tmpdir; + sort_param.sort_info=&sort_info; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master =1; + + del=info->state->del; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + sort_param.calc_checksum= 1; + + rec_per_key_part= param->rec_per_key_part; + for (sort_param.key=0 ; sort_param.key < share->base.keys ; + rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++) + { + sort_param.read_cache=param->read_cache; + sort_param.keyinfo=share->keyinfo+sort_param.key; + sort_param.seg=sort_param.keyinfo->seg; + if (! maria_is_key_active(key_map, sort_param.key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",sort_param.key+1); + sort_param.max_pos=sort_param.pos=share->pack.header_length; + keyseg=sort_param.seg; + bzero((char*) sort_param.unique,sizeof(sort_param.unique)); + sort_param.key_length=share->rec_reflength; + for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++) + { + sort_param.key_length+=keyseg[i].length; + if (keyseg[i].flag & HA_SPACE_PACK) + sort_param.key_length+=get_pack_length(keyseg[i].length); + if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param.key_length+=2 + test(keyseg[i].length >= 127); + if (keyseg[i].flag & HA_NULL_PART) + sort_param.key_length++; + } + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + if (sort_param.keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param.keyinfo->seg->charset->mbmaxlen; + sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + /* + fulltext indexes may have much more entries than the + number of rows in the table. We estimate the number here. + + Note, built-in parser is always nr. 0 - see ftparser_call_initializer() + */ + if (sort_param.keyinfo->ftparser_nr == 0) + { + /* + for built-in parser the number of generated index entries + cannot be larger than the size of the data file divided + by the minimal word's length + */ + sort_info.max_records= + (ha_rows) (sort_info.filelength/ft_min_word_len+1); + } + else + { + /* + for external plugin parser we cannot tell anything at all :( + so, we'll use all the sort memory and start from ~10 buffpeks. + (see _create_index_by_sort) + */ + sort_info.max_records= + 10*param->sort_buffer_length/sort_param.key_length; + } + + sort_param.key_read= sort_maria_ft_key_read; + sort_param.key_write= sort_maria_ft_key_write; + } + else + { + sort_param.key_read= sort_key_read; + sort_param.key_write= sort_key_write; + } + + if (_ma_create_index_by_sort(&sort_param, + (my_bool) (!(param->testflag & T_VERBOSE)), + (uint) param->sort_buffer_length)) + { + param->retry_repair=1; + goto err; + } + /* No need to calculate checksum again. */ + sort_param.calc_checksum= 0; + free_root(&sort_param.wordroot, MYF(0)); + + /* Set for next loop */ + sort_info.max_records= (ha_rows) info->state->records; + + if (param->testflag & T_STATISTICS) + maria_update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sort_param.notnull: NULL,(ulonglong) info->state->records); + maria_set_key_active(share->state.key_map, sort_param.key); + + if (sort_param.fix_datafile) + { + param->read_cache.end_of_file=sort_param.filepos; + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length = info->state->data_file_length= + sort_param.filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + sort_param.fix_datafile=0; + } + else + info->state->data_file_length=sort_param.max_pos; + + param->read_cache.file=info->dfile; /* re-init read cache */ + reinit_io_cache(¶m->read_cache,READ_CACHE,share->pack.header_length, + 1,1); + } + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength) + if (my_chsize(info->dfile,skr,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile); + VOID(end_io_cache(&info->rec_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + if (info->dfile == new_file) + info->dfile= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|=STATE_NOT_SORTED_PAGES; + + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + DBUG_RETURN(got_error); +} + +/* + Threaded repair of table using sorting + + SYNOPSIS + maria_repair_parallel() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + DESCRIPTION + Same as maria_repair_by_sort but do it multithreaded + Each key is handled by a separate thread. + TODO: make a number of threads a parameter + + In parallel repair we use one thread per index. There are two modes: + + Quick + + Only the indexes are rebuilt. All threads share a read buffer. + Every thread that needs fresh data in the buffer enters the shared + cache lock. The last thread joining the lock reads the buffer from + the data file and wakes all other threads. + + Non-quick + + The data file is rebuilt and all indexes are rebuilt to point to + the new record positions. One thread is the master thread. It + reads from the old data file and writes to the new data file. It + also creates one of the indexes. The other threads read from a + buffer which is filled by the master. If they need fresh data, + they enter the shared cache lock. If the masters write buffer is + full, it flushes it to the new data file and enters the shared + cache lock too. When all threads joined in the lock, the master + copies its write buffer to the read buffer for the other threads + and wakes them. + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ +#ifndef THREAD + return maria_repair_by_sort(param, info, name, rep_quick); +#else + int got_error; + uint i,key, total_key_length, istep; + ulong rec_length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM *sort_param=0; + MARIA_SHARE *share=info->s; + ulong *rec_per_key_part; + HA_KEYSEG *keyseg; + char llbuff[22]; + IO_CACHE new_data_cache; /* For non-quick repair. */ + IO_CACHE_SHARE io_share; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + pthread_attr_t thr_attr; + DBUG_ENTER("maria_repair_parallel"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- parallel recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + /* + Quick repair (not touching data file, rebuilding indexes): + { + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + } + + Non-quick repair (rebuilding data file and indexes): + { + Master thread: + + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + Write cache is (MI_INFO *info)->rec_cache using new_file. + + Slave threads: + + Read cache is new_data_cache synced to master rec_cache. + + The final assignment of the filedescriptor for rec_cache is done + after the cache creation. + + Don't check file size on new_data_cache, as the resulting file size + is not known yet. + + As rec_cache and new_data_cache are synced, write_buffer_length is + used for the read cache 'new_data_cache'. Both start at the same + position 'new_header_length'. + } + */ + DBUG_PRINT("info", ("is quick repair: %d", rep_quick)); + bzero((char*)&sort_info,sizeof(sort_info)); + /* Initialize pthread structures before goto err. */ + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + + if (!(sort_info.key_block= + alloc_key_blocks(param, (uint) param->sort_key_blocks, + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) || + (!rep_quick && + (init_io_cache(&info->rec_cache, info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) || + init_io_cache(&new_data_cache, -1, + (uint) param->write_buffer_length, + READ_CACHE, new_header_length, 1, + MYF(MY_WME | MY_DONT_CHECK_FILESIZE))))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file=info->dfile; /* for sort_delete_record */ + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, + 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param, new_file,info->dfile,0L,new_header_length, + "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + maria_chk + */ + flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + if (share->data_file_type == DYNAMIC_RECORD) + rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + rec_length=share->base.min_block_length; + else + rec_length=share->base.pack_reclength; + /* + +1 below is required hack for parallel repair mode. + The info->state->records value, that is compared later + to sort_info.max_records and cannot exceed it, is + increased in sort_key_write. In maria_repair_by_sort, sort_key_write + is called after sort_key_read, where the comparison is performed, + but in parallel mode master thread can call sort_key_write + before some other repair thread calls sort_key_read. + Furthermore I'm not even sure +1 would be enough. + May be sort_info.max_records shold be always set to max value in + parallel mode. + */ + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records + 1: + (ha_rows) (sort_info.filelength/rec_length+1)); + + del=info->state->del; + param->glob_crc=0; + + if (!(sort_param=(MARIA_SORT_PARAM *) + my_malloc((uint) share->base.keys * + (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength), + MYF(MY_ZEROFILL)))) + { + _ma_check_print_error(param,"Not enough memory for key!"); + goto err; + } + total_key_length=0; + rec_per_key_part= param->rec_per_key_part; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + for (i=key=0, istep=1 ; key < share->base.keys ; + rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++) + { + sort_param[i].key=key; + sort_param[i].keyinfo=share->keyinfo+key; + sort_param[i].seg=sort_param[i].keyinfo->seg; + if (! maria_is_key_active(key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part+ + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part)); + istep=0; + continue; + } + istep=1; + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",key+1); + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + sort_param[i].key_read=sort_maria_ft_key_read; + sort_param[i].key_write=sort_maria_ft_key_write; + } + else + { + sort_param[i].key_read=sort_key_read; + sort_param[i].key_write=sort_key_write; + } + sort_param[i].key_cmp=sort_key_cmp; + sort_param[i].lock_in_memory=maria_lock_memory; + sort_param[i].tmpdir=param->tmpdir; + sort_param[i].sort_info=&sort_info; + sort_param[i].master=0; + sort_param[i].fix_datafile=0; + sort_param[i].calc_checksum= 0; + + sort_param[i].filepos=new_header_length; + sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; + + sort_param[i].record= (((char *)(sort_param+share->base.keys))+ + (share->base.pack_reclength * i)); + if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size, + share->base.default_rec_buff_size)) + { + _ma_check_print_error(param,"Not enough memory!"); + goto err; + } + sort_param[i].key_length=share->rec_reflength; + for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END; + keyseg++) + { + sort_param[i].key_length+=keyseg->length; + if (keyseg->flag & HA_SPACE_PACK) + sort_param[i].key_length+=get_pack_length(keyseg->length); + if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param[i].key_length+=2 + test(keyseg->length >= 127); + if (keyseg->flag & HA_NULL_PART) + sort_param[i].key_length++; + } + total_key_length+=sort_param[i].key_length; + + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param[i].keyinfo->seg->charset->mbmaxlen; + sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + } + } + sort_info.total_keys=i; + sort_param[0].master= 1; + sort_param[0].fix_datafile= (my_bool)(! rep_quick); + sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM); + + sort_info.got_error=0; + pthread_mutex_lock(&sort_info.mutex); + + /* + Initialize the I/O cache share for use with the read caches and, in + case of non-quick repair, the write cache. When all threads join on + the cache lock, the writer copies the write cache contents to the + read caches. + */ + if (i > 1) + { + if (rep_quick) + init_io_cache_share(¶m->read_cache, &io_share, NULL, i); + else + init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i); + } + else + io_share.total_threads= 0; /* share not used */ + + (void) pthread_attr_init(&thr_attr); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + + for (i=0 ; i < sort_info.total_keys ; i++) + { + /* + Copy the properly initialized IO_CACHE structure so that every + thread has its own copy. In quick mode param->read_cache is shared + for use by all threads. In non-quick mode all threads but the + first copy the shared new_data_cache, which is synchronized to the + write cache of the first thread. The first thread copies + param->read_cache, which is not shared. + */ + sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache : + new_data_cache); + DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx", + i, (long) &sort_param[i].read_cache)); + + /* + two approaches: the same amount of memory for each thread + or the memory for the same number of keys for each thread... + In the second one all the threads will fill their sort_buffers + (and call write_keys) at the same time, putting more stress on i/o. + */ + sort_param[i].sortbuff_size= +#ifndef USING_SECOND_APPROACH + param->sort_buffer_length/sort_info.total_keys; +#else + param->sort_buffer_length*sort_param[i].key_length/total_key_length; +#endif + if (pthread_create(&sort_param[i].thr, &thr_attr, + _ma_thr_find_all_keys, + (void *) (sort_param+i))) + { + _ma_check_print_error(param,"Cannot start a repair thread"); + /* Cleanup: Detach from the share. Avoid others to be blocked. */ + if (io_share.total_threads) + remove_io_thread(&sort_param[i].read_cache); + DBUG_PRINT("error", ("Cannot start a repair thread")); + sort_info.got_error=1; + } + else + sort_info.threads_running++; + } + (void) pthread_attr_destroy(&thr_attr); + + /* waiting for all threads to finish */ + while (sort_info.threads_running) + pthread_cond_wait(&sort_info.cond, &sort_info.mutex); + pthread_mutex_unlock(&sort_info.mutex); + + if ((got_error= _ma_thr_write_keys(sort_param))) + { + param->retry_repair=1; + goto err; + } + got_error=1; /* Assume the following may go wrong */ + + if (sort_param[0].fix_datafile) + { + /* + Append some nuls to the end of a memory mapped file. Destroy the + write cache. The master thread did already detach from the share + by remove_io_thread() in sort.c:thr_find_all_keys(). + */ + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length= info->state->data_file_length= + sort_param->filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + /* + Exchange the data file descriptor of the table, so that we use the + new file from now on. + */ + my_close(info->dfile,MYF(0)); + info->dfile=new_file; + + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + } + else + info->state->data_file_length=sort_param->max_pos; + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength) + if (my_chsize(info->dfile,skr,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy(&share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile); + /* + Destroy the write cache. The master thread did already detach from + the share by remove_io_thread() or it was not yet started (if the + error happend before creating the thread). + */ + VOID(end_io_cache(&info->rec_cache)); + /* + Destroy the new data cache in case of non-quick repair. All slave + threads did either detach from the share by remove_io_thread() + already or they were not yet started (if the error happend before + creating the threads). + */ + if (!rep_quick) + VOID(end_io_cache(&new_data_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile=new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + if (info->dfile == new_file) + info->dfile= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|=STATE_NOT_SORTED_PAGES; + + pthread_cond_destroy (&sort_info.cond); + pthread_mutex_destroy(&sort_info.mutex); + + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_param,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + DBUG_RETURN(got_error); +#endif /* THREAD */ +} + + /* Read next record and return next key */ + +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key) +{ + int error; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + MARIA_HA *info= sort_info->info; + DBUG_ENTER("sort_key_read"); + + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (info->state->records == sort_info->max_records) + { + _ma_check_print_error(sort_info->param, + "Key %d - Found too many records; Can't continue", + sort_param->key+1); + DBUG_RETURN(1); + } + sort_param->real_key_length= + (info->s->rec_reflength+ + _ma_make_key(info, sort_param->key, key, + sort_param->record, sort_param->filepos)); +#ifdef HAVE_purify + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + DBUG_RETURN(_ma_sort_write_record(sort_param)); +} /* sort_key_read */ + + +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key) +{ + int error; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + MARIA_HA *info=sort_info->info; + FT_WORD *wptr=0; + DBUG_ENTER("sort_maria_ft_key_read"); + + if (!sort_param->wordlist) + { + for (;;) + { + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record, + &sort_param->wordroot))) + + DBUG_RETURN(1); + if (wptr->pos) + break; + error=_ma_sort_write_record(sort_param); + } + sort_param->wordptr=sort_param->wordlist=wptr; + } + else + { + error=0; + wptr=(FT_WORD*)(sort_param->wordptr); + } + + sort_param->real_key_length=(info->s->rec_reflength+ + _ma_ft_make_key(info, sort_param->key, + key, wptr++, + sort_param->filepos)); +#ifdef HAVE_purify + if (sort_param->key_length > sort_param->real_key_length) + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + if (!wptr->pos) + { + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); + sort_param->wordlist=0; + error=_ma_sort_write_record(sort_param); + } + else + sort_param->wordptr=(void*)wptr; + + DBUG_RETURN(error); +} /* sort_maria_ft_key_read */ + + +/* + Read next record from file using parameters in sort_info. + + SYNOPSIS + sort_get_next_record() + sort_param Information about and for the sort process + + NOTE + + Dynamic Records With Non-Quick Parallel Repair + + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. + + RETURN + -1 end of file + 0 ok + > 0 error +*/ + +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) +{ + int searching; + int parallel_flag; + uint found_record,b_type,left_length; + my_off_t pos; + byte *to; + MARIA_BLOCK_INFO block_info; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("sort_get_next_record"); + + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + + switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; + case STATIC_RECORD: + for (;;) + { + if (my_b_read(&sort_param->read_cache,sort_param->record, + share->base.pack_reclength)) + { + if (sort_param->read_cache.error) + param->out_flag |= O_DATA_LOST; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(-1); + } + sort_param->start_recpos=sort_param->pos; + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); + if (*sort_param->record) + { + if (sort_param->calc_checksum) + param->glob_crc+= (info->cur_row.checksum= + _ma_static_checksum(info,sort_param->record)); + DBUG_RETURN(0); + } + if (!sort_param->fix_datafile && sort_param->master) + { + info->state->del++; + info->state->empty+=share->base.pack_reclength; + } + } + case DYNAMIC_RECORD: + LINT_INIT(to); + pos=sort_param->pos; + searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; + for (;;) + { + found_record=block_info.second_read= 0; + left_length=1; + if (searching) + { + pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE); + param->testflag|=T_RETRY_WITHOUT_QUICK; + sort_param->start_recpos=pos; + } + do + { + if (pos > sort_param->max_pos) + sort_param->max_pos=pos; + if (pos & (MARIA_DYN_ALIGN_SIZE-1)) + { + if ((param->testflag & T_VERBOSE) || searching == 0) + _ma_check_print_info(param,"Wrong aligned block at %s", + llstr(pos,llbuff)); + if (searching) + goto try_next; + } + if (found_record && pos == param->search_after_block) + _ma_check_print_info(param,"Block: %s used by record at %s", + llstr(param->search_after_block,llbuff), + llstr(sort_param->start_recpos,llbuff2)); + if (_ma_read_cache(&sort_param->read_cache, + (byte*) block_info.header,pos, + MARIA_BLOCK_INFO_HEADER_LENGTH, + (! found_record ? READING_NEXT : 0) | + parallel_flag | READING_HEADER)) + { + if (found_record) + { + _ma_check_print_info(param, + "Can't read whole record at %s (errno: %d)", + llstr(sort_param->start_recpos,llbuff),errno); + goto try_next; + } + DBUG_RETURN(-1); + } + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + b_type= _ma_get_block_info(&block_info,-1,pos); + if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) || + ((b_type & BLOCK_FIRST) && + (block_info.rec_len < (uint) share->base.min_pack_length || + block_info.rec_len > (uint) share->base.max_pack_length))) + { + uint i; + if (param->testflag & T_VERBOSE || searching == 0) + _ma_check_print_info(param, + "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped", + block_info.header[0],block_info.header[1], + block_info.header[2],llstr(pos,llbuff)); + if (found_record) + goto try_next; + block_info.second_read=0; + searching=1; + /* Search after block in read header string */ + for (i=MARIA_DYN_ALIGN_SIZE ; + i < MARIA_BLOCK_INFO_HEADER_LENGTH ; + i+= MARIA_DYN_ALIGN_SIZE) + if (block_info.header[i] >= 1 && + block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE) + break; + pos+=(ulong) i; + sort_param->start_recpos=pos; + continue; + } + if (b_type & BLOCK_DELETED) + { + bool error=0; + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length) + { + if (!searching) + _ma_check_print_info(param, + "Deleted block with impossible length %u at %s", + block_info.block_len,llstr(pos,llbuff)); + error=1; + } + else + { + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= + info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + if (!searching) + _ma_check_print_info(param, + "Delete link points outside datafile at %s", + llstr(pos,llbuff)); + error=1; + } + } + if (error) + { + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + else + { + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length || + block_info.block_len > (uint) share->base.max_pack_length+ + MARIA_SPLIT_LENGTH) + { + if (!searching) + _ma_check_print_info(param, + "Found block with impossible length %u at %s; Skipped", + block_info.block_len+ (uint) (block_info.filepos-pos), + llstr(pos,llbuff)); + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + if (!sort_param->fix_datafile && sort_param->master && + (b_type & BLOCK_DELETED)) + { + info->state->empty+=block_info.block_len; + info->state->del++; + share->state.split++; + } + if (found_record) + goto try_next; + if (searching) + { + pos+=MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + } + else + pos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; + } + + if (!sort_param->fix_datafile && sort_param->master) + share->state.split++; + if (! found_record++) + { + sort_param->find_length=left_length=block_info.rec_len; + sort_param->start_recpos=pos; + if (!sort_param->fix_datafile) + sort_param->filepos=sort_param->start_recpos; + if (sort_param->fix_datafile && (param->testflag & T_EXTEND)) + sort_param->pos=block_info.filepos+1; + else + sort_param->pos=block_info.filepos+block_info.block_len; + if (share->base.blobs) + { + if (_ma_alloc_buffer(&sort_param->rec_buff, + &sort_param->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + + { + if (param->max_record_length >= block_info.rec_len) + { + _ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + DBUG_RETURN(1); + } + else + { + _ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + goto try_next; + } + } + } + to= sort_param->rec_buff; + } + if (left_length < block_info.data_len || ! block_info.data_len) + { + _ma_check_print_info(param, + "Found block with too small length at %s; " + "Skipped", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (block_info.filepos + block_info.data_len > + sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param, + "Found block that points outside data file " + "at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + /* + Copy information that is already read. Avoid accessing data + below the cache start. This could happen if the header + streched over the end of the previous buffer contents. + */ + { + uint header_len= (uint) (block_info.filepos - pos); + uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy(to, block_info.header + header_len, prefetch_len); + block_info.filepos+= prefetch_len; + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + if (block_info.data_len && + _ma_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0) | + parallel_flag)) + { + _ma_check_print_info(param, + "Read error for block at: %s (error: %d); Skipped", + llstr(block_info.filepos,llbuff),my_errno); + goto try_next; + } + left_length-=block_info.data_len; + to+=block_info.data_len; + pos=block_info.next_filepos; + if (pos == HA_OFFSET_ERROR && left_length) + { + _ma_check_print_info(param,"Wrong block with wrong total length starting at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s", + llstr(pos,llbuff2), + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } while (left_length); + + if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff, + sort_param->find_length) != MY_FILE_ERROR) + { + if (sort_param->read_cache.error < 0) + DBUG_RETURN(1); + if (sort_param->calc_checksum) + info->cur_row.checksum= _ma_checksum(info, sort_param->record); + if ((param->testflag & (T_EXTEND | T_REP)) || searching) + { + if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, + sort_param->find_length, + (param->testflag & T_QUICK) && + sort_param->calc_checksum && + test(info->s->calc_checksum))) + { + _ma_check_print_info(param,"Found wrong packed record at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } + if (sort_param->calc_checksum) + param->glob_crc+= info->cur_row.checksum; + DBUG_RETURN(0); + } + if (!searching) + _ma_check_print_info(param,"Key %d - Found wrong stored record at %s", + sort_param->key+1, + llstr(sort_param->start_recpos,llbuff)); + try_next: + pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE); + searching=1; + } + case COMPRESSED_RECORD: + for (searching=0 ;; searching=1, sort_param->pos++) + { + if (_ma_read_cache(&sort_param->read_cache,(byte*) block_info.header, + sort_param->pos, + share->pack.ref_length,READING_NEXT)) + DBUG_RETURN(-1); + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + sort_param->start_recpos=sort_param->pos; + if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info, + &sort_param->rec_buff, + &sort_param->rec_buff_size, -1, + sort_param->pos)) + DBUG_RETURN(-1); + if (!block_info.rec_len && + sort_param->pos + MEMMAP_EXTRA_MARGIN == + sort_param->read_cache.end_of_file) + DBUG_RETURN(-1); + if (block_info.rec_len < (uint) share->min_pack_length || + block_info.rec_len > (uint) share->max_pack_length) + { + if (! searching) + _ma_check_print_info(param,"Found block with wrong recordlength: %d at %s\n", + block_info.rec_len, + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_read_cache(&sort_param->read_cache,(byte*) sort_param->rec_buff, + block_info.filepos, block_info.rec_len, + READING_NEXT)) + { + if (! searching) + _ma_check_print_info(param,"Couldn't read whole record from %s", + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record, + sort_param->rec_buff, block_info.rec_len)) + { + if (! searching) + _ma_check_print_info(param,"Found wrong record at %s", + llstr(sort_param->pos,llbuff)); + continue; + } + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos=block_info.filepos+ + block_info.rec_len); + info->packed_length=block_info.rec_len; + + if (sort_param->calc_checksum) + { + info->cur_row.checksum= (*info->s->calc_checksum)(info, + sort_param->record); + param->glob_crc+= info->cur_row.checksum; + } + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); /* Impossible */ +} + + +/* + Write record to new file. + + SYNOPSIS + _ma_sort_write_record() + sort_param Sort parameters. + + NOTE + This is only called by a master thread if parallel repair is used. + + RETURN + 0 OK + 1 Error +*/ + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) +{ + int flag; + uint length; + ulong block_length,reclength; + byte *from; + byte block_buff[8]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_sort_write_record"); + + if (sort_param->fix_datafile) + { + switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; + case STATIC_RECORD: + if (my_b_write(&info->rec_cache,sort_param->record, + share->base.pack_reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=share->base.pack_reclength; + info->s->state.split++; + break; + case DYNAMIC_RECORD: + if (! info->blobs) + from=sort_param->rec_buff; + else + { + /* must be sure that local buffer is big enough */ + reclength=info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,sort_param->record)+ + ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER; + if (sort_info->buff_length < reclength) + { + if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength, + MYF(MY_FREE_ON_ERROR | + MY_ALLOW_ZERO_PTR)))) + DBUG_RETURN(1); + sort_info->buff_length=reclength; + } + from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); + } + /* We can use info->checksum here as only one thread calls this */ + info->cur_row.checksum= _ma_checksum(info,sort_param->record); + reclength= _ma_rec_pack(info,from,sort_param->record); + flag=0; + + do + { + block_length=reclength+ 3 + test(reclength >= (65520-3)); + if (block_length < share->base.min_block_length) + block_length=share->base.min_block_length; + info->update|=HA_STATE_WRITE_AT_END; + block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE); + if (block_length > MARIA_MAX_BLOCK_LENGTH) + block_length=MARIA_MAX_BLOCK_LENGTH; + if (_ma_write_part_record(info,0L,block_length, + sort_param->filepos+block_length, + &from,&reclength,&flag)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=block_length; + info->s->state.split++; + } while (reclength); + break; + case COMPRESSED_RECORD: + reclength=info->packed_length; + length= _ma_save_pack_length((uint) share->pack.version, block_buff, + reclength); + if (info->s->base.blobs) + length+= _ma_save_pack_length((uint) share->pack.version, + block_buff + length, info->blob_length); + if (my_b_write(&info->rec_cache,block_buff,length) || + my_b_write(&info->rec_cache,(byte*) sort_param->rec_buff,reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=reclength+length; + info->s->state.split++; + break; + } + } + if (sort_param->master) + { + info->state->records++; + if ((param->testflag & T_WRITE_LOOP) && + (info->state->records % WRITE_COUNT) == 0) + { + char llbuff[22]; + printf("%s\r", llstr(info->state->records,llbuff)); + VOID(fflush(stdout)); + } + } + DBUG_RETURN(0); +} /* _ma_sort_write_record */ + + + /* Compare two keys from _ma_create_index_by_sort */ + +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b) +{ + uint not_used[2]; + return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b), + USE_WHOLE_KEY, SEARCH_SAME, not_used)); +} /* sort_key_cmp */ + + +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a) +{ + uint diff_pos[2]; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; + int cmp; + + if (sort_info->key_block->inited) + { + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, + diff_pos); + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg, + sort_param->notnull, + sort_info->key_block->lastkey, + a); + } + sort_param->unique[diff_pos[0]-1]++; + } + else + { + cmp= -1; + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, + a); + } + if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) + { + sort_info->dupp++; + sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info, + sort_param->keyinfo, + a); + _ma_check_print_warning(param, + "Duplicate key for record at %10s against record at %10s", + llstr(sort_info->info->cur_row.lastpos, llbuff), + llstr(get_record_for_key(sort_info->info, + sort_param->keyinfo, + sort_info->key_block-> + lastkey), + llbuff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (sort_info->param->testflag & T_VERBOSE) + _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY); + return (sort_delete_record(sort_param)); + } +#ifndef DBUG_OFF + if (cmp > 0) + { + _ma_check_print_error(param, + "Internal error: Keys are not in order from sort"); + return(1); + } +#endif + return (sort_insert_key(sort_param, sort_info->key_block, + a, HA_OFFSET_ERROR)); +} /* sort_key_write */ + + +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + SORT_KEY_BLOCKS *key_block=sort_info->key_block; + MARIA_SHARE *share=sort_info->info->s; + uint val_off, val_len; + int error; + SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf; + byte *from, *to; + + val_len=share->ft2_keyinfo.keylength; + get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey); + to= maria_ft_buf->lastkey+val_off; + + if (maria_ft_buf->buf) + { + /* flushing first-level tree */ + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + for (from=to+val_len; + !error && from < maria_ft_buf->buf; + from+= val_len) + { + memcpy(to, from, val_len); + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + } + return error; + } + /* flushing second-level tree keyblocks */ + error=_ma_flush_pending_blocks(sort_param); + /* updating lastkey with second-level tree info */ + ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count); + _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); + /* restoring first level tree data in sort_info/sort_param */ + sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; + sort_param->keyinfo=share->keyinfo+sort_param->key; + share->state.key_root[sort_param->key]=HA_OFFSET_ERROR; + /* writing lastkey in first-level tree */ + return error ? error : + sort_insert_key(sort_param,sort_info->key_block, + maria_ft_buf->lastkey,HA_OFFSET_ERROR); +} + + +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a) +{ + uint a_len, val_off, val_len, error; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + SORT_FT_BUF *ft_buf= sort_info->ft_buf; + SORT_KEY_BLOCKS *key_block= sort_info->key_block; + + val_len=HA_FT_WLEN+sort_info->info->s->base.rec_reflength; + get_key_full_length_rdonly(a_len, (uchar *)a); + + if (!ft_buf) + { + /* + use two-level tree only if key_reflength fits in rec_reflength place + and row format is NOT static - for _ma_dpointer not to garble offsets + */ + if ((sort_info->info->s->base.key_reflength <= + sort_info->info->s->base.rec_reflength) && + (sort_info->info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length + + sizeof(SORT_FT_BUF), MYF(MY_WME)); + + if (!ft_buf) + { + sort_param->key_write=sort_key_write; + return sort_key_write(sort_param, a); + } + sort_info->ft_buf= ft_buf; + goto word_init_ft_buf; /* no need to duplicate the code */ + } + get_key_full_length_rdonly(val_off, ft_buf->lastkey); + + if (ha_compare_text(sort_param->seg->charset, + ((uchar *)a)+1,a_len-1, + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) + { + byte *p; + if (!ft_buf->buf) /* store in second-level tree */ + { + ft_buf->count++; + return sort_insert_key(sort_param,key_block, + a + a_len, HA_OFFSET_ERROR); + } + + /* storing the key in the buffer. */ + memcpy (ft_buf->buf, (char *)a+a_len, val_len); + ft_buf->buf+=val_len; + if (ft_buf->buf < ft_buf->end) + return 0; + + /* converting to two-level tree */ + p=ft_buf->lastkey+val_off; + + while (key_block->inited) + key_block++; + sort_info->key_block=key_block; + sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; + ft_buf->count=(ft_buf->buf - p)/val_len; + + /* flushing buffer to second-level tree */ + for (error=0; !error && p < ft_buf->buf; p+= val_len) + error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); + ft_buf->buf=0; + return error; + } + + /* flushing buffer */ + if ((error=_ma_sort_ft_buf_flush(sort_param))) + return error; + +word_init_ft_buf: + a_len+=val_len; + memcpy(ft_buf->lastkey, a, a_len); + ft_buf->buf=ft_buf->lastkey+a_len; + /* + 32 is just a safety margin here + (at least max(val_len, sizeof(nod_flag)) should be there). + May be better performance could be achieved if we'd put + (sort_info->keyinfo->block_length-32)/XXX + instead. + TODO: benchmark the best value for XXX. + */ + ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32); + return 0; +} /* sort_maria_ft_key_write */ + + + /* get pointer to record from a key */ + +static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + const byte *key) +{ + return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key)); +} /* get_record_for_key */ + + + /* Insert a key in sort-key-blocks */ + +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + register SORT_KEY_BLOCKS *key_block, + const byte *key, + my_off_t prev_block) +{ + uint a_length,t_length,nod_flag; + my_off_t filepos,key_file_length; + byte *anc_buff,*lastkey; + MARIA_KEY_PARAM s_temp; + MARIA_HA *info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_insert_key"); + + anc_buff= key_block->buff; + info=sort_info->info; + lastkey=key_block->lastkey; + nod_flag= (key_block == sort_info->key_block ? 0 : + info->s->base.key_reflength); + + if (!key_block->inited) + { + key_block->inited=1; + if (key_block == sort_info->key_block_end) + { + _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks"); + DBUG_RETURN(1); + } + a_length=2+nod_flag; + key_block->end_pos=anc_buff+2; + lastkey=0; /* No previous key in block */ + } + else + a_length=maria_getint(anc_buff); + + /* Save pointer to previous block */ + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,prev_block); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (byte*) 0,lastkey,lastkey,key, + &s_temp); + (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + key_block->end_pos+=t_length; + if (a_length <= keyinfo->block_length) + { + VOID(_ma_move_key(keyinfo, key_block->lastkey, key)); + key_block->last_length=a_length-t_length; + DBUG_RETURN(0); + } + + /* Fill block with end-zero and write filled block */ + maria_putint(anc_buff,key_block->last_length,nod_flag); + bzero(anc_buff+key_block->last_length, + keyinfo->block_length- key_block->last_length); + key_file_length=info->state->key_file_length; + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back to it */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile,anc_buff, + (uint) keyinfo->block_length,filepos, param->myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",anc_buff,maria_getint(anc_buff)); + + /* Write separator-key to block in next level */ + if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + DBUG_RETURN(1); + + /* clear old block and write new key in it */ + key_block->inited=0; + DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block)); +} /* sort_insert_key */ + + + /* Delete record when we found a duplicated key */ + +static int sort_delete_record(MARIA_SORT_PARAM *sort_param) +{ + uint i; + int old_file,error; + byte *key; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + DBUG_ENTER("sort_delete_record"); + + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch -q or with switch -qq"); + DBUG_RETURN(1); + } + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + { + _ma_check_print_error(param, + "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'maria_chk --safe-recover' to fix it\n",stderr);; + DBUG_RETURN(1); + } + + old_file=info->dfile; + info->dfile=info->rec_cache.file; + if (sort_info->current_key) + { + key= info->lastkey+info->s->base.max_key_length; + if ((error=(*info->s->read_record)(info,sort_param->record, + info->cur_row.lastpos)) && + error != HA_ERR_RECORD_DELETED) + { + _ma_check_print_error(param,"Can't read record to be removed"); + info->dfile=old_file; + DBUG_RETURN(1); + } + + for (i=0 ; i < sort_info->current_key ; i++) + { + uint key_length= _ma_make_key(info, i, key, sort_param->record, + info->cur_row.lastpos); + if (_ma_ck_delete(info, i, key, key_length)) + { + _ma_check_print_error(param, + "Can't delete key %d from record to be removed", + i+1); + info->dfile=old_file; + DBUG_RETURN(1); + } + } + if (sort_param->calc_checksum) + param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + } + error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info); + info->dfile=old_file; /* restore actual value */ + info->state->records--; + DBUG_RETURN(error); +} /* sort_delete_record */ + + /* Fix all pending blocks and flush everything to disk */ + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) +{ + uint nod_flag,length; + my_off_t filepos,key_file_length; + SORT_KEY_BLOCKS *key_block; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + myf myf_rw=sort_info->param->myf_rw; + MARIA_HA *info=sort_info->info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + DBUG_ENTER("_ma_flush_pending_blocks"); + + filepos= HA_OFFSET_ERROR; /* if empty file */ + nod_flag=0; + for (key_block=sort_info->key_block ; key_block->inited ; key_block++) + { + key_block->inited=0; + length=maria_getint(key_block->buff); + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,filepos); + key_file_length=info->state->key_file_length; + bzero(key_block->buff+length, keyinfo->block_length-length); + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, + DFLT_INIT_HITS, key_block->buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile,key_block->buff, + (uint) keyinfo->block_length,filepos, myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",key_block->buff,length); + nod_flag=1; + } + info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */ + DBUG_RETURN(0); +} /* _ma_flush_pending_blocks */ + + /* alloc space and pointers for key_blocks */ + +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length) +{ + reg1 uint i; + SORT_KEY_BLOCKS *block; + DBUG_ENTER("alloc_key_blocks"); + + if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+ + buffer_length+IO_SIZE)*blocks, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for sort-key-blocks"); + return(0); + } + for (i=0 ; i < blocks ; i++) + { + block[i].inited=0; + block[i].buff= (byte*) (block+blocks)+(buffer_length+IO_SIZE)*i; + } + DBUG_RETURN(block); +} /* alloc_key_blocks */ + + + /* Check if file is almost full */ + +int maria_test_if_almost_full(MARIA_HA *info) +{ + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + return 0; + return (my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0))/10*9 > + (my_off_t) (info->s->base.max_key_file_length) || + my_seek(info->dfile,0L,MY_SEEK_END,MYF(0))/10*9 > + (my_off_t) info->s->base.max_data_file_length); +} + + /* Recreate table with bigger more alloced record-data */ + +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) +{ + int error; + MARIA_HA info; + MARIA_SHARE share; + MARIA_KEYDEF *keyinfo,*key,*key_end; + HA_KEYSEG *keysegs,*keyseg; + MARIA_COLUMNDEF *recdef,*rec,*end; + MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end; + MARIA_STATUS_INFO status_info; + uint unpack,key_parts; + ha_rows max_records; + ulonglong file_length,tmp_length; + MARIA_CREATE_INFO create_info; + DBUG_ENTER("maria_recreate_table"); + + error=1; /* Default error */ + info= **org_info; + status_info= (*org_info)->state[0]; + info.state= &status_info; + share= *(*org_info)->s; + unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && + (param->testflag & T_UNPACK); + if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) * + share.base.keys))) + DBUG_RETURN(0); + memcpy((byte*) keyinfo,(byte*) share.keyinfo, + (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys)); + + key_parts= share.base.all_key_parts; + if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)* + (key_parts+share.base.keys)))) + { + my_afree((gptr) keyinfo); + DBUG_RETURN(1); + } + if (!(recdef=(MARIA_COLUMNDEF*) + my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)))) + { + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + DBUG_RETURN(1); + } + if (!(uniquedef=(MARIA_UNIQUEDEF*) + my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1)))) + { + my_afree((gptr) recdef); + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + DBUG_RETURN(1); + } + + /* Copy the column definitions */ + memcpy((byte*) recdef,(byte*) share.rec, + (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))); + for (rec=recdef,end=recdef+share.base.fields; rec != end ; rec++) + { + if (unpack && !(share.options & HA_OPTION_PACK_RECORD) && + rec->type != FIELD_BLOB && + rec->type != FIELD_VARCHAR && + rec->type != FIELD_CHECK) + rec->type=(int) FIELD_NORMAL; + } + + /* Change the new key to point at the saved key segments */ + memcpy((byte*) keysegs,(byte*) share.keyparts, + (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+ + share.state.header.uniques))); + keyseg=keysegs; + for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++) + { + key->seg=keyseg; + for (; keyseg->type ; keyseg++) + { + if (param->language) + keyseg->language=param->language; /* change language */ + } + keyseg++; /* Skip end pointer */ + } + + /* + Copy the unique definitions and change them to point at the new key + segments + */ + memcpy((byte*) uniquedef,(byte*) share.uniqueinfo, + (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques))); + for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques; + u_ptr != u_end ; u_ptr++) + { + u_ptr->seg=keyseg; + keyseg+=u_ptr->keysegs+1; + } + if (share.options & HA_OPTION_COMPRESS_RECORD) + share.base.records=max_records=info.state->records; + else if (share.base.min_pack_length) + max_records=(ha_rows) (my_seek(info.dfile,0L,MY_SEEK_END,MYF(0)) / + (ulong) share.base.min_pack_length); + else + max_records=0; + unpack= (share.data_file_type == COMPRESSED_RECORD) && + (param->testflag & T_UNPACK); + share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD; + + file_length=(ulonglong) my_seek(info.dfile,0L,MY_SEEK_END,MYF(0)); + tmp_length= file_length+file_length/10; + set_if_bigger(file_length,param->max_data_file_length); + set_if_bigger(file_length,tmp_length); + set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length); + + VOID(maria_close(*org_info)); + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=max(max_records,share.base.records); + create_info.reloc_rows=share.base.reloc; + create_info.old_options=(share.options | + (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0)); + + create_info.data_file_length=file_length; + create_info.auto_increment=share.state.auto_increment; + create_info.language = (param->language ? param->language : + share.state.header.language); + create_info.key_file_length= status_info.key_file_length; + create_info.org_data_file_type= ((enum data_file_type) + share.state.header.org_data_file_type); + + /* + Allow for creating an auto_increment key. This has an effect only if + an auto_increment key exists in the original table. + */ + create_info.with_auto_increment= TRUE; + create_info.null_bytes= share.base.null_bytes; + /* + We don't have to handle symlinks here because we are using + HA_DONT_TOUCH_DATA + */ + if (maria_create(filename, share.data_file_type, + share.base.keys - share.state.header.uniques, + keyinfo, share.base.fields, recdef, + share.state.header.uniques, uniquedef, + &create_info, + HA_DONT_TOUCH_DATA)) + { + _ma_check_print_error(param, + "Got error %d when trying to recreate indexfile", + my_errno); + goto end; + } + *org_info=maria_open(filename,O_RDWR, + (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED); + if (!*org_info) + { + _ma_check_print_error(param, + "Got error %d when trying to open re-created indexfile", + my_errno); + goto end; + } + /* We are modifing */ + (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA; + VOID(_ma_readinfo(*org_info,F_WRLCK,0)); + (*org_info)->state->records=info.state->records; + if (share.state.create_time) + (*org_info)->s->state.create_time=share.state.create_time; + (*org_info)->s->state.unique=(*org_info)->this_unique= + share.state.unique; + (*org_info)->state->checksum=info.state->checksum; + (*org_info)->state->del=info.state->del; + (*org_info)->s->state.dellink=share.state.dellink; + (*org_info)->state->empty=info.state->empty; + (*org_info)->state->data_file_length=info.state->data_file_length; + if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT | + UPDATE_OPEN_COUNT)) + goto end; + error=0; +end: + my_afree((gptr) uniquedef); + my_afree((gptr) keyinfo); + my_afree((gptr) recdef); + my_afree((gptr) keysegs); + DBUG_RETURN(error); +} + + + /* write suffix to data file if neaded */ + +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) +{ + MARIA_HA *info=sort_info->info; + + if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + { + char buff[MEMMAP_EXTRA_MARGIN]; + bzero(buff,sizeof(buff)); + if (my_b_write(&info->rec_cache,buff,sizeof(buff))) + { + _ma_check_print_error(sort_info->param, + "%d when writing to datafile",my_errno); + return 1; + } + sort_info->param->read_cache.end_of_file+=sizeof(buff); + } + return 0; +} + + +/* Update state and maria_chk time of indexfile */ + +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) +{ + MARIA_SHARE *share=info->s; + + if (update & UPDATE_OPEN_COUNT) + { + share->state.open_count=0; + share->global_changed=0; + } + if (update & UPDATE_STAT) + { + uint i, key_parts= mi_uint2korr(share->state.header.key_parts); + share->state.rec_per_key_rows=info->state->records; + share->state.changed&= ~STATE_NOT_ANALYZED; + if (info->state->records) + { + for (i=0; i<key_parts; i++) + { + if (!(share->state.rec_per_key_part[i]=param->rec_per_key_part[i])) + share->state.changed|= STATE_NOT_ANALYZED; + } + } + } + if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC)) + { + if (update & UPDATE_TIME) + { + share->state.check_time= (long) time((time_t*) 0); + if (!share->state.create_time) + share->state.create_time=share->state.check_time; + } + /* + When tables are locked we haven't synched the share state and the + real state for a while so we better do it here before synching + the share state to disk. Only when table is write locked is it + necessary to perform this synch. + */ + if (info->lock_type == F_WRLCK) + share->state.state= *info->state; + if (_ma_state_info_write(share->kfile,&share->state,1+2)) + goto err; + share->changed=0; + } + { /* Force update of status */ + int error; + uint r_locks=share->r_locks,w_locks=share->w_locks; + share->r_locks= share->w_locks= share->tot_locks= 0; + error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK); + share->r_locks=r_locks; + share->w_locks=w_locks; + share->tot_locks=r_locks+w_locks; + if (!error) + return 0; + } +err: + _ma_check_print_error(param,"%d when updating keyfile",my_errno); + return 1; +} + + /* + Update auto increment value for a table + When setting the 'repair_only' flag we only want to change the + old auto_increment value if its wrong (smaller than some given key). + The reason is that we shouldn't change the auto_increment value + for a table without good reason when only doing a repair; If the + user have inserted and deleted rows, the auto_increment value + may be bigger than the biggest current row and this is ok. + + If repair_only is not set, we will update the flag to the value in + param->auto_increment is bigger than the biggest key. + */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair_only) +{ + byte *record; + DBUG_ENTER("update_auto_increment_key"); + + if (!info->s->base.auto_key || + ! maria_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1)) + { + if (!(param->testflag & T_VERY_SILENT)) + _ma_check_print_info(param, + "Table: %s doesn't have an auto increment key\n", + param->isam_file_name); + DBUG_VOID_RETURN; + } + if (!(param->testflag & T_SILENT) && + !(param->testflag & T_REP)) + printf("Updating MARIA file: %s\n", param->isam_file_name); + /* + We have to use an allocated buffer instead of info->rec_buff as + _ma_put_key_in_record() may use info->rec_buff + */ + if (!(record= (byte*) my_malloc((uint) info->s->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for extra record"); + DBUG_VOID_RETURN; + } + + maria_extra(info,HA_EXTRA_KEYREAD,0); + if (maria_rlast(info, record, info->s->base.auto_key-1)) + { + if (my_errno != HA_ERR_END_OF_FILE) + { + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + _ma_check_print_error(param,"%d when reading last record",my_errno); + DBUG_VOID_RETURN; + } + if (!repair_only) + info->s->state.auto_increment=param->auto_increment_value; + } + else + { + ulonglong auto_increment= ma_retrieve_auto_increment(info, record); + set_if_bigger(info->s->state.auto_increment,auto_increment); + if (!repair_only) + set_if_bigger(info->s->state.auto_increment, param->auto_increment_value); + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + maria_update_state_info(param, info, UPDATE_AUTO_INC); + DBUG_VOID_RETURN; +} + + +/* + Update statistics for each part of an index + + SYNOPSIS + maria_update_key_parts() + keyinfo IN Index information (only key->keysegs used) + rec_per_key_part OUT Store statistics here + unique IN Array of (#distinct tuples) + notnull_tuples IN Array of (#tuples), or NULL + records Number of records in the table + + DESCRIPTION + This function is called produce index statistics values from unique and + notnull_tuples arrays after these arrays were produced with sequential + index scan (the scan is done in two places: chk_index() and + sort_key_write()). + + This function handles all 3 index statistics collection methods. + + Unique is an array: + unique[0]= (#different values of {keypart1}) - 1 + unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 + ... + + For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: + notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) + notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all + keypart{i} are not NULL) + ... + For all other statistics collection methods notnull_tuples==NULL. + + Output is an array: + rec_per_key_part[k] = + = E(#records in the table such that keypart_1=c_1 AND ... AND + keypart_k=c_k for arbitrary constants c_1 ... c_k) + + = {assuming that values have uniform distribution and index contains all + tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from + index tuples} + + = #tuples-in-the-index / #distinct-tuples-in-the-index. + + The #tuples-in-the-index and #distinct-tuples-in-the-index have different + meaning depending on which statistics collection method is used: + + MI_STATS_METHOD_* how are nulls compared? which tuples are counted? + NULLS_EQUAL NULL == NULL all tuples in table + NULLS_NOT_EQUAL NULL != NULL all tuples in table + IGNORE_NULLS n/a tuples that don't have NULLs +*/ + +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records) +{ + ulonglong count=0,tmp, unique_tuples; + ulonglong tuples= records; + uint parts; + for (parts=0 ; parts < keyinfo->keysegs ; parts++) + { + count+=unique[parts]; + unique_tuples= count + 1; + if (notnull) + { + tuples= notnull[parts]; + /* + #(unique_tuples not counting tuples with NULLs) = + #(unique_tuples counting tuples with NULLs as different) - + #(tuples with NULLs) + */ + unique_tuples -= (records - notnull[parts]); + } + + if (unique_tuples == 0) + tmp= 1; + else if (count == 0) + tmp= tuples; /* 1 unique tuple */ + else + tmp= (tuples + unique_tuples/2) / unique_tuples; + + /* + for some weird keys (e.g. FULLTEXT) tmp can be <1 here. + let's ensure it is not + */ + set_if_bigger(tmp,1); + if (tmp >= (ulonglong) ~(ulong) 0) + tmp=(ulonglong) ~(ulong) 0; + + *rec_per_key_part=(ulong) tmp; + rec_per_key_part++; + } +} + + +static ha_checksum maria_byte_checksum(const byte *buf, uint length) +{ + ha_checksum crc; + const byte *end=buf+length; + for (crc=0; buf != end; buf++) + crc=((crc << 1) + *((uchar*) buf)) + + test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1))); + return crc; +} + +static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows) +{ + uint key_maxlength=key->maxlength; + if (key->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + key->seg->charset->mbmaxlen; + key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + } + return (key->flag & HA_SPATIAL) || + (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) && + ((ulonglong) rows * key_maxlength > + (ulonglong) maria_max_temp_length)); +} + +/* + Deactivate all not unique index that can be recreated fast + These include packed keys on which sorting will use more temporary + space than the max allowed file length or for which the unpacked keys + will take much more space than packed keys. + Note that 'rows' may be zero for the case when we don't know how many + rows we will put into the file. + */ + +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + DBUG_ASSERT(info->state->records == 0 && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)); + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && + ! maria_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } +} + + +/* + Return TRUE if we can use repair by sorting + One can set the force argument to force to use sorting + even if the temporary file would be quite big! +*/ + +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, + ulonglong key_map, my_bool force) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + /* + maria_repair_by_sort only works if we have at least one key. If we don't + have any keys, we should use the normal repair. + */ + if (! maria_is_any_key_active(key_map)) + return FALSE; /* Can't use sort */ + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!force && maria_too_big_key_for_sort(key,rows)) + return FALSE; + } + return TRUE; +} + + +static void +set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) +{ + if ((sort_info->new_data_file_type=share->data_file_type) == + COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) + { + MARIA_SHARE tmp; + sort_info->new_data_file_type= share->state.header.org_data_file_type; + /* Set delete_function for sort_delete_record() */ + memcpy((char*) &tmp, share, sizeof(*share)); + tmp.options= ~HA_OPTION_COMPRESS_RECORD; + _ma_setup_functions(&tmp); + share->delete_record=tmp.delete_record; + } +} + +static void restore_data_file_type(MARIA_SHARE *share) +{ + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + share->state.header.data_file_type= + share->state.header.org_data_file_type; + share->data_file_type= share->state.header.data_file_type= + share->pack.header_length= 0; +} diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c new file mode 100644 index 00000000000..02d887f758a --- /dev/null +++ b/storage/maria/ma_checkpoint.c @@ -0,0 +1,429 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +/* + Summary: + - there are asynchronous checkpoints (a writer to the log notices that it's + been a long time since we last checkpoint-ed, so posts a request for a + background thread to do a checkpoint; does not care about the success of the + checkpoint). Then the checkpoint is done by the checkpoint thread, at an + unspecified moment ("later") (==soon, of course). + - there are synchronous checkpoints: a thread requests a checkpoint to + happen now and wants to know when it finishes and if it succeeded; then the + checkpoint is done by that same thread. +*/ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +#define LSN_IMPOSSIBLE ((LSN)0) /* could also be called LSN_ERROR */ +#define LSN_MAX ((LSN)ULONGLONG_MAX) + +/* + this transaction is used for any system work (purge, checkpoint writing + etc), that is, background threads. It will not be declared/initialized here + in the final version. +*/ +st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,...}; + +/* those three are protected by the log's mutex */ +/* + The maximum rec_lsn in the LRD when last checkpoint was run, serves for the + MEDIUM checkpoint. +*/ +LSN max_rec_lsn_at_last_checkpoint= 0; +/* last submitted checkpoint request; cleared when starts */ +CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE; +CHECKPOINT_LEVEL checkpoint_in_progress= NONE; + +static inline ulonglong read_non_atomic(ulonglong volatile *x); + +/* + Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA + ENGINE DO CHECKPOINT"), and probably by maria_panic(), and at the end of the + UNDO recovery phase. +*/ +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) +{ + my_bool result; + DBUG_ENTER("execute_synchronous_checkpoint"); + DBUG_ASSERT(level > NONE); + + lock(log_mutex); + while (checkpoint_in_progress != NONE) + wait_on_checkpoint_done_cond(); + + result= execute_checkpoint(level); + DBUG_RETURN(result); +} + +/* + If no checkpoint is running, and there is a pending asynchronous checkpoint + request, executes it. + Is safe if multiple threads call it, though in first version only one will. + It's intended to be used by a thread which regularly calls this function; + this is why, if there is a request, it does not wait in a loop for + synchronous checkpoints to be finished, but just exits (because the thread + may want to do something useful meanwhile (flushing dirty pages for example) + instead of waiting). +*/ +my_bool execute_asynchronous_checkpoint_if_any() +{ + my_bool result; + CHECKPOINT_LEVEL level; + DBUG_ENTER("execute_asynchronous_checkpoint"); + + /* first check without mutex, ok to see old data */ + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (checkpoint_in_progress != NONE))) + DBUG_RETURN(FALSE); + + lock(log_mutex); + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (checkpoint_in_progress != NONE))) + { + unlock(log_mutex); + DBUG_RETURN(FALSE); + } + + result= execute_checkpoint(next_asynchronous_checkpoint_to_do); + DBUG_RETURN(result); +} + + +/* + Does the actual checkpointing. Called by + execute_synchronous_checkpoint() and + execute_asynchronous_checkpoint_if_any(). +*/ +my_bool execute_checkpoint(CHECKPOINT_LEVEL level) +{ + my_bool result; + DBUG_ENTER("execute_checkpoint"); + + safemutex_assert_owner(log_mutex); + if (next_asynchronous_checkpoint_to_do <= level) + next_asynchronous_checkpoint_to_do= NONE; + checkpoint_in_progress= level; + + if (unlikely(level > INDIRECT)) + { + LSN copy_of_max_rec_lsn_at_last_checkpoint= + max_rec_lsn_at_last_checkpoint; + /* much I/O work to do, release log mutex */ + unlock(log_mutex); + + switch (level) + { + case FULL: + /* flush all pages up to the current end of the LRD */ + flush_all_LRD_to_lsn(LSN_MAX); + /* this will go full speed (normal scheduling, no sleep) */ + break; + case MEDIUM: + /* + flush all pages which were already dirty at last checkpoint: + ensures that recovery will never start from before the next-to-last + checkpoint (two-checkpoint rule). + */ + flush_all_LRD_to_lsn(copy_of_max_rec_lsn_at_last_checkpoint); + /* this will go full speed (normal scheduling, no sleep) */ + break; + } + lock(log_mutex); + } + + result= execute_checkpoint_indirect(); + checkpoint_in_progress= NONE; + unlock(log_mutex); + broadcast(checkpoint_done_cond); + DBUG_RETURN(result); +} + + +/* + Does an indirect checpoint (collects data from data structures, writes into + a checkpoint log record). + Starts and ends while having log's mutex (released in the middle). +*/ +my_bool execute_checkpoint_indirect() +{ + int error= 0, i; + /* checkpoint record data: */ + LSN checkpoint_start_lsn; + char checkpoint_start_lsn_char[8]; + LEX_STRING strings[6]= + {checkpoint_start_lsn_char, 8}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} }; + char *ptr; + LSN checkpoint_lsn; + LSN candidate_max_rec_lsn_at_last_checkpoint; + DBUG_ENTER("execute_checkpoint_indirect"); + + DBUG_ASSERT(sizeof(byte *) <= 8); + DBUG_ASSERT(sizeof(LSN) <= 8); + + safemutex_assert_owner(log_mutex); + + /* STEP 1: record current end-of-log LSN */ + checkpoint_start_lsn= log_read_end_lsn(); + if (LSN_IMPOSSIBLE == checkpoint_start_lsn) /* error */ + DBUG_RETURN(TRUE); + unlock(log_mutex); + + DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); + int8store(strings[0].str, checkpoint_start_lsn); + + /* STEP 2: fetch information about dirty pages */ + + if (pagecache_collect_changed_blocks_with_LSN(pagecache, &strings[1], + &candidate_max_rec_lsn_at_last_checkpoint)) + goto err; + + /* STEP 3: fetch information about transactions */ + if (trnman_collect_transactions(&strings[2], &strings[3])) + goto err; + + /* STEP 4: fetch information about table files */ + + { + /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ + lock(global_share_list_mutex); + strings[4].length= 8+(8+8)*share_list->count; + if (NULL == (strings[4].str= my_malloc(strings[4].length))) + goto err; + ptr= string3.str; + /* + Note that maria_open_list is a list of MARIA_HA*, while we would prefer + a list of MARIA_SHARE* here (we are interested in the short id, + unique file name, members of MARIA_SHARE*, and in file descriptors, + which will in the end be in MARIA_SHARE*). + */ + for (iterate on the maria_open_list) + { + /* latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); + /* + TODO: + we need to prevent the share from going away while we later flush and + force it without holding THR_LOCK_maria. For example if the share is + free()d by maria_close() we'll have a problem. Or if the share's file + descriptor is closed by maria_close() we will not be able to my_sync() + it. + */ + pthread_mutex_unlock(&share->intern_lock); + store the share pointer into a private array; + } + unlock(global_share_list_mutex); + + /* work on copy */ + int8store(ptr, elements_in_array); + ptr+= 8; + for (el in array) + { + int8store(ptr, array[...].short_id); + ptr+= 8; + memcpy(ptr, array[...].unique_file_name[_length], ...); + ptr+= ...; + /* maybe we need to lock share->intern_lock here */ + /* + these two are long ops (involving disk I/O) that's why we copied the + list, to not keep the list locked for long: + */ + flush_bitmap_pages(el); + /* TODO: and also autoinc counter, logical file end, free page list */ + + /* + fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per + second, so if you have touched 1000 files it's 7 seconds). + */ + force_file(el); + } + } + + /* LAST STEP: now write the checkpoint log record */ + + checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, + &system_trans, strings); + + /* + Do nothing between the log write and the control file write, for the + "repair control file" tool to be possible one day. + */ + + if (LSN_IMPOSSIBLE == checkpoint_lsn) + goto err; + + if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) + goto err; + + /* + Note that we should not alter memory structures until we have successfully + written the checkpoint record and control file. + Btw, a log write failure is serious: + - if we know how many bytes we managed to write, we should try to write + more, keeping the log's mutex (MY_FULL_IO) + - if we don't know, this log record is corrupted and we have no way to + "de-corrupt" it, so it will stay corrupted, and as the log is sequential, + any log record written after it will not be reachable (for example if we + would write UNDOs and crash, we would not be able to read the log and so + not be able to rollback), so we should stop the engine now (holding the + log's mutex) and do a recovery. + */ + goto end; + +err: + print_error_to_error_log(the_error_message); + candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE; + +end: + + for (i= 1; i<6; i++) + my_free(strings[i].str, MYF(MY_ALLOW_ZERO_PTR)); + + /* + this portion cannot be done as a hook in write_log_record() for the + LOGREC_CHECKPOINT type because: + - at that moment we still have not written to the control file so cannot + mark the request as done; this could be solved by writing to the control + file in the hook but that would be an I/O under the log's mutex, bad. + - it would not be nice organisation of code (I tried it :). + */ + if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE) + { + /* checkpoint succeeded */ + /* + TODO: compute log's low water mark (how to do that with our fuzzy + ARIES-like reads of data structures? TODO think about it :). + */ + lock(log_mutex); + /* That LSN is used for the "two-checkpoint rule" (MEDIUM checkpoints) */ + maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; + DBUG_RETURN(FALSE); + } + lock(log_mutex); + DBUG_RETURN(TRUE); + /* + keep mutex locked upon exit because callers will want to clear + mutex-protected status variables + */ +} + + + +/* + Here's what should be put in log_write_record() in the log handler: +*/ +log_write_record(...) +{ + ...; + lock(log_mutex); + ...; + write_to_log(length); + written_since_last_checkpoint+= length; + if (written_since_last_checkpoint > + MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS) + { + /* + ask one system thread (the "LRD background flusher and checkpointer + thread" WL#3261) to do a checkpoint + */ + request_asynchronous_checkpoint(INDIRECT); + /* prevent similar redundant requests */ + written_since_last_checkpoint= (my_off_t)0; + } + ...; + unlock(log_mutex); + ...; +} + +/* + Requests a checkpoint from the background thread, *asynchronously* + (requestor does not wait for completion, and does not even later check the + result). + In real life it will be called by log_write_record(). +*/ +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); +{ + safemutex_assert_owner(log_mutex); + + DBUG_ASSERT(level > NONE); + if ((next_asynchronous_checkpoint_to_do < level) && + (checkpoint_in_progress < level)) + { + /* no equal or stronger running or to run, we post request */ + /* + We just don't broacast a cond, the checkpoint thread + (see ma_least_recently_dirtied.c) will notice our request in max a few + seconds. + */ + next_asynchronous_checkpoint_to_do= level; /* post request */ + } + + /* + If there was an error, only an error + message to the error log will say it; normal, for a checkpoint triggered + by a log write, we probably don't want the client's log write to throw an + error, as the log write succeeded and a checkpoint failure is not + critical: the failure in this case is more for the DBA to know than for + the end user. + */ +} + + +/* + If a 64-bit variable transitions from both halves being zero to both halves + being non-zero, and never changes after that (like the transaction's + first_undo_lsn), this function can be used to do a read of it (without + mutex, without atomic load) which always produces a correct (though maybe + slightly old) value (even on 32-bit CPUs). + The prototype will change with Sanja's new LSN type. +*/ +static inline ulonglong read_non_atomic(ulonglong volatile *x) +{ +#if ( SIZEOF_CHARP >= 8 ) + /* 64-bit CPU (right?), 64-bit reads are atomic */ + return *x; +#else + /* + 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old + low bits and new high bits, or the contrary). + As the variable we read transitions from both halves being zero to both + halves being non-zero, and never changes then, we can detect atomicity + problems: + */ + ulonglong y; + for (;;) /* loop until no atomicity problems */ + { + y= *x; + if (likely(((0 == y) || + ((0 != (y >> 32)) && (0 != (y << 32))))) + return y; + /* Worth seeing it! */ + DBUG_PRINT("info",("atomicity problem")); + } +#endif +} diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h new file mode 100644 index 00000000000..1b8064fa755 --- /dev/null +++ b/storage/maria/ma_checkpoint.h @@ -0,0 +1,35 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +typedef enum enum_checkpoint_level { + NONE=-1, + INDIRECT, /* just write dirty_pages, transactions table and sync files */ + MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/ + FULL /* also flush all dirty pages */ +} CHECKPOINT_LEVEL; + +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_asynchronous_checkpoint_if_any(); +/* that's all that's needed in the interface */ diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c new file mode 100644 index 00000000000..1b0f683fe63 --- /dev/null +++ b/storage/maria/ma_checksum.c @@ -0,0 +1,68 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Calculate a checksum for a row */ + +#include "maria_def.h" + +ha_checksum _ma_checksum(MARIA_HA *info, const byte *record) +{ + uint i; + ha_checksum crc=0; + MARIA_COLUMNDEF *rec=info->s->rec; + + if (info->s->base.null_bytes) + crc= my_checksum(crc, record, info->s->base.null_bytes); + + for (i=info->s->base.fields ; i-- ; ) + { + const byte *pos= record + rec->offset; + ulong length; + + switch (rec->type) { + case FIELD_BLOB: + { + length= _ma_calc_blob_length(rec->length- + maria_portable_sizeof_char_ptr, + pos); + memcpy((char*) &pos, pos+rec->length- maria_portable_sizeof_char_ptr, + sizeof(char*)); + break; + } + case FIELD_VARCHAR: + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1); + if (pack_length == 1) + length= (ulong) *(uchar*) pos; + else + length= uint2korr(pos); + pos+= pack_length; + break; + } + default: + length= rec->length; + break; + } + crc= my_checksum(crc, pos ? pos : "", length); + } + return crc; +} + + +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *pos) +{ + return my_checksum(0, pos, info->s->base.reclength); +} diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c new file mode 100644 index 00000000000..2a874079961 --- /dev/null +++ b/storage/maria/ma_close.c @@ -0,0 +1,133 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* close a isam-database */ +/* + TODO: + We need to have a separate mutex on the closed file to allow other threads + to open other files during the time we flush the cache and close this file +*/ + +#include "maria_def.h" + +int maria_close(register MARIA_HA *info) +{ + int error=0,flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_close"); + DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", + (long) info, (uint) share->reopen, + (uint) share->tot_locks)); + + pthread_mutex_lock(&THR_LOCK_maria); + if (info->lock_type == F_EXTRA_LCK) + info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */ + + if (share->reopen == 1 && share->kfile >= 0) + _ma_decrement_open_count(info); + + if (info->lock_type != F_UNLCK) + { + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } + pthread_mutex_lock(&share->intern_lock); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + share->r_locks--; + share->tot_locks--; + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + error=my_errno; + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + } + flag= !--share->reopen; + /* + RECOVERYTODO: + If "flag" is TRUE, in the line below we are going to make the table + unknown to future checkpoints, so it needs to have fsync'ed itself + entirely (bitmap, pages, etc) at this point. + The flushing is currently done a few lines further (which is ok, as we + still hold THR_LOCK_maria), but syncing is missing. + */ + maria_open_list=list_delete(maria_open_list,&info->open_list); + pthread_mutex_unlock(&share->intern_lock); + + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + (share->end)(info); + + if (info->s->data_file_type == BLOCK_RECORD) + info->dfile= -1; /* Closed in ma_end_once_block_row */ + if (flag) + { + if (share->kfile >= 0) + { + if ((*share->once_end)(share)) + error= my_errno; + if (flush_key_blocks(share->key_cache, share->kfile, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + error= my_errno; + + /* + If we are crashed, we can safely flush the current state as it will + not change the crashed state. + We can NOT write the state in other cases as other threads + may be using the file at this point + */ + if (share->mode != O_RDONLY && maria_is_crashed(info)) + _ma_state_info_write(share->kfile, &share->state, 1); + if (my_close(share->kfile, MYF(0))) + error= my_errno; + } +#ifdef HAVE_MMAP + if (share->file_map) + _ma_unmap_file(info); +#endif +#ifdef THREAD + thr_lock_delete(&share->lock); + VOID(pthread_mutex_destroy(&share->intern_lock)); + { + int i,keys; + keys = share->state.header.keys; + VOID(rwlock_destroy(&share->mmap_lock)); + for(i=0; i<keys; i++) { + VOID(rwlock_destroy(&share->key_root_lock[i])); + } + } +#endif + my_free((gptr) info->s,MYF(0)); + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (info->ftparser_param) + { + my_free((gptr)info->ftparser_param, MYF(0)); + info->ftparser_param= 0; + } + if (info->dfile >= 0 && my_close(info->dfile,MYF(0))) + error = my_errno; + + my_free((gptr) info,MYF(0)); + + if (error) + { + DBUG_RETURN(my_errno=error); + } + DBUG_RETURN(0); +} /* maria_close */ diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c new file mode 100644 index 00000000000..07eddb956a2 --- /dev/null +++ b/storage/maria/ma_control_file.c @@ -0,0 +1,320 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +#include "maria_def.h" + +/* Here is the implementation of this module */ + +/* + a control file contains 3 objects: magic string, LSN of last checkpoint, + number of last log. +*/ + +/* total size should be < sector size for atomic write operation */ +#define CONTROL_FILE_MAGIC_STRING "\xfe\xfe\xc\1MACF" +#define CONTROL_FILE_MAGIC_STRING_OFFSET 0 +#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1) +#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) +#define CONTROL_FILE_CHECKSUM_SIZE 1 +#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE) +#define CONTROL_FILE_LSN_SIZE LSN_STORE_SIZE +#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) +#define CONTROL_FILE_FILENO_SIZE 4 +#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) + +/* + This module owns these two vars. + uint32 is always atomically updated, but LSN is 8 bytes, we will need + provisions to ensure that it's updated atomically in + ma_control_file_write_and_force(). Probably the log mutex could be + used. TODO. +*/ +LSN last_checkpoint_lsn; +uint32 last_logno; + + +/* + Control file is less then 512 bytes (a disk sector), + to be as atomic as possible +*/ +static int control_file_fd= -1; + +static char simple_checksum(char *buffer, uint size) +{ + /* TODO: improve this sum if we want */ + char s= 0; + uint i; + for (i= 0; i<size; i++) + s+= buffer[i]; + return s; +} + +/* + Initialize control file subsystem + + SYNOPSIS + ma_control_file_create_or_open() + + Looks for the control file. If absent, it's a fresh start, creates file. + If present, reads it to find out last checkpoint's LSN and last log, updates + the last_checkpoint_lsn and last_logno global variables. + Called at engine's start. + + The format of the control file is: + 4 bytes: magic string + 1 byte: checksum of the following bytes + 4 bytes: number of log where last checkpoint is + 4 bytes: offset in log where last checkpoint is + 4 bytes: number of last log + + RETURN + 0 - OK + 1 - Error (in which case the file is left closed) +*/ +CONTROL_FILE_ERROR ma_control_file_create_or_open() +{ + char buffer[CONTROL_FILE_SIZE]; + char name[FN_REFLEN]; + MY_STAT stat_buff; + my_bool create_file; + int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR; + int error= CONTROL_FILE_UNKNOWN_ERROR; + DBUG_ENTER("ma_control_file_create_or_open"); + + /* + If you change sizes in the #defines, you at least have to change the + "*store" and "*korr" calls in this file, and can even create backward + compatibility problems. Beware! + */ + DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (3+4)); + DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4); + + if (control_file_fd >= 0) /* already open */ + DBUG_RETURN(0); + + if (fn_format(name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); + + create_file= test(my_access(name,F_OK)); + + if (create_file) + { + if ((control_file_fd= my_create(name, 0, + open_flags, MYF(MY_SYNC_DIR))) < 0) + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); + + /* + To be safer we should make sure that there are no logs or data/index + files around (indeed it could be that the control file alone was deleted + or not restored, and we should not go on with life at this point). + + TODO: For now we trust (this is alpha version), but for beta if would + be great to verify. + + We could have a tool which can rebuild the control file, by reading the + directory of logs, finding the newest log, reading it to find last + checkpoint... Slow but can save your db. For this to be possible, we + must always write to the control file right after writing the checkpoint + log record, and do nothing in between (i.e. the checkpoint must be + usable as soon as it has been written to the log). + */ + + /* init the file with these "undefined" values */ + DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, + CONTROL_FILE_IMPOSSIBLE_FILENO, + CONTROL_FILE_UPDATE_ALL)); + } + + /* Otherwise, file exists */ + + if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0) + goto err; + + if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) + goto err; + + if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE) + { + /* + Given that normally we write only a sector and it's atomic, the only + possibility for a file to be of too short size is if we crashed at the + very first startup, between file creation and file write. Quite unlikely + (and can be made even more unlikely by doing this: create a temp file, + write it, and then rename it to be the control file). + What's more likely is if someone forgot to restore the control file, + just did a "touch control" to try to get Maria to start, or if the + disk/filesystem has a problem. + So let's be rigid. + */ + /* + TODO: store a message "too small file" somewhere, so that it goes to + MySQL's error log at startup. + */ + error= CONTROL_FILE_TOO_SMALL; + goto err; + } + + if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE) + { + /* TODO: store "too big file" message */ + error= CONTROL_FILE_TOO_BIG; + goto err; + } + + if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE, + MYF(MY_FNABP | MY_WME))) + goto err; + if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) + { + /* TODO: store message "bad magic string" somewhere */ + error= CONTROL_FILE_BAD_MAGIC_STRING; + goto err; + } + if (simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET, + CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) != + buffer[CONTROL_FILE_CHECKSUM_OFFSET]) + { + /* TODO: store message "checksum mismatch" somewhere */ + error= CONTROL_FILE_BAD_CHECKSUM; + goto err; + } + last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); + + DBUG_RETURN(0); +err: + ma_control_file_end(); + DBUG_RETURN(error); +} + + +/* + Write information durably to the control file; stores this information into + the last_checkpoint_lsn and last_logno global variables. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). + + SYNOPSIS + ma_control_file_write_and_force() + checkpoint_lsn LSN of last checkpoint + logno last log file number + objs_to_write which of the arguments should be used as new values + (for example, CONTROL_FILE_UPDATE_ONLY_LSN will not + write the logno argument to the control file and will + not update the last_logno global variable); can be: + CONTROL_FILE_UPDATE_ALL + CONTROL_FILE_UPDATE_ONLY_LSN + CONTROL_FILE_UPDATE_ONLY_LOGNO. + + NOTE + We always want to do one single my_pwrite() here to be as atomic as + possible. + + RETURN + 0 - OK + 1 - Error +*/ + +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, + uint objs_to_write) +{ + char buffer[CONTROL_FILE_SIZE]; + my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE; + DBUG_ENTER("ma_control_file_write_and_force"); + + DBUG_ASSERT(control_file_fd >= 0); /* must be open */ + + memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE); + + /* TODO: you need some protection to be able to read last_* global vars */ + + if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN) + update_checkpoint_lsn= TRUE; + else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO) + update_logno= TRUE; + else if (objs_to_write == CONTROL_FILE_UPDATE_ALL) + update_checkpoint_lsn= update_logno= TRUE; + else /* incorrect value of objs_to_write */ + DBUG_ASSERT(0); + + if (update_checkpoint_lsn) + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); + else /* store old value == change nothing */ + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn); + + if (update_logno) + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno); + else + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, last_logno); + + buffer[CONTROL_FILE_CHECKSUM_OFFSET]= + simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET, + CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET); + + if (my_pwrite(control_file_fd, buffer, sizeof(buffer), + 0, MYF(MY_FNABP | MY_WME)) || + my_sync(control_file_fd, MYF(MY_WME))) + DBUG_RETURN(1); + + /* TODO: you need some protection to be able to write last_* global vars */ + if (update_checkpoint_lsn) + last_checkpoint_lsn= checkpoint_lsn; + if (update_logno) + last_logno= logno; + + DBUG_RETURN(0); +} + + +/* + Free resources taken by control file subsystem + + SYNOPSIS + ma_control_file_end() +*/ + +int ma_control_file_end() +{ + int close_error; + DBUG_ENTER("ma_control_file_end"); + + if (control_file_fd < 0) /* already closed */ + DBUG_RETURN(0); + + close_error= my_close(control_file_fd, MYF(MY_WME)); + /* + As my_close() frees structures even if close() fails, we do the same, + i.e. we mark the file as closed in all cases. + */ + control_file_fd= -1; + /* + As this module owns these variables, closing the module forbids access to + them (just a safety): + */ + last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + + DBUG_RETURN(close_error); +} diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h new file mode 100644 index 00000000000..616babc3bb2 --- /dev/null +++ b/storage/maria/ma_control_file.h @@ -0,0 +1,77 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. +*/ + +#define CONTROL_FILE_BASE_NAME "maria_control" +/* + indicate absence of the log file number; first log is always number 1, 0 is + impossible. +*/ +#define CONTROL_FILE_IMPOSSIBLE_FILENO 0 +/* logs always have a header */ +#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 +/* indicate absence of LSN. */ +#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0) + +/* Here is the interface of this module */ + +/* + LSN of the last checkoint + (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN + then there was never a checkpoint) +*/ +extern LSN last_checkpoint_lsn; +/* + Last log number (if last_logno == + CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet) +*/ +extern uint32 last_logno; + +typedef enum enum_control_file_error { + CONTROL_FILE_OK= 0, + CONTROL_FILE_TOO_SMALL, + CONTROL_FILE_TOO_BIG, + CONTROL_FILE_BAD_MAGIC_STRING, + CONTROL_FILE_BAD_CHECKSUM, + CONTROL_FILE_UNKNOWN_ERROR /* any other error */ +} CONTROL_FILE_ERROR; + +#define CONTROL_FILE_UPDATE_ALL 0 +#define CONTROL_FILE_UPDATE_ONLY_LSN 1 +#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 + + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +CONTROL_FILE_ERROR ma_control_file_create_or_open(); +/* + Write information durably to the control file. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). +*/ +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, + uint objs_to_write); + + +/* Free resources taken by control file subsystem */ +int ma_control_file_end(); diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c new file mode 100644 index 00000000000..59f5f7d1a4d --- /dev/null +++ b/storage/maria/ma_create.c @@ -0,0 +1,1043 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Create a MARIA table */ + +#include "ma_ftdefs.h" +#include "ma_sp_defs.h" +#include <my_bit.h> +#include "ma_blockrec.h" + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include <fcntl.h> +#else +#include <process.h> /* Prototype for getpid */ +#endif +#endif +#include <m_ctype.h> + +static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b); + +/* + Old options is used when recreating database, from maria_chk +*/ + +int maria_create(const char *name, enum data_file_type record_type, + uint keys,MARIA_KEYDEF *keydefs, + uint columns, MARIA_COLUMNDEF *recinfo, + uint uniques, MARIA_UNIQUEDEF *uniquedefs, + MARIA_CREATE_INFO *ci,uint flags) +{ + register uint i,j; + File dfile,file; + int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; + myf create_flag; + uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff, + key_length,info_length,key_segs,options,min_key_length_skip, + base_pos,long_varchar_count,varchar_length, + unique_key_parts,fulltext_keys,offset; + uint max_field_lengths, extra_header_size; + ulong reclength, real_reclength,min_pack_length; + char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; + ulong pack_reclength; + ulonglong tot_length,max_rows, tmp; + enum en_fieldtype type; + enum data_file_type org_record_type= record_type; + MARIA_SHARE share; + MARIA_KEYDEF *keydef,tmp_keydef; + MARIA_UNIQUEDEF *uniquedef; + HA_KEYSEG *keyseg,tmp_keyseg; + MARIA_COLUMNDEF *rec, *rec_end; + ulong *rec_per_key_part; + my_off_t key_root[HA_MAX_POSSIBLE_KEY]; + MARIA_CREATE_INFO tmp_create_info; + my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */ + myf sync_dir= MY_SYNC_DIR; + DBUG_ENTER("maria_create"); + DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", + keys, columns, uniques, flags)); + + LINT_INIT(dfile); + LINT_INIT(file); + + if (!ci) + { + bzero((char*) &tmp_create_info,sizeof(tmp_create_info)); + ci=&tmp_create_info; + } + + if (keys + uniques > MARIA_MAX_KEY || columns == 0) + { + DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION); + } + errpos=0; + options=0; + bzero((byte*) &share,sizeof(share)); + + if (flags & HA_DONT_TOUCH_DATA) + { + org_record_type= ci->org_data_file_type; + if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD)) + options=ci->old_options & + (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD | + HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + else + { + /* Uncompressing rows */ + options=ci->old_options & + (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + } + } + + if (ci->reloc_rows > ci->max_rows) + ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ + + if (!(rec_per_key_part= + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(my_errno); + + /* Start by checking fields and field-types used */ + + varchar_length=long_varchar_count=packed= + pack_reclength= max_field_lengths= 0; + reclength= min_pack_length= ci->null_bytes; + + for (rec= recinfo, rec_end= rec + columns ; rec != rec_end ; rec++) + { + /* Fill in not used struct parts */ + rec->offset= reclength; + rec->empty_pos= 0; + rec->empty_bit= 0; + rec->fill_length= rec->length; + + reclength+= rec->length; + type= rec->type; + if (type == FIELD_SKIP_PRESPACE && record_type == BLOCK_RECORD) + type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */ + + if (type != FIELD_NORMAL && type != FIELD_CHECK) + { + rec->empty_pos= packed/8; + rec->empty_bit= (1 << (packed & 7)); + if (type == FIELD_BLOB) + { + packed++; + share.base.blobs++; + if (pack_reclength != INT_MAX32) + { + if (rec->length == 4+maria_portable_sizeof_char_ptr) + pack_reclength= INT_MAX32; + else + { + /* Add max possible blob length */ + pack_reclength+= (1 << ((rec->length- + maria_portable_sizeof_char_ptr)*8)); + } + } + max_field_lengths+= (rec->length - maria_portable_sizeof_char_ptr); + } + else if (type == FIELD_SKIP_PRESPACE || + type == FIELD_SKIP_ENDSPACE) + { + max_field_lengths+= rec->length > 255 ? 2 : 1; + min_pack_length++; + packed++; + } + else if (type == FIELD_VARCHAR) + { + varchar_length+= rec->length-1; /* Used for min_pack_length */ + pack_reclength++; + min_pack_length++; + max_field_lengths++; + packed++; + /* We must test for 257 as length includes pack-length */ + if (test(rec->length >= 257)) + { + long_varchar_count++; + max_field_lengths++; + } + } + else if (type != FIELD_SKIP_ZERO) + { + min_pack_length+=rec->length; + rec->empty_pos= 0; + rec->empty_bit= 0; + } + else + packed++; + } + else /* FIELD_NORMAL */ + { + min_pack_length+=rec->length; + if (!rec->null_bit) + { + share.base.fixed_not_null_fields++; + share.base.fixed_not_null_fields_length+= rec->length; + } + } + } + if ((packed & 7) == 1) + { + /* + Not optimal packing, try to remove a 1 byte length zero-field as + this will get same record length, but smaller pack overhead + */ + while (rec != recinfo) + { + rec--; + if (rec->type == (int) FIELD_SKIP_ZERO && rec->length == 1) + { + rec->type=(int) FIELD_NORMAL; + packed--; + min_pack_length++; + break; + } + } + } + share.base.null_bytes= ci->null_bytes; + share.base.original_null_bytes= ci->null_bytes; + share.base.transactional= ci->transactional; + share.base.max_field_lengths= max_field_lengths; + share.base.field_offsets= 0; /* for future */ + + if (pack_reclength != INT_MAX32) + pack_reclength+= max_field_lengths + long_varchar_count; + + if (packed && record_type == STATIC_RECORD) + record_type= BLOCK_RECORD; + if (record_type == DYNAMIC_RECORD) + options|= HA_OPTION_PACK_RECORD; /* Must use packed records */ + + if (record_type == STATIC_RECORD) + { + /* We can't use checksum with static length rows */ + flags&= ~HA_CREATE_CHECKSUM; + options&= ~HA_OPTION_CHECKSUM; + min_pack_length+= varchar_length; + } + if (flags & HA_CREATE_TMP_TABLE) + { + options|= HA_OPTION_TMP_TABLE; + create_mode|= O_EXCL | O_NOFOLLOW; + } + if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM)) + { + options|= HA_OPTION_CHECKSUM; + min_pack_length++; + pack_reclength++; + } + if (flags & HA_CREATE_DELAY_KEY_WRITE) + options|= HA_OPTION_DELAY_KEY_WRITE; + if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) + options|= HA_OPTION_RELIES_ON_SQL_LAYER; + + pack_bytes= (packed + 7) / 8; + if (pack_reclength != INT_MAX32) + pack_reclength+= reclength+pack_bytes + + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); + min_pack_length+= pack_bytes; + /* Calculate min possible row length for rows-in-block */ + extra_header_size= MAX_FIXED_HEADER_SIZE; + if (ci->transactional) + extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE; + share.base.min_row_length= (extra_header_size + share.base.null_bytes + + pack_bytes); + if (!ci->data_file_length && ci->max_rows) + { + if (pack_reclength == INT_MAX32 || + (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength) + ci->data_file_length= ~(ulonglong) 0; + else + ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength; + } + else if (!ci->max_rows) + { + if (record_type == BLOCK_RECORD) + { + uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) / + (min_pack_length + extra_header_size + + DIR_ENTRY_SIZE)); + ulonglong data_file_length= ci->data_file_length; + if (data_file_length) + data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) * + 8)) -1)); + if (rows_per_page > 0) + { + set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE); + ci->max_rows= ci->data_file_length / maria_block_size * rows_per_page; + } + else + ci->max_rows= ci->data_file_length / (min_pack_length + + extra_header_size + + DIR_ENTRY_SIZE); + } + else + ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length + + ((options & + HA_OPTION_PACK_RECORD) ? + 3 : 0))); + } + max_rows= (ulonglong) ci->max_rows; + if (record_type == BLOCK_RECORD) + { + /* The + 1 is for record position withing page */ + pointer= maria_get_pointer_length((ci->data_file_length / + maria_block_size), 3) + 1; + set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE); + + if (!max_rows) + max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) / + min_pack_length); + } + else + { + if (record_type != STATIC_RECORD) + pointer= maria_get_pointer_length(ci->data_file_length, + maria_data_pointer_size); + else + pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size); + if (!max_rows) + max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length); + } + + real_reclength=reclength; + if (record_type == STATIC_RECORD) + { + if (reclength <= pointer) + reclength=pointer+1; /* reserve place for delete link */ + } + else + reclength+= long_varchar_count; /* We need space for varchar! */ + + max_key_length=0; tot_length=0 ; key_segs=0; + fulltext_keys=0; + share.state.rec_per_key_part=rec_per_key_part; + share.state.key_root=key_root; + share.state.key_del= HA_OFFSET_ERROR; + if (uniques) + max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer; + + for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++) + { + share.state.key_root[i]= HA_OFFSET_ERROR; + min_key_length_skip=length=real_length_diff=0; + key_length=pointer; + if (keydef->flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + /* BAR TODO to support 3D and more dimensions in the future */ + uint sp_segs=SPDIMS*2; + keydef->flag=HA_SPATIAL; + + if (flags & HA_DONT_TOUCH_DATA) + { + /* + Called by maria_chk - i.e. table structure was taken from + MYI file and SPATIAL key *does have* additional sp_segs keysegs. + keydef->seg here points right at the GEOMETRY segment, + so we only need to decrease keydef->keysegs. + (see maria_recreate_table() in _ma_check.c) + */ + keydef->keysegs-=sp_segs-1; + } + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_BINARY && + keyseg->type != HA_KEYTYPE_VARBINARY1 && + keyseg->type != HA_KEYTYPE_VARBINARY2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + } + keydef->keysegs+=sp_segs; + key_length+=SPLEN*sp_segs; + length++; /* At least one length byte */ + min_key_length_skip+=SPLEN*2*SPDIMS; +#else + my_errno= HA_ERR_UNSUPPORTED; + goto err; +#endif /*HAVE_SPATIAL*/ + } + else if (keydef->flag & HA_FULLTEXT) + { + keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_TEXT && + keyseg->type != HA_KEYTYPE_VARTEXT1 && + keyseg->type != HA_KEYTYPE_VARTEXT2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + if (!(keyseg->flag & HA_BLOB_PART) && + (keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARTEXT2)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)? + 1 : 2); + } + } + + fulltext_keys++; + key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN; + length++; /* At least one length byte */ + min_key_length_skip+=HA_FT_MAXBYTELEN; + real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT; + } + else + { + /* Test if prefix compression */ + if (keydef->flag & HA_PACK_KEY) + { + /* Can't use space_compression on number keys */ + if ((keydef->seg[0].flag & HA_SPACE_PACK) && + keydef->seg[0].type == (int) HA_KEYTYPE_NUM) + keydef->seg[0].flag&= ~HA_SPACE_PACK; + + /* Only use HA_PACK_KEY when first segment is a variable length key */ + if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART | + HA_VAR_LENGTH_PART))) + { + /* pack relative to previous key */ + keydef->flag&= ~HA_PACK_KEY; + keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY; + } + else + { + keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */ + keydef->flag|=HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + } + } + if (keydef->flag & HA_BINARY_PACK_KEY) + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment) + share.base.auto_key=i+1; + for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++) + { + /* numbers are stored with high by first to make compression easier */ + switch (keyseg->type) { + case HA_KEYTYPE_SHORT_INT: + case HA_KEYTYPE_LONG_INT: + case HA_KEYTYPE_FLOAT: + case HA_KEYTYPE_DOUBLE: + case HA_KEYTYPE_USHORT_INT: + case HA_KEYTYPE_ULONG_INT: + case HA_KEYTYPE_LONGLONG: + case HA_KEYTYPE_ULONGLONG: + case HA_KEYTYPE_INT24: + case HA_KEYTYPE_UINT24: + case HA_KEYTYPE_INT8: + keyseg->flag|= HA_SWAP_KEY; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + break; + } + if (keyseg->flag & HA_SPACE_PACK) + { + DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART)); + keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + length++; /* At least one length byte */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + DBUG_ASSERT(!test_all_bits(keyseg->flag, + (HA_VAR_LENGTH_PART | HA_BLOB_PART))); + keydef->flag|=HA_VAR_LENGTH_KEY; + length++; /* At least one length byte */ + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + key_length+= keyseg->length; + if (keyseg->null_bit) + { + key_length++; + options|=HA_OPTION_PACK_KEYS; + keyseg->flag|=HA_NULL_PART; + keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY; + } + } + } /* if HA_FULLTEXT */ + key_segs+=keydef->keysegs; + if (keydef->keysegs > HA_MAX_KEY_SEG) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + /* + key_segs may be 0 in the case when we only want to be able to + add on row into the table. This can happen with some DISTINCT queries + in MySQL + */ + if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME && + key_segs) + share.state.rec_per_key_part[key_segs-1]=1L; + length+=key_length; + if (length >= min(HA_MAX_KEY_BUFF, MARIA_MAX_KEY_LENGTH)) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err; + } + keydef->block_length= maria_block_size; + keydef->keylength= (uint16) key_length; + keydef->minlength= (uint16) (length-min_key_length_skip); + keydef->maxlength= (uint16) length; + + if (length > max_key_length) + max_key_length= length; + tot_length+= ((max_rows/(ulong) (((uint) maria_block_size-5)/ + (length*2))) * + maria_block_size); + } + + unique_key_parts=0; + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++) + { + uniquedef->key=keys+i; + unique_key_parts+=uniquedef->keysegs; + share.state.key_root[keys+i]= HA_OFFSET_ERROR; + tot_length+= (max_rows/(ulong) (((uint) maria_block_size-5)/ + ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))* + (ulong) maria_block_size; + } + keys+=uniques; /* Each unique has 1 key */ + key_segs+=uniques; /* Each unique has 1 key seg */ + + base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE + + key_segs * MARIA_STATE_KEYSEG_SIZE); + info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+ + keys * MARIA_KEYDEF_SIZE+ + uniques * MARIA_UNIQUEDEF_SIZE + + (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ + columns*MARIA_COLUMNDEF_SIZE); + + DBUG_PRINT("info", ("info_length: %u", info_length)); + /* There are only 16 bits for the total header length. */ + if (info_length > 65535) + { + my_printf_error(0, "Maria table '%s' has too many columns and/or " + "indexes and/or unique constraints.", + MYF(0), name + dirname_length(name)); + my_errno= HA_WRONG_CREATE_OPTION; + goto err; + } + + bmove(share.state.header.file_version,(byte*) maria_file_magic,4); + ci->old_options=options| (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ? + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD: 0); + mi_int2store(share.state.header.options,ci->old_options); + mi_int2store(share.state.header.header_length,info_length); + mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); + mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); + mi_int2store(share.state.header.base_pos,base_pos); + share.state.header.data_file_type= record_type; + share.state.header.org_data_file_type= org_record_type; + share.state.header.language= (ci->language ? + ci->language : default_charset_info->number); + + share.state.dellink = HA_OFFSET_ERROR; + share.state.first_bitmap_with_space= 0; + share.state.process= (ulong) getpid(); + share.state.unique= (ulong) 0; + share.state.update_count=(ulong) 0; + share.state.version= (ulong) time((time_t*) 0); + share.state.sortkey= (ushort) ~0; + share.state.auto_increment=ci->auto_increment; + share.options=options; + share.base.rec_reflength=pointer; + share.base.block_size= maria_block_size; + + /* Get estimate for index file length (this may be wrong for FT keys) */ + tmp= (tot_length + maria_block_size * keys * + MARIA_INDEX_BLOCK_MARGIN) / maria_block_size; + /* + use maximum of key_file_length we calculated and key_file_length value we + got from MYI file header (see also mariapack.c:save_state) + */ + share.base.key_reflength= + maria_get_pointer_length(max(ci->key_file_length,tmp),3); + share.base.keys= share.state.header.keys= keys; + share.state.header.uniques= uniques; + share.state.header.fulltext_keys= fulltext_keys; + mi_int2store(share.state.header.key_parts,key_segs); + mi_int2store(share.state.header.unique_key_parts,unique_key_parts); + + maria_set_all_keys_active(share.state.key_map, keys); + + share.base.keystart = share.state.state.key_file_length= + MY_ALIGN(info_length, maria_block_size); + share.base.max_key_block_length= maria_block_size; + share.base.max_key_length=ALIGN_SIZE(max_key_length+4); + share.base.records=ci->max_rows; + share.base.reloc= ci->reloc_rows; + share.base.reclength=real_reclength; + share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); + share.base.max_pack_length=pack_reclength; + share.base.min_pack_length=min_pack_length; + share.base.pack_bytes= pack_bytes; + share.base.fields= columns; + share.base.pack_fields= packed; +#ifdef USE_RAID + share.base.raid_type=ci->raid_type; + share.base.raid_chunks=ci->raid_chunks; + share.base.raid_chunksize=ci->raid_chunksize; +#endif + + /* max_data_file_length and max_key_file_length are recalculated on open */ + if (options & HA_OPTION_TMP_TABLE) + { + tmp_table= TRUE; + sync_dir= 0; + share.base.max_data_file_length= (my_off_t) ci->data_file_length; + } + + share.base.min_block_length= + (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH && + ! share.base.blobs) ? + max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) : + MARIA_EXTEND_BLOCK_LENGTH; + if (! (flags & HA_DONT_TOUCH_DATA)) + share.state.create_time= (long) time((time_t*) 0); + + pthread_mutex_lock(&THR_LOCK_maria); + + if (ci->index_file_name) + { + char *iext= strrchr(ci->index_file_name, '.'); + int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT); + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->index_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT : + MY_APPEND_EXT)); + } + fn_format(linkname, name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME|MY_APPEND_EXT); + linkname_ptr=linkname; + /* + Don't create the table if the link or file exists to ensure that one + doesn't accidently destroy another table. + Don't sync dir now if the data file has the same path. + */ + create_flag= + (ci->data_file_name && + !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir; + } + else + { + fn_format(filename, name, "", MARIA_NAME_IEXT, + (MY_UNPACK_FILENAME | + (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | + MY_APPEND_EXT); + linkname_ptr=0; + /* + Replace the current file. + Don't sync dir now if the data file has the same path. + */ + create_flag= MY_DELETE_OLD | (!ci->data_file_name ? 0 : sync_dir); + } + + /* + If a MRG_MARIA table is in use, the mapped MARIA tables are open, + but no entry is made in the table cache for them. + A TRUNCATE command checks for the table in the cache only and could + be fooled to believe, the table is not open. + Pull the emergency brake in this situation. (Bug #8306) + */ + if (_ma_test_if_reopen(filename)) + { + my_printf_error(0, "MARIA table '%s' is in use " + "(most likely by a MERGE table). Try FLUSH TABLES.", + MYF(0), name + dirname_length(name)); + goto err; + } + + if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME|create_flag))) < 0) + goto err; + errpos=1; + + if (!(flags & HA_DONT_TOUCH_DATA)) + { + if (ci->data_file_name) + { + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + } + fn_format(linkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=linkname; + create_flag=0; + } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr=0; + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag | sync_dir))) < 0) + goto err; + errpos=3; + + if (record_type == BLOCK_RECORD) + { + /* Write one bitmap page */ + char buff[IO_SIZE]; + uint i; + bzero(buff, sizeof(buff)); + for (i= 0 ; i < maria_block_size ; i+= IO_SIZE) + if (my_write(dfile, (byte*) buff, sizeof(buff), MYF(MY_NABP))) + goto err; + share.state.state.data_file_length= maria_block_size; + } + } + DBUG_PRINT("info", ("write state info and base info")); + if (_ma_state_info_write(file, &share.state, 2) || + _ma_base_info_write(file, &share.base)) + goto err; + DBUG_PRINT("info", ("base_pos: %d base_info_size: %d", + base_pos, MARIA_BASE_INFO_SIZE)); + DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE); + + /* Write key and keyseg definitions */ + DBUG_PRINT("info", ("write key and keyseg definitions")); + for (i=0 ; i < share.base.keys - uniques; i++) + { + uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0; + + if (_ma_keydef_write(file, &keydefs[i])) + goto err; + for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++) + if (_ma_keyseg_write(file, &keydefs[i].seg[j])) + goto err; +#ifdef HAVE_SPATIAL + for (j=0 ; j < sp_segs ; j++) + { + HA_KEYSEG sseg; + sseg.type=SPTYPE; + sseg.language= 7; /* Binary */ + sseg.null_bit=0; + sseg.bit_start=0; + sseg.bit_end=0; + sseg.bit_length= 0; + sseg.bit_pos= 0; + sseg.length=SPLEN; + sseg.null_pos=0; + sseg.start=j*SPLEN; + sseg.flag= HA_SWAP_KEY; + if (_ma_keyseg_write(file, &sseg)) + goto err; + } +#endif + } + /* Create extra keys for unique definitions */ + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + bzero((char*) &tmp_keydef,sizeof(tmp_keydef)); + bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg)); + for (i=0; i < uniques ; i++) + { + tmp_keydef.keysegs=1; + tmp_keydef.flag= HA_UNIQUE_CHECK; + tmp_keydef.block_length= (uint16) maria_block_size; + tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer; + tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength; + tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE; + tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH; + tmp_keyseg.start= offset; + offset+= MARIA_UNIQUE_HASH_LENGTH; + if (_ma_keydef_write(file,&tmp_keydef) || + _ma_keyseg_write(file,(&tmp_keyseg))) + goto err; + } + + /* Save unique definition */ + DBUG_PRINT("info", ("write unique definitions")); + for (i=0 ; i < share.state.header.uniques ; i++) + { + HA_KEYSEG *keyseg_end; + keyseg= uniquedefs[i].seg; + if (_ma_uniquedef_write(file, &uniquedefs[i])) + goto err; + for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs; + keyseg < keyseg_end; + keyseg++) + { + switch (keyseg->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + keyseg->flag|= HA_VAR_LENGTH_PART; + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0); + break; + } + if (_ma_keyseg_write(file, keyseg)) + goto err; + } + } + DBUG_PRINT("info", ("write field definitions")); + if (record_type == BLOCK_RECORD) + { + /* Store columns in a more efficent order */ + MARIA_COLUMNDEF **tmp, **pos; + if (!(tmp= (MARIA_COLUMNDEF**) my_malloc(share.base.fields * + sizeof(MARIA_COLUMNDEF*), + MYF(MY_WME)))) + goto err; + for (rec= recinfo, pos= tmp ; rec != rec_end ; rec++, pos++) + *pos= rec; + qsort(tmp, share.base.fields, sizeof(*tmp), (qsort_cmp) compare_columns); + for (i=0 ; i < share.base.fields ; i++) + if (_ma_recinfo_write(file, tmp[i])) + goto err; + } + else + { + for (i=0 ; i < share.base.fields ; i++) + if (_ma_recinfo_write(file, &recinfo[i])) + goto err; + } + +#ifndef DBUG_OFF + if ((uint) my_tell(file,MYF(0)) != info_length) + { + uint pos= (uint) my_tell(file,MYF(0)); + DBUG_PRINT("warning",("info_length: %d != used_length: %d", + info_length, pos)); + } +#endif + + /* Enlarge files */ + DBUG_PRINT("info", ("enlarge to keystart: %lu", + (ulong) share.base.keystart)); + if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) + goto err; + + if (! (flags & HA_DONT_TOUCH_DATA)) + { +#ifdef USE_RELOC + if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) + goto err; + if (!tmp_table && my_sync(file, MYF(0))) + goto err; +#endif + /* if !USE_RELOC, there was no write to the file, no need to sync it */ + errpos=2; + if (my_close(dfile,MYF(0))) + goto err; + } + errpos=0; + pthread_mutex_unlock(&THR_LOCK_maria); + if (my_close(file,MYF(0))) + goto err; + /* + RECOVERYTODO + Write a log record describing the CREATE operation (just the file + names, link names, and the full header's content). + For this record to be of any use for Recovery, we need the upper + MySQL layer to be crash-safe, which it is not now (that would require work + using the ddl_log of sql/sql_table.cc); when is is, we should reconsider + the moment of writing this log record (before or after op, under + THR_LOCK_maria or not...), how to use it in Recovery, and force the log. + For now this record is just informative. + If operation failed earlier, we clean up in "err:" and the MySQL layer + will clean up the frm, so we needn't write anything to the log. + */ + my_free((char*) rec_per_key_part,MYF(0)); + DBUG_RETURN(0); + +err: + pthread_mutex_unlock(&THR_LOCK_maria); + save_errno=my_errno; + switch (errpos) { + case 3: + VOID(my_close(dfile,MYF(0))); + /* fall through */ + case 2: + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + sync_dir); + /* fall through */ + case 1: + VOID(my_close(file,MYF(0))); + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + sync_dir); + } + my_free((char*) rec_per_key_part, MYF(0)); + DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */ +} + + +uint maria_get_pointer_length(ulonglong file_length, uint def) +{ + DBUG_ASSERT(def >= 2 && def <= 7); + if (file_length) /* If not default */ + { +#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS + if (file_length >= (longlong) 1 << 56) + def=8; +#endif + if (file_length >= (longlong) 1 << 48) + def=7; + if (file_length >= (longlong) 1 << 40) + def=6; + else if (file_length >= (longlong) 1 << 32) + def=5; + else if (file_length >= (1L << 24)) + def=4; + else if (file_length >= (1L << 16)) + def=3; + else + def=2; + } + return def; +} + + +/* + Sort columns for records-in-block + + IMPLEMENTATION + Sort columns in following order: + + Fixed size, not null columns + Fixed length, null fields + Variable length fields (CHAR, VARCHAR) + Blobs + + For same kind of fields, keep fields in original order +*/ + +static inline int sign(longlong a) +{ + return a < 0 ? -1 : (a > 0 ? 1 : 0); +} + + +int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) +{ + MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr; + enum en_fieldtype a_type, b_type; + + a_type= (a->type == FIELD_NORMAL || a->type == FIELD_CHECK ? + FIELD_NORMAL : a->type); + b_type= (b->type == FIELD_NORMAL || b->type == FIELD_CHECK ? + FIELD_NORMAL : b->type); + + if (a_type == FIELD_NORMAL && !a->null_bit) + { + if (b_type != FIELD_NORMAL || b->null_bit) + return -1; + return sign((long) (a->offset - b->offset)); + } + if (b_type == FIELD_NORMAL && !b->null_bit) + return 1; + if (a_type == b_type) + return sign((long) (a->offset - b->offset)); + if (a_type == FIELD_NORMAL) + return -1; + if (b_type == FIELD_NORMAL) + return 1; + if (a_type == FIELD_BLOB) + return 1; + if (b_type == FIELD_BLOB) + return -1; + return sign((long) (a->offset - b->offset)); +} + + + + diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c new file mode 100644 index 00000000000..596bf18bfe5 --- /dev/null +++ b/storage/maria/ma_dbug.c @@ -0,0 +1,193 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Support rutiner with are using with dbug */ + +#include "maria_def.h" + + /* Print a key in user understandable format */ + +void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg, + const byte *key, uint length) +{ + int flag; + short int s_1; + long int l_1; + float f_1; + double d_1; + const byte *end; + const byte *key_end= key + length; + + VOID(fputs("Key: \"",stream)); + flag=0; + for (; keyseg->type && key < key_end ;keyseg++) + { + if (flag++) + VOID(putc('-',stream)); + end= key+ keyseg->length; + if (keyseg->flag & HA_NULL_PART) + { + /* A NULL value is encoded by a 1-byte flag. Zero means NULL. */ + if (! *(key++)) + { + fprintf(stream,"NULL"); + continue; + } + } + + switch (keyseg->type) { + case HA_KEYTYPE_BINARY: + if (!(keyseg->flag & HA_SPACE_PACK) && keyseg->length == 1) + { /* packed binary digit */ + VOID(fprintf(stream,"%d",(uint) *key++)); + break; + } + /* fall through */ + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_NUM: + if (keyseg->flag & HA_SPACE_PACK) + { + VOID(fprintf(stream,"%.*s",(int) *key,key+1)); + key+= (int) *key+1; + } + else + { + VOID(fprintf(stream,"%.*s",(int) keyseg->length,key)); + key=end; + } + break; + case HA_KEYTYPE_INT8: + VOID(fprintf(stream,"%d",(int) *((signed char*) key))); + key=end; + break; + case HA_KEYTYPE_SHORT_INT: + s_1= mi_sint2korr(key); + VOID(fprintf(stream,"%d",(int) s_1)); + key=end; + break; + case HA_KEYTYPE_USHORT_INT: + { + ushort u_1; + u_1= mi_uint2korr(key); + VOID(fprintf(stream,"%u",(uint) u_1)); + key=end; + break; + } + case HA_KEYTYPE_LONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%ld",l_1)); + key=end; + break; + case HA_KEYTYPE_ULONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%lu",(ulong) l_1)); + key=end; + break; + case HA_KEYTYPE_INT24: + VOID(fprintf(stream,"%ld",(long) mi_sint3korr(key))); + key=end; + break; + case HA_KEYTYPE_UINT24: + VOID(fprintf(stream,"%lu",(ulong) mi_uint3korr(key))); + key=end; + break; + case HA_KEYTYPE_FLOAT: + mi_float4get(f_1,key); + VOID(fprintf(stream,"%g",(double) f_1)); + key=end; + break; + case HA_KEYTYPE_DOUBLE: + mi_float8get(d_1,key); + VOID(fprintf(stream,"%g",d_1)); + key=end; + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,-10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_ULONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_BIT: + { + uint i; + fputs("0x",stream); + for (i=0 ; i < keyseg->length ; i++) + fprintf(stream, "%02x", (uint) *key++); + key= end; + break; + } + +#endif + case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */ + case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */ + { + uint tmp_length; + get_key_length(tmp_length,key); + /* + The following command sometimes gives a warning from valgrind. + Not yet sure if the bug is in valgrind, glibc or mysqld + */ + VOID(fprintf(stream,"%.*s",(int) tmp_length,key)); + key+=tmp_length; + break; + } + default: break; /* This never happens */ + } + } + VOID(fputs("\"\n",stream)); + return; +} /* print_key */ + + +#ifdef EXTRA_DEBUG + +my_bool _ma_check_table_is_closed(const char *name, const char *where) +{ + char filename[FN_REFLEN]; + LIST *pos; + DBUG_ENTER("_ma_check_table_is_closed"); + + (void) fn_format(filename,name,"",MARIA_NAME_IEXT,4+16+32); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename)) + { + if (share->last_version) + { + fprintf(stderr,"Warning: Table: %s is open on %s\n", name,where); + DBUG_PRINT("warning",("Table: %s is open on %s", name,where)); + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} +#endif /* EXTRA_DEBUG */ diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c new file mode 100644 index 00000000000..9198989dcb7 --- /dev/null +++ b/storage/maria/ma_delete.c @@ -0,0 +1,891 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove a row from a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag, + byte *key,uint key_length,my_off_t page,byte *anc_buff); +static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos, + my_off_t next_block,byte *ret_key); +static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos); +static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,byte *keypos, + byte *lastkey,byte *page_end, + my_off_t *next_block); +static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root); + + +int maria_delete(MARIA_HA *info,const byte *record) +{ + uint i; + byte *old_key; + int save_errno; + char lastpos[8]; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_delete"); + + /* Test if record is in datafile */ + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + DBUG_EXECUTE_IF("my_error_test_undefined_error", + maria_print_error(info->s, INT_MAX); + DBUG_RETURN(my_errno= INT_MAX);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */ + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + if ((*share->compare_record)(info,record)) + goto err; /* Error on read-check */ + + if (_ma_mark_file_changed(info)) + goto err; + + /* Remove all keys from the index file */ + + old_key= info->lastkey2; + for (i=0 ; i < share->base.keys ; i++ ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + info->s->keyinfo[i].version++; + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_del(info,i,(char*) old_key,record,info->cur_row.lastpos)) + goto err; + } + else + { + if (info->s->keyinfo[i].ck_delete(info,i,old_key, + _ma_make_key(info,i,old_key,record,info->cur_row.lastpos))) + goto err; + } + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + } + } + + if ((*share->delete_record)(info)) + goto err; /* Remove record from database */ + + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + { + info->cur_row.checksum= (*info->s->calc_checksum)(info,record); + info->state->checksum-= info->cur_row.checksum; + } + + info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED; + info->state->records--; + + mi_sizestore(lastpos, info->cur_row.lastpos); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (delete)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + save_errno=my_errno; + mi_sizestore(lastpos, info->cur_row.lastpos); + if (save_errno != HA_ERR_RECORD_CHANGED) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* mark table crashed */ + } + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + allow_break(); /* Allow SIGHUP & SIGINT */ + my_errno=save_errno; + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + } + + DBUG_RETURN(my_errno); +} /* maria_delete */ + + + /* Remove a key from the btree index */ + +int _ma_ck_delete(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length, + &info->s->state.key_root[keynr]); +} /* _ma_ck_delete */ + + +static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root) +{ + int error; + uint nod_flag; + my_off_t old_root; + byte *root_buff; + DBUG_ENTER("_ma_ck_real_delete"); + + if ((old_root=*root) == HA_OFFSET_ERROR) + { + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno=HA_ERR_CRASHED); + } + if (!(root_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + DBUG_RETURN(my_errno=ENOMEM); + } + DBUG_PRINT("info",("root_page: %ld", (long) old_root)); + if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0)) + { + error= -1; + goto err; + } + if ((error=d_search(info,keyinfo, + (keyinfo->flag & HA_FULLTEXT ? SEARCH_FIND | SEARCH_UPDATE + : SEARCH_SAME), + key,key_length,old_root,root_buff)) >0) + { + if (error == 2) + { + DBUG_PRINT("test",("Enlarging of root when deleting")); + error= _ma_enlarge_root(info,keyinfo,key,root); + } + else /* error == 1 */ + { + if (maria_getint(root_buff) <= (nod_flag=_ma_test_if_nod(root_buff))+3) + { + error=0; + if (nod_flag) + *root= _ma_kpos(nod_flag,root_buff+2+nod_flag); + else + *root=HA_OFFSET_ERROR; + if (_ma_dispose(info,keyinfo,old_root,DFLT_INIT_HITS)) + error= -1; + } + else + error= _ma_write_keypage(info,keyinfo,old_root, + DFLT_INIT_HITS,root_buff); + } + } +err: + my_afree((gptr) root_buff); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} /* _ma_ck_real_delete */ + + + /* + ** Remove key below key root + ** Return values: + ** 1 if there are less buffers; In this case anc_buff is not saved + ** 2 if there are more buffers + ** -1 on errors + */ + +static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, uint key_length, + my_off_t page, byte *anc_buff) +{ + int flag,ret_value,save_flag; + uint length,nod_flag,search_key_length; + my_bool last_key; + byte *leaf_buff,*keypos; + my_off_t leaf_page,next_block; + byte lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("d_search"); + DBUG_DUMP("page",anc_buff,maria_getint(anc_buff)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length, + comp_flag, &keypos, lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + { + DBUG_PRINT("error",("Found wrong key")); + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(anc_buff); + + if (!flag && keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, lastkey); + subkeys=ft_sintXkorr(lastkey+off); + DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + if (info->ft1_to_ft2) + { + /* we're in ft1->ft2 conversion mode. Saving key data */ + insert_dynamic(info->ft1_to_ft2, (char*) (lastkey+off)); + } + else + { + /* we need exact match only if not in ft1->ft2 conversion mode */ + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, + comp_flag, &keypos, lastkey, &last_key); + } + /* fall through to normal delete */ + } + else + { + /* popular word. two-level tree. going down */ + uint tmp_key_length; + my_off_t root; + byte *kpos=keypos; + + if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + root= _ma_dpos(info,nod_flag,kpos); + if (subkeys == -1) + { + /* the last entry in sub-tree */ + if (_ma_dispose(info, keyinfo, root,DFLT_INIT_HITS)) + DBUG_RETURN(-1); + /* fall through to normal delete */ + } + else + { + keyinfo=&info->s->ft2_keyinfo; + kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + get_key_full_length_rdonly(off, key); + key+=off; + ret_value= _ma_ck_real_delete(info, &info->s->ft2_keyinfo, + key, HA_FT_WLEN, &root); + _ma_dpointer(info, kpos+HA_FT_WLEN, root); + subkeys++; + ft_intXstore(kpos, subkeys); + if (!ret_value) + ret_value= _ma_write_keypage(info,keyinfo,page, + DFLT_INIT_HITS,anc_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + } + } + } + leaf_buff=0; + LINT_INIT(leaf_page); + if (nod_flag) + { + leaf_page= _ma_kpos(nod_flag,keypos); + if (!(leaf_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + my_errno=ENOMEM; + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0)) + goto err; + } + + if (flag != 0) + { + if (!nod_flag) + { + DBUG_PRINT("error",("Didn't find key")); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; /* This should newer happend */ + goto err; + } + save_flag=0; + ret_value=d_search(info,keyinfo,comp_flag,key,key_length, + leaf_page,leaf_buff); + } + else + { /* Found key */ + uint tmp; + length=maria_getint(anc_buff); + if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, + &next_block))) + goto err; + + length-= tmp; + + maria_putint(anc_buff,length,nod_flag); + if (!nod_flag) + { /* On leaf page */ + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff)) + { + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + /* Page will be update later if we return 1 */ + DBUG_RETURN(test(length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + save_flag=1; + ret_value=del(info,keyinfo,key,anc_buff,leaf_page,leaf_buff,keypos, + next_block,lastkey); + } + if (ret_value >0) + { + save_flag=1; + if (ret_value == 1) + ret_value= underflow(info,keyinfo,anc_buff,leaf_page,leaf_buff,keypos); + else + { /* This happens only with packed keys */ + DBUG_PRINT("test",("Enlarging of key when deleting")); + if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey, + (byte*) 0,(byte*) 0,(my_off_t) 0,(my_bool) 0); + } + } + if (ret_value == 0 && maria_getint(anc_buff) > keyinfo->block_length) + { + save_flag=1; + ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2; + } + if (save_flag && ret_value != 1) + ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff); + else + { + DBUG_DUMP("page",anc_buff,maria_getint(anc_buff)); + } + my_afree(leaf_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + +err: + my_afree(leaf_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* d_search */ + + + /* Remove a key that has a page-reference */ + +static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *anc_buff, my_off_t leaf_page, + byte *leaf_buff, + byte *keypos, /* Pos to where deleted key was */ + my_off_t next_block, + byte *ret_key) /* key before keypos in anc_buff */ +{ + int ret_value,length; + uint a_length,nod_flag,tmp; + my_off_t next_page; + byte keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + MARIA_SHARE *share=info->s; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("del"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page, + (ulong) keypos)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff)); + + endpos= leaf_buff+ maria_getint(leaf_buff); + if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp))) + DBUG_RETURN(-1); + + if ((nod_flag=_ma_test_if_nod(leaf_buff))) + { + next_page= _ma_kpos(nod_flag,endpos); + if (!(next_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) + ret_value= -1; + else + { + DBUG_DUMP("next_page",next_buff,maria_getint(next_buff)); + if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff, + keypos,next_block,ret_key)) >0) + { + endpos=leaf_buff+maria_getint(leaf_buff); + if (ret_value == 1) + { + ret_value=underflow(info,keyinfo,leaf_buff,next_page, + next_buff,endpos); + if (ret_value == 0 && maria_getint(leaf_buff) > keyinfo->block_length) + { + ret_value= _ma_split_page(info,keyinfo,key,leaf_buff,ret_key,0) | 2; + } + } + else + { + DBUG_PRINT("test",("Inserting of key when deleting")); + if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff, + (byte*) 0,(byte*) 0,(my_off_t) 0,0); + } + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + } + my_afree(next_buff); + DBUG_RETURN(ret_value); + } + + /* Remove last key from leaf page */ + + maria_putint(leaf_buff,key_start-leaf_buff,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + + /* Place last key in ancestor page on deleted key position */ + + a_length=maria_getint(anc_buff); + endpos=anc_buff+a_length; + if (keypos != anc_buff+2+share->base.key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,ret_key,keypos,&tmp)) + goto err; + prev_key=(keypos == anc_buff+2+share->base.key_reflength ? + 0 : ret_key); + length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength, + keypos == endpos ? (byte*) 0 : keypos, + prev_key, prev_key, + keybuff,&s_temp); + if (length > 0) + bmove_upp(endpos+length,endpos,(uint) (endpos-keypos)); + else + bmove(keypos,keypos-length, (int) (endpos-keypos)+length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + /* Save pointer to next leaf */ + if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key)) + goto err; + _ma_kpointer(info,keypos - share->base.key_reflength,next_block); + maria_putint(anc_buff,a_length+length,share->base.key_reflength); + + DBUG_RETURN( maria_getint(leaf_buff) <= + (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length)); +err: + DBUG_RETURN(-1); +} /* del */ + + + /* Balances adjacent pages if underflow occours */ + +static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *anc_buff, + my_off_t leaf_page,/* Ancestor page and underflow page */ + byte *leaf_buff, + byte *keypos) /* Position to pos after key */ +{ + int t_length; + uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, + key_reflength,key_length; + my_off_t next_page; + byte anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF]; + byte *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key; + byte *after_key; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("underflow"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page, + (ulong) keypos)); + DBUG_DUMP("anc_buff",anc_buff,maria_getint(anc_buff)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff)); + + buff=info->buff; + info->keybuff_used=1; + next_keypos=keypos; + nod_flag=_ma_test_if_nod(leaf_buff); + p_length=nod_flag+2; + anc_length=maria_getint(anc_buff); + leaf_length=maria_getint(leaf_buff); + key_reflength=share->base.key_reflength; + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; + + if ((keypos < anc_buff+anc_length && (info->state->records & 1)) || + keypos == anc_buff+2+key_reflength) + { /* Use page right of anc-page */ + DBUG_PRINT("test",("use right page")); + + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + if (!(next_keypos= _ma_get_key(info, keyinfo, + anc_buff, buff, keypos, &length))) + goto err; + } + else + { + /* Got to end of found key */ + buff[0]=buff[1]=0; /* Avoid length error check if packed key */ + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + buff)) + goto err; + } + next_page= _ma_kpos(key_reflength,next_keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length=maria_getint(buff); + DBUG_DUMP("next",buff,buff_length); + + /* find keys to make a big key-page */ + bmove(next_keypos-key_reflength, buff+2, key_reflength); + if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length) + || !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key, + leaf_buff+leaf_length,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length, + prev_key, prev_key, + anc_key, &s_temp); + length=buff_length-p_length; + endpos=buff+length+leaf_length+t_length; + /* buff will always be larger than before !*/ + bmove_upp(endpos, buff+buff_length,length); + memcpy(buff, leaf_buff,(size_t) leaf_length); + (*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp); + buff_length=(uint) (endpos-buff); + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + + if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + memcpy(leaf_buff,buff,(size_t) buff_length); + if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + endpos=anc_buff+anc_length; + DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx", + (long) anc_buff, (long) endpos)); + if (keypos != anc_buff+2+key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length)) + goto err; + if (!(half_pos= _ma_find_half_pos(nod_flag, keyinfo, buff, leaf_key, + &key_length, &after_key))) + goto err; + length=(uint) (half_pos-buff); + memcpy(leaf_buff,buff,(size_t) length); + maria_putint(leaf_buff,length,nod_flag); + + /* Correct new keypointer to leaf_page */ + half_pos=after_key; + _ma_kpointer(info,leaf_key+key_length,next_page); + /* Save key in anc_buff */ + prev_key=(keypos == anc_buff+2+key_reflength ? (byte*) 0 : anc_key), + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + (keypos == endpos ? (byte*) 0 : + keypos), + prev_key, prev_key, + leaf_key, &s_temp); + if (t_length >= 0) + bmove_upp(endpos+t_length, endpos, (uint) (endpos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store key first in new page */ + if (nod_flag) + bmove(buff+2,half_pos-nod_flag,(size_t) nod_flag); + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)) + goto err; + t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, + leaf_key, &s_temp); + /* t_length will always be > 0 for a new page !*/ + length=(uint) ((buff+maria_getint(buff))-half_pos); + bmove(buff+p_length+t_length, half_pos, (size_t) length); + (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp); + maria_putint(buff,length+t_length+p_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + DBUG_RETURN(anc_length <= ((info->quick_mode ? MARIA_MIN_BLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + + DBUG_PRINT("test",("use left page")); + + keypos= _ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length); + if (!keypos) + goto err; + next_page= _ma_kpos(key_reflength,keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length=maria_getint(buff); + endpos=buff+buff_length; + DBUG_DUMP("prev",buff,buff_length); + + /* find keys to make a big key-page */ + bmove(next_keypos - key_reflength, leaf_buff+2, key_reflength); + next_keypos=keypos; + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + anc_key)) + goto err; + if (!_ma_get_last_key(info,keyinfo,buff,leaf_key,endpos,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (leaf_length == p_length ? + (byte*) 0 : leaf_buff+p_length), + prev_key, prev_key, + anc_key, &s_temp); + if (t_length >= 0) + bmove(endpos+t_length, leaf_buff+p_length, + (size_t) (leaf_length-p_length)); + else /* We gained space */ + bmove(endpos,leaf_buff+((int) p_length-t_length), + (size_t) (leaf_length-p_length+t_length)); + + (*keyinfo->store_key)(keyinfo,endpos,&s_temp); + buff_length=buff_length+leaf_length-p_length+t_length; + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + if (_ma_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + if (keypos == anc_buff+2+key_reflength) + anc_pos=0; /* First key */ + else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos, + &length)) + goto err; + endpos= _ma_find_half_pos(nod_flag,keyinfo,buff,leaf_key, + &key_length, &half_pos); + if (!endpos) + goto err; + _ma_kpointer(info,leaf_key+key_length,leaf_page); + /* Save key in anc_buff */ + DBUG_DUMP("anc_buff",anc_buff,anc_length); + DBUG_DUMP("key_to_anc",leaf_key,key_length); + + temp_pos=anc_buff+anc_length; + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + keypos == temp_pos ? (byte*) 0 + : keypos, + anc_pos, anc_pos, + leaf_key,&s_temp); + if (t_length > 0) + bmove_upp(temp_pos+t_length, temp_pos, (uint) (temp_pos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store first key on new page */ + if (nod_flag) + bmove(leaf_buff+2,half_pos-nod_flag,(size_t) nod_flag); + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))) + goto err; + DBUG_DUMP("key_to_leaf",leaf_key,length); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, leaf_key, &s_temp); + length=(uint) ((buff+buff_length)-half_pos); + DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length)); + bmove(leaf_buff+p_length+t_length,half_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp); + maria_putint(leaf_buff,length+t_length+p_length,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + maria_putint(buff,endpos-buff,nod_flag); + } + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2); + +err: + DBUG_RETURN(-1); +} /* underflow */ + + + /* + remove a key from packed buffert + The current code doesn't handle the case that the next key may be + packed better against the previous key if there is a case difference + returns how many chars was removed or 0 on error + */ + +static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *keypos, /* Where key starts */ + byte *lastkey, /* key to be removed */ + byte *page_end, /* End of page */ + my_off_t *next_block) /* ptr to next block */ +{ + int s_length; + byte *start; + DBUG_ENTER("remove_key"); + DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end)); + + start=keypos; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + s_length=(int) (keyinfo->keylength+nod_flag); + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos+s_length); + } + else + { /* Let keypos point at next key */ + /* Calculate length of key */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + DBUG_RETURN(0); /* Error */ + + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos); + s_length=(int) (keypos-start); + if (keypos != page_end) + { + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + byte *old_key=start; + uint next_length,prev_length,prev_pack_length; + get_key_length(next_length,keypos); + get_key_pack_length(prev_length,prev_pack_length,old_key); + if (next_length > prev_length) + { + /* We have to copy data from the current key to the next key */ + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + (next_length-prev_length)); + keypos-=(next_length-prev_length)+prev_pack_length; + store_key_length(keypos,prev_length); + s_length=(int) (keypos-start); + } + } + else + { + /* Check if a variable length first key part */ + if ((keyinfo->seg->flag & HA_PACK_KEY) && *keypos & 128) + { + /* Next key is packed against the current one */ + uint next_length,prev_length,prev_pack_length,lastkey_length, + rest_length; + if (keyinfo->seg[0].length >= 127) + { + if (!(prev_length=mi_uint2korr(start) & 32767)) + goto end; + next_length=mi_uint2korr(keypos) & 32767; + keypos+=2; + prev_pack_length=2; + } + else + { + if (!(prev_length= *start & 127)) + goto end; /* Same key as previous*/ + next_length= *keypos & 127; + keypos++; + prev_pack_length=1; + } + if (!(*start & 128)) + prev_length=0; /* prev key not packed */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + lastkey++; /* Skip null marker */ + get_key_length(lastkey_length,lastkey); + if (!next_length) /* Same key after */ + { + next_length=lastkey_length; + rest_length=0; + } + else + get_key_length(rest_length,keypos); + + if (next_length >= prev_length) + { /* Key after is based on deleted key */ + uint pack_length,tmp; + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + tmp=(next_length-prev_length)); + rest_length+=tmp; + pack_length= prev_length ? get_pack_length(rest_length): 0; + keypos-=tmp+pack_length+prev_pack_length; + s_length=(int) (keypos-start); + if (prev_length) /* Pack against prev key */ + { + *keypos++= start[0]; + if (prev_pack_length == 2) + *keypos++= start[1]; + store_key_length(keypos,rest_length); + } + else + { + /* Next key is not packed anymore */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + { + rest_length++; /* Mark not null */ + } + if (prev_pack_length == 2) + { + mi_int2store(keypos,rest_length); + } + else + *keypos= rest_length; + } + } + } + } + } + } + end: + bmove(start, start+s_length, (uint) (page_end-start-s_length)); + DBUG_RETURN((uint) s_length); +} /* remove_key */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c new file mode 100644 index 00000000000..616147c1067 --- /dev/null +++ b/storage/maria/ma_delete_all.c @@ -0,0 +1,102 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove all rows from a MARIA table */ +/* This clears the status information and truncates files */ + +#include "maria_def.h" + +int maria_delete_all_rows(MARIA_HA *info) +{ + uint i; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO *state=&share->state; + DBUG_ENTER("maria_delete_all_rows"); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + /* LOCKTODO take X-lock on table here */ + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + if (_ma_mark_file_changed(info)) + goto err; + + info->state->records=info->state->del=state->split=0; + state->dellink = HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length=share->base.keystart; + info->state->data_file_length=0; + info->state->empty=info->state->key_empty=0; + info->state->checksum=0; + + state->key_del= HA_OFFSET_ERROR; + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; + + /* + If we are using delayed keys or if the user has done changes to the tables + since it was locked then there may be key blocks in the key cache + */ + flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED); + /* + RECOVERYTODO Log the two chsize and header modifications and force the + log. So that if crash between the two chsize, we finish the work at + Recovery. For this scenario: + "TRUNCATE TABLE t1; DROP TABLE t1; RENAME TABLE t2 to t1; crash;" + Recovery mustn't truncate the new t1, so the log records of TRUNCATE + should be applied only if t1 exists and its ZeroDirtyPagesLSN is smaller + than the records'. See more comments below. + */ + if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) || + my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) ) + goto err; + /* + RECOVERYTODO Consider updating ZeroDirtyPagesLSN here. It is + not a necessity (it is one only in RENAME commands) but an optional + optimization which will allow some REDO skipping at Recovery. + */ + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); +#ifdef HAVE_MMAP + /* Resize mmaped area */ + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, (my_off_t)0); + rw_unlock(&info->s->mmap_lock); +#endif + /* + RECOVERYTODO Until we have the TRUNCATE log record and take it into + account for log-low-water-mark calculation and use it in Recovery, we need + to sync. + */ + if (_ma_sync_table_files(info)) + goto err; + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + { + int save_errno=my_errno; + /* RECOVERYTODO log the header modifications */ + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + /* RECOVERYTODO until we log above we have to sync */ + if (_ma_sync_table_files(info) && !save_errno) + save_errno= my_errno; + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); + } +} /* maria_delete */ diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c new file mode 100644 index 00000000000..5c7b4337b20 --- /dev/null +++ b/storage/maria/ma_delete_table.c @@ -0,0 +1,81 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + deletes a table +*/ + +#include "ma_fulltext.h" + +int maria_delete_table(const char *name) +{ + char from[FN_REFLEN]; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + DBUG_ENTER("maria_delete_table"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif + /* LOCKTODO take X-lock on table here */ +#ifdef USE_RAID + { + MARIA_HA *info; + /* + When built with RAID support, we need to determine if this table + makes use of the raid feature. If yes, we need to remove all raid + chunks. This is done with my_raid_delete(). Unfortunately it is + necessary to open the table just to check this. We use + 'open_for_repair' to be able to open even a crashed table. If even + this open fails, we assume no raid configuration for this table + and try to remove the normal data file only. This may however + leave the raid chunks behind. + */ + if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) + raid_type= 0; + else + { + raid_type= info->s->base.raid_type; + raid_chunks= info->s->base.raid_chunks; + maria_close(info); + } + } +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif +#endif /* USE_RAID */ + + fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + /* + RECOVERYTODO log the two deletes below. + Then do the file deletions. + For this log record to be of any use for Recovery, we need the upper MySQL + layer to be crash-safe in DDLs; when it is we should reconsider the moment + of writing this log record, how to use it in Recovery, and force the log. + For now this record is only informative. + */ + if (my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR))) + DBUG_RETURN(my_errno); + fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME | MY_SYNC_DIR)) ? + my_errno : 0); +#endif + DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR)) ? + my_errno : 0); +} diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c new file mode 100644 index 00000000000..c3d0bf1fb0a --- /dev/null +++ b/storage/maria/ma_dynrec.c @@ -0,0 +1,1954 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions to handle space-packed-records and blobs + + A row may be stored in one or more linked blocks. + The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH. + Each block is aligned on MARIA_DYN_ALIGN_SIZE. + The reson for the max block size is to not have too many different types + of blocks. For the differnet block types, look at _ma_get_block_info() +*/ + +#include "maria_def.h" + +static my_bool write_dynamic_record(MARIA_HA *info,const byte *record, + ulong reclength); +static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos, + ulong *length); +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength); +static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos, + uint second_read); +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length); + +#ifdef THREAD +/* Play it safe; We have a small stack when using threads */ +#undef my_alloca +#undef my_afree +#define my_alloca(A) my_malloc((A),MYF(0)) +#define my_afree(A) my_free((A),MYF(0)) +#endif + + /* Interface function from MARIA_HA */ + +#ifdef HAVE_MMAP + +/* + Create mmaped area for MARIA handler + + SYNOPSIS + _ma_dynmap_file() + info MARIA handler + + RETURN + 0 ok + 1 error. +*/ + +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size) +{ + DBUG_ENTER("_ma_dynmap_file"); + if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning", ("File is too large for mmap")); + DBUG_RETURN(1); + } + info->s->file_map= (byte*) + my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN), + info->s->mode==O_RDONLY ? PROT_READ : + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_NORESERVE, + info->dfile, 0L); + if (info->s->file_map == (byte*) MAP_FAILED) + { + info->s->file_map= NULL; + DBUG_RETURN(1); + } +#if defined(HAVE_MADVISE) + madvise(info->s->file_map, size, MADV_RANDOM); +#endif + info->s->mmaped_length= size; + DBUG_RETURN(0); +} + + +/* + Resize mmaped area for MARIA handler + + SYNOPSIS + _ma_remap_file() + info MARIA handler + + RETURN +*/ + +void _ma_remap_file(MARIA_HA *info, my_off_t size) +{ + if (info->s->file_map) + { + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); + _ma_dynmap_file(info, size); + } +} +#endif + + +/* + Read bytes from MySAM handler, using mmap or pread + + SYNOPSIS + _ma_mmap_pread() + info MARIA handler + Buffer Input buffer + Count Count of bytes for read + offset Start position + MyFlags + + RETURN + 0 ok +*/ + +uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(Buffer, info->s->file_map + offset, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pread(info->dfile, Buffer, Count, offset, MyFlags); + } +} + + + /* wrapper for my_pread in case if mmap isn't used */ + +uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pread(info->dfile, Buffer, Count, offset, MyFlags); +} + + +/* + Write bytes to MySAM handler, using mmap or pwrite + + SYNOPSIS + _ma_mmap_pwrite() + info MARIA handler + Buffer Output buffer + Count Count of bytes for write + offset Start position + MyFlags + + RETURN + 0 ok + !=0 error. In this case return error from pwrite +*/ + +uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(info->s->file_map + offset, Buffer, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + info->s->nonmmaped_inserts++; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags); + } + +} + + + /* wrapper for my_pwrite in case if mmap isn't used */ + +uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags); +} + + +my_bool _ma_write_dynamic_record(MARIA_HA *info, const byte *record) +{ + ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + reclength)); +} + +my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) +{ + uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (update_dynamic_record(info, pos, + info->rec_buff + MARIA_REC_BUFF_OFFSET, + length)); +} + + +my_bool _ma_write_blob_record(MARIA_HA *info, const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,reclength2,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER+1); + reclength= (info->s->base.pack_reclength + + _ma_calc_total_blob_length(info,record)+ extra); + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno=ENOMEM; + return(1); + } + reclength2= _ma_rec_pack(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + DBUG_PRINT("info",("reclength: %lu reclength2: %lu", + reclength, reclength2)); + DBUG_ASSERT(reclength2 <= reclength); + error= write_dynamic_record(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength2); + my_afree(rec_buff); + return(error != 0); +} + + +my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER); + reclength= (info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)+ extra); +#ifdef NOT_USED /* We now support big rows */ + if (reclength > MARIA_DYN_MAX_ROW_LENGTH) + { + my_errno=HA_ERR_TO_BIG_ROW; + return 1; + } +#endif + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno=ENOMEM; + return(1); + } + reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + error=update_dynamic_record(info,pos, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength); + my_afree(rec_buff); + return(error != 0); +} + + +my_bool _ma_delete_dynamic_record(MARIA_HA *info) +{ + return delete_dynamic_record(info, info->cur_row.lastpos, 0); +} + + + /* Write record to data-file */ + +static my_bool write_dynamic_record(MARIA_HA *info, const byte *record, + ulong reclength) +{ + int flag; + ulong length; + my_off_t filepos; + DBUG_ENTER("write_dynamic_record"); + + flag=0; + do + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + if (_ma_write_part_record(info,filepos,length, + (info->append_insert_at_end ? + HA_OFFSET_ERROR : info->s->state.dellink), + (byte**) &record,&reclength,&flag)) + goto err; + } while (reclength); + + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Get a block for data ; The given data-area must be used !! */ + +static int _ma_find_writepos(MARIA_HA *info, + ulong reclength, /* record length */ + my_off_t *filepos, /* Return file pos */ + ulong *length) /* length of block at filepos */ +{ + MARIA_BLOCK_INFO block_info; + ulong tmp; + DBUG_ENTER("_ma_find_writepos"); + + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + /* Deleted blocks exists; Get last used block */ + *filepos=info->s->state.dellink; + block_info.second_read=0; + info->rec_cache.seek_not_done=1; + if (!(_ma_get_block_info(&block_info,info->dfile,info->s->state.dellink) & + BLOCK_DELETED)) + { + DBUG_PRINT("error",("Delete link crashed")); + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(-1); + } + info->s->state.dellink=block_info.next_filepos; + info->state->del--; + info->state->empty-= block_info.block_len; + *length= block_info.block_len; + } + else + { + /* No deleted blocks; Allocate a new block */ + *filepos=info->state->data_file_length; + if ((tmp=reclength+3 + test(reclength >= (65520-3))) < + info->s->base.min_block_length) + tmp= info->s->base.min_block_length; + else + tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) & + (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))); + if (info->state->data_file_length > + (info->s->base.max_data_file_length - tmp)) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + DBUG_RETURN(-1); + } + if (tmp > MARIA_MAX_BLOCK_LENGTH) + tmp=MARIA_MAX_BLOCK_LENGTH; + *length= tmp; + info->state->data_file_length+= tmp; + info->s->state.split++; + info->update|=HA_STATE_WRITE_AT_END; + } + DBUG_RETURN(0); +} /* _ma_find_writepos */ + + + +/* + Unlink a deleted block from the deleted list. + This block will be combined with the preceding or next block to form + a big block. +*/ + +static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info) +{ + DBUG_ENTER("unlink_deleted_block"); + if (block_info->filepos == info->s->state.dellink) + { + /* First deleted block; We can just use this ! */ + info->s->state.dellink=block_info->next_filepos; + } + else + { + MARIA_BLOCK_INFO tmp; + tmp.second_read=0; + /* Unlink block from the previous block */ + if (!(_ma_get_block_info(&tmp,info->dfile,block_info->prev_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+4,block_info->next_filepos); + if (info->s->file_write(info,(char*) tmp.header+4,8, + block_info->prev_filepos+4, MYF(MY_NABP))) + DBUG_RETURN(1); + /* Unlink block from next block */ + if (block_info->next_filepos != HA_OFFSET_ERROR) + { + if (!(_ma_get_block_info(&tmp,info->dfile,block_info->next_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+12,block_info->prev_filepos); + if (info->s->file_write(info,(char*) tmp.header+12,8, + block_info->next_filepos+12, + MYF(MY_NABP))) + DBUG_RETURN(1); + } + } + /* We now have one less deleted block */ + info->state->del--; + info->state->empty-= block_info->block_len; + info->s->state.split--; + + /* + If this was a block that we where accessing through table scan + (maria_rrnd() or maria_scan(), then ensure that we skip over this block + when doing next maria_rrnd() or maria_scan(). + */ + if (info->cur_row.nextpos == block_info->filepos) + info->cur_row.nextpos+= block_info->block_len; + DBUG_RETURN(0); +} + + +/* + Add a backward link to delete block + + SYNOPSIS + update_backward_delete_link() + info MARIA handler + delete_block Position to delete block to update. + If this is 'HA_OFFSET_ERROR', nothing will be done + filepos Position to block that 'delete_block' should point to + + RETURN + 0 ok + 1 error. In this case my_error is set. +*/ + +static my_bool update_backward_delete_link(MARIA_HA *info, + my_off_t delete_block, + MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_backward_delete_link"); + + if (delete_block != HA_OFFSET_ERROR) + { + block_info.second_read=0; + if (_ma_get_block_info(&block_info,info->dfile,delete_block) + & BLOCK_DELETED) + { + char buff[8]; + mi_sizestore(buff,filepos); + if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP))) + DBUG_RETURN(1); /* Error on write */ + } + else + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); /* Wrong delete link */ + } + } + DBUG_RETURN(0); +} + +/* Delete datarecord from database */ +/* info->rec_cache.seek_not_done is updated in cmp_record */ + +static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + uint second_read) +{ + uint length,b_type; + MARIA_BLOCK_INFO block_info,del_block; + int error; + my_bool remove_next_block; + DBUG_ENTER("delete_dynamic_record"); + + /* First add a link from the last block to the new one */ + error= update_backward_delete_link(info, info->s->state.dellink, filepos); + + block_info.second_read=second_read; + do + { + /* Remove block at 'filepos' */ + if ((b_type= _ma_get_block_info(&block_info,info->dfile,filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR) || + (length=(uint) (block_info.filepos-filepos) +block_info.block_len) < + MARIA_MIN_BLOCK_LENGTH) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + /* Check if next block is a delete block */ + del_block.second_read=0; + remove_next_block=0; + if (_ma_get_block_info(&del_block,info->dfile,filepos+length) & + BLOCK_DELETED && del_block.block_len+length < + MARIA_DYN_MAX_BLOCK_LENGTH) + { + /* We can't remove this yet as this block may be the head block */ + remove_next_block=1; + length+=del_block.block_len; + } + + block_info.header[0]=0; + mi_int3store(block_info.header+1,length); + mi_sizestore(block_info.header+4,info->s->state.dellink); + if (b_type & BLOCK_LAST) + bfill(block_info.header+12,8,255); + else + mi_sizestore(block_info.header+12,block_info.next_filepos); + if (info->s->file_write(info,(byte*) block_info.header,20,filepos, + MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink = filepos; + info->state->del++; + info->state->empty+=length; + filepos=block_info.next_filepos; + + /* Now it's safe to unlink the deleted block directly after this one */ + if (remove_next_block && unlink_deleted_block(info,&del_block)) + error=1; + } while (!(b_type & BLOCK_LAST)); + + DBUG_RETURN(error); +} + + + /* Write a block to datafile */ + +int _ma_write_part_record(MARIA_HA *info, + my_off_t filepos, /* points at empty block */ + ulong length, /* length of block */ + my_off_t next_filepos,/* Next empty block */ + byte **record, /* pointer to record ptr */ + ulong *reclength, /* length of *record */ + int *flag) /* *flag == 0 if header */ +{ + ulong head_length,res_length,extra_length,long_block,del_length; + byte *pos,*record_end; + my_off_t next_delete_block; + uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER]; + DBUG_ENTER("_ma_write_part_record"); + + next_delete_block=HA_OFFSET_ERROR; + + res_length=extra_length=0; + if (length > *reclength + MARIA_SPLIT_LENGTH) + { /* Splitt big block */ + res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH, + MARIA_DYN_ALIGN_SIZE); + length-= res_length; /* Use this for first part */ + } + long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1; + if (length == *reclength+ 3 + long_block) + { + /* Block is exactly of the right length */ + temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + head_length=4; + } + else + { + mi_int2store(temp+1,*reclength); + head_length=3; + } + } + else if (length-long_block < *reclength+4) + { /* To short block */ + if (next_filepos == HA_OFFSET_ERROR) + next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end ? + info->s->state.dellink : info->state->data_file_length); + if (*flag == 0) /* First block */ + { + if (*reclength > MARIA_MAX_BLOCK_LENGTH) + { + head_length= 16; + temp[0]=13; + mi_int4store(temp+1,*reclength); + mi_int3store(temp+5,length-head_length); + mi_sizestore((byte*) temp+8,next_filepos); + } + else + { + head_length=5+8+long_block*2; + temp[0]=5+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,*reclength); + mi_int3store(temp+4,length-head_length); + mi_sizestore((byte*) temp+7,next_filepos); + } + else + { + mi_int2store(temp+1,*reclength); + mi_int2store(temp+3,length-head_length); + mi_sizestore((byte*) temp+5,next_filepos); + } + } + } + else + { + head_length=3+8+long_block; + temp[0]=11+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,length-head_length); + mi_sizestore((byte*) temp+4,next_filepos); + } + else + { + mi_int2store(temp+1,length-head_length); + mi_sizestore((byte*) temp+3,next_filepos); + } + } + } + else + { /* Block with empty info last */ + head_length=4+long_block; + extra_length= length- *reclength-head_length; + temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + temp[4]= (uchar) (extra_length); + } + else + { + mi_int2store(temp+1,*reclength); + temp[3]= (uchar) (extra_length); + } + length= *reclength+head_length; /* Write only what is needed */ + } + DBUG_DUMP("header",(byte*) temp,head_length); + + /* Make a long block for one write */ + record_end= *record+length-head_length; + del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0); + bmove((byte*) (*record-head_length),(byte*) temp,head_length); + memcpy(temp,record_end,(size_t) (extra_length+del_length)); + bzero((byte*) record_end,extra_length); + + if (res_length) + { + /* Check first if we can join this block with the next one */ + MARIA_BLOCK_INFO del_block; + my_off_t next_block=filepos+length+extra_length+res_length; + + del_block.second_read=0; + if (next_block < info->state->data_file_length && + info->s->state.dellink != HA_OFFSET_ERROR) + { + if ((_ma_get_block_info(&del_block,info->dfile,next_block) + & BLOCK_DELETED) && + res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH) + { + if (unlink_deleted_block(info,&del_block)) + goto err; + res_length+=del_block.block_len; + } + } + + /* Create a delete link of the last part of the block */ + pos=record_end+extra_length; + pos[0]= '\0'; + mi_int3store(pos+1,res_length); + mi_sizestore(pos+4,info->s->state.dellink); + bfill(pos+12,8,255); /* End link */ + next_delete_block=info->s->state.dellink; + info->s->state.dellink= filepos+length+extra_length; + info->state->del++; + info->state->empty+=res_length; + info->s->state.split++; + } + if (info->opt_flag & WRITE_CACHE_USED && + info->update & HA_STATE_WRITE_AT_END) + { + if (info->update & HA_STATE_EXTEND_BLOCK) + { + info->update&= ~HA_STATE_EXTEND_BLOCK; + if (my_block_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length,filepos)) + goto err; + } + else if (my_b_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length)) + goto err; + } + else + { + info->rec_cache.seek_not_done=1; + if (info->s->file_write(info,(byte*) *record-head_length, + length+extra_length+ + del_length,filepos,info->s->write_flag)) + goto err; + } + memcpy(record_end,temp,(size_t) (extra_length+del_length)); + *record=record_end; + *reclength-=(length-head_length); + *flag=6; + + if (del_length) + { + /* link the next delete block to this */ + if (update_backward_delete_link(info, next_delete_block, + info->s->state.dellink)) + goto err; + } + + DBUG_RETURN(0); +err: + DBUG_PRINT("exit",("errno: %d",my_errno)); + DBUG_RETURN(1); +} /* _ma_write_part_record */ + + + /* update record from datafile */ + +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength) +{ + int flag; + uint error; + ulong length; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_dynamic_record"); + + flag=block_info.second_read=0; + while (reclength > 0) + { + if (filepos != info->s->state.dellink) + { + block_info.next_filepos= HA_OFFSET_ERROR; + if ((error= _ma_get_block_info(&block_info,info->dfile,filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + DBUG_PRINT("error",("Got wrong block info")); + if (!(error & BLOCK_FATAL_ERROR)) + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + length=(ulong) (block_info.filepos-filepos) + block_info.block_len; + if (length < reclength) + { + uint tmp=MY_ALIGN(reclength - length + 3 + + test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE); + /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */ + tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length; + /* Check if we can extend this block */ + if (block_info.filepos + block_info.block_len == + info->state->data_file_length && + info->state->data_file_length < + info->s->base.max_data_file_length-tmp) + { + /* extend file */ + DBUG_PRINT("info",("Extending file with %d bytes",tmp)); + if (info->cur_row.nextpos == info->state->data_file_length) + info->cur_row.nextpos+= tmp; + info->state->data_file_length+= tmp; + info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK; + length+=tmp; + } + else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH) + { + /* + Check if next block is a deleted block + Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where + the next block is so small it can't be splited which could + casue problems + */ + + MARIA_BLOCK_INFO del_block; + del_block.second_read=0; + if (_ma_get_block_info(&del_block,info->dfile, + block_info.filepos + block_info.block_len) & + BLOCK_DELETED) + { + /* Use; Unlink it and extend the current block */ + DBUG_PRINT("info",("Extending current block")); + if (unlink_deleted_block(info,&del_block)) + goto err; + if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH) + { + /* + New block was too big, link overflow part back to + delete list + */ + my_off_t next_pos; + ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH; + set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH); + next_pos= del_block.filepos+ del_block.block_len - rest_length; + + if (update_backward_delete_link(info, info->s->state.dellink, + next_pos)) + DBUG_RETURN(1); + + /* create delete link for data that didn't fit into the page */ + del_block.header[0]=0; + mi_int3store(del_block.header+1, rest_length); + mi_sizestore(del_block.header+4,info->s->state.dellink); + bfill(del_block.header+12,8,255); + if (info->s->file_write(info,(byte*) del_block.header, 20, + next_pos, MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink= next_pos; + info->s->state.split++; + info->state->del++; + info->state->empty+= rest_length; + length-= rest_length; + } + } + } + } + } + else + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + } + if (_ma_write_part_record(info,filepos,length,block_info.next_filepos, + &record,&reclength,&flag)) + goto err; + if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR) + { + /* Start writing data on deleted blocks */ + filepos=info->s->state.dellink; + } + } + + if (block_info.next_filepos != HA_OFFSET_ERROR) + if (delete_dynamic_record(info,block_info.next_filepos,1)) + goto err; + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Pack a record. Return new reclength */ + +uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*startpos,*packpos; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *rec; + MARIA_BLOB *blob; + DBUG_ENTER("_ma_rec_pack"); + + flag= 0; + bit= 1; + startpos= packpos=to; + to+= info->s->base.pack_bytes; + blob= info->blobs; + rec= info->s->rec; + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + from+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; + } + + for (i=info->s->base.fields ; i-- > 0; from+= length,rec++) + { + length=(uint) rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + if (!blob->length) + flag|=bit; + else + { + char *temp_pos; + size_t tmp_length=length-maria_portable_sizeof_char_ptr; + memcpy((byte*) to,from,tmp_length); + memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*)); + memcpy(to+tmp_length,temp_pos,(size_t) blob->length); + to+=tmp_length+blob->length; + } + blob++; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) from, maria_zero_string, length) == 0) + flag|=bit; + else + { + memcpy((byte*) to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) from; end= (byte*) from + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > from && *(end-1) == ' ') + end--; + } + else + { /* Pack pref-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(rec->length > 255 && new_length > 127) + < length) + { + if (rec->length > 255 && new_length > 127) + { + to[0]=(char) ((new_length & 127)+128); + to[1]=(char) (new_length >> 7); + to+=2; + } + else + *to++= (char) new_length; + memcpy((byte*) to,pos,(size_t) new_length); to+=new_length; + flag|=bit; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) from; + *to++= *from; + } + else + { + tmp_length= uint2korr(from); + store_key_length_inc(to,tmp_length); + } + memcpy(to, from+pack_length,tmp_length); + to+= tmp_length; + continue; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + *packpos++ = (char) (uchar) flag; + bit=1; flag=0; + } + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + if (bit != 1) + *packpos= (char) (uchar) flag; + if (info->s->calc_checksum) + *to++= (byte) info->cur_row.checksum; + DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos))); + DBUG_RETURN((uint) (to-startpos)); +} /* _ma_rec_pack */ + + + +/* + Check if a record was correctly packed. Used only by maria_chk + Returns 0 if record is ok. +*/ + +my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, + ulong packed_length, my_bool with_checksum) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*packpos,*to; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *rec; + DBUG_ENTER("_ma_rec_check"); + + packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes; + rec=info->s->rec; + flag= *packpos; bit=1; + record+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; + + for (i=info->s->base.fields ; i-- > 0; record+= length, rec++) + { + length=(uint) rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + uint blob_length= + _ma_calc_blob_length(length-maria_portable_sizeof_char_ptr,record); + if (!blob_length && !(flag & bit)) + goto err; + if (blob_length) + to+=length - maria_portable_sizeof_char_ptr+ blob_length; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) record, maria_zero_string, length) == 0) + { + if (!(flag & bit)) + goto err; + } + else + to+=length; + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) record; end= (byte*) record + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > record && *(end-1) == ' ') + end--; + } + else + { /* Pack pre-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(rec->length > 255 && new_length > 127) + < length) + { + if (!(flag & bit)) + goto err; + if (rec->length > 255 && new_length > 127) + { + if (to[0] != (char) ((new_length & 127)+128) || + to[1] != (char) (new_length >> 7)) + goto err; + to+=2; + } + else if (*to++ != (char) new_length) + goto err; + to+=new_length; + } + else + to+=length; + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) record; + to+= 1+ tmp_length; + continue; + } + else + { + tmp_length= uint2korr(record); + to+= get_pack_length(tmp_length)+tmp_length; + } + continue; + } + else + { + to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + flag= *++packpos; + bit=1; + } + } + else + to+= length; + } + if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || + (bit != 1 && (flag & ~(bit - 1)))) + goto err; + if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) + { + DBUG_PRINT("error",("wrong checksum for row")); + goto err; + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + + + /* Unpacks a record */ + /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */ + /* right. Returns reclength (>0) if ok */ + +ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, + ulong found_length) +{ + uint flag,bit,length,rec_length,min_pack_length; + enum en_fieldtype type; + byte *from_end,*to_end,*packpos; + reg3 MARIA_COLUMNDEF *rec,*end_field; + DBUG_ENTER("_ma_rec_unpack"); + + to_end=to + info->s->base.reclength; + from_end=from+found_length; + flag= (uchar) *from; bit=1; packpos=from; + if (found_length < info->s->base.min_pack_length) + goto err; + from+= info->s->base.pack_bytes; + min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes; + + if ((length= info->s->base.null_bytes)) + { + memcpy(to, from, length); + from+= length; + to+= length; + min_pack_length-= length; + } + + for (rec=info->s->rec , end_field=rec+info->s->base.fields ; + rec < end_field ; to+= rec_length, rec++) + { + rec_length=rec->length; + if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL && + (type != FIELD_CHECK)) + { + if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(rec_length-1); + if (pack_length == 1) + { + length= (uint) *(uchar*) from; + if (length > rec_length-1) + goto err; + *to= *from++; + } + else + { + get_key_length(length, from); + if (length > rec_length-2) + goto err; + int2store(to,length); + } + if (from+length > from_end) + goto err; + memcpy(to+pack_length, from, length); + from+= length; + min_pack_length--; + continue; + } + if (flag & bit) + { + if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO) + bzero((byte*) to,rec_length); + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + if (rec->length > 255 && *from & 128) + { + if (from + 1 >= from_end) + goto err; + length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2; + } + else + { + if (from == from_end) + goto err; + length= (uchar) *from++; + } + min_pack_length--; + if (length >= rec_length || + min_pack_length + length > (uint) (from_end - from)) + goto err; + if (type == FIELD_SKIP_ENDSPACE) + { + memcpy(to,(byte*) from,(size_t) length); + bfill((byte*) to+length,rec_length-length,' '); + } + else + { + bfill((byte*) to,rec_length-length,' '); + memcpy(to+rec_length-length,(byte*) from,(size_t) length); + } + from+=length; + } + } + else if (type == FIELD_BLOB) + { + uint size_length=rec_length- maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length,from); + if ((ulong) (from_end-from) - size_length < blob_length || + min_pack_length > (uint) (from_end -(from+size_length+blob_length))) + goto err; + memcpy((byte*) to,(byte*) from,(size_t) size_length); + from+=size_length; + memcpy_fixed((byte*) to+size_length,(byte*) &from,sizeof(char*)); + from+=blob_length; + } + else + { + if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) + min_pack_length--; + if (min_pack_length + rec_length > (uint) (from_end - from)) + goto err; + memcpy(to,(byte*) from,(size_t) rec_length); from+=rec_length; + } + if ((bit= bit << 1) >= 256) + { + flag= (uchar) *++packpos; bit=1; + } + } + else + { + if (min_pack_length > (uint) (from_end - from)) + goto err; + min_pack_length-=rec_length; + memcpy(to, (byte*) from, (size_t) rec_length); + from+=rec_length; + } + } + if (info->s->calc_checksum) + info->cur_row.checksum= (uint) (uchar) *from++; + if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1)))) + DBUG_RETURN(found_length); + +err: + my_errno= HA_ERR_WRONG_IN_RECORD; + DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx", + (long) to, (long) to_end, (long) from, (long) from_end)); + DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); + DBUG_RETURN(MY_FILE_ERROR); +} /* _ma_rec_unpack */ + + + /* Calc length of blob. Update info in blobs->length */ + +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record) +{ + ulong length; + MARIA_BLOB *blob,*end; + + for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ; + blob != end; + blob++) + { + blob->length= _ma_calc_blob_length(blob->pack_length,record + blob->offset); + length+=blob->length; + } + return length; +} + + +ulong _ma_calc_blob_length(uint length, const byte *pos) +{ + switch (length) { + case 1: + return (uint) (uchar) *pos; + case 2: + return (uint) uint2korr(pos); + case 3: + return uint3korr(pos); + case 4: + return uint4korr(pos); + default: + break; + } + return 0; /* Impossible */ +} + + +void _ma_store_blob_length(byte *pos,uint pack_length,uint length) +{ + switch (pack_length) { + case 1: + *pos= (uchar) length; + break; + case 2: + int2store(pos,length); + break; + case 3: + int3store(pos,length); + break; + case 4: + int4store(pos,length); + default: + break; + } + return; +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_dynamic_record() + info MARIA_HA pointer to table. + filepos From where to read the record. + buf Destination for record. + + NOTE + If a write buffer is active, it needs to be flushed if its contents + intersects with the record to read. We always check if the position + of the first byte of the write buffer is lower than the position + past the last byte to read. In theory this is also true if the write + buffer is completely below the read segment. That is, if there is no + intersection. But this case is unusual. We flush anyway. Only if the + first byte in the write buffer is above the last byte to read, we do + not flush. + + A dynamic record may need several reads. So this check must be done + before every read. Reading a dynamic record starts with reading the + block header. If the record does not fit into the free space of the + header, the block may be longer than the header. In this case a + second read is necessary. These one or two reads repeat for every + part of the record. + + RETURN + 0 OK + 1 Error +*/ +int _ma_read_dynamic_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) +{ + int block_of_record; + uint b_type,left_length; + byte *to; + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("_ma_read_dynamic_record"); + + if (filepos != HA_OFFSET_ERROR) + { + LINT_INIT(to); + LINT_INIT(left_length); + file=info->dfile; + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; + do + { + /* A corrupted table can have wrong pointers. (Bug# 19835) */ + if (filepos == HA_OFFSET_ERROR) + goto panic; + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH && + flush_io_cache(&info->rec_cache)) + goto err; + info->rec_cache.seek_not_done=1; + if ((b_type= _ma_get_block_info(&block_info, file, filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_DELETED; + goto err; + } + if (block_of_record++ == 0) /* First block */ + { + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + goto panic; + if (info->s->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + goto err; + } + to= info->rec_buff; + left_length=block_info.rec_len; + } + if (left_length < block_info.data_len || ! block_info.data_len) + goto panic; /* Wrong linked record */ + /* copy information that is already read */ + { + uint offset= (uint) (block_info.filepos - filepos); + uint prefetch_len= (sizeof(block_info.header) - offset); + filepos+= sizeof(block_info.header); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy((byte*) to, block_info.header + offset, prefetch_len); + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; + /* + What a pity that this method is not called 'file_pread' and that + there is no equivalent without seeking. We are at the right + position already. :( + */ + if (info->s->file_read(info, (byte*) to, block_info.data_len, + filepos, MYF(MY_NABP))) + goto panic; + left_length-=block_info.data_len; + to+=block_info.data_len; + } + filepos= block_info.next_filepos; + } while (left_length); + + info->update|= HA_STATE_AKTIV; /* We have a aktive record */ + fast_ma_writeinfo(info); + DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR ? 0 : 1); + } + fast_ma_writeinfo(info); + DBUG_RETURN(1); /* Wrong data to read */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(1); +} + + /* compare unique constraint between stored rows */ + +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + byte *old_rec_buff,*old_record; + my_off_t old_rec_buff_size; + my_bool error; + DBUG_ENTER("_ma_cmp_dynamic_unique"); + + if (!(old_record=my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + old_rec_buff= info->rec_buff; + old_rec_buff_size= info->rec_buff_size; + + if (info->s->base.blobs) + info->rec_buff= 0; + error= _ma_read_dynamic_record(info, old_record, pos) != 0; + if (!error) + error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0; + if (info->s->base.blobs) + { + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= old_rec_buff; + info->rec_buff_size= old_rec_buff_size; + } + my_afree(old_record); + DBUG_RETURN(error); +} + + + /* Compare of record on disk with packed record in memory */ + +my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, + register const byte *record) +{ + uint flag, reclength, b_type,cmp_length; + my_off_t filepos; + byte *buffer; + MARIA_BLOCK_INFO block_info; + my_bool error= 1; + DBUG_ENTER("_ma_cmp_dynamic_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK); + if (flush_io_cache(&info->rec_cache)) + DBUG_RETURN(1); + } + info->rec_cache.seek_not_done=1; + + /* If nobody have touched the database we don't have to test rec */ + + buffer=info->rec_buff; + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + if (info->s->base.blobs) + { + if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)))) + DBUG_RETURN(1); + } + reclength= _ma_rec_pack(info,buffer,record); + record= buffer; + + filepos= info->cur_row.lastpos; + flag=block_info.second_read=0; + block_info.next_filepos=filepos; + while (reclength > 0) + { + if ((b_type= _ma_get_block_info(&block_info,info->dfile, + block_info.next_filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + if (flag == 0) /* First block */ + { + flag=1; + if (reclength != block_info.rec_len) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + } else if (reclength < block_info.data_len) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + reclength-= block_info.data_len; + cmp_length= block_info.data_len; + if (!reclength && info->s->calc_checksum) + cmp_length--; /* 'record' may not contain checksum */ + + if (_ma_cmp_buffer(info->dfile,record,block_info.filepos, + cmp_length)) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + flag=1; + record+=block_info.data_len; + } + } + my_errno=0; + error= 0; +err: + if (buffer != info->rec_buff) + my_afree((gptr) buffer); + DBUG_PRINT("exit", ("result: %d", error)); + DBUG_RETURN(error); +} + + + /* Compare file to buffert */ + +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length) +{ + uint next_length; + char temp_buff[IO_SIZE*2]; + DBUG_ENTER("_ma_cmp_buffer"); + + next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1)); + + while (length > IO_SIZE*2) + { + if (my_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) || + memcmp((byte*) buff,temp_buff,next_length)) + goto err; + filepos+=next_length; + buff+=next_length; + length-= next_length; + next_length=IO_SIZE*2; + } + if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP))) + goto err; + DBUG_RETURN(memcmp((byte*) buff,temp_buff,length) != 0); +err: + DBUG_RETURN(1); +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_rnd_dynamic_record() + info MARIA_HA pointer to table. + buf Destination for record. + filepos From where to read the record. + skip_deleted_blocks If to repeat reading until a non-deleted + record is found. + + NOTE + + If a write buffer is active, it needs to be flushed if its contents + intersects with the record to read. We always check if the position + of the first byte of the write buffer is lower than the position + past the last byte to read. In theory this is also true if the write + buffer is completely below the read segment. That is, if there is no + intersection. But this case is unusual. We flush anyway. Only if the + first byte in the write buffer is above the last byte to read, we do + not flush. + + A dynamic record may need several reads. So this check must be done + before every read. Reading a dynamic record starts with reading the + block header. If the record does not fit into the free space of the + header, the block may be longer than the header. In this case a + second read is necessary. These one or two reads repeat for every + part of the record. + + RETURN + 0 OK + != 0 Error +*/ + +int _ma_read_rnd_dynamic_record(MARIA_HA *info, + byte *buf, + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + int block_of_record, info_read, save_errno; + uint left_len,b_type; + byte *to; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_dynamic_record"); + + info_read=0; + LINT_INIT(to); + + if (info->lock_type == F_UNLCK) + { +#ifndef UNSAFE_LOCKING +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + else + info_read=1; /* memory-keyinfoblock is ok */ + + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; + left_len=1; + do + { + if (filepos >= info->state->data_file_length) + { + if (!info_read) + { /* Check if changed */ + info_read=1; + info->rec_cache.seek_not_done=1; + if (_ma_state_info_read_dsk(share->kfile,&share->state,1)) + goto panic; + } + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + } + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, + sizeof(block_info.header), + (!block_of_record && skip_deleted_blocks ? + READING_NEXT : 0) | READING_HEADER)) + goto panic; + b_type= _ma_get_block_info(&block_info,-1,filepos); + } + else + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH && + flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + info->rec_cache.seek_not_done=1; + b_type= _ma_get_block_info(&block_info,info->dfile,filepos); + } + + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + && skip_deleted_blocks) + { + filepos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; /* Search after next_record */ + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + my_errno= HA_ERR_RECORD_DELETED; + info->cur_row.lastpos= block_info.filepos; + info->cur_row.nextpos= block_info.filepos+block_info.block_len; + } + goto err; + } + if (block_of_record == 0) /* First block */ + { + if (block_info.rec_len > (uint) share->base.max_pack_length) + goto panic; + info->cur_row.lastpos= filepos; + if (share->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + goto err; + } + to= info->rec_buff; + left_len=block_info.rec_len; + } + if (left_len < block_info.data_len) + goto panic; /* Wrong linked record */ + + /* copy information that is already read */ + { + uint offset=(uint) (block_info.filepos - filepos); + uint tmp_length= (sizeof(block_info.header) - offset); + filepos=block_info.filepos; + + if (tmp_length > block_info.data_len) + tmp_length= block_info.data_len; + if (tmp_length) + { + memcpy((byte*) to, block_info.header+offset,tmp_length); + block_info.data_len-=tmp_length; + left_len-=tmp_length; + to+=tmp_length; + filepos+=tmp_length; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) to,filepos, + block_info.data_len, + (!block_of_record && skip_deleted_blocks) ? + READING_NEXT : 0)) + goto panic; + } + else + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < + block_info.filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; + /* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */ + if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP))) + { + if (my_errno == -1) + my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */ + goto err; + } + } + } + /* + Increment block-of-record counter. If it was the first block, + remember the position behind the block for the next call. + */ + if (block_of_record++ == 0) + { + info->cur_row.nextpos= block_info.filepos+block_info.block_len; + skip_deleted_blocks=0; + } + left_len-=block_info.data_len; + to+=block_info.data_len; + filepos=block_info.next_filepos; + } while (left_len); + + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + fast_ma_writeinfo(info); + if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR) + DBUG_RETURN(0); + DBUG_RETURN(my_errno); /* Wrong record */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */ +err: + save_errno=my_errno; + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(my_errno=save_errno); +} + + + /* Read and process header from a dynamic-record-file */ + +uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) +{ + uint return_val=0; + uchar *header=info->header; + + if (file >= 0) + { + /* + We do not use my_pread() here because we want to have the file + pointer set to the end of the header after this function. + my_pread() may leave the file pointer untouched. + */ + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) != + sizeof(info->header)) + goto err; + } + DBUG_DUMP("header",(byte*) header,MARIA_BLOCK_INFO_HEADER_LENGTH); + if (info->second_read) + { + if (info->header[0] <= 6 || info->header[0] == 13) + return_val=BLOCK_SYNC_ERROR; + } + else + { + if (info->header[0] > 6 && info->header[0] != 13) + return_val=BLOCK_SYNC_ERROR; + } + info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */ + + switch (info->header[0]) { + case 0: + if ((info->block_len=(uint) mi_uint3korr(header+1)) < + MARIA_MIN_BLOCK_LENGTH || + (info->block_len & (MARIA_DYN_ALIGN_SIZE -1))) + goto err; + info->filepos=filepos; + info->next_filepos=mi_sizekorr(header+4); + info->prev_filepos=mi_sizekorr(header+12); +#if SIZEOF_OFF_T == 4 + if ((mi_uint4korr(header+4) != 0 && + (mi_uint4korr(header+4) != (ulong) ~0 || + info->next_filepos != (ulong) ~0)) || + (mi_uint4korr(header+12) != 0 && + (mi_uint4korr(header+12) != (ulong) ~0 || + info->prev_filepos != (ulong) ~0))) + goto err; +#endif + return return_val | BLOCK_DELETED; /* Deleted block */ + + case 1: + info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 2: + info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 13: + info->rec_len=mi_uint4korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+5); + info->next_filepos=mi_sizekorr(header+8); + info->second_read=1; + info->filepos=filepos+16; + return return_val | BLOCK_FIRST; + + case 3: + info->rec_len=info->data_len=mi_uint2korr(header+1); + info->block_len=info->rec_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 4: + info->rec_len=info->data_len=mi_uint3korr(header+1); + info->block_len=info->rec_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 5: + info->rec_len=mi_uint2korr(header+1); + info->block_len=info->data_len=mi_uint2korr(header+3); + info->next_filepos=mi_sizekorr(header+5); + info->second_read=1; + info->filepos=filepos+13; + return return_val | BLOCK_FIRST; + case 6: + info->rec_len=mi_uint3korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+4); + info->next_filepos=mi_sizekorr(header+7); + info->second_read=1; + info->filepos=filepos+15; + return return_val | BLOCK_FIRST; + + /* The following blocks are identical to 1-6 without rec_len */ + case 7: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_LAST; + case 8: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + + case 9: + info->data_len=mi_uint2korr(header+1); + info->block_len=info->data_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + case 10: + info->data_len=mi_uint3korr(header+1); + info->block_len=info->data_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_LAST; + + case 11: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->next_filepos=mi_sizekorr(header+3); + info->second_read=1; + info->filepos=filepos+11; + return return_val; + case 12: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->next_filepos=mi_sizekorr(header+4); + info->second_read=1; + info->filepos=filepos+12; + return return_val; + } + +err: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */ + return BLOCK_ERROR; +} diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c new file mode 100644 index 00000000000..90e79362442 --- /dev/null +++ b/storage/maria/ma_extra.c @@ -0,0 +1,458 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +static void maria_extra_keyflag(MARIA_HA *info, + enum ha_extra_function function); + + +/* + Set options and buffers to optimize table handling + + SYNOPSIS + maria_extra() + info open table + function operation + extra_arg Pointer to extra argument (normally pointer to ulong) + Used when function is one of: + HA_EXTRA_WRITE_CACHE + HA_EXTRA_CACHE + RETURN VALUES + 0 ok + # error +*/ + +int maria_extra(MARIA_HA *info, enum ha_extra_function function, + void *extra_arg) +{ + int error=0; + ulong cache_size; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_extra"); + DBUG_PRINT("enter",("function: %d",(int) function)); + + switch (function) { + case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + /* Next/prev gives first/last */ + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED) + ); + } + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + break; + case HA_EXTRA_CACHE: + if (info->lock_type == F_UNLCK && + (share->options & HA_OPTION_PACK_RECORD)) + { + error=1; /* Not possibly if not locked */ + my_errno=EACCES; + break; + } + if (info->s->file_map) /* Don't use cache if mmap */ + break; +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if ((share->options & HA_OPTION_COMPRESS_RECORD)) + { + pthread_mutex_lock(&share->intern_lock); + if (_ma_memmap_file(info)) + { + /* We don't nead MADV_SEQUENTIAL if small file */ + madvise(share->file_map,share->state.state.data_file_length, + share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ? + MADV_RANDOM : MADV_SEQUENTIAL); + pthread_mutex_unlock(&share->intern_lock); + break; + } + pthread_mutex_unlock(&share->intern_lock); + } +#endif + if (info->opt_flag & WRITE_CACHE_USED) + { + info->opt_flag&= ~WRITE_CACHE_USED; + if ((error=end_io_cache(&info->rec_cache))) + break; + } + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED))) + { + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(init_io_cache(&info->rec_cache,info->dfile, + (uint) min(info->state->data_file_length+1, + cache_size), + READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=READ_CACHE_USED; + info->update&= ~HA_STATE_ROW_CHANGED; + } + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_REINIT_CACHE: + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED)); + info->update&= ~HA_STATE_ROW_CHANGED; + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_WRITE_CACHE: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not locked */ + break; + } + + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) && + !share->state.header.uniques) + if (!(init_io_cache(&info->rec_cache,info->dfile, cache_size, + WRITE_CACHE,info->state->data_file_length, + (pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=WRITE_CACHE_USED; + info->update&= ~(HA_STATE_ROW_CHANGED | + HA_STATE_WRITE_AT_END | + HA_STATE_EXTEND_BLOCK); + } + break; + case HA_EXTRA_PREPARE_FOR_UPDATE: + if (info->s->data_file_type != DYNAMIC_RECORD) + break; + /* Remove read/write cache if dynamic rows */ + case HA_EXTRA_NO_CACHE: + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + /* Sergei will insert full text index caching here */ + } +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + break; + case HA_EXTRA_FLUSH_CACHE: + if (info->opt_flag & WRITE_CACHE_USED) + { + if ((error=flush_io_cache(&info->rec_cache))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + break; + case HA_EXTRA_NO_READCHECK: + info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */ + break; + case HA_EXTRA_READCHECK: + info->opt_flag|= READ_CHECK_USED; + break; + case HA_EXTRA_KEYREAD: /* Read only keys to record */ + case HA_EXTRA_REMEMBER_POS: + info->opt_flag |= REMEMBER_OLD_POS; + bmove((byte*) info->lastkey+share->base.max_key_length*2, + (byte*) info->lastkey,info->lastkey_length); + info->save_update= info->update; + info->save_lastinx= info->lastinx; + info->save_lastpos= info->cur_row.lastpos; + info->save_lastkey_length=info->lastkey_length; + if (function == HA_EXTRA_REMEMBER_POS) + break; + /* fall through */ + case HA_EXTRA_KEYREAD_CHANGE_POS: + info->opt_flag |= KEY_READ_USED; + info->read_record= _ma_read_key_record; + break; + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_RESTORE_POS: + if (info->opt_flag & REMEMBER_OLD_POS) + { + bmove((byte*) info->lastkey, + (byte*) info->lastkey+share->base.max_key_length*2, + info->save_lastkey_length); + info->update= info->save_update | HA_STATE_WRITTEN; + info->lastinx= info->save_lastinx; + info->cur_row.lastpos= info->save_lastpos; + info->lastkey_length=info->save_lastkey_length; + } + info->read_record= share->read_record; + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + break; + case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */ + info->lock_type= F_EXTRA_LCK; /* Simulate as locked */ + break; + case HA_EXTRA_WAIT_LOCK: + info->lock_wait=0; + break; + case HA_EXTRA_NO_WAIT_LOCK: + info->lock_wait=MY_DONT_WAIT; + break; + case HA_EXTRA_NO_KEYS: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not lock */ + break; + } + if (maria_is_any_key_active(share->state.key_map)) + { + MARIA_KEYDEF *key=share->keyinfo; + uint i; + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } + + if (!share->changed) + { + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; + share->changed=1; /* Update on close */ + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + } + share->state.state= *info->state; + error=_ma_state_info_write(share->kfile,&share->state,1 | 2); + } + break; + case HA_EXTRA_FORCE_REOPEN: + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_PREPARE_FOR_DELETE: + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ +#ifdef __WIN__ + /* Close the isam and data files as Win32 can't drop an open table */ + pthread_mutex_lock(&share->intern_lock); + if (flush_key_blocks(share->key_cache, share->kfile, + (function == HA_EXTRA_FORCE_REOPEN ? + FLUSH_RELEASE : FLUSH_IGNORE_CHANGED))) + { + error=my_errno; + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + info->lock_type = F_UNLCK; + } + if (share->kfile >= 0) + _ma_decrement_open_count(info); + if (share->kfile >= 0 && my_close(share->kfile,MYF(0))) + error=my_errno; + { + LIST *list_element ; + for (list_element=maria_open_list ; + list_element ; + list_element=list_element->next) + { + MARIA_HA *tmpinfo=(MARIA_HA*) list_element->data; + if (tmpinfo->s == info->s) + { + if (tmpinfo->dfile >= 0 && my_close(tmpinfo->dfile,MYF(0))) + error = my_errno; + tmpinfo->dfile= -1; + } + } + } + share->kfile= -1; /* Files aren't open anymore */ + pthread_mutex_unlock(&share->intern_lock); +#endif + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_FLUSH: + if (!share->temporary) + flush_key_blocks(share->key_cache, share->kfile, FLUSH_KEEP); +#ifdef HAVE_PWRITE + _ma_decrement_open_count(info); +#endif + if (share->not_flushed) + { + share->not_flushed=0; + if (_ma_sync_table_files(info)) + error= my_errno; + if (error) + { + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } + break; + case HA_EXTRA_NORMAL: /* Theese isn't in use */ + info->quick_mode=0; + break; + case HA_EXTRA_QUICK: + info->quick_mode=1; + break; + case HA_EXTRA_NO_ROWS: + if (!share->state.header.uniques) + info->opt_flag|= OPT_NO_ROWS; + break; + case HA_EXTRA_PRELOAD_BUFFER_SIZE: + info->preload_buff_size= *((ulong *) extra_arg); + break; + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + case HA_EXTRA_CHANGE_KEY_TO_DUP: + maria_extra_keyflag(info, function); + break; + case HA_EXTRA_MMAP: +#ifdef HAVE_MMAP + pthread_mutex_lock(&share->intern_lock); + if (!share->file_map) + { + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + { + DBUG_PRINT("warning",("mmap failed: errno: %d",errno)); + error= my_errno= errno; + } + else + { + share->file_read= _ma_mmap_pread; + share->file_write= _ma_mmap_pwrite; + } + } + pthread_mutex_unlock(&share->intern_lock); +#endif + break; + case HA_EXTRA_MARK_AS_LOG_TABLE: + pthread_mutex_lock(&share->intern_lock); + share->is_log_table= TRUE; + pthread_mutex_unlock(&share->intern_lock); + break; + case HA_EXTRA_KEY_CACHE: + case HA_EXTRA_NO_KEY_CACHE: + default: + break; + } + { + char tmp[1]; + tmp[0]=function; + } + DBUG_RETURN(error); +} /* maria_extra */ + + +/* + Start/Stop Inserting Duplicates Into a Table, WL#1648. + */ +static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function) +{ + uint idx; + + for (idx= 0; idx< info->s->base.keys; idx++) + { + switch (function) { + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + info->s->keyinfo[idx].flag|= HA_NOSAME; + break; + case HA_EXTRA_CHANGE_KEY_TO_DUP: + info->s->keyinfo[idx].flag&= ~(HA_NOSAME); + break; + default: + break; + } + } +} + + +int maria_reset(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_reset"); + /* + Free buffers and reset the following flags: + EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK + + If the row buffer cache is large (for dynamic tables), reduce it + to save memory. + */ + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error= end_io_cache(&info->rec_cache); + } + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + info->quick_mode=0; + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed= 1; + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + DBUG_RETURN(error); +} + + +int _ma_sync_table_files(const MARIA_HA *info) +{ + return (my_sync(info->dfile, MYF(0)) || + my_sync(info->s->kfile, MYF(0))); +} diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c new file mode 100644 index 00000000000..06b044e9e38 --- /dev/null +++ b/storage/maria/ma_ft_boolean_search.c @@ -0,0 +1,964 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* TODO: add caching - pre-read several index entries at once */ + +/* + Added optimization for full-text queries with plus-words. It was + implemented by sharing maximal document id (max_docid) variable + inside plus subtree. max_docid could be used by any word in plus + subtree, but it could be updated by plus-word only. + + The idea is: there is no need to search for docid smaller than + biggest docid inside current plus subtree. + + Examples: + +word1 word2 + share same max_docid + max_docid updated by word1 + +word1 +(word2 word3) + share same max_docid + max_docid updated by word1 + +(word1 -word2) +(+word3 word4) + share same max_docid + max_docid updated by word3 +*/ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with boolean queries */ + +static double _wghts[11]= +{ + 0.131687242798354, + 0.197530864197531, + 0.296296296296296, + 0.444444444444444, + 0.666666666666667, + 1.000000000000000, + 1.500000000000000, + 2.250000000000000, + 3.375000000000000, + 5.062500000000000, + 7.593750000000000}; +static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */ + +static double _nwghts[11]= +{ + -0.065843621399177, + -0.098765432098766, + -0.148148148148148, + -0.222222222222222, + -0.333333333333334, + -0.500000000000000, + -0.750000000000000, + -1.125000000000000, + -1.687500000000000, + -2.531250000000000, + -3.796875000000000}; +static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ + +#define FTB_FLAG_TRUNC 1 +/* At most one of the following flags can be set */ +#define FTB_FLAG_YES 2 +#define FTB_FLAG_NO 4 +#define FTB_FLAG_WONLY 8 + +typedef struct st_ftb_expr FTB_EXPR; +struct st_ftb_expr +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; + my_off_t max_docid; + float weight; + float cur_weight; + LIST *phrase; /* phrase words */ + LIST *document; /* for phrase search */ + uint yesses; /* number of "yes" words matched */ + uint nos; /* number of "no" words matched */ + uint ythresh; /* number of "yes" words in expr */ + uint yweaks; /* number of "yes" words for scan only */ +}; + +typedef struct st_ftb_word +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; /* for index search and for scan */ + my_off_t key_root; + my_off_t *max_docid; + MARIA_KEYDEF *keyinfo; + struct st_ftb_word *prev; + float weight; + uint ndepth; + uint len; + uchar off; + byte word[1]; +} FTB_WORD; + +typedef struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + CHARSET_INFO *charset; + FTB_EXPR *root; + FTB_WORD **list; + FTB_WORD *last_word; + MEM_ROOT mem_root; + QUEUE queue; + TREE no_dupes; + my_off_t lastpos; + uint keynr; + uchar with_scan; + enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state; +} FTB; + +static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) +{ + int i; + + /* if a==curdoc, take it as a < b */ + if (v && a->docid[0] == *v) + return -1; + + /* ORDER BY docid, ndepth DESC */ + i=CMP_NUM(a->docid[0], b->docid[0]); + if (!i) + i=CMP_NUM(b->ndepth,a->ndepth); + return i; +} + +static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) +{ + /* ORDER BY word DESC, ndepth DESC */ + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + (uchar*) (*a)->word+1,(*a)->len-1,0,0); + if (!i) + i=CMP_NUM((*b)->ndepth,(*a)->ndepth); + return i; +} + + +typedef struct st_my_ftb_param +{ + FTB *ftb; + FTB_EXPR *ftbe; + byte *up_quot; + uint depth; +} MY_FTB_PARAM; + + +static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *info) +{ + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; + FTB_WORD *ftbw; + FTB_EXPR *ftbe, *tmp_expr; + FT_WORD *phrase_word; + LIST *tmp_element; + int r= info->weight_adjust; + float weight= (float) + (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; + + switch (info->type) { + case FT_TOKEN_WORD: + ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, + sizeof(FTB_WORD) + + (info->trunc ? HA_MAX_KEY_BUFF : + word_len * ftb_param->ftb->charset->mbmaxlen + + HA_FT_WLEN + + ftb_param->ftb->info->s->rec_reflength)); + ftbw->len= word_len + 1; + ftbw->flags= 0; + ftbw->off= 0; + if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO; + if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC; + ftbw->weight= weight; + ftbw->up= ftb_param->ftbe; + ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR; + ftbw->ndepth= (info->yesno < 0) + ftb_param->depth; + ftbw->key_root= HA_OFFSET_ERROR; + memcpy(ftbw->word + 1, word, word_len); + ftbw->word[0]= word_len; + if (info->yesno > 0) ftbw->up->ythresh++; + ftb_param->ftb->queue.max_elements++; + ftbw->prev= ftb_param->ftb->last_word; + ftb_param->ftb->last_word= ftbw; + ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC); + for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up) + if (! (tmp_expr->flags & FTB_FLAG_YES)) + break; + ftbw->max_docid= &tmp_expr->max_docid; + /* fall through */ + case FT_TOKEN_STOPWORD: + if (! ftb_param->up_quot) break; + phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + phrase_word->pos= word; + phrase_word->len= word_len; + tmp_element->data= (void *)phrase_word; + ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element); + /* Allocate document list at this point. + It allows to avoid huge amount of allocs/frees for each row.*/ + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + ftb_param->ftbe->document= + list_add(ftb_param->ftbe->document, tmp_element); + break; + case FT_TOKEN_LEFT_PAREN: + ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->flags= 0; + if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO; + ftbe->weight= weight; + ftbe->up= ftb_param->ftbe; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + if (info->quot) ftb_param->ftb->with_scan|= 2; + if (info->yesno > 0) ftbe->up->ythresh++; + ftb_param->ftbe= ftbe; + ftb_param->depth++; + ftb_param->up_quot= info->quot; + break; + case FT_TOKEN_RIGHT_PAREN: + if (ftb_param->ftbe->document) + { + /* Circuit document list */ + for (tmp_element= ftb_param->ftbe->document; + tmp_element->next; tmp_element= tmp_element->next) /* no-op */; + tmp_element->next= ftb_param->ftbe->document; + ftb_param->ftbe->document->prev= tmp_element; + } + info->quot= 0; + if (ftb_param->ftbe->up) + { + DBUG_ASSERT(ftb_param->depth); + ftb_param->ftbe= ftb_param->ftbe->up; + ftb_param->depth--; + ftb_param->up_quot= 0; + } + break; + case FT_TOKEN_EOF: + default: + break; + } + return(0); +} + + +static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param, + char *query, int len) +{ + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; + MYSQL_FTPARSER_BOOLEAN_INFO info; + CHARSET_INFO *cs= ftb_param->ftb->charset; + char **start= &query; + char *end= query + len; + FT_WORD w; + + info.prev= ' '; + info.quot= 0; + while (maria_ft_get_word(cs, start, end, &w, &info)) + param->mysql_add_word(param, w.pos, w.len, &info); + return(0); +} + + +static void _ftb_parse_query(FTB *ftb, byte *query, uint len, + struct st_mysql_ftparser *parser) +{ + MYSQL_FTPARSER_PARAM *param; + MY_FTB_PARAM ftb_param; + DBUG_ENTER("_ftb_parse_query"); + DBUG_ASSERT(parser); + + if (ftb->state != UNINITIALIZED) + DBUG_VOID_RETURN; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) + DBUG_VOID_RETURN; + + ftb_param.ftb= ftb; + ftb_param.depth= 0; + ftb_param.ftbe= ftb->root; + ftb_param.up_quot= 0; + + param->mysql_parse= ftb_parse_query_internal; + param->mysql_add_word= ftb_query_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= query; + param->length= len; + param->flags= 0; + param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO; + parser->parse(param); + DBUG_VOID_RETURN; +} + + +static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)), + const void *a,const void *b) +{ + return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); +} + +/* returns 1 if the search was finished (must-word wasn't found) */ +static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) +{ + int r; + int subkeys=1; + my_bool can_go_down; + MARIA_HA *info=ftb->info; + uint off= 0, extra=HA_FT_WLEN+info->s->base.rec_reflength; + byte *lastkey_buf= ftbw->word+ftbw->off; + + if (ftbw->flags & FTB_FLAG_TRUNC) + lastkey_buf+=ftbw->len; + + if (init_search) + { + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + + r= _ma_search(info, ftbw->keyinfo, ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root); + } + else + { + uint sflag= SEARCH_BIGGER; + if (ftbw->docid[0] < *ftbw->max_docid) + { + sflag|= SEARCH_SAME; + _ma_dpointer(info, (ftbw->word + ftbw->len + HA_FT_WLEN), + *ftbw->max_docid); + } + r= _ma_search(info, ftbw->keyinfo, lastkey_buf, + USE_WHOLE_KEY, sflag, ftbw->key_root); + } + + can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC))); + /* Skip rows inserted by concurrent insert */ + while (!r) + { + if (can_go_down) + { + /* going down ? */ + off=info->lastkey_length-extra; + subkeys=ft_sintXkorr(info->lastkey+off); + } + if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length) + break; + r= _ma_search_next(info, ftbw->keyinfo, info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, ftbw->key_root); + } + + if (!r && !ftbw->off) + { + r= ha_compare_text(ftb->charset, + (uchar*) info->lastkey+1, + info->lastkey_length-extra-1, + (uchar*) ftbw->word+1, + ftbw->len-1, + (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0); + } + + if (r) /* not found */ + { + if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC)) + { + ftbw->docid[0]=HA_OFFSET_ERROR; + if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0) + { + /* + This word MUST BE present in every document returned, + so we can stop the search right now + */ + ftb->state=INDEX_DONE; + return 1; /* search is done */ + } + else + return 0; + } + + /* going up to the first-level tree to continue search there */ + _ma_dpointer(info, (lastkey_buf+HA_FT_WLEN), ftbw->key_root); + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + ftbw->off=0; + return _ft2_search(ftb, ftbw, 0); + } + + /* matching key found */ + memcpy(lastkey_buf, info->lastkey, info->lastkey_length); + if (lastkey_buf == ftbw->word) + ftbw->len=info->lastkey_length-extra; + + /* going down ? */ + if (subkeys<0) + { + /* + yep, going down, to the second-level tree + TODO here: subkey-based optimization + */ + ftbw->off=off; + ftbw->key_root= info->cur_row.lastpos; + ftbw->keyinfo=& info->s->ft2_keyinfo; + r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root); + DBUG_ASSERT(r==0); /* found something */ + memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length); + } + ftbw->docid[0]= info->cur_row.lastpos; + if (ftbw->flags & FTB_FLAG_YES) + *ftbw->max_docid= info->cur_row.lastpos; + return 0; +} + +static void _ftb_init_index_search(FT_INFO *ftb) +{ + int i; + FTB_WORD *ftbw; + + if ((ftb->state != READY && ftb->state !=INDEX_DONE) || + ftb->keynr == NO_SUCH_KEY) + return; + ftb->state=INDEX_SEARCH; + + for (i=ftb->queue.elements; i; i--) + { + ftbw=(FTB_WORD *)(ftb->queue.root[i]); + + if (ftbw->flags & FTB_FLAG_TRUNC) + { + /* + special treatment for truncation operator + 1. there are some (besides this) +words + | no need to search in the index, it can never ADD new rows + | to the result, and to remove half-matched rows we do scan anyway + 2. -trunc* + | same as 1. + 3. in 1 and 2, +/- need not be on the same expr. level, + but can be on any upper level, as in +word +(trunc1* trunc2*) + 4. otherwise + | We have to index-search for this prefix. + | It may cause duplicates, as in the index (sorted by <word,docid>) + | <aaaa,row1> + | <aabb,row2> + | <aacc,row1> + | Searching for "aa*" will find row1 twice... + */ + FTB_EXPR *ftbe; + for (ftbe=(FTB_EXPR*)ftbw; + ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC); + ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up) + { + if (ftbe->flags & FTB_FLAG_NO || /* 2 */ + ftbe->up->ythresh - ftbe->up->yweaks >1) /* 1 */ + { + FTB_EXPR *top_ftbe=ftbe->up; + ftbw->docid[0]=HA_OFFSET_ERROR; + for (ftbe=(FTB_EXPR *)ftbw; + ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO); + ftbe=ftbe->up) + ftbe->up->yweaks++; + ftbe=0; + break; + } + } + if (!ftbe) + continue; + /* 4 */ + if (!is_tree_inited(& ftb->no_dupes)) + init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t), + _ftb_no_dupes_cmp,0,0,0); + else + reset_tree(& ftb->no_dupes); + } + + ftbw->off=0; /* in case of reinit */ + if (_ft2_search(ftb, ftbw, 1)) + return; + } + queue_fix(& ftb->queue); +} + + +FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, CHARSET_INFO *cs) +{ + FTB *ftb; + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + + if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) + return 0; + ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean; + ftb->state=UNINITIALIZED; + ftb->info=info; + ftb->keynr=keynr; + ftb->charset=cs; + DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset); + ftb->with_scan=0; + ftb->lastpos=HA_OFFSET_ERROR; + bzero(& ftb->no_dupes, sizeof(TREE)); + ftb->last_word= 0; + + init_alloc_root(&ftb->mem_root, 1024, 1024); + ftb->queue.max_elements= 0; + if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)))) + goto err; + ftbe->weight=1; + ftbe->flags=FTB_FLAG_YES; + ftbe->nos=1; + ftbe->up=0; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + ftb->root=ftbe; + _ftb_parse_query(ftb, query, query_len, keynr == NO_SUCH_KEY ? + &ft_default_parser : + info->s->keyinfo[keynr].parser); + /* + Hack: instead of init_queue, we'll use reinit queue to be able + to alloc queue with alloc_root() + */ + if (! (ftb->queue.root= (byte **)alloc_root(&ftb->mem_root, + (ftb->queue.max_elements + 1) * + sizeof(void *)))) + goto err; + reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0, + (int (*)(void*, byte*, byte*))FTB_WORD_cmp, 0); + for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev) + queue_insert(&ftb->queue, (byte *)ftbw); + ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, + sizeof(FTB_WORD *)*ftb->queue.elements); + memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); + qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), + (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); + if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; + ftb->state=READY; + return ftb; +err: + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); + return 0; +} + + +typedef struct st_my_ftb_phrase_param +{ + LIST *phrase; + LIST *document; + CHARSET_INFO *cs; + uint phrase_length; + uint document_length; + uint match; +} MY_FTB_PHRASE_PARAM; + + +static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; + FT_WORD *w= (FT_WORD *)phrase_param->document->data; + LIST *phrase, *document; + w->pos= word; + w->len= word_len; + phrase_param->document= phrase_param->document->prev; + if (phrase_param->phrase_length > phrase_param->document_length) + { + phrase_param->document_length++; + return 0; + } + /* TODO: rewrite phrase search to avoid + comparing the same word twice. */ + for (phrase= phrase_param->phrase, document= phrase_param->document->next; + phrase; phrase= phrase->next, document= document->next) + { + FT_WORD *phrase_word= (FT_WORD *)phrase->data; + FT_WORD *document_word= (FT_WORD *)document->data; + if (my_strnncoll(phrase_param->cs, + (uchar*) phrase_word->pos, phrase_word->len, + (uchar*) document_word->pos, document_word->len)) + return 0; + } + phrase_param->match++; + return 0; +} + + +static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param, + char *document, int len) +{ + FT_WORD word; + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; + const char *docend= document + len; + while (maria_ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE)) + { + param->mysql_add_word(param, word.pos, word.len, 0); + if (phrase_param->match) + return 1; + } + return 0; +} + + +/* + Checks if given buffer matches phrase list. + + SYNOPSIS + _ftb_check_phrase() + s0 start of buffer + e0 end of buffer + phrase broken into list phrase + cs charset info + + RETURN VALUE + 1 is returned if phrase found, 0 else. +*/ + +static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len, + FTB_EXPR *ftbe, struct st_mysql_ftparser *parser) +{ + MY_FTB_PHRASE_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ftb_check_phrase"); + DBUG_ASSERT(parser); + + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1))) + DBUG_RETURN(0); + ftb_param.phrase= ftbe->phrase; + ftb_param.document= ftbe->document; + ftb_param.cs= ftb->charset; + ftb_param.phrase_length= list_length(ftbe->phrase); + ftb_param.document_length= 1; + ftb_param.match= 0; + + param->mysql_parse= ftb_check_phrase_internal; + param->mysql_add_word= ftb_phrase_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= (byte *)document; + param->length= len; + param->flags= 0; + param->mode= MYSQL_FTPARSER_WITH_STOPWORDS; + parser->parse(param); + DBUG_RETURN(ftb_param.match ? 1 : 0); +} + + +static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) +{ + FT_SEG_ITERATOR ftsi; + FTB_EXPR *ftbe; + float weight=ftbw->weight; + int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); + my_off_t curdoc=ftbw->docid[mode]; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) + { + ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks); + if (ftbe->docid[mode] != curdoc) + { + ftbe->cur_weight=0; + ftbe->yesses=ftbe->nos=0; + ftbe->docid[mode]=curdoc; + } + if (ftbe->nos) + break; + if (yn & FTB_FLAG_YES) + { + weight /= ftbe->ythresh; + ftbe->cur_weight += weight; + if ((int) ++ftbe->yesses == ythresh) + { + yn=ftbe->flags; + weight=ftbe->cur_weight*ftbe->weight; + if (mode && ftbe->phrase) + { + int not_found=1; + + memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); + while (_ma_ft_segiterator(&ftsi) && not_found) + { + if (!ftsi.pos) + continue; + not_found = ! _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, + ftbe, parser); + } + if (not_found) break; + } /* ftbe->quot */ + } + else + break; + } + else + if (yn & FTB_FLAG_NO) + { + /* + NOTE: special sort function of queue assures that all + (yn & FTB_FLAG_NO) != 0 + events for every particular subexpression will + "auto-magically" happen BEFORE all the + (yn & FTB_FLAG_YES) != 0 events. So no + already matched expression can become not-matched again. + */ + ++ftbe->nos; + break; + } + else + { + if (ftbe->ythresh) + weight/=3; + ftbe->cur_weight += weight; + if ((int) ftbe->yesses < ythresh) + break; + if (!(yn & FTB_FLAG_WONLY)) + yn= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ; + weight*= ftbe->weight; + } + } +} + + +int maria_ft_boolean_read_next(FT_INFO *ftb, char *record) +{ + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + MARIA_HA *info=ftb->info; + my_off_t curdoc; + + if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE) + return -1; + + /* black magic ON */ + if ((int) _ma_check_index(info, ftb->keynr) < 0) + return my_errno; + if (_ma_readinfo(info, F_RDLCK, 1)) + return my_errno; + /* black magic OFF */ + + if (!ftb->queue.elements) + return my_errno=HA_ERR_END_OF_FILE; + + /* Attention!!! Address of a local variable is used here! See err: label */ + ftb->queue.first_cmp_arg=(void *)&curdoc; + + while (ftb->state == INDEX_SEARCH && + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != + HA_OFFSET_ERROR) + { + while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) + { + _ftb_climb_the_tree(ftb, ftbw, 0); + + /* update queue */ + _ft2_search(ftb, ftbw, 0); + queue_replaced(& ftb->queue); + } + + ftbe=ftb->root; + if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 && + ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos) + { + /* curdoc matched ! */ + if (is_tree_inited(&ftb->no_dupes) && + tree_insert(&ftb->no_dupes, &curdoc, 0, + ftb->no_dupes.custom_arg)->count >1) + /* but it managed already to get past this line once */ + continue; + + info->cur_row.lastpos= curdoc; + /* Clear all states, except that the table was updated */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (!(*info->read_record)(info, record, curdoc)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0) + continue; /* no match */ + my_errno=0; + goto err; + } + goto err; + } + } + ftb->state=INDEX_DONE; + my_errno=HA_ERR_END_OF_FILE; +err: + ftb->queue.first_cmp_arg=(void *)0; + return my_errno; +} + + +typedef struct st_my_ftb_find_param +{ + FT_INFO *ftb; + FT_SEG_ITERATOR *ftsi; +} MY_FTB_FIND_PARAM; + + +static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; + FT_INFO *ftb= ftb_param->ftb; + FTB_WORD *ftbw; + int a, b, c; + for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word+1, ftbw->len-1, + (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) + b= c; + else + a= c; + } + for (; c >= 0; c--) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word + 1,ftbw->len - 1, + (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) + break; + if (ftbw->docid[1] == ftb->info->cur_row.lastpos) + continue; + ftbw->docid[1]= ftb->info->cur_row.lastpos; + _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi); + } + return(0); +} + + +static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param, + char *doc, int len) +{ + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; + FT_INFO *ftb= ftb_param->ftb; + char *end= doc + len; + FT_WORD w; + while (maria_ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE)) + param->mysql_add_word(param, w.pos, w.len, 0); + return(0); +} + + +float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) +{ + FTB_EXPR *ftbe; + FT_SEG_ITERATOR ftsi, ftsi2; + MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos; + MY_FTB_FIND_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + if (docid == HA_OFFSET_ERROR) + return -2.0; + if (!ftb->queue.elements) + return 0; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) + return 0; + + if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos) + { + FTB_EXPR *x; + uint i; + + for (i=0; i < ftb->queue.elements; i++) + { + ftb->list[i]->docid[1]=HA_OFFSET_ERROR; + for (x=ftb->list[i]->up; x; x=x->up) + x->docid[1]=HA_OFFSET_ERROR; + } + } + + ftb->lastpos=docid; + + if (ftb->keynr==NO_SUCH_KEY) + _ma_ft_segiterator_dummy_init(record, length, &ftsi); + else + _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); + memcpy(&ftsi2, &ftsi, sizeof(ftsi)); + + ftb_param.ftb= ftb; + ftb_param.ftsi= &ftsi2; + param->mysql_parse= ftb_find_relevance_parse; + param->mysql_add_word= ftb_find_relevance_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->flags= 0; + param->cs= ftb->charset; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + + while (_ma_ft_segiterator(&ftsi)) + { + if (!ftsi.pos) + continue; + param->doc= (byte *)ftsi.pos; + param->length= ftsi.len; + parser->parse(param); + } + ftbe=ftb->root; + if (ftbe->docid[1]==docid && ftbe->cur_weight>0 && + ftbe->yesses>=ftbe->ythresh && !ftbe->nos) + { /* row matched ! */ + return ftbe->cur_weight; + } + else + { /* match failed ! */ + return 0.0; + } +} + + +void maria_ft_boolean_close_search(FT_INFO *ftb) +{ + if (is_tree_inited(& ftb->no_dupes)) + { + delete_tree(& ftb->no_dupes); + } + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); +} + + +float maria_ft_boolean_get_relevance(FT_INFO *ftb) +{ + return ftb->root->cur_weight; +} + + +void maria_ft_boolean_reinit_search(FT_INFO *ftb) +{ + _ftb_init_index_search(ftb); +} diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c new file mode 100644 index 00000000000..fe4900aeb64 --- /dev/null +++ b/storage/maria/ma_ft_eval.c @@ -0,0 +1,255 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_eval.h" +#include <stdarg.h> +#include <my_getopt.h> + +static void print_error(int exit_code, const char *fmt,...); +static void get_options(int argc, char *argv[]); +static int create_record(char *pos, FILE *file); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 's', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'q', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MARIA_HA *file; + int i,j; + + MY_INIT(argv[0]); + get_options(argc,argv); + bzero((char*)recinfo,sizeof(recinfo)); + + maria_init(); + /* First define 2 columns */ + recinfo[0].type=FIELD_SKIP_ENDSPACE; + recinfo[0].length=docid_length; + recinfo[1].type=FIELD_BLOB; + recinfo[1].length= 4+maria_portable_sizeof_char_ptr; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag= HA_BLOB_PART; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].bit_start=4; + keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].flag = HA_FULLTEXT; + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,1,keyinfo,2,recinfo,0,NULL,(MARIA_CREATE_INFO*) 0,0)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + if (!silent) + printf("Initializing stopwords\n"); + maria_ft_init_stopwords(stopwordlist); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + i=0; + while (create_record(record,df)) + { + error=maria_write(file,record); + if (error) + printf("I= %2d maria_write: %d errno: %d\n",i,error,my_errno); + i++; + } + fclose(df); + + if (maria_close(file)) goto err; + if (!silent) + printf("- Reopening file\n"); + if (!(file=maria_open(filename,2,0))) goto err; + if (!silent) + printf("- Reading rows with key\n"); + for (i=1;create_record(record,qf);i++) + { + FT_DOCLIST *result; + double w; + int t, err; + + result=maria_ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1); + if (!result) + { + printf("Query %d failed with errno %3d\n",i,my_errno); + goto err; + } + if (!silent) + printf("Query %d. Found: %d.\n",i,result->ndocs); + for (j=0;(err=maria_ft_nlq_read_next(result, read_record))==0;j++) + { + t=uint2korr(read_record); + w=maria_ft_nlq_get_relevance(result); + printf("%d %.*s %f\n",i,t,read_record+2,w); + } + if (err != HA_ERR_END_OF_FILE) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + goto err; + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); + + err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ + +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { + case 's': + if (stopwordlist && stopwordlist != maria_ft_precompiled_stopwords) + break; + { + FILE *f; char s[HA_FT_MAXLEN]; int i=0,n=SWL_INIT; + + if (!(stopwordlist=(const char**) malloc(n*sizeof(char *)))) + print_error(1,"malloc(%d)",n*sizeof(char *)); + if (!(f=fopen(argument,"r"))) + print_error(1,"fopen(%s)",argument); + while (!feof(f)) + { + if (!(fgets(s,HA_FT_MAXLEN,f))) + print_error(1,"fgets(s,%d,%s)",HA_FT_MAXLEN,argument); + if (!(stopwordlist[i++]=strdup(s))) + print_error(1,"strdup(%s)",s); + if (i >= n) + { + n+=SWL_PLUS; + if (!(stopwordlist=(const char**) realloc((char*) stopwordlist, + n*sizeof(char *)))) + print_error(1,"realloc(%d)",n*sizeof(char *)); + } + } + fclose(f); + stopwordlist[i]=NULL; + break; + } + case 'q': silent=1; break; + case 'S': if (stopwordlist==maria_ft_precompiled_stopwords) stopwordlist=NULL; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!(d_file=argv[optind])) print_error(1,"No d_file"); + if (!(df=fopen(d_file,"r"))) + print_error(1,"fopen(%s)",d_file); + if (!(q_file=argv[optind+1])) print_error(1,"No q_file"); + if (!(qf=fopen(q_file,"r"))) + print_error(1,"fopen(%s)",q_file); + return; +} /* get options */ + + +static int create_record(char *pos, FILE *file) +{ + uint tmp; char *ptr; + + bzero((char *)pos,MAX_REC_LENGTH); + + /* column 1 - VARCHAR */ + if (!(fgets(pos+2,MAX_REC_LENGTH-32,file))) + { + if (feof(file)) + return 0; + else + print_error(1,"fgets(docid) - 1"); + } + tmp=(uint) strlen(pos+2)-1; + int2store(pos,tmp); + pos+=recinfo[0].length; + + /* column 2 - BLOB */ + + if (!(fgets(blob_record,MAX_BLOB_LENGTH,file))) + print_error(1,"fgets(docid) - 2"); + tmp=(uint) strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + return 1; +} + +/* VARARGS */ + +static void print_error(int exit_code, const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + exit(exit_code); +} + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_eval.h b/storage/maria/ma_ft_eval.h new file mode 100644 index 00000000000..d9b5c51642c --- /dev/null +++ b/storage/maria/ma_ft_eval.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2006 MySQL AB & Sergei A. Golubchik + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +const char **stopwordlist=maria_ft_precompiled_stopwords; + +#define MAX_REC_LENGTH 128 +#define MAX_BLOB_LENGTH 60000 +char record[MAX_REC_LENGTH], read_record[MAX_REC_LENGTH+MAX_BLOB_LENGTH]; +char blob_record[MAX_BLOB_LENGTH+20*20]; + +char *filename= (char*) "EVAL"; + +int silent=0, error=0; + +uint key_length=MAX_BLOB_LENGTH,docid_length=32; +char *d_file, *q_file; +FILE *df,*qf; + +MARIA_COLUMNDEF recinfo[3]; +MARIA_KEYDEF keyinfo[2]; +HA_KEYSEG keyseg[10]; + +#define SWL_INIT 500 +#define SWL_PLUS 50 + +#define MAX_LINE_LENGTH 128 +char line[MAX_LINE_LENGTH]; diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c new file mode 100644 index 00000000000..4c922516455 --- /dev/null +++ b/storage/maria/ma_ft_nlq_search.c @@ -0,0 +1,371 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with natural language queries */ + +typedef struct ft_doc_rec +{ + my_off_t dpos; + double weight; +} FT_DOC; + +struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + int ndocs; + int curdoc; + FT_DOC doc[1]; +}; + +typedef struct st_all_in_one +{ + MARIA_HA *info; + uint keynr; + CHARSET_INFO *charset; + uchar *keybuff; + TREE dtree; +} ALL_IN_ONE; + +typedef struct st_ft_superdoc +{ + FT_DOC doc; + FT_WORD *word_ptr; + double tmp_weight; +} FT_SUPERDOC; + +static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), + FT_SUPERDOC *p1, FT_SUPERDOC *p2) +{ + if (p1->doc.dpos < p2->doc.dpos) + return -1; + if (p1->doc.dpos == p2->doc.dpos) + return 0; + return 1; +} + +static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) +{ + int subkeys, r; + uint keylen, doc_cnt; + FT_SUPERDOC sdoc, *sptr; + TREE_ELEMENT *selem; + double gweight=1; + MARIA_HA *info= aio->info; + byte *keybuff= (byte*) aio->keybuff; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr; + my_off_t key_root=info->s->state.key_root[aio->keynr]; + uint extra=HA_FT_WLEN+info->s->base.rec_reflength; +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + float tmp_weight; +#else +#error +#endif + + DBUG_ENTER("walk_and_match"); + + word->weight=LWS_FOR_QUERY; + + keylen= _ma_ft_make_key(info,aio->keynr,(char*) keybuff,word,0); + keylen-=HA_FT_WLEN; + doc_cnt=0; + + /* Skip rows inserted by current inserted */ + for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ; + !r && + (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + info->cur_row.lastpos >= info->state->data_file_length ; + r= _ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, SEARCH_BIGGER, key_root)) + ; + + info->update|= HA_STATE_AKTIV; /* for _ma_test_if_changed() */ + + /* The following should be safe, even if we compare doubles */ + while (!r && gweight) + { + + if (keylen && + ha_compare_text(aio->charset, + (uchar*) info->lastkey+1, info->lastkey_length-extra-1, + (uchar*) keybuff+1, keylen-1, 0, 0)) + break; + + if (subkeys<0) + { + if (doc_cnt) + DBUG_RETURN(1); /* index is corrupted */ + /* + TODO here: unsafe optimization, should this word + be skipped (based on subkeys) ? + */ + keybuff+=keylen; + keyinfo=& info->s->ft2_keyinfo; + key_root= info->cur_row.lastpos; + keylen=0; + r= _ma_search_first(info, keyinfo, key_root); + goto do_skip; + } +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + tmp_weight=*(float*)&subkeys; +#else +#error +#endif + /* The following should be safe, even if we compare doubles */ + if (tmp_weight==0) + DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ + + sdoc.doc.dpos= info->cur_row.lastpos; + + /* saving document matched into dtree */ + if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg))) + DBUG_RETURN(1); + + sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); + + if (selem->count==1) /* document's first match */ + sptr->doc.weight=0; + else + sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight; + + sptr->word_ptr=word; + sptr->tmp_weight=tmp_weight; + + doc_cnt++; + + gweight=word->weight*GWS_IN_USE; + if (gweight < 0 || doc_cnt > 2000000) + gweight=0; + + if (_ma_test_if_changed(info) == 0) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + else + r= _ma_search(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); +do_skip: + while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + !r && info->cur_row.lastpos >= info->state->data_file_length) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + + } + word->weight=gweight; + + DBUG_RETURN(0); +} + + +static int walk_and_copy(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), FT_DOC **to) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + (*to)->dpos=from->doc.dpos; + (*to)->weight=from->doc.weight; + (*to)++; + DBUG_RETURN(0); +} + +static int walk_and_push(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), QUEUE *best) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + set_if_smaller(best->elements, ft_query_expansion_limit-1); + queue_insert(best, (byte *)& from->doc); + DBUG_RETURN(0); +} + + +static int FT_DOC_cmp(void *unused __attribute__((unused)), + FT_DOC *a, FT_DOC *b) +{ + return sgn(b->weight - a->weight); +} + + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, uint flags, byte *record) +{ + TREE wtree; + ALL_IN_ONE aio; + FT_DOC *dptr; + FT_INFO *dlist=NULL; + MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos; + struct st_mysql_ftparser *parser; + MYSQL_FTPARSER_PARAM *ftparser_param; + DBUG_ENTER("maria_ft_init_nlq_search"); + + /* black magic ON */ + if ((int) (keynr = _ma_check_index(info,keynr)) < 0) + DBUG_RETURN(NULL); + if (_ma_readinfo(info,F_RDLCK,1)) + DBUG_RETURN(NULL); + /* black magic OFF */ + + aio.info=info; + aio.keynr=keynr; + aio.charset=info->s->keyinfo[keynr].seg->charset; + aio.keybuff= (uchar*) info->lastkey+info->s->base.max_key_length; + parser= info->s->keyinfo[keynr].parser; + if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0))) + goto err; + + bzero(&wtree,sizeof(wtree)); + + init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, + NULL, NULL); + + maria_ft_parse_init(&wtree, aio.charset); + ftparser_param->flags= 0; + if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param, + &wtree.mem_root)) + goto err; + + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + if (flags & FT_EXPAND && ft_query_expansion_limit) + { + QUEUE best; + init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp, + 0); + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push, + &best, left_root_right); + while (best.elements) + { + my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos; + if (!(*info->read_record)(info, record, docid)) + { + info->update|= HA_STATE_AKTIV; + ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY; + _ma_ft_parse(&wtree, info, keynr, record, ftparser_param, + &wtree.mem_root); + } + } + delete_queue(&best); + reset_tree(&aio.dtree); + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + } + + /* + If ndocs == 0, this will not allocate RAM for FT_INFO.doc[], + so if ndocs == 0, FT_INFO.doc[] must not be accessed. + */ + dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ + sizeof(FT_DOC)* + (int)(aio.dtree.elements_in_tree-1), + MYF(0)); + if (!dlist) + goto err; + + dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq; + dlist->ndocs=aio.dtree.elements_in_tree; + dlist->curdoc=-1; + dlist->info=aio.info; + dptr=dlist->doc; + + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy, + &dptr, left_root_right); + + if (flags & FT_SORTED) + qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0); + +err: + delete_tree(&aio.dtree); + delete_tree(&wtree); + info->cur_row.lastpos= saved_lastpos; + DBUG_RETURN(dlist); +} + + +int maria_ft_nlq_read_next(FT_INFO *handler, char *record) +{ + MARIA_HA *info= (MARIA_HA *) handler->info; + + if (++handler->curdoc >= handler->ndocs) + { + --handler->curdoc; + return HA_ERR_END_OF_FILE; + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + info->cur_row.lastpos= handler->doc[handler->curdoc].dpos; + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; + } + return my_errno; +} + + +float maria_ft_nlq_find_relevance(FT_INFO *handler, + byte *record __attribute__((unused)), + uint length __attribute__((unused))) +{ + int a,b,c; + FT_DOC *docs=handler->doc; + MARIA_RECORD_POS docid= handler->info->cur_row.lastpos; + + if (docid == HA_POS_ERROR) + return -5.0; + + /* Assuming docs[] is sorted by dpos... */ + + for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) + { + if (docs[c].dpos > docid) + b=c; + else + a=c; + } + /* bounds check to avoid accessing unallocated handler->doc */ + if (a < handler->ndocs && docs[a].dpos == docid) + return (float) docs[a].weight; + else + return 0.0; +} + + +void maria_ft_nlq_close_search(FT_INFO *handler) +{ + my_free((gptr)handler,MYF(0)); +} + + +float maria_ft_nlq_get_relevance(FT_INFO *handler) +{ + return (float) handler->doc[handler->curdoc].weight; +} + + +void maria_ft_nlq_reinit_search(FT_INFO *handler) +{ + handler->curdoc=-1; +} + diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c new file mode 100644 index 00000000000..24713c1344f --- /dev/null +++ b/storage/maria/ma_ft_parser.c @@ -0,0 +1,425 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#include "ma_ftdefs.h" + +typedef struct st_maria_ft_docstat { + FT_WORD *list; + uint uniq; + double sum; +} FT_DOCSTAT; + + +typedef struct st_my_maria_ft_parser_param +{ + TREE *wtree; + MEM_ROOT *mem_root; +} MY_FT_PARSER_PARAM; + + +static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) +{ + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, + (uchar*) w2->pos, w2->len, 0, 0); +} + +static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) +{ + word->weight=LWS_IN_USE; + docstat->sum+=word->weight; + memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD)); + return 0; +} + +/* transforms tree of words into the array, applying normalization */ + +FT_WORD * maria_ft_linearize(TREE *wtree, MEM_ROOT *mem_root) +{ + FT_WORD *wlist,*p; + FT_DOCSTAT docstat; + DBUG_ENTER("maria_ft_linearize"); + + if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)* + (1+wtree->elements_in_tree)))) + { + docstat.list=wlist; + docstat.uniq=wtree->elements_in_tree; + docstat.sum=0; + tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right); + } + delete_tree(wtree); + if (!wlist) + DBUG_RETURN(NULL); + + docstat.list->pos=NULL; + + for (p=wlist;p->pos;p++) + { + p->weight=PRENORM_IN_USE; + } + + for (p=wlist;p->pos;p++) + { + p->weight/=NORM_IN_USE; + } + + DBUG_RETURN(wlist); +} + +my_bool maria_ft_boolean_check_syntax_string(const byte *str) +{ + uint i, j; + + if (!str || + (strlen(str)+1 != sizeof(ft_boolean_syntax)) || + (str[0] != ' ' && str[1] != ' ')) + return 1; + for (i=0; i<sizeof(ft_boolean_syntax); i++) + { + /* limiting to 7-bit ascii only */ + if ((unsigned char)(str[i]) > 127 || + my_isalnum(default_charset_info, str[i])) + return 1; + for (j=0; j<i; j++) + if (str[i] == str[j] && (i != 11 || j != 10)) + return 1; + } + return 0; +} + +/* + RETURN VALUE + 0 - eof + 1 - word found + 2 - left bracket + 3 - right bracket + 4 - stopword found +*/ +byte maria_ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, + FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param) +{ + byte *doc=*start; + int ctype; + uint mwc, length, mbl; + + param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + param->type= FT_TOKEN_EOF; + + while (doc<end) + { + for (; doc < end; doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; + if (*doc == FTB_RQUOT && param->quot) + { + param->quot=doc; + *start=doc+1; + param->type= FT_TOKEN_RIGHT_PAREN; + goto ret; + } + if (!param->quot) + { + if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) + { + /* param->prev=' '; */ + *start=doc+1; + if (*doc == FTB_LQUOT) param->quot=*start; + param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN); + goto ret; + } + if (param->prev == ' ') + { + if (*doc == FTB_YES ) { param->yesno=+1; continue; } else + if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else + if (*doc == FTB_NO ) { param->yesno=-1; continue; } else + if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else + if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else + if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; } + } + } + param->prev=*doc; + param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + } + + mwc=length=0; + for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + mwc=0; + else if (!misc_word_char(*doc) || mwc) + break; + else + mwc++; + } + param->prev='A'; /* be sure *prev is true_word_char */ + word->len= (uint)(doc-word->pos) - mwc; + if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) + doc++; + + if (((length >= ft_min_word_len && !is_stopword(word->pos, word->len)) + || param->trunc) && length < ft_max_word_len) + { + *start=doc; + param->type= FT_TOKEN_WORD; + goto ret; + } + else if (length) /* make sure length > 0 (if start contains spaces only) */ + { + *start= doc; + param->type= FT_TOKEN_STOPWORD; + goto ret; + } + } + if (param->quot) + { + param->quot=*start=doc; + param->type= 3; /* FT_RBR */ + goto ret; + } +ret: + return param->type; +} + +byte maria_ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, + FT_WORD *word, my_bool skip_stopwords) +{ + byte *doc= *start; + uint mwc, length, mbl; + int ctype; + DBUG_ENTER("maria_ft_simple_get_word"); + + do + { + for (;; doc+= (mbl > 0 ? mbl : 1)) + { + if (doc >= end) + DBUG_RETURN(0); + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; + } + + mwc= length= 0; + for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + mwc= 0; + else if (!misc_word_char(*doc) || mwc) + break; + else + mwc++; + } + + word->len= (uint)(doc-word->pos) - mwc; + + if (skip_stopwords == FALSE || + (length >= ft_min_word_len && length < ft_max_word_len && + !is_stopword(word->pos, word->len))) + { + *start= doc; + DBUG_RETURN(1); + } + } while (doc < end); + DBUG_RETURN(0); +} + +void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs) +{ + DBUG_ENTER("maria_ft_parse_init"); + if (!is_tree_inited(wtree)) + init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs); + DBUG_VOID_RETURN; +} + + +static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + TREE *wtree; + FT_WORD w; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; + DBUG_ENTER("maria_ft_add_word"); + wtree= ft_param->wtree; + if (param->flags & MYSQL_FTFLAGS_NEED_COPY) + { + byte *ptr; + DBUG_ASSERT(wtree->with_delete == 0); + ptr= (byte *)alloc_root(ft_param->mem_root, word_len); + memcpy(ptr, word, word_len); + w.pos= ptr; + } + else + w.pos= word; + w.len= word_len; + if (!tree_insert(wtree, &w, 0, wtree->custom_arg)) + { + delete_tree(wtree); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param, + char *doc, int doc_len) +{ + byte *end=doc+doc_len; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; + TREE *wtree= ft_param->wtree; + FT_WORD w; + DBUG_ENTER("maria_ft_parse_internal"); + + while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) + if (param->mysql_add_word(param, w.pos, w.len, 0)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +int maria_ft_parse(TREE *wtree, byte *doc, int doclen, + struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) +{ + MY_FT_PARSER_PARAM my_param; + DBUG_ENTER("maria_ft_parse"); + DBUG_ASSERT(parser); + my_param.wtree= wtree; + my_param.mem_root= mem_root; + + param->mysql_parse= maria_ft_parse_internal; + param->mysql_add_word= maria_ft_add_word; + param->mysql_ftparam= &my_param; + param->cs= wtree->custom_arg; + param->doc= doc; + param->length= doclen; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + DBUG_RETURN(parser->parse(param)); +} + + +#define MAX_PARAM_NR 2 +MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr, uint paramnr) +{ + uint32 ftparser_nr; + struct st_mysql_ftparser *parser; + if (! info->ftparser_param) + { + /* info->ftparser_param can not be zero after the initialization, + because it always includes built-in fulltext parser. And built-in + parser can be called even if the table has no fulltext indexes and + no varchar/text fields. */ + if (! info->s->ftparsers) + { + /* It's ok that modification to shared structure is done w/o mutex + locks, because all threads would set the same variables to the + same values. */ + uint i, j, keys= info->s->state.header.keys, ftparsers= 1; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + if (keyinfo->flag & HA_FULLTEXT) + { + for (j= 0;; j++) + { + if (j == i) + { + keyinfo->ftparser_nr= ftparsers++; + break; + } + if (info->s->keyinfo[j].flag & HA_FULLTEXT && + keyinfo->parser == info->s->keyinfo[j].parser) + { + keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr; + break; + } + } + } + } + info->s->ftparsers= ftparsers; + } + /* + We have to allocate two MYSQL_FTPARSER_PARAM structures per plugin + because in a boolean search a parser is called recursively + ftb_find_relevance* calls ftb_check_phrase* + (MAX_PARAM_NR=2) + */ + info->ftparser_param= (MYSQL_FTPARSER_PARAM *) + my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) * + info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL)); + init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + if (! info->ftparser_param) + return 0; + } + if (keynr == NO_SUCH_KEY) + { + ftparser_nr= 0; + parser= &ft_default_parser; + } + else + { + ftparser_nr= info->s->keyinfo[keynr].ftparser_nr; + parser= info->s->keyinfo[keynr].parser; + } + DBUG_ASSERT(paramnr < MAX_PARAM_NR); + ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr; + if (! info->ftparser_param[ftparser_nr].mysql_add_word) + { + /* Note, that mysql_add_word is used here as a flag: + mysql_add_word == 0 - parser is not initialized + mysql_add_word != 0 - parser is initialized, or no + initialization needed. */ + info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1; + if (parser->init && parser->init(&info->ftparser_param[ftparser_nr])) + return 0; + } + return &info->ftparser_param[ftparser_nr]; +} + + +void maria_ftparser_call_deinitializer(MARIA_HA *info) +{ + uint i, j, keys= info->s->state.header.keys; + free_root(&info->ft_memroot, MYF(0)); + if (! info->ftparser_param) + return; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + for (j=0; j < MAX_PARAM_NR; j++) + { + MYSQL_FTPARSER_PARAM *ftparser_param= + &info->ftparser_param[keyinfo->ftparser_nr*MAX_PARAM_NR + j]; + if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word) + { + if (keyinfo->parser->deinit) + keyinfo->parser->deinit(ftparser_param); + ftparser_param->mysql_add_word= 0; + } + else + break; + } + } +} diff --git a/storage/maria/ma_ft_stem.c b/storage/maria/ma_ft_stem.c new file mode 100644 index 00000000000..7a2f8cfd7c5 --- /dev/null +++ b/storage/maria/ma_ft_stem.c @@ -0,0 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* mulitingual stem */ diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c new file mode 100644 index 00000000000..595c31e774c --- /dev/null +++ b/storage/maria/ma_ft_test1.c @@ -0,0 +1,318 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_test1.h" +#include <my_getopt.h> + +static int key_field=FIELD_VARCHAR,extra_field=FIELD_SKIP_ENDSPACE; +static uint key_length=200,extra_length=50; +static int key_type=HA_KEYTYPE_TEXT; +static int verbose=0,silent=0,skip_update=0, + no_keys=0,no_stopwords=0,no_search=0,no_fulltext=0; +static int create_flag=0,error=0; + +#define MAX_REC_LENGTH 300 +static char record[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_record(char *, int); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 's', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'N', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'K', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'F', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'U', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MY_INIT(argv[0]); + + get_options(argc,argv); + maria_init(); + + exit(run_test("FT1")); +} + +static MARIA_COLUMNDEF recinfo[3]; +static MARIA_KEYDEF keyinfo[2]; +static HA_KEYSEG keyseg[10]; + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j; + my_off_t pos; + + bzero((char*) recinfo,sizeof(recinfo)); + + /* First define 2 columns */ + recinfo[0].type=extra_field; + recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : + extra_length); + if (extra_field == FIELD_VARCHAR) + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length); + recinfo[1].type=key_field; + recinfo[1].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length); + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART: + (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT); + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,(no_keys?0:1),keyinfo,2,recinfo,0,NULL, + (MARIA_CREATE_INFO*) 0, create_flag)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + + if (!silent) + printf("- %s stopwords\n",no_stopwords?"Skipping":"Initializing"); + maria_ft_init_stopwords(no_stopwords?NULL:maria_ft_precompiled_stopwords); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + for (i=NUPD ; i<NDATAS; i++ ) + { + create_record(record,i); + error=maria_write(file,record); + if (verbose || error) + printf("I= %2d maria_write: %d errno: %d, record: %s\n", + i,error,my_errno,data[i].f0); + } + + if (!skip_update) + { + if (!silent) + printf("- Updating rows\n"); + + /* Read through all rows and update them */ + pos=(ha_rows) 0; + i=0; + while ((error=maria_rrnd(file,read_record,pos)) == 0) + { + create_record(record,NUPD-i-1); + if (maria_update(file,read_record,record)) + { + printf("Can't update row: %.*s, error: %d\n", + keyinfo[0].seg[0].length,record,my_errno); + } + if(++i == NUPD) break; + pos=HA_OFFSET_ERROR; + } + if (i != NUPD) + printf("Found %d of %d rows\n", i,NUPD); + } + + if (maria_close(file)) goto err; + if(no_search) return 0; + if (!silent) + printf("- Reopening file\n"); + if (!(file=maria_open(filename,2,0))) goto err; + if (!silent) + printf("- Reading rows with key\n"); + for (i=0 ; i < NQUERIES ; i++) + { + FT_DOCLIST *result; + result=maria_ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1); + if(!result) + { + printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno); + continue; + } + printf("Query %d: `%s'. Found: %d. Top five documents:\n", + i,query[i],result->ndocs); + for (j=0;j<5;j++) + { + double w; int err; + err= maria_ft_nlq_read_next(result, read_record); + if (err==HA_ERR_END_OF_FILE) + { + printf("No more matches!\n"); + break; + } + else if (err) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + break; + } + w=maria_ft_nlq_get_relevance(result); + if (key_field == FIELD_VARCHAR) + { + uint l; + char *p; + p=recinfo[0].length+read_record; + l=uint2korr(p); + printf("%10.7f: %.*s\n",w,(int) l,p+2); + } + else + printf("%10.7f: %.*s\n",w,recinfo[1].length, + recinfo[0].length+read_record); + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + +static char blob_key[MAX_REC_LENGTH]; +/* static char blob_record[MAX_REC_LENGTH+20*20]; */ + +void create_record(char *pos, int n) +{ + bzero((char*) pos,MAX_REC_LENGTH); + if (recinfo[0].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(pos+pack_length); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[0].length; + } + else + { + strnmov(pos,data[n].f0,keyinfo[0].seg[0].length); + pos+=recinfo[0].length; + } + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[1].length; + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(pos+1); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[1].length; + } + else + { + strnmov(pos,data[n].f2,keyinfo[0].seg[0].length); + pos+=recinfo[1].length; + } +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'v': verbose=1; break; + case 's': silent=1; break; + case 'F': no_fulltext=1; no_search=1; + case 'U': skip_update=1; break; + case 'K': no_keys=no_search=1; break; + case 'N': no_search=1; break; + case 'S': no_stopwords=1; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + +/* Read options */ + +static void get_options(int argc,char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + return; +} /* get options */ + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_test1.h b/storage/maria/ma_ft_test1.h new file mode 100644 index 00000000000..9449f063125 --- /dev/null +++ b/storage/maria/ma_ft_test1.h @@ -0,0 +1,421 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define NUPD 20 +#define NDATAS 389 +struct { const char *f0, *f2; } data[NDATAS] = { + {"1", "General Information about MySQL"}, + {"1.1", "What is MySQL?"}, + {"1.2", "About this manual"}, + {"1.3", "History of MySQL"}, + {"1.4", "The main features of MySQL"}, + {"1.5", "General SQL information and tutorials"}, + {"1.6", "Useful MySQL-related links"}, + {"1.7", "What are stored procedures and triggers and so on?"}, + {"2", "MySQL mailing lists and how to ask questions/give error (bug) reports"}, + {"2.1", "Subscribing to/un-subscribing from the MySQL mailing list"}, + {"2.2", "Asking questions or reporting bugs"}, + {"2.3", "I think I have found a bug. What information do you need to help me?"}, + {"2.3.1", "MySQL keeps crashing"}, + {"2.4", "Guidelines for answering questions on the mailing list"}, + {"3", "Licensing or When do I have/want to pay for MySQL?"}, + {"3.1", "How much does MySQL cost?"}, + {"3.2", "How do I get commercial support?"}, + {"3.2.1", "Types of commercial support"}, + {"3.2.1.1", "Basic email support"}, + {"3.2.1.2", "Extended email support"}, +/*------------------------------- NUPD=20 -------------------------------*/ + {"3.2.1.3", "Asking: Login support"}, + {"3.2.1.4", "Extended login support"}, + {"3.3", "How do I pay for licenses/support?"}, + {"3.4", "Who do I contact when I want more information about licensing/support?"}, + {"3.5", "What Copyright does MySQL use?"}, + {"3.6", "When may I distribute MySQL commercially without a fee?"}, + {"3.7", "I want to sell a product that can be configured to use MySQL"}, + {"3.8", "I am running a commercial web server using MySQL"}, + {"3.9", "Do I need a license to sell commercial Perl/tcl/PHP/Web+ etc applications?"}, + {"3.10", "Possible future changes in the licensing"}, + {"4", "Compiling and installing MySQL"}, + {"4.1", "How do I get MySQL?"}, + {"4.2", "Which MySQL version should I use?"}, + {"4.3", "How/when will you release updates?"}, + {"4.4", "What operating systems does MySQL support?"}, + {"4.5", "Compiling MySQL from source code"}, + {"4.5.1", "Quick installation overview"}, + {"4.5.2", "Usual configure switches"}, + {"4.5.3", "Applying a patch"}, + {"4.6", "Problems compiling?"}, + {"4.7", "General compilation notes"}, + {"4.8", "MIT-pthreads notes (FreeBSD)"}, + {"4.9", "Perl installation comments"}, + {"4.10", "Special things to consider for some machine/OS combinations"}, + {"4.10.1", "Solaris notes"}, + {"4.10.2", "SunOS 4 notes"}, + {"4.10.3", "Linux notes for all versions"}, + {"4.10.3.1", "Linux-x86 notes"}, + {"4.10.3.2", "RedHat 5.0"}, + {"4.10.3.3", "RedHat 5.1"}, + {"4.10.3.4", "Linux-Sparc notes"}, + {"4.10.3.5", "Linux-Alpha notes"}, + {"4.10.3.6", "MkLinux notes"}, + {"4.10.4", "Alpha-DEC-Unix notes"}, + {"4.10.5", "Alpha-DEC-OSF1 notes"}, + {"4.10.6", "SGI-IRIX notes"}, + {"4.10.7", "FreeBSD notes"}, + {"4.10.7.1", "FreeBSD-3.0 notes"}, + {"4.10.8", "BSD/OS 2.# notes"}, + {"4.10.8.1", "BSD/OS 3.# notes"}, + {"4.10.9", "SCO notes"}, + {"4.10.10", "SCO Unixware 7.0 notes"}, + {"4.10.11", "IBM-AIX notes"}, + {"4.10.12", "HP-UX notes"}, + {"4.11", "TcX binaries"}, + {"4.12", "Win32 notes"}, + {"4.13", "Installation instructions for MySQL binary releases"}, + {"4.13.1", "How to get MySQL Perl support working"}, + {"4.13.2", "Linux notes"}, + {"4.13.3", "HP-UX notes"}, + {"4.13.4", "Linking client libraries"}, + {"4.14", "Problems running mysql_install_db"}, + {"4.15", "Problems starting MySQL"}, + {"4.16", "Automatic start/stop of MySQL"}, + {"4.17", "Option files"}, + {"5", "How standards-compatible is MySQL?"}, + {"5.1", "What extensions has MySQL to ANSI SQL92?"}, + {"5.2", "What functionality is missing in MySQL?"}, + {"5.2.1", "Sub-selects"}, + {"5.2.2", "SELECT INTO TABLE"}, + {"5.2.3", "Transactions"}, + {"5.2.4", "Triggers"}, + {"5.2.5", "Foreign Keys"}, + {"5.2.5.1", "Some reasons NOT to use FOREIGN KEYS"}, + {"5.2.6", "Views"}, + {"5.2.7", "-- as start of a comment"}, + {"5.3", "What standards does MySQL follow?"}, + {"5.4", "What functions exist only for compatibility?"}, + {"5.5", "Limitations of BLOB and TEXT types"}, + {"5.6", "How to cope without COMMIT-ROLLBACK"}, + {"6", "The MySQL access privilege system"}, + {"6.1", "What the privilege system does"}, + {"6.2", "Connecting to the MySQL server"}, + {"6.2.1", "Keeping your password secure"}, + {"6.3", "Privileges provided by MySQL"}, + {"6.4", "How the privilege system works"}, + {"6.5", "The privilege tables"}, + {"6.6", "Setting up the initial MySQL privileges"}, + {"6.7", "Adding new user privileges to MySQL"}, + {"6.8", "An example permission setup"}, + {"6.9", "Causes of Access denied errors"}, + {"6.10", "How to make MySQL secure against crackers"}, + {"7", "MySQL language reference"}, + {"7.1", "Literals: how to write strings and numbers"}, + {"7.1.1", "Strings"}, + {"7.1.2", "Numbers"}, + {"7.1.3", "NULL values"}, + {"7.1.4", "Database, table, index, column and alias names"}, + {"7.1.4.1", "Case sensitivity in names"}, + {"7.2", "Column types"}, + {"7.2.1", "Column type storage requirements"}, + {"7.2.5", "Numeric types"}, + {"7.2.6", "Date and time types"}, + {"7.2.6.1", "The DATE type"}, + {"7.2.6.2", "The TIME type"}, + {"7.2.6.3", "The DATETIME type"}, + {"7.2.6.4", "The TIMESTAMP type"}, + {"7.2.6.5", "The YEAR type"}, + {"7.2.6.6", "Miscellaneous date and time properties"}, + {"7.2.7", "String types"}, + {"7.2.7.1", "The CHAR and VARCHAR types"}, + {"7.2.7.2", "The BLOB and TEXT types"}, + {"7.2.7.3", "The ENUM type"}, + {"7.2.7.4", "The SET type"}, + {"7.2.8", "Choosing the right type for a column"}, + {"7.2.9", "Column indexes"}, + {"7.2.10", "Multiple-column indexes"}, + {"7.2.11", "Using column types from other database engines"}, + {"7.3", "Functions for use in SELECT and WHERE clauses"}, + {"7.3.1", "Grouping functions"}, + {"7.3.2", "Normal arithmetic operations"}, + {"7.3.3", "Bit functions"}, + {"7.3.4", "Logical operations"}, + {"7.3.5", "Comparison operators"}, + {"7.3.6", "String comparison functions"}, + {"7.3.7", "Control flow functions"}, + {"7.3.8", "Mathematical functions"}, + {"7.3.9", "String functions"}, + {"7.3.10", "Date and time functions"}, + {"7.3.11", "Miscellaneous functions"}, + {"7.3.12", "Functions for use with GROUP BY clauses"}, + {"7.4", "CREATE DATABASE syntax"}, + {"7.5", "DROP DATABASE syntax"}, + {"7.6", "CREATE TABLE syntax"}, + {"7.7", "ALTER TABLE syntax"}, + {"7.8", "OPTIMIZE TABLE syntax"}, + {"7.9", "DROP TABLE syntax"}, + {"7.10", "DELETE syntax"}, + {"7.11", "SELECT syntax"}, + {"7.12", "JOIN syntax"}, + {"7.13", "INSERT syntax"}, + {"7.14", "REPLACE syntax"}, + {"7.15", "LOAD DATA INFILE syntax"}, + {"7.16", "UPDATE syntax"}, + {"7.17", "USE syntax"}, + {"7.18", "SHOW syntax (Get information about tables, columns...)"}, + {"7.19", "EXPLAIN syntax (Get information about a SELECT)"}, + {"7.20", "DESCRIBE syntax (Get information about columns)"}, + {"7.21", "LOCK TABLES/UNLOCK TABLES syntax"}, + {"7.22", "SET OPTION syntax"}, + {"7.23", "GRANT syntax (Compatibility function)"}, + {"7.24", "CREATE INDEX syntax (Compatibility function)"}, + {"7.25", "DROP INDEX syntax (Compatibility function)"}, + {"7.26", "Comment syntax"}, + {"7.27", "CREATE FUNCTION/DROP FUNCTION syntax"}, + {"7.28", "Is MySQL picky about reserved words?"}, + {"8", "Example SQL queries"}, + {"8.1", "Queries from twin project"}, + {"8.1.1", "Find all non-distributed twins"}, + {"8.1.2", "Show a table on twin pair status"}, + {"9", "How safe/stable is MySQL?"}, + {"9.1", "How stable is MySQL?"}, + {"9.2", "Why are there is so many releases of MySQL?"}, + {"9.3", "Checking a table for errors"}, + {"9.4", "How to repair tables"}, + {"9.5", "Is there anything special to do when upgrading/downgrading MySQL?"}, + {"9.5.1", "Upgrading from a 3.21 version to 3.22"}, + {"9.5.2", "Upgrading from a 3.20 version to 3.21"}, + {"9.5.3", "Upgrading to another architecture"}, + {"9.6", "Year 2000 compliance"}, + {"10", "MySQL Server functions"}, + {"10.1", "What languages are supported by MySQL?"}, + {"10.1.1", "Character set used for data & sorting"}, + {"10.2", "The update log"}, + {"10.3", "How big can MySQL tables be?"}, + {"11", "Getting maximum performance from MySQL"}, + {"11.1", "How does one change the size of MySQL buffers?"}, + {"11.2", "How compiling and linking affects the speed of MySQL"}, + {"11.3", "How does MySQL use memory?"}, + {"11.4", "How does MySQL use indexes?"}, + {"11.5", "What optimizations are done on WHERE clauses?"}, + {"11.6", "How does MySQL open & close tables?"}, + {"11.6.0.1", "What are the drawbacks of creating possibly thousands of tables in a database?"}, + {"11.7", "How does MySQL lock tables?"}, + {"11.8", "How should I arrange my table to be as fast/small as possible?"}, + {"11.9", "What affects the speed of INSERT statements?"}, + {"11.10", "What affects the speed DELETE statements?"}, + {"11.11", "How do I get MySQL to run at full speed?"}, + {"11.12", "What are the different row formats? Or, when should VARCHAR/CHAR be used?"}, + {"11.13", "Why so many open tables?"}, + {"12", "MySQL benchmark suite"}, + {"13", "MySQL Utilites"}, + {"13.1", "Overview of the different MySQL programs"}, + {"13.2", "The MySQL table check, optimize and repair program"}, + {"13.2.1", "isamchk memory use"}, + {"13.2.2", "Getting low-level table information"}, + {"13.3", "The MySQL compressed read-only table generator"}, + {"14", "Adding new functions to MySQL"}, + {"15", "MySQL ODBC Support"}, + {"15.1", "Operating systems supported by MyODBC"}, + {"15.2", "How to report problems with MyODBC"}, + {"15.3", "Programs known to work with MyODBC"}, + {"15.4", "How to fill in the various fields in the ODBC administrator program"}, + {"15.5", "How to get the value of an AUTO_INCREMENT column in ODBC"}, + {"16", "Problems and common errors"}, + {"16.1", "Some common errors when using MySQL"}, + {"16.1.1", "MySQL server has gone away error"}, + {"16.1.2", "Can't connect to local MySQL server error"}, + {"16.1.3", "Out of memory error"}, + {"16.1.4", "Packet too large error"}, + {"16.1.5", "The table is full error"}, + {"16.1.6", "Commands out of sync error in client"}, + {"16.1.7", "Removing user error"}, + {"16.2", "How MySQL handles a full disk"}, + {"16.3", "How to run SQL commands from a text file"}, + {"16.4", "Where MySQL stores temporary files"}, + {"16.5", "Access denied error"}, + {"16.6", "How to run MySQL as a normal user"}, + {"16.7", "Problems with file permissions"}, + {"16.8", "File not found"}, + {"16.9", "Problems using DATE columns"}, + {"16.10", "Case sensitivity in searches"}, + {"16.11", "Problems with NULL values"}, + {"17", "Solving some common problems with MySQL"}, + {"17.1", "Database replication"}, + {"17.2", "Database backups"}, + {"18", "MySQL client tools and API's"}, + {"18.1", "MySQL C API"}, + {"18.2", "C API datatypes"}, + {"18.3", "C API function overview"}, + {"18.4", "C API function descriptions"}, + {"18.4.1", "mysql_affected_rows()"}, + {"18.4.2", "mysql_close()"}, + {"18.4.3", "mysql_connect()"}, + {"18.4.4", "mysql_create_db()"}, + {"18.4.5", "mysql_data_seek()"}, + {"18.4.6", "mysql_debug()"}, + {"18.4.7", "mysql_drop_db()"}, + {"18.4.8", "mysql_dump_debug_info()"}, + {"18.4.9", "mysql_eof()"}, + {"18.4.10", "mysql_errno()"}, + {"18.4.11", "mysql_error()"}, + {"18.4.12", "mysql_escape_string()"}, + {"18.4.13", "mysql_fetch_field()"}, + {"18.4.14", "mysql_fetch_fields()"}, + {"18.4.15", "mysql_fetch_field_direct()"}, + {"18.4.16", "mysql_fetch_lengths()"}, + {"18.4.17", "mysql_fetch_row()"}, + {"18.4.18", "mysql_field_seek()"}, + {"18.4.19", "mysql_field_tell()"}, + {"18.4.20", "mysql_free_result()"}, + {"18.4.21", "mysql_get_client_info()"}, + {"18.4.22", "mysql_get_host_info()"}, + {"18.4.23", "mysql_get_proto_info()"}, + {"18.4.24", "mysql_get_server_info()"}, + {"18.4.25", "mysql_info()"}, + {"18.4.26", "mysql_init()"}, + {"18.4.27", "mysql_insert_id()"}, + {"18.4.28", "mysql_kill()"}, + {"18.4.29", "mysql_list_dbs()"}, + {"18.4.30", "mysql_list_fields()"}, + {"18.4.31", "mysql_list_processes()"}, + {"18.4.32", "mysql_list_tables()"}, + {"18.4.33", "mysql_num_fields()"}, + {"18.4.34", "mysql_num_rows()"}, + {"18.4.35", "mysql_query()"}, + {"18.4.36", "mysql_real_connect()"}, + {"18.4.37", "mysql_real_query()"}, + {"18.4.38", "mysql_reload()"}, + {"18.4.39", "mysql_row_tell()"}, + {"18.4.40", "mysql_select_db()"}, + {"18.4.41", "mysql_shutdown()"}, + {"18.4.42", "mysql_stat()"}, + {"18.4.43", "mysql_store_result()"}, + {"18.4.44", "mysql_thread_id()"}, + {"18.4.45", "mysql_use_result()"}, + {"18.4.46", "Why is it that after mysql_query() returns success, mysql_store_result() sometimes returns NULL?"}, + {"18.4.47", "What results can I get from a query?"}, + {"18.4.48", "How can I get the unique ID for the last inserted row?"}, + {"18.4.49", "Problems linking with the C API"}, + {"18.4.50", "How to make a thread-safe client"}, + {"18.5", "MySQL Perl API's"}, + {"18.5.1", "DBI with DBD::mysql"}, + {"18.5.1.1", "The DBI interface"}, + {"18.5.1.2", "More DBI/DBD information"}, + {"18.6", "MySQL Java connectivity (JDBC)"}, + {"18.7", "MySQL PHP API's"}, + {"18.8", "MySQL C++ API's"}, + {"18.9", "MySQL Python API's"}, + {"18.10", "MySQL TCL API's"}, + {"19", "How MySQL compares to other databases"}, + {"19.1", "How MySQL compares to mSQL"}, + {"19.1.1", "How to convert mSQL tools for MySQL"}, + {"19.1.2", "How mSQL and MySQL client/server communications protocols differ"}, + {"19.1.3", "How mSQL 2.0 SQL syntax differs from MySQL"}, + {"19.2", "How MySQL compares to PostgreSQL"}, + {"A", "Some users of MySQL"}, + {"B", "Contributed programs"}, + {"C", "Contributors to MySQL"}, + {"D", "MySQL change history"}, + {"19.3", "Changes in release 3.22.x (Alpha version)"}, + {"19.3.1", "Changes in release 3.22.7"}, + {"19.3.2", "Changes in release 3.22.6"}, + {"19.3.3", "Changes in release 3.22.5"}, + {"19.3.4", "Changes in release 3.22.4"}, + {"19.3.5", "Changes in release 3.22.3"}, + {"19.3.6", "Changes in release 3.22.2"}, + {"19.3.7", "Changes in release 3.22.1"}, + {"19.3.8", "Changes in release 3.22.0"}, + {"19.4", "Changes in release 3.21.x"}, + {"19.4.1", "Changes in release 3.21.33"}, + {"19.4.2", "Changes in release 3.21.32"}, + {"19.4.3", "Changes in release 3.21.31"}, + {"19.4.4", "Changes in release 3.21.30"}, + {"19.4.5", "Changes in release 3.21.29"}, + {"19.4.6", "Changes in release 3.21.28"}, + {"19.4.7", "Changes in release 3.21.27"}, + {"19.4.8", "Changes in release 3.21.26"}, + {"19.4.9", "Changes in release 3.21.25"}, + {"19.4.10", "Changes in release 3.21.24"}, + {"19.4.11", "Changes in release 3.21.23"}, + {"19.4.12", "Changes in release 3.21.22"}, + {"19.4.13", "Changes in release 3.21.21a"}, + {"19.4.14", "Changes in release 3.21.21"}, + {"19.4.15", "Changes in release 3.21.20"}, + {"19.4.16", "Changes in release 3.21.19"}, + {"19.4.17", "Changes in release 3.21.18"}, + {"19.4.18", "Changes in release 3.21.17"}, + {"19.4.19", "Changes in release 3.21.16"}, + {"19.4.20", "Changes in release 3.21.15"}, + {"19.4.21", "Changes in release 3.21.14b"}, + {"19.4.22", "Changes in release 3.21.14a"}, + {"19.4.23", "Changes in release 3.21.13"}, + {"19.4.24", "Changes in release 3.21.12"}, + {"19.4.25", "Changes in release 3.21.11"}, + {"19.4.26", "Changes in release 3.21.10"}, + {"19.4.27", "Changes in release 3.21.9"}, + {"19.4.28", "Changes in release 3.21.8"}, + {"19.4.29", "Changes in release 3.21.7"}, + {"19.4.30", "Changes in release 3.21.6"}, + {"19.4.31", "Changes in release 3.21.5"}, + {"19.4.32", "Changes in release 3.21.4"}, + {"19.4.33", "Changes in release 3.21.3"}, + {"19.4.34", "Changes in release 3.21.2"}, + {"19.4.35", "Changes in release 3.21.0"}, + {"19.5", "Changes in release 3.20.x"}, + {"19.5.1", "Changes in release 3.20.18"}, + {"19.5.2", "Changes in release 3.20.17"}, + {"19.5.3", "Changes in release 3.20.16"}, + {"19.5.4", "Changes in release 3.20.15"}, + {"19.5.5", "Changes in release 3.20.14"}, + {"19.5.6", "Changes in release 3.20.13"}, + {"19.5.7", "Changes in release 3.20.11"}, + {"19.5.8", "Changes in release 3.20.10"}, + {"19.5.9", "Changes in release 3.20.9"}, + {"19.5.10", "Changes in release 3.20.8"}, + {"19.5.11", "Changes in release 3.20.7"}, + {"19.5.12", "Changes in release 3.20.6"}, + {"19.5.13", "Changes in release 3.20.3"}, + {"19.5.14", "Changes in release 3.20.0"}, + {"19.6", "Changes in release 3.19.x"}, + {"19.6.1", "Changes in release 3.19.5"}, + {"19.6.2", "Changes in release 3.19.4"}, + {"19.6.3", "Changes in release 3.19.3"}, + {"E", "Known errors and design deficiencies in MySQL"}, + {"F", "List of things we want to add to MySQL in the future (The TODO)"}, + {"19.7", "Things that must done in the real near future"}, + {"19.8", "Things that have to be done sometime"}, + {"19.9", "Some things we don't have any plans to do"}, + {"G", "Comments on porting to other systems"}, + {"19.10", "Debugging MySQL"}, + {"19.11", "Comments about RTS threads"}, + {"19.12", "What is the difference between different thread packages?"}, + {"H", "Description of MySQL regular expression syntax"}, + {"I", "What is Unireg?"}, + {"J", "The MySQL server license"}, + {"K", "The MySQL license for Microsoft operating systems"}, + {"*", "SQL command, type and function index"}, + {"*", "Concept Index"} +}; + +#define NQUERIES 5 +const char *query[NQUERIES]={ + "mysql information and manual", + "upgrading from previous version", + "column indexes", + "against about after more right the with/without", /* stopwords test */ + "mysql license and copyright" +}; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c new file mode 100644 index 00000000000..25f4d5a67a0 --- /dev/null +++ b/storage/maria/ma_ft_update.c @@ -0,0 +1,353 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* functions to work with full-text indices */ + +#include "ma_ftdefs.h" +#include <math.h> + +void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const byte *record, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_init"); + + ftsi->num=info->s->keyinfo[keynr].keysegs; + ftsi->seg=info->s->keyinfo[keynr].seg; + ftsi->rec=record; + DBUG_VOID_RETURN; +} + +void _ma_ft_segiterator_dummy_init(const byte *record, uint len, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_dummy_init"); + + ftsi->num=1; + ftsi->seg=0; + ftsi->pos=record; + ftsi->len=len; + DBUG_VOID_RETURN; +} + +/* + This function breaks convention "return 0 in success" + but it's easier to use like this + + while(_ma_ft_segiterator()) + + so "1" means "OK", "0" means "EOF" +*/ + +uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator"); + + if (!ftsi->num) + DBUG_RETURN(0); + + ftsi->num--; + if (!ftsi->seg) + DBUG_RETURN(1); + + ftsi->seg--; + + if (ftsi->seg->null_bit && + (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) + { + ftsi->pos=0; + DBUG_RETURN(1); + } + ftsi->pos= ftsi->rec+ftsi->seg->start; + if (ftsi->seg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= (ftsi->seg->bit_start); + ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos : + uint2korr(ftsi->pos)); + ftsi->pos+= pack_length; /* Skip VARCHAR length */ + DBUG_RETURN(1); + } + if (ftsi->seg->flag & HA_BLOB_PART) + { + ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); + memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, + sizeof(char*)); + DBUG_RETURN(1); + } + ftsi->len=ftsi->seg->length; + DBUG_RETURN(1); +} + + +/* parses a document i.e. calls maria_ft_parse for every keyseg */ + +uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const byte *record, + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) +{ + FT_SEG_ITERATOR ftsi; + struct st_mysql_ftparser *parser; + DBUG_ENTER("_ma_ft_parse"); + + _ma_ft_segiterator_init(info, keynr, record, &ftsi); + + maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); + parser= info->s->keyinfo[keynr].parser; + while (_ma_ft_segiterator(&ftsi)) + { + if (ftsi.pos) + if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, parser, param, + mem_root)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record, + MEM_ROOT *mem_root) +{ + TREE ptree; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ma_ft_parserecord"); + if (! (param= maria_ftparser_call_initializer(info, keynr, 0))) + DBUG_RETURN(NULL); + bzero((char*) &ptree, sizeof(ptree)); + param->flags= 0; + if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root)) + DBUG_RETURN(NULL); + + DBUG_RETURN(maria_ft_linearize(&ptree, mem_root)); +} + +static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length; + DBUG_ENTER("_ma_ft_store"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_write(info, keynr, keybuf, key_length)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +static int _ma_ft_erase(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length, err=0; + DBUG_ENTER("_ma_ft_erase"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_delete(info, keynr, keybuf, key_length)) + err=1; + } + DBUG_RETURN(err); +} + +/* + Compares an appropriate parts of two WORD_KEY keys directly out of records + returns 1 if they are different +*/ + +#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1 +#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL 0 + +int _ma_ft_cmp(MARIA_HA *info, uint keynr, const byte *rec1, const byte *rec2) +{ + FT_SEG_ITERATOR ftsi1, ftsi2; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + DBUG_ENTER("_ma_ft_cmp"); + + _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1); + _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2); + + while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2)) + { + if ((ftsi1.pos != ftsi2.pos) && + (!ftsi1.pos || !ftsi2.pos || + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + (uchar*) ftsi2.pos,ftsi2.len,0,0))) + DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); + } + DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); +} + + +/* update a document entry */ + +int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, my_off_t pos) +{ + int error= -1; + FT_WORD *oldlist,*newlist, *old_word, *new_word; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + uint key_length; + int cmp, cmp2; + DBUG_ENTER("_ma_ft_update"); + + if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec, + &info->ft_memroot)) || + !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec, + &info->ft_memroot))) + goto err; + + error=0; + while(old_word->pos && new_word->pos) + { + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, + (uchar*) new_word->pos,new_word->len,0,0); + cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); + + if (cmp < 0 || cmp2) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos); + if ((error= _ma_ck_delete(info,keynr, keybuf,key_length))) + goto err; + } + if (cmp > 0 || cmp2) + { + key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos); + if ((error= _ma_ck_write(info, keynr, keybuf,key_length))) + goto err; + } + if (cmp<=0) old_word++; + if (cmp>=0) new_word++; + } + if (old_word->pos) + error= _ma_ft_erase(info,keynr,keybuf,old_word,pos); + else if (new_word->pos) + error= _ma_ft_store(info,keynr,keybuf,new_word,pos); + +err: + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_RETURN(error); +} + + +/* adds a document to the collection */ + +int _ma_ft_add(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_add"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) + error= _ma_ft_store(info,keynr,keybuf,wlist,pos); + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} + + +/* removes a document from the collection */ + +int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_del"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) + error= _ma_ft_erase(info,keynr,keybuf,wlist,pos); + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} + + +uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, + my_off_t filepos) +{ + byte buf[HA_FT_MAXBYTELEN+16]; + DBUG_ENTER("_ma_ft_make_key"); + +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + { + float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight); + mi_float4store(buf,weight); + } +#else +#error +#endif + + int2store(buf+HA_FT_WLEN,wptr->len); + memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); + DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos)); +} + + +/* + convert key value to ft2 +*/ + +uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, byte *key) +{ + my_off_t root; + DYNAMIC_ARRAY *da=info->ft1_to_ft2; + MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo; + byte *key_ptr= (byte*) dynamic_array_ptr(da, 0), *end; + uint length, key_length; + DBUG_ENTER("_ma_ft_convert_to_ft2"); + + /* we'll generate one pageful at once, and insert the rest one-by-one */ + /* calculating the length of this page ...*/ + length=(keyinfo->block_length-2) / keyinfo->keylength; + set_if_smaller(length, da->elements); + length=length * keyinfo->keylength; + + get_key_full_length_rdonly(key_length, key); + while (_ma_ck_delete(info, keynr, key, key_length) == 0) + { + /* + nothing to do here. + _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */ + } + + /* creating pageful of keys */ + maria_putint(info->buff,length+2,0); + memcpy(info->buff+2, key_ptr, length); + info->keybuff_used=info->page_changed=1; /* info->buff is used */ + if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + + /* inserting the rest of key values */ + end= (byte*) dynamic_array_ptr(da, da->elements); + for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) + if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) + DBUG_RETURN(-1); + + /* now, writing the word key entry */ + ft_intXstore(key+key_length, - (int) da->elements); + _ma_dpointer(info, key+key_length+HA_FT_WLEN, root); + + DBUG_RETURN(_ma_ck_real_write_btree(info, + info->s->keyinfo+keynr, + key, 0, + &info->s->state.key_root[keynr], + SEARCH_SAME)); +} diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h new file mode 100644 index 00000000000..def7e92e6e0 --- /dev/null +++ b/storage/maria/ma_ftdefs.h @@ -0,0 +1,153 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "ma_fulltext.h" +#include <m_ctype.h> +#include <my_tree.h> +#include <queues.h> +#include <mysql/plugin.h> + +#define true_word_char(ctype, character) \ + ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \ + (character) == '_') +#define misc_word_char(X) 0 + +#define FT_MAX_WORD_LEN_FOR_SORT 31 + +#define FTPARSER_MEMROOT_ALLOC_SIZE 65536 + +#define COMPILE_STOPWORDS_IN + +/* Interested readers may consult SMART + (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z) + for an excellent implementation of vector space model we use. + It also demonstrate the usage of different weghting techniques. + This code, though, is completely original and is not based on the + SMART code but was in some cases inspired by it. + + NORM_PIVOT was taken from the article + A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization", + ACM SIGIR'96, 21-29, 1996 + */ + +#define LWS_FOR_QUERY LWS_TF +#define LWS_IN_USE LWS_LOG +#define PRENORM_IN_USE PRENORM_AVG +#define NORM_IN_USE NORM_PIVOT +#define GWS_IN_USE GWS_PROB +/*==============================================================*/ +#define LWS_TF (count) +#define LWS_BINARY (count>0) +#define LWS_SQUARE (count*count) +#define LWS_LOG (count?(log( (double) count)+1):0) +/*--------------------------------------------------------------*/ +#define PRENORM_NONE (p->weight) +#define PRENORM_MAX (p->weight/docstat.max) +#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max) +#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq) +#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq))) +/*--------------------------------------------------------------*/ +#define NORM_NONE (1) +#define NORM_SUM (docstat.nsum) +#define NORM_COS (sqrt(docstat.nsum2)) + +#define PIVOT_VAL (0.0115) +#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq) +/*---------------------------------------------------------------*/ +#define GWS_NORM (1/sqrt(sum2)) +#define GWS_GFIDF (sum/doc_cnt) +/* Mysterious, but w/o (double) GWS_IDF performs better :-o */ +#define GWS_IDF log(aio->info->state->records/doc_cnt) +#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt) +#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 ) +#define GWS_FREQ (1.0/doc_cnt) +#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2) +#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3) +#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records)) +/*=================================================================*/ + +/* Boolean search operators */ +#define FTB_YES (ft_boolean_syntax[0]) +#define FTB_EGAL (ft_boolean_syntax[1]) +#define FTB_NO (ft_boolean_syntax[2]) +#define FTB_INC (ft_boolean_syntax[3]) +#define FTB_DEC (ft_boolean_syntax[4]) +#define FTB_LBR (ft_boolean_syntax[5]) +#define FTB_RBR (ft_boolean_syntax[6]) +#define FTB_NEG (ft_boolean_syntax[7]) +#define FTB_TRUNC (ft_boolean_syntax[8]) +#define FTB_LQUOT (ft_boolean_syntax[10]) +#define FTB_RQUOT (ft_boolean_syntax[11]) + +typedef struct st_maria_ft_word { + byte * pos; + uint len; + double weight; +} FT_WORD; + +int is_stopword(char *word, uint len); + +uint _ma_ft_make_key(MARIA_HA *, uint , byte *, FT_WORD *, my_off_t); + +byte maria_ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, + MYSQL_FTPARSER_BOOLEAN_INFO *); +byte maria_ft_simple_get_word(CHARSET_INFO *, byte **, const byte *, + FT_WORD *, my_bool); + +typedef struct _st_maria_ft_seg_iterator { + uint num, len; + HA_KEYSEG *seg; + const byte *rec, *pos; +} FT_SEG_ITERATOR; + +void _ma_ft_segiterator_init(MARIA_HA *, uint, const byte *, FT_SEG_ITERATOR *); +void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); +uint _ma_ft_segiterator(FT_SEG_ITERATOR *); + +void maria_ft_parse_init(TREE *, CHARSET_INFO *); +int maria_ft_parse(TREE *, byte *, int, struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); +FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *); +FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *, MEM_ROOT *); +uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *); +FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *); + +extern const struct _ft_vft _ma_ft_vft_nlq; +int maria_ft_nlq_read_next(FT_INFO *, char *); +float maria_ft_nlq_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_nlq_close_search(FT_INFO *); +float maria_ft_nlq_get_relevance(FT_INFO *); +my_off_t maria_ft_nlq_get_docid(FT_INFO *); +void maria_ft_nlq_reinit_search(FT_INFO *); + +extern const struct _ft_vft _ma_ft_vft_boolean; +int maria_ft_boolean_read_next(FT_INFO *, char *); +float maria_ft_boolean_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_boolean_close_search(FT_INFO *); +float maria_ft_boolean_get_relevance(FT_INFO *); +my_off_t maria_ft_boolean_get_docid(FT_INFO *); +void maria_ft_boolean_reinit_search(FT_INFO *); +extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr, + uint paramnr); +extern void maria_ftparser_call_deinitializer(MARIA_HA *info); diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h new file mode 100644 index 00000000000..cf21471b316 --- /dev/null +++ b/storage/maria/ma_fulltext.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "maria_def.h" +#include "ft_global.h" + +int _ma_ft_cmp(MARIA_HA *, uint, const byte *, const byte *); +int _ma_ft_add(MARIA_HA *, uint, byte *, const byte *, my_off_t); +int _ma_ft_del(MARIA_HA *, uint, byte *, const byte *, my_off_t); + +uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, byte *); diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c new file mode 100644 index 00000000000..397cd2465d4 --- /dev/null +++ b/storage/maria/ma_info.c @@ -0,0 +1,137 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Return useful base information for an open table */ + +#include "maria_def.h" +#ifdef __WIN__ +#include <sys/stat.h> +#endif + + /* Get position to last record */ + +MARIA_RECORD_POS maria_position(MARIA_HA *info) +{ + return info->cur_row.lastpos; +} + + +/* Get information about the table */ +/* if flag == 2 one get current info (no sync from database */ + +int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) +{ + MY_STAT state; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_status"); + + x->recpos= info->cur_row.lastpos; + if (flag == HA_STATUS_POS) + DBUG_RETURN(0); /* Compatible with ISAM */ + if (!(flag & HA_STATUS_NO_LOCK)) + { + pthread_mutex_lock(&share->intern_lock); + VOID(_ma_readinfo(info,F_RDLCK,0)); + fast_ma_writeinfo(info); + pthread_mutex_unlock(&share->intern_lock); + } + if (flag & HA_STATUS_VARIABLE) + { + x->records = info->state->records; + x->deleted = info->state->del; + x->delete_length = info->state->empty; + x->data_file_length =info->state->data_file_length; + x->index_file_length=info->state->key_file_length; + + x->keys = share->state.header.keys; + x->check_time = share->state.check_time; + x->mean_reclength = info->state->records ? + (ulong) ((info->state->data_file_length-info->state->empty)/ + info->state->records) : (ulong) share->min_pack_length; + } + if (flag & HA_STATUS_ERRKEY) + { + x->errkey= info->errkey; + x->dup_key_pos= info->dup_key_pos; + } + if (flag & HA_STATUS_CONST) + { + x->reclength = share->base.reclength; + x->max_data_file_length=share->base.max_data_file_length; + x->max_index_file_length=info->s->base.max_key_file_length; + x->filenr = info->dfile; + x->options = share->options; + x->create_time=share->state.create_time; + x->reflength= maria_get_pointer_length(share->base.max_data_file_length, + maria_data_pointer_size); + x->record_offset= ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? + 0L : share->base.pack_reclength); + x->sortkey= -1; /* No clustering */ + x->rec_per_key = share->state.rec_per_key_part; + x->key_map = share->state.key_map; + x->data_file_name = share->data_file_name; + x->index_file_name = share->index_file_name; + } + if ((flag & HA_STATUS_TIME) && !my_fstat(info->dfile,&state,MYF(0))) + x->update_time=state.st_mtime; + else + x->update_time=0; + if (flag & HA_STATUS_AUTO) + { + x->auto_increment= share->state.auto_increment+1; + if (!x->auto_increment) /* This shouldn't happen */ + x->auto_increment= ~(ulonglong) 0; + } + DBUG_RETURN(0); +} + + +/* + Write a message to the error log. + + SYNOPSIS + _ma_report_error() + file_name Name of table file (e.g. index_file_name). + errcode Error number. + + DESCRIPTION + This function supplies my_error() with a table name. Most error + messages need one. Since string arguments in error messages are limited + to 64 characters by convention, we ensure that in case of truncation, + that the end of the index file path is in the message. This contains + the most valuable information (the table name and the database name). + + RETURN + void +*/ + +void _ma_report_error(int errcode, const char *file_name) +{ + uint length; + DBUG_ENTER("_ma_report_error"); + DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); + + if ((length= strlen(file_name)) > 64) + { + uint dir_length= dirname_length(file_name); + file_name+= dir_length; + if ((length-= dir_length) > 64) + file_name+= length - 64; + } + my_error(errcode, MYF(ME_NOREFRESH), file_name); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c new file mode 100644 index 00000000000..679aa8c6f8f --- /dev/null +++ b/storage/maria/ma_init.c @@ -0,0 +1,60 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Initialize an maria-database */ + +#include "maria_def.h" +#include <ft_global.h> +#include "ma_blockrec.h" + +my_bool maria_inited= FALSE; +pthread_mutex_t THR_LOCK_maria; + +/* + Initialize maria + + SYNOPSIS + maria_init() + + TODO + Open log files and do recovery if need + + RETURN + 0 ok + # error number +*/ + +int maria_init(void) +{ + if (!maria_inited) + { + maria_inited= TRUE; + pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); + _ma_init_block_record_data(); + } + return 0; +} + + +void maria_end(void) +{ + if (maria_inited) + { + maria_inited= FALSE; + ft_free_stopwords(); + pthread_mutex_destroy(&THR_LOCK_maria); + } +} diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c new file mode 100644 index 00000000000..036fd305c4d --- /dev/null +++ b/storage/maria/ma_key.c @@ -0,0 +1,591 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to handle keys */ + +#include "maria_def.h" +#include "m_ctype.h" +#include "ma_sp_defs.h" +#ifdef HAVE_IEEEFP_H +#include <ieeefp.h> +#endif + +#define CHECK_KEYS /* Enable safety checks */ + +#define FIX_LENGTH(cs, pos, length, char_length) \ + do { \ + if (length > char_length) \ + char_length= my_charpos(cs, pos, pos+length, char_length); \ + set_if_smaller(char_length,length); \ + } while(0) + +static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record); + +/* + Make a intern key from a record + + SYNOPSIS + _ma_make_key() + info MyiSAM handler + keynr key number + key Store created key here + record Record + filepos Position to record in the data file + + RETURN + Length of key +*/ + +uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos) +{ + const byte *pos; + byte *start; + reg1 HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_make_key"); + + if (info->s->keyinfo[keynr].flag & HA_SPATIAL) + { + /* + TODO: nulls processing + */ +#ifdef HAVE_SPATIAL + DBUG_RETURN(_ma_sp_make_key(info,keynr, key,record,filepos)); +#else + DBUG_ASSERT(0); /* maria_open should check that this never happens*/ +#endif + } + + start=key; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + uint char_length; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + *key++= 0; /* NULL in key */ + continue; + } + *key++=1; /* Not NULL */ + } + + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + + pos= record+keyseg->start; + if (type == HA_KEYTYPE_BIT) + { + if (keyseg->bit_length) + { + uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos, + keyseg->bit_start, keyseg->bit_length); + *key++= (char) bits; + length--; + } + memcpy(key, pos, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + if (type != HA_KEYTYPE_NUM) + { + length= cs->cset->lengthsp(cs, pos, length); + } + else + { + const byte *end= pos + length; + while (pos < end && pos[0] == ' ') + pos++; + length= (uint) (end-pos); + } + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key, pos, (size_t) char_length); + key+=char_length; + continue; + } + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= (keyseg->bit_start == 1 ? 1 : 2); + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*)); + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ +#ifdef HAVE_ISNAN + if (type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr,pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key,length); + key+=length; + continue; + } + } + else if (type == HA_KEYTYPE_DOUBLE) + { + double nr; + float8get(nr,pos); + if (isnan(nr)) + { + bzero(key,length); + key+=length; + continue; + } + } +#endif + pos+=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy(key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + } + _ma_dpointer(info,key,filepos); + DBUG_PRINT("exit",("keynr: %d",keynr)); + DBUG_DUMP("key",start,(uint) (key-start)+keyseg->length); + DBUG_EXECUTE("key", + _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start, + (uint) (key-start));); + DBUG_RETURN((uint) (key-start)); /* Return keylength */ +} /* _ma_make_key */ + + +/* + Pack a key to intern format from given format (c_rkey) + + SYNOPSIS + _ma_pack_key() + info MARIA handler + uint keynr key number + key Store packed key here + old Not packed key + k_length Length of 'old' to use + last_used_keyseg out parameter. May be NULL + + RETURN + length of packed key + + last_use_keyseg Store pointer to the keyseg after the last used one +*/ + +uint _ma_pack_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint k_length, HA_KEYSEG **last_used_keyseg) +{ + byte *start_key=key; + HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_pack_key"); + + for (keyseg=info->s->keyinfo[keynr].seg ; + keyseg->type && (int) k_length > 0; + old+=keyseg->length, keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=min((uint) keyseg->length,(uint) k_length); + uint char_length; + const byte *pos; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + k_length--; + if (!(*key++= (char) 1-*old++)) /* Copy null marker */ + { + k_length-=length; + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + k_length-=2; /* Skip length */ + old+= 2; + } + continue; /* Found NULL */ + } + } + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + pos= old; + if (keyseg->flag & HA_SPACE_PACK) + { + const byte *end= pos + length; + if (type != HA_KEYTYPE_NUM) + { + while (end > pos && end[-1] == ' ') + end--; + } + else + { + while (pos < end && pos[0] == ' ') + pos++; + } + k_length-=length; + length=(uint) (end-pos); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + /* Length of key-part used with maria_rkey() always 2 */ + uint tmp_length=uint2korr(pos); + k_length-= 2+length; + pos+=2; + set_if_smaller(length,tmp_length); /* Safety */ + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + old+=2; /* Skip length */ + memcpy(key, pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ + pos+=length; + k_length-=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy(key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + k_length-=length; + } + if (last_used_keyseg) + *last_used_keyseg= keyseg; + +#ifdef NOT_USED + if (keyseg->type) + { + /* Part-key ; fill with ASCII 0 for easier searching */ + length= (uint) -k_length; /* unused part of last key */ + do + { + if (keyseg->flag & HA_NULL_PART) + length++; + if (keyseg->flag & HA_SPACE_PACK) + length+=2; + else + length+= keyseg->length; + keyseg++; + } while (keyseg->type); + bzero(key,length); + key+=length; + } +#endif + DBUG_RETURN((uint) (key-start_key)); +} /* _ma_pack_key */ + + + +/* + Store found key in record + + SYNOPSIS + _ma_put_key_in_record() + info MARIA handler + keynr Key number that was used + record Store key here + + Last read key is in info->lastkey + + NOTES + Used when only-keyread is wanted + + RETURN + 0 ok + 1 error +*/ + +static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, + byte *record) +{ + reg2 byte *key; + byte *pos,*key_end; + reg1 HA_KEYSEG *keyseg; + byte *blob_ptr; + DBUG_ENTER("_ma_put_key_in_record"); + + blob_ptr= info->lastkey2; /* Place to put blob parts */ + key=info->lastkey; /* KEy that was read */ + key_end=key+info->lastkey_length; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + if (keyseg->null_bit) + { + if (!*key++) + { + record[keyseg->null_pos]|= keyseg->null_bit; + continue; + } + record[keyseg->null_pos]&= ~keyseg->null_bit; + } + if (keyseg->type == HA_KEYTYPE_BIT) + { + uint length= keyseg->length; + + if (keyseg->bit_length) + { + byte bits= *key++; + set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + length--; + } + else + { + clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + } + memcpy(record + keyseg->start, key, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + pos= record+keyseg->start; + if (keyseg->type != (int) HA_KEYTYPE_NUM) + { + memcpy(pos,key,(size_t) length); + keyseg->charset->cset->fill(keyseg->charset, + pos + length, keyseg->length - length, + ' '); + } + else + { + bfill(pos,keyseg->length-length,' '); + memcpy(pos+keyseg->length-length,key,(size_t) length); + } + key+=length; + continue; + } + + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + /* Store key length */ + if (keyseg->bit_start == 1) + *(uchar*) (record+keyseg->start)= (uchar) length; + else + int2store(record+keyseg->start, length); + /* And key data */ + memcpy(record+keyseg->start + keyseg->bit_start, key, length); + key+= length; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + memcpy(record+keyseg->start+keyseg->bit_start, + (char*) &blob_ptr,sizeof(char*)); + memcpy(blob_ptr,key,length); + blob_ptr+=length; + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + _ma_store_blob_length(record+keyseg->start, + (uint) keyseg->bit_start,length); + key+=length; + } + else if (keyseg->flag & HA_SWAP_KEY) + { + byte *to= record+keyseg->start+keyseg->length; + byte *end= key+keyseg->length; +#ifdef CHECK_KEYS + if (end > key_end) + goto err; +#endif + do + { + *--to= *key++; + } while (key != end); + continue; + } + else + { +#ifdef CHECK_KEYS + if (key+keyseg->length > key_end) + goto err; +#endif + memcpy(record+keyseg->start, key, (size_t) keyseg->length); + key+= keyseg->length; + } + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); /* Crashed row */ +} /* _ma_put_key_in_record */ + + + /* Here when key reads are used */ + +int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + fast_ma_writeinfo(info); + if (filepos != HA_OFFSET_ERROR) + { + if (info->lastinx >= 0) + { /* Read only key */ + if (_ma_put_key_in_record(info,(uint) info->lastinx,buf)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return -1; + } + info->update|= HA_STATE_AKTIV; /* We should find a record */ + return 0; + } + my_errno=HA_ERR_WRONG_INDEX; + } + return(-1); /* Wrong data to read */ +} + + +/* + Retrieve auto_increment info + + SYNOPSIS + retrieve_auto_increment() + info Maria handler + record Row to update + + IMPLEMENTATION + For signed columns we don't retrieve the auto increment value if it's + less than zero. +*/ + +ulonglong ma_retrieve_auto_increment(MARIA_HA *info,const byte *record) +{ + ulonglong value= 0; /* Store unsigned values here */ + longlong s_value= 0; /* Store signed values here */ + HA_KEYSEG *keyseg= info->s->keyinfo[info->s->base.auto_key-1].seg; + const byte *key= record + keyseg->start; + + switch (keyseg->type) { + case HA_KEYTYPE_INT8: + s_value= (longlong) *(char*)key; + break; + case HA_KEYTYPE_BINARY: + value=(ulonglong) *(uchar*) key; + break; + case HA_KEYTYPE_SHORT_INT: + s_value= (longlong) sint2korr(key); + break; + case HA_KEYTYPE_USHORT_INT: + value=(ulonglong) uint2korr(key); + break; + case HA_KEYTYPE_LONG_INT: + s_value= (longlong) sint4korr(key); + break; + case HA_KEYTYPE_ULONG_INT: + value=(ulonglong) uint4korr(key); + break; + case HA_KEYTYPE_INT24: + s_value= (longlong) sint3korr(key); + break; + case HA_KEYTYPE_UINT24: + value=(ulonglong) uint3korr(key); + break; + case HA_KEYTYPE_FLOAT: /* This shouldn't be used */ + { + float f_1; + float4get(f_1,key); + /* Ignore negative values */ + value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */ + { + double f_1; + float8get(f_1,key); + /* Ignore negative values */ + value = (f_1 < 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_LONGLONG: + s_value= sint8korr(key); + break; + case HA_KEYTYPE_ULONGLONG: + value= uint8korr(key); + break; + default: + DBUG_ASSERT(0); + value=0; /* Error */ + break; + } + + /* + The following code works becasue if s_value < 0 then value is 0 + and if s_value == 0 then value will contain either s_value or the + correct value. + */ + return (s_value > 0) ? (ulonglong) s_value : value; +} diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c new file mode 100644 index 00000000000..64725baae86 --- /dev/null +++ b/storage/maria/ma_keycache.c @@ -0,0 +1,164 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Key cache assignments +*/ + +#include "maria_def.h" + +/* + Assign pages of the index file for a table to a key cache + + SYNOPSIS + maria_assign_to_key_cache() + info open table + key_map map of indexes to assign to the key cache + key_cache_ptr pointer to the key cache handle + assign_lock Mutex to lock during assignment + + PREREQUESTS + One must have a READ lock or a WRITE lock on the table when calling + the function to ensure that there is no other writers to it. + + The caller must also ensure that one doesn't call this function from + two different threads with the same table. + + NOTES + At present pages for all indexes must be assigned to the same key cache. + In future only pages for indexes specified in the key_map parameter + of the table will be assigned to the specified key cache. + + RETURN VALUE + 0 If a success + # Error code +*/ + +int maria_assign_to_key_cache(MARIA_HA *info, + ulonglong key_map __attribute__((unused)), + KEY_CACHE *key_cache) +{ + int error= 0; + MARIA_SHARE* share= info->s; + DBUG_ENTER("maria_assign_to_key_cache"); + DBUG_PRINT("enter", + ("old_key_cache_handle: 0x%lx new_key_cache_handle: 0x%lx", + (long) share->key_cache, (long) key_cache)); + + /* + Skip operation if we didn't change key cache. This can happen if we + call this for all open instances of the same table + */ + if (share->key_cache == key_cache) + DBUG_RETURN(0); + + /* + First flush all blocks for the table in the old key cache. + This is to ensure that the disk is consistent with the data pages + in memory (which may not be the case if the table uses delayed_key_write) + + Note that some other read thread may still fill in the key cache with + new blocks during this call and after, but this doesn't matter as + all threads will start using the new key cache for their next call to + maria library and we know that there will not be any changed blocks + in the old key cache. + */ + + if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Mark that table must be checked */ + } + + /* + Flush the new key cache for this file. This is needed to ensure + that there is no old blocks (with outdated data) left in the new key + cache from an earlier assign_to_keycache operation + + (This can never fail as there is never any not written data in the + new key cache) + */ + (void) flush_key_blocks(key_cache, share->kfile, FLUSH_RELEASE); + + /* + ensure that setting the key cache and changing the multi_key_cache + is done atomicly + */ + pthread_mutex_lock(&share->intern_lock); + /* + Tell all threads to use the new key cache + This should be seen at the lastes for the next call to an maria function. + */ + share->key_cache= key_cache; + + /* store the key cache in the global hash structure for future opens */ + if (multi_key_cache_set(share->unique_file_name, share->unique_name_length, + share->key_cache)) + error= my_errno; + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} + + +/* + Change all MARIA entries that uses one key cache to another key cache + + SYNOPSIS + maria_change_key_cache() + old_key_cache Old key cache + new_key_cache New key cache + + NOTES + This is used when we delete one key cache. + + To handle the case where some other threads tries to open an MARIA + table associated with the to-be-deleted key cache while this operation + is running, we have to call 'multi_key_cache_change()' from this + function while we have a lock on the MARIA table list structure. + + This is safe as long as it's only MARIA that is using this specific + key cache. +*/ + + +void maria_change_key_cache(KEY_CACHE *old_key_cache, + KEY_CACHE *new_key_cache) +{ + LIST *pos; + DBUG_ENTER("maria_change_key_cache"); + + /* + Lock list to ensure that no one can close the table while we manipulate it + */ + pthread_mutex_lock(&THR_LOCK_maria); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info= (MARIA_HA*) pos->data; + MARIA_SHARE *share= info->s; + if (share->key_cache == old_key_cache) + maria_assign_to_key_cache(info, (ulonglong) ~0, new_key_cache); + } + + /* + We have to do the following call while we have the lock on the + MARIA list structure to ensure that another thread is not trying to + open a new table that will be associted with the old key cache + */ + multi_key_cache_change(old_key_cache, new_key_cache); + pthread_mutex_unlock(&THR_LOCK_maria); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c new file mode 100644 index 00000000000..170e59a601a --- /dev/null +++ b/storage/maria/ma_least_recently_dirtied.c @@ -0,0 +1,106 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* + To be part of the page cache. + The pseudocode below is dependent on the page cache + which is being designed WL#3134. It is not clear if I need to do page + copies, as the page cache already keeps page copies. + So, this code will move to the page cache and take inspiration from its + methods. Below is just to give the idea of what could be done. + And I should compare my imaginations to WL#3134. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" + +/* + This thread does background flush of pieces of the LRD, and serves + requests for asynchronous checkpoints. + Just launch it when engine starts. + MikaelR questioned why the same thread does two different jobs, the risk + could be that while a checkpoint happens no LRD flushing happens. + For now, we only do checkpoints - no LRD flushing (to be done when the + second version of the page cache is ready WL#3077). + Reasons to delay: + - Recovery will work (just slower) + - new page cache may be different, why do then re-do + - current pagecache probably has issues with flushing when somebody is + writing to the table being flushed - better avoid that. +*/ +pthread_handler_decl background_flush_and_checkpoint_thread() +{ + while (this_thread_not_killed) + { + /* note that we don't care of the checkpoint's success */ + (void)execute_asynchronous_checkpoint_if_any(); + sleep(5); + /* + in the final version, we will not sleep but call flush_pages_from_LRD() + repeatedly. If there are no dirty pages, we'll make sure to not have a + tight loop probing for checkpoint requests. + */ + } +} + +/* The rest of this file will not serve in first version */ + +/* + flushes only the first pages of the LRD. + max_this_number could be FLUSH_CACHE (of mf_pagecache.c) for example. +*/ +flush_pages_from_LRD(uint max_this_number, LSN max_this_lsn) +{ + /* + One rule to better observe is "page must be flushed to disk before it is + removed from LRD" (otherwise checkpoint is incomplete info, corruption). + */ + + /* + Build a list of pages to flush: + changed_blocks[i] is roughly sorted by descending rec_lsn, + so we could do a merge sort of changed_blocks[] lists, stopping after we + have the max_this_number first elements or after we have found a page with + rec_lsn > max_this_lsn. + Then do like pagecache_flush_blocks_int() does (beware! this time we are + not alone on the file! there may be dangers! TODO: sort this out). + */ + + /* + MikaelR noted that he observed that Linux's file cache may never fsync to + disk until this cache is full, at which point it decides to empty the + cache, making the machine very slow. A solution was to fsync after writing + 2 MB. + */ +} + +/* + Note that when we flush all page from LRD up to rec_lsn>=max_lsn, + this is approximate because the LRD list may + not be exactly sorted by rec_lsn (because for a big row, all pages of the + row are inserted into the LRD with rec_lsn being the LSN of the REDO for the + first page, so if there are concurrent insertions, the last page of the big + row may have a smaller rec_lsn than the previous pages inserted by + concurrent inserters). +*/ diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h new file mode 100644 index 00000000000..f6d7420febc --- /dev/null +++ b/storage/maria/ma_least_recently_dirtied.h @@ -0,0 +1,26 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */ +int flush_all_LRD_to_lsn(LSN max_lsn); diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c new file mode 100644 index 00000000000..6a0bbe82dcb --- /dev/null +++ b/storage/maria/ma_locking.c @@ -0,0 +1,512 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + locking of isam-tables. + reads info from a isam-table. Must be first request before doing any furter + calls to any isamfunktion. Is used to allow many process use the same + isamdatabase. +*/ + +#include "ma_ftdefs.h" + + /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */ + +int maria_lock_database(MARIA_HA *info, int lock_type) +{ + int error; + uint count; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_lock_database"); + DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " + "global_changed: %d open_count: %u name: '%s'", + lock_type, info->lock_type, share->r_locks, + share->w_locks, + share->global_changed, share->state.open_count, + share->index_file_name)); + if (share->options & HA_OPTION_READ_ONLY_DATA || + info->lock_type == lock_type) + DBUG_RETURN(0); + if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */ + { + ++share->w_locks; + ++share->tot_locks; + info->lock_type= lock_type; + DBUG_RETURN(0); + } + + error=0; + pthread_mutex_lock(&share->intern_lock); + if (share->kfile >= 0) /* May only be false on windows */ + { + switch (lock_type) { + case F_UNLCK: + maria_ftparser_call_deinitializer(info); + if (info->lock_type == F_RDLCK) + count= --share->r_locks; + else + count= --share->w_locks; + --share->tot_locks; + if (info->lock_type == F_WRLCK && !share->w_locks) + { + if (!share->delay_key_write && flush_key_blocks(share->key_cache, + share->kfile, + FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } + if (share->data_file_type == BLOCK_RECORD && + flush_key_blocks(share->key_cache, info->dfile, FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + { + error=my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + if (!count) + { + DBUG_PRINT("info",("changed: %u w_locks: %u", + (uint) share->changed, share->w_locks)); + if (share->changed && !share->w_locks) + { +#ifdef HAVE_MMAP + if ((info->s->mmaped_length != + info->s->state.state.data_file_length) && + (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) + { + if (info->s->concurrent_insert) + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, info->s->state.state.data_file_length); + info->s->nonmmaped_inserts= 0; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + } +#endif + share->state.process= share->last_process=share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if (_ma_state_info_write(share->kfile, &share->state, 1)) + error=my_errno; + share->changed=0; + if (maria_flush) + { + if (_ma_sync_table_files(info)) + error= my_errno; + } + else + share->not_flushed=1; + if (error) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + } + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + info->lock_type= F_UNLCK; + break; + case F_RDLCK: + if (info->lock_type == F_WRLCK) + { + /* + Change RW to READONLY + + mysqld does not turn write locks to read locks, + so we're never here in mysqld. + */ + share->w_locks--; + share->r_locks++; + info->lock_type=lock_type; + break; + } + if (!share->r_locks && !share->w_locks) + { + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + error=my_errno; + break; + } + } + VOID(_ma_test_if_changed(info)); + share->r_locks++; + share->tot_locks++; + info->lock_type=lock_type; + break; + case F_WRLCK: + if (info->lock_type == F_RDLCK) + { /* Change READONLY to RW */ + if (share->r_locks == 1) + { + share->r_locks--; + share->w_locks++; + info->lock_type=lock_type; + break; + } + } + if (!(share->options & HA_OPTION_READ_ONLY_DATA)) + { + if (!share->w_locks) + { + if (!share->r_locks) + { + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + error=my_errno; + break; + } + } + } + } + VOID(_ma_test_if_changed(info)); + + info->lock_type=lock_type; + info->invalidator=info->s->invalidator; + share->w_locks++; + share->tot_locks++; + break; + default: + break; /* Impossible */ + } + } +#ifdef __WIN__ + else + { + /* + Check for bad file descriptors if this table is part + of a merge union. Failing to capture this may cause + a crash on windows if the table is renamed and + later on referenced by the merge table. + */ + if( info->owned_by_merge && (info->s)->kfile < 0 ) + { + error = HA_ERR_NO_SUCH_TABLE; + } + } +#endif + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} /* maria_lock_database */ + + +/**************************************************************************** + The following functions are called by thr_lock() in threaded applications +****************************************************************************/ + +/* + Create a copy of the current status for the table + + SYNOPSIS + _ma_get_status() + param Pointer to Myisam handler + concurrent_insert Set to 1 if we are going to do concurrent inserts + (THR_WRITE_CONCURRENT_INSERT was used) +*/ + +void _ma_get_status(void* param, int concurrent_insert) +{ + MARIA_HA *info=(MARIA_HA*) param; + DBUG_ENTER("_ma_get_status"); + DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length, + concurrent_insert)); +#ifndef DBUG_OFF + if (info->state->key_file_length > info->s->state.state.key_file_length || + info->state->data_file_length > info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); +#endif + info->save_state=info->s->state.state; + info->state= &info->save_state; + info->append_insert_at_end= concurrent_insert; + DBUG_VOID_RETURN; +} + + +void _ma_update_status(void* param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + Because someone may have closed the table we point at, we only + update the state if its our own state. This isn't a problem as + we are always pointing at our own lock or at a read lock. + (This is enforced by thr_multi_lock.c) + */ + if (info->state == &info->save_state) + { +#ifndef DBUG_OFF + DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); + if (info->state->key_file_length < info->s->state.state.key_file_length || + info->state->data_file_length < info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length)); +#endif + info->s->state.state= *info->state; + info->state= &info->s->state.state; + } + info->append_insert_at_end= 0; + + /* + We have to flush the write cache here as other threads may start + reading the table before maria_lock_database() is called + */ + if (info->opt_flag & WRITE_CACHE_USED) + { + if (end_io_cache(&info->rec_cache)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->opt_flag&= ~WRITE_CACHE_USED; + } +} + +void _ma_copy_status(void* to,void *from) +{ + ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state; +} + + +/* + Check if should allow concurrent inserts + + IMPLEMENTATION + Allow concurrent inserts if we don't have a hole in the table or + if there is no active write lock and there is active read locks and + maria_concurrent_insert == 2. In this last case the new + row('s) are inserted at end of file instead of filling up the hole. + + The last case is to allow one to inserts into a heavily read-used table + even if there is holes. + + NOTES + If there is a an rtree indexes in the table, concurrent inserts are + disabled in maria_open() + + RETURN + 0 ok to use concurrent inserts + 1 not ok +*/ + +my_bool _ma_check_status(void *param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + The test for w_locks == 1 is here because this thread has already done an + external lock (in other words: w_locks == 1 means no other threads has + a write lock) + */ + DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u", + (long) info->s->state.dellink, (uint) info->s->r_locks, + (uint) info->s->w_locks)); + return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR || + (maria_concurrent_insert == 2 && info->s->r_locks && + info->s->w_locks == 1)); +} + + +/**************************************************************************** + ** functions to read / write the state +****************************************************************************/ + +int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer) +{ + DBUG_ENTER("_ma_readinfo"); + + if (info->lock_type == F_UNLCK) + { + MARIA_SHARE *share=info->s; + if (!share->tot_locks) + { + if (_ma_state_info_read_dsk(share->kfile, &share->state, 1)) + { + int error=my_errno ? my_errno : -1; + my_errno=error; + DBUG_RETURN(1); + } + } + if (check_keybuffer) + VOID(_ma_test_if_changed(info)); + info->invalidator=info->s->invalidator; + } + else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK) + { + my_errno=EACCES; /* Not allowed to change */ + DBUG_RETURN(-1); /* when have read_lock() */ + } + DBUG_RETURN(0); +} /* _ma_readinfo */ + + +/* + Every isam-function that uppdates the isam-database MUST end with this + request +*/ + +int _ma_writeinfo(register MARIA_HA *info, uint operation) +{ + int error,olderror; + MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_writeinfo"); + DBUG_PRINT("info",("operation: %u tot_locks: %u", operation, + share->tot_locks)); + + error=0; + if (share->tot_locks == 0) + { + if (operation) + { /* Two threads can't be here */ + olderror= my_errno; /* Remember last error */ + share->state.process= share->last_process= share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if ((error= _ma_state_info_write(share->kfile, &share->state, 1))) + olderror=my_errno; +#ifdef __WIN__ + if (maria_flush) + { + _commit(share->kfile); + _commit(info->dfile); + } +#endif + my_errno=olderror; + } + } + else if (operation) + share->changed= 1; /* Mark keyfile changed */ + DBUG_RETURN(error); +} /* _ma_writeinfo */ + + + /* Test if someone has changed the database */ + /* (Should be called after readinfo) */ + +int _ma_test_if_changed(register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + if (share->state.process != share->last_process || + share->state.unique != info->last_unique || + share->state.update_count != info->last_loop) + { /* Keyfile has changed */ + DBUG_PRINT("info",("index file changed")); + if (share->state.process != share->this_process) + VOID(flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE)); + share->last_process=share->state.process; + info->last_unique= share->state.unique; + info->last_loop= share->state.update_count; + info->update|= HA_STATE_WRITTEN; /* Must use file on next */ + info->data_changed= 1; /* For maria_is_changed */ + return 1; + } + return (!(info->update & HA_STATE_AKTIV) || + (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED | + HA_STATE_KEY_CHANGED))); +} /* _ma_test_if_changed */ + + +/* + Put a mark in the .MYI file that someone is updating the table + + + DOCUMENTATION + + state.open_count in the .MYI file is used the following way: + - For the first change of the .MYI file in this process open_count is + incremented by maria_mark_file_change(). (We have a write lock on the file + when this happens) + - In maria_close() it's decremented by _ma_decrement_open_count() if it + was incremented in the same process. + + This mean that if we are the only process using the file, the open_count + tells us if the MARIA file wasn't properly closed. (This is true if + my_disable_locking is set). +*/ + + +int _ma_mark_file_changed(MARIA_HA *info) +{ + char buff[3]; + register MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_mark_file_changed"); + + if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed) + { + share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | + STATE_NOT_OPTIMIZED_KEYS); + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + if (!share->temporary) + { + mi_int2store(buff,share->state.open_count); + buff[2]=1; /* Mark that it's changed */ + DBUG_RETURN(my_pwrite(share->kfile,buff,sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP))); + } + } + DBUG_RETURN(0); +} + + +/* + This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite() + call. In these context the following code should be safe! + */ + +int _ma_decrement_open_count(MARIA_HA *info) +{ + char buff[2]; + register MARIA_SHARE *share=info->s; + int lock_error=0,write_error=0; + if (share->global_changed) + { + uint old_lock=info->lock_type; + share->global_changed=0; + lock_error=maria_lock_database(info,F_WRLCK); + /* Its not fatal even if we couldn't get the lock ! */ + if (share->state.open_count > 0) + { + share->state.open_count--; + mi_int2store(buff,share->state.open_count); + write_error=my_pwrite(share->kfile,buff,sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP)); + } + if (!lock_error) + lock_error=maria_lock_database(info,old_lock); + } + return test(lock_error || write_error); +} diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c new file mode 100644 index 00000000000..1e9afc7571d --- /dev/null +++ b/storage/maria/ma_loghandler.c @@ -0,0 +1,5296 @@ +#include "maria_def.h" + +/* number of opened log files in the pagecache (should be at least 2) */ +#define OPENED_FILES_NUM 3 + +/* records buffer size (should be LOG_PAGE_SIZE * n) */ +#define TRANSLOG_WRITE_BUFFER (1024*1024) +/* min chunk length */ +#define TRANSLOG_MIN_CHUNK 3 +/* + Number of buffers used by loghandler + + Should be at least 4, because one thread can block up to 2 buffers in + normal circumstances (less then half of one and full other, or just + switched one and other), But if we met end of the file in the middle and + have to switch buffer it will be 3. + 1 buffer for flushing/writing. + We have a bigger number here for higher concurrency. +*/ +#define TRANSLOG_BUFFERS_NO 5 +/* number of bytes (+ header) which can be unused on first page in sequence */ +#define TRANSLOG_MINCHUNK_CONTENT 1 +/* version of log file */ +#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */ + +#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */ + +/* QQ: For temporary debugging */ +#define UNRECOVERABLE_ERROR(E) \ + do { \ + DBUG_PRINT("error", E); \ + printf E; \ + putchar('\n'); \ + } while(0); + + + +/* record part descriptor */ +struct st_translog_part +{ + translog_size_t len; + byte *buff; +}; + + +/* record parts descriptor */ +struct st_translog_parts +{ + /* full record length */ + translog_size_t record_length; + /* full record length with chunk headers */ + translog_size_t total_record_length; + /* array of parts (st_translog_part) */ + DYNAMIC_ARRAY parts; + /* current part index */ + uint current; +}; + +/* log write buffer descriptor */ +struct st_translog_buffer +{ + LSN last_lsn; + /* This buffer offset in the file */ + TRANSLOG_ADDRESS offset; + /* + How much written (or will be written when copy_to_buffer_in_progress + become 0) to this buffer + */ + translog_size_t size; + /* File handler for this buffer */ + File file; + /* Threads which are waiting for buffer filling/freeing */ + WQUEUE waiting_filling_buffer; + /* Number of record which are in copy progress */ + uint copy_to_buffer_in_progress; + /* list of waiting buffer ready threads */ + struct st_my_thread_var *waiting_flush; + struct st_translog_buffer *overlay; +#ifndef DBUG_OFF + uint buffer_no; +#endif + /* lock for the buffer. Current buffer also lock the handler */ + pthread_mutex_t mutex; + /* IO cache for current log */ + byte buffer[TRANSLOG_WRITE_BUFFER]; +}; + + +struct st_buffer_cursor +{ + /* pointer on the buffer */ + byte *ptr; + /* current buffer */ + struct st_translog_buffer *buffer; + /* current page fill */ + uint16 current_page_fill; + /* how many times we finish this page to write it */ + uint16 write_counter; + /* previous write offset */ + uint16 previous_offset; + /* Number of current buffer */ + uint8 buffer_no; + my_bool chaser, protected; +}; + + +struct st_translog_descriptor +{ + /* *** Parameters of the log handler *** */ + + /* Page cache for the log reads */ + PAGECACHE *pagecache; + /* Flags */ + uint flags; + /* max size of one log size (for new logs creation) */ + uint32 log_file_max_size; + /* server version */ + uint32 server_version; + /* server ID */ + uint32 server_id; + /* Loghandler's buffer capacity in case of chunk 2 filling */ + uint32 buffer_capacity_chunk_2; + /* Half of the buffer capacity in case of chunk 2 filling */ + uint32 half_buffer_capacity_chunk_2; + /* Page overhead calculated by flags */ + uint16 page_overhead; + /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */ + uint16 page_capacity_chunk_2; + /* Directory to store files */ + char directory[FN_REFLEN]; + + /* *** Current state of the log handler *** */ + /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */ + File log_file_num[OPENED_FILES_NUM]; + File directory_fd; + /* buffers for log writing */ + struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO]; + /* + horizon - visible end of the log (here is absolute end of the log: + position where next chunk can start + */ + TRANSLOG_ADDRESS horizon; + /* horizon buffer cursor */ + struct st_buffer_cursor bc; + + /* Last flushed LSN */ + LSN flushed; + LSN sent_to_file; + pthread_mutex_t sent_to_file_lock; +}; + +static struct st_translog_descriptor log_descriptor; + +/* Marker for end of log */ +static byte end_of_log= 0; + +/* record classes */ +enum record_class +{ + LOGRECTYPE_NOT_ALLOWED, + LOGRECTYPE_VARIABLE_LENGTH, + LOGRECTYPE_PSEUDOFIXEDLENGTH, + LOGRECTYPE_FIXEDLENGTH +}; + +/* chunk types */ +#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */ +#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */ +#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */ +#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */ +#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */ +#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */ + +/* compressed (relative) LSN constants */ +#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */ +#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed LSN */ + +typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, + void *tcb, + struct st_translog_parts *parts); + +typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, + void *tcb, + LSN *lsn, + struct st_translog_parts *parts); + +typedef uint16(*read_rec_hook) (enum translog_record_type type, + uint16 read_length, uchar *read_buff, + byte *decoded_buff); + +/* + Descriptor of log record type + Note: Don't reorder because of constructs later... +*/ +struct st_log_record_type_descriptor +{ + /* internal class of the record */ + enum record_class class; + /* length for fixed-size record, or maximum length of pseudo-fixed */ + uint16 fixed_length; + /* how much record body (belonged to headers too) read with headers */ + uint16 read_header_len; + /* HOOK for writing the record called before lock */ + prewrite_rec_hook prewrite_hook; + /* HOOK for writing the record called when LSN is known */ + inwrite_rec_hook inwrite_hook; + /* HOOK for reading headers */ + read_rec_hook read_hook; + /* + For pseudo fixed records number of compressed LSNs followed by + system header + */ + int16 compressed_LSN; +}; + + +static struct st_log_record_type_descriptor + log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]= +{ + /*LOGREC_RESERVED_FOR_CHUNKS23= 0 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_HEAD= 1 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_TAIL= 2 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_BLOB= 3 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INSERT_ROW_BLOBS= 4 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_PURGE_ROW= 5 */ + {LOGRECTYPE_FIXEDLENGTH, 9, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_PURGE_BLOCKS= 6 */ + {LOGRECTYPE_FIXEDLENGTH, 10, 10, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_DELETE_ROW= 7 */ + {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_UPDATE_ROW_HEAD= 8 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_INDEX= 9 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_UNDELETE_ROW= 10 */ + {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0}, + /*LOGREC_CLR_END= 11 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_PURGE_END= 12 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_ROW_INSERT= 13 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 14, 14, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_ROW_DELETE= 14 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 19, 19, NULL, NULL, NULL, 2}, + /*LOGREC_UNDO_ROW_UPDATE= 15 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 14, NULL, NULL, NULL, 2}, + /*LOGREC_UNDO_KEY_INSERT= 16 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 1}, + /*LOGREC_UNDO_KEY_DELETE= 17 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, NULL, NULL, 2}, + /*LOGREC_PREPARE= 18 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_PREPARE_WITH_UNDO_PURGE= 19 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1}, + /*LOGREC_COMMIT= 20 */ + {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_COMMIT_WITH_UNDO_PURGE= 21 */ + {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}, + /*LOGREC_CHECKPOINT_PAGE= 22 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 6, NULL, NULL, NULL, 0}, + /*LOGREC_CHECKPOINT_TRAN= 23 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_CHECKPOINT_TABL= 24 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_CREATE_TABLE= 25 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_RENAME_TABLE= 26 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_DROP_TABLE= 27 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_REDO_TRUNCATE_TABLE= 28 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_FILE_ID= 29 */ + {LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0}, + /*LOGREC_LONG_TRANSACTION_ID= 30 */ + {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}, + /*31 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*32 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*33 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*34 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*35 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*36 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*37 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*38 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*39 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*40 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*41 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*42 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*43 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*44 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*45 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*46 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*47 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*48 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*49 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*50 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*51 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*52 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*53 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*54 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*55 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*56 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*57 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*58 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*59 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*60 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*61 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*62 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}, + /*LOGREC_RESERVED_FUTURE_EXTENSION= 63 */ + {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0} +}; + +/* all possible flags page overheads */ +static uint page_overhead[TRANSLOG_FLAGS_NUM]; + +typedef struct st_translog_validator_data +{ + TRANSLOG_ADDRESS *addr; + my_bool was_recovered; +} TRANSLOG_VALIDATOR_DATA; + + +const char *maria_data_root; + + +/* + Check cursor/buffer consistence + + SYNOPSIS + translog_check_cursor + cursor cursor which will be checked +*/ + +#ifndef DBUG_OFF +static void translog_check_cursor(struct st_buffer_cursor *cursor) +{ + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr - cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == + cursor->current_page_fill % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); +} +#endif + +/* + Get file name of the log by log number + + SYNOPSIS + translog_filename_by_fileno() + file_no Number of the log we want to open + path Pointer to buffer where file name will be + stored (must be FN_REFLEN bytes at least + RETURN + pointer to path +*/ + +static char *translog_filename_by_fileno(uint32 file_no, char *path) +{ + char file_name[10 + 8 + 1]; /* See my_sprintf */ + char *res; + DBUG_ENTER("translog_filename_by_fileno"); + DBUG_ASSERT(file_no <= 0xfffffff); + my_sprintf(file_name, (file_name, "maria_log.%08u", file_no)); + res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME)); + DBUG_PRINT("info", ("Path: '%s' path: 0x%lx res: 0x%lx", + res, (ulong) path, (ulong) res)); + DBUG_RETURN(res); +} + + +/* + Open log file with given number without cache + + SYNOPSIS + open_logfile_by_number_no_cache() + file_no Number of the log we want to open + + RETURN + -1 error + # file descriptor number +*/ + +static File open_logfile_by_number_no_cache(uint32 file_no) +{ + File file; + char path[FN_REFLEN]; + DBUG_ENTER("open_logfile_by_number_no_cache"); + + /* Todo: add O_DIRECT to open flags (when buffer is aligned) */ + if ((file= my_open(translog_filename_by_fileno(file_no, path), + O_CREAT | O_BINARY | O_RDWR, + MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path)); + DBUG_RETURN(-1); + } + DBUG_PRINT("info", ("File: '%s' handler: %d", path, file)); + DBUG_RETURN(file); +} + + +/* + Write log file page header in the just opened new log file + + SYNOPSIS + translog_write_file_header(); + + NOTES + First page is just a marker page; We don't store any real log data in it. + + RETURN + 0 OK + 1 ERROR +*/ + +uchar NEAR maria_trans_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A', + 'L', 'O', 'G' }; + +static my_bool translog_write_file_header() +{ + ulonglong timestamp; + byte page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff; + DBUG_ENTER("translog_write_file_header"); + + /* file tag */ + memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic)); + page+= sizeof(maria_trans_file_magic); + /* timestamp */ + timestamp= my_getsystime(); + int8store(page, timestamp); + page+= 8; + /* maria version */ + int4store(page, TRANSLOG_VERSION_ID); + page+= 4; + /* mysql version (MYSQL_VERSION_ID) */ + int4store(page, log_descriptor.server_version); + page+= 4; + /* server ID */ + int4store(page, log_descriptor.server_id); + page+= 4; + /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */ + int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE); + page+= 2; + /* file number */ + int3store(page, LSN_FILE_NO(log_descriptor.horizon)); + page+= 3; + bzero(page, sizeof(page_buff) - (page- page_buff)); + + DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff, + sizeof(page_buff), 0, MYF(MY_WME | MY_NABP)) != 0); +} + + +/* + Initialize transaction log file buffer + + SYNOPSIS + translog_buffer_init() + buffer The buffer to initialize + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_init(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_init"); + /* This buffer offset */ + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + /* This Buffer File */ + buffer->file= -1; + buffer->overlay= 0; + /* IO cache for current log */ + bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER); + /* Buffer size */ + buffer->size= 0; + /* cond of thread which is waiting for buffer filling */ + buffer->waiting_filling_buffer.last_thread= 0; + /* Number of record which are in copy progress */ + buffer->copy_to_buffer_in_progress= 0; + /* list of waiting buffer ready threads */ + buffer->waiting_flush= 0; + /* lock for the buffer. Current buffer also lock the handler */ + if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Close transaction log file by descriptor + + SYNOPSIS + translog_close_log_file() + file file descriptor + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_close_log_file(File file) +{ + int rc; + PAGECACHE_FILE fl; + fl.file= file; + flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE); + /* + Sync file when we close it + TODO: sync only we have changed the log + */ + rc= my_sync(file, MYF(MY_WME)); + rc|= my_close(file, MYF(MY_WME)); + return test(rc); +} + + +/* + Create and fill header of new file + + SYNOPSIS + translog_create_new_file() + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_create_new_file() +{ + int i; + uint32 file_no= LSN_FILE_NO(log_descriptor.horizon); + DBUG_ENTER("translog_create_new_file"); + + if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 && + translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM - + 1])) + DBUG_RETURN(1); + for (i= OPENED_FILES_NUM - 1; i > 0; i--) + log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1]; + + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(file_no)) == -1 || + translog_write_file_header()) + DBUG_RETURN(1); + + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + Lock the loghandler buffer + + SYNOPSIS + translog_buffer_lock() + buffer This buffer which should be locked + + RETURN + 0 OK + 1 Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_lock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_lock"); + DBUG_PRINT("enter", + ("Lock buffer #%u: (0x%lx) mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + res= (pthread_mutex_lock(&buffer->mutex) != 0); + DBUG_RETURN(res); +} +#else +#define translog_buffer_lock(B) \ + pthread_mutex_lock(&B->mutex); +#endif + + +/* + Unlock the loghandler buffer + + SYNOPSIS + translog_buffer_unlock() + buffer This buffer which should be unlocked + + RETURN + 0 OK + 1 Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_unlock"); + DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + + res= (pthread_mutex_unlock(&buffer->mutex) != 0); + DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + DBUG_RETURN(res); +} +#else +#define translog_buffer_unlock(B) \ + pthread_mutex_unlock(&B->mutex); +#endif + + +/* + Write a header on the page + + SYNOPSIS + translog_new_page_header() + horizon Where to write the page + cursor Where to write the page + + NOTE + - space for page header should be checked before +*/ + +static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + byte *ptr; + + DBUG_ENTER("translog_new_page_header"); + DBUG_ASSERT(cursor->ptr); + + cursor->protected= 0; + + ptr= cursor->ptr; + /* Page number */ + int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE); + ptr+= 3; + /* File number */ + int3store(ptr, LSN_FILE_NO(*horizon)); + ptr+= 3; + *(ptr++)= (byte) log_descriptor.flags; + if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + { +#ifndef DBUG_OFF + DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)", + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon))); + /* This will be overwritten by real CRC; This is just for debugging */ + int4store(ptr, 0x11223344); +#endif + /* CRC will be put when page is finished */ + ptr+= CRC_LENGTH; + } + if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + { + time_t tm; + uint16 tmp_time= time(&tm); + int2store(ptr, tmp_time); + ptr+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + } + { + uint len= (ptr - cursor->ptr); + (*horizon)+= len; /* it is increasing of offset part of the address */ + cursor->current_page_fill= len; + if (!cursor->chaser) + cursor->buffer->size+= len; + } + cursor->ptr= ptr; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + DBUG_VOID_RETURN; +} + + +/* + Put sector protection on the page image + + SYNOPSIS + translog_put_sector_protection() + page reference on the page content + cursor cursor of the buffer + + NOTES + We put a sector protection on all following sectors on the page, + except the first sector that is protected by page header. +*/ + +static void translog_put_sector_protection(byte *page, + struct st_buffer_cursor *cursor) +{ + byte *table= page + log_descriptor.page_overhead - + (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + uint16 value= uint2korr(table) + cursor->write_counter; + uint16 last_protected_sector= ((cursor->previous_offset - 1) / + DISK_DRIVE_SECTOR_SIZE); + uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE; + uint i, offset; + DBUG_ENTER("translog_put_sector_protection"); + + if (start_sector == 0) + start_sector= 1; /* First sector is protected */ + + DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, " + "last protected:%u start sector:%u", + (uint) cursor->write_counter, + (uint) value, + (uint) cursor->previous_offset, + (uint) last_protected_sector, (uint) start_sector)); + if (last_protected_sector == start_sector) + { + i= last_protected_sector * 2; + offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE; + /* restore data, because we modified sector which was protected */ + if (offset < cursor->previous_offset) + page[offset]= table[i]; + offset++; + if (offset < cursor->previous_offset) + page[offset]= table[i + 1]; + } + for (i= start_sector * 2, offset= start_sector * DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + (i+= 2), (offset+= DISK_DRIVE_SECTOR_SIZE)) + { + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + table[i]= page[offset]; + table[i + 1]= page[offset + 1]; + int2store(page + offset, value); + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + } + DBUG_VOID_RETURN; +} + + +/* + Calculate CRC32 of given area + + SYNOPSIS + translog_crc() + area Pointer of the area beginning + length The Area length + + RETURN + CRC32 +*/ + +static uint32 translog_crc(byte *area, uint length) +{ + return crc32(0L, area, length); +} + + +/* + Finish current page with zeros + + SYNOPSIS + translog_finish_page() + horizon \ horizon & buffer pointers + cursor / +*/ + +static void translog_finish_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill; + byte *page= cursor->ptr -cursor->current_page_fill; + DBUG_ENTER("translog_finish_page"); + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "size:%lu (%lu) Pg:%u left:%u", + (uint) cursor->buffer_no, (ulong) cursor->buffer, + (ulong) LSN_FILE_NO(cursor->buffer->offset), + (ulong) LSN_OFFSET(cursor->buffer->offset), + (ulong) LSN_FILE_NO(*horizon), + (ulong) (LSN_OFFSET(*horizon) - + cursor->current_page_fill), + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer), + (uint) cursor->current_page_fill, (uint) left)); + DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + if (cursor->protected) + { + DBUG_PRINT("info", ("Already protected and finished")); + DBUG_VOID_RETURN; + } + cursor->protected= 1; + + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) + { + DBUG_PRINT("info", ("left: %u", (uint) left)); + bzero(cursor->ptr, left); + cursor->ptr +=left; + (*horizon)+= left; /* offset increasing */ + if (!cursor->chaser) + cursor->buffer->size+= left; + cursor->current_page_fill= 0; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, + (ulong) cursor->buffer, cursor->chaser, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + } + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(page, cursor); + DBUG_PRINT("info", ("drop write_counter")); + cursor->write_counter= 0; + cursor->previous_offset= 0; + } + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(page + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: %lx", (ulong) crc)); + /* We have page number, file number and flag before crc */ + int4store(page + 3 + 3 + 1, crc); + } + DBUG_VOID_RETURN; +} + + +/* + Wait until all thread finish filling this buffer + + SYNOPSIS + translog_wait_for_writers() + buffer This buffer should be check + + NOTE + This buffer should be locked +*/ + +static void translog_wait_for_writers(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("translog_wait_for_writers"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress)); + + while (buffer->copy_to_buffer_in_progress) + { + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + DBUG_ASSERT(buffer->file != -1); + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + } + + DBUG_VOID_RETURN; +} + + +/* + + Wait for buffer to become free + + SYNOPSIS + translog_wait_for_buffer_free() + buffer The buffer we are waiting for + + NOTE + - this buffer should be locked +*/ + +static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("translog_wait_for_buffer_free"); + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx copies in progress: %u " + "File: %d size: 0x%lu", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress, + buffer->file, (ulong) buffer->size)); + + translog_wait_for_writers(buffer); + + while (buffer->file != -1) + { + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done. " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + } + DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0); + DBUG_VOID_RETURN; +} + + +/* + Initialize the cursor for a buffer + + SYNOPSIS + translog_cursor_init() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ + +static void translog_cursor_init(struct st_buffer_cursor *cursor, + struct st_translog_buffer *buffer, + uint8 buffer_no) +{ + DBUG_ENTER("translog_cursor_init"); + cursor->ptr= buffer->buffer; + cursor->buffer= buffer; + cursor->buffer_no= buffer_no; + cursor->current_page_fill= 0; + cursor->chaser= (cursor != &log_descriptor.bc); + cursor->write_counter= 0; + cursor->previous_offset= 0; + cursor->protected= 0; + DBUG_VOID_RETURN; +} + + +/* + Initialize buffer for current file + + SYNOPSIS + translog_start_buffer() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ + +static void translog_start_buffer(struct st_translog_buffer *buffer, + struct st_buffer_cursor *cursor, + uint buffer_no) +{ + DBUG_ENTER("translog_start_buffer"); + DBUG_PRINT("enter", + ("Assign buffer: #%u (0x%lx) to file: %d offset: 0x%lx(%lu)", + (uint) buffer->buffer_no, (ulong) buffer, + log_descriptor.log_file_num[0], + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_ASSERT(buffer_no == buffer->buffer_no); + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->offset= log_descriptor.horizon; + buffer->file= log_descriptor.log_file_num[0]; + buffer->overlay= 0; + buffer->size= 0; + translog_cursor_init(cursor, buffer, buffer_no); + DBUG_PRINT("info", ("init cursor #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + DBUG_VOID_RETURN; +} + + +/* + Switch to the next buffer in a chain + + SYNOPSIS + translog_buffer_next() + horizon \ Pointers on current position in file and buffer + cursor / + next_file Also start new file + + NOTE: + - loghandler should be locked + - after return new and old buffer still are locked + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + my_bool new_file) +{ + uint old_buffer_no= cursor->buffer_no; + uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; + my_bool chasing= cursor->chaser; + DBUG_ENTER("translog_buffer_next"); + + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), chasing)); + + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0); + + translog_finish_page(horizon, cursor); + + if (!chasing) + { + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + } +#ifndef DBUG_OFF + else + DBUG_ASSERT(new_buffer->file != 0); +#endif + if (new_file) + { + /* move the horizon to the next file and its header page */ + (*horizon)+= LSN_ONE_FILE; + (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE); + if (!chasing && translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + + /* prepare next page */ + if (chasing) + translog_cursor_init(cursor, new_buffer, new_buffer_no); + else + translog_start_buffer(new_buffer, cursor, new_buffer_no); + translog_new_page_header(horizon, cursor); + DBUG_RETURN(0); +} + + +/* + Set max LSN send to file + + SYNOPSIS + translog_set_sent_to_file() + lsn LSN to assign +*/ + +static void translog_set_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_set_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + DBUG_ASSERT(cmp_translog_addr(*lsn, log_descriptor.sent_to_file) >= 0); + log_descriptor.sent_to_file= *lsn; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get max LSN send to file + + SYNOPSIS + translog_get_sent_to_file() + lsn LSN to value +*/ + +static void translog_get_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_get_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + *lsn= log_descriptor.sent_to_file; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get first chunk address on the given page + + SYNOPSIS + translog_get_first_chunk_offset() + page The page where to find first chunk + + RETURN + first chunk offset +*/ + +static my_bool translog_get_first_chunk_offset(byte *page) +{ + uint16 page_header= 7; + DBUG_ENTER("translog_get_first_chunk_offset"); + + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + page_header+= 4; + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + page_header+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + DBUG_RETURN(page_header); +} + + +/* + Write coded length of record + + SYNOPSIS + translog_write_variable_record_1group_code_len + dst Destination buffer pointer + length Length which should be coded + header_len Calculated total header length +*/ + +static void +translog_write_variable_record_1group_code_len(byte *dst, + translog_size_t length, + uint16 header_len) +{ + switch (header_len) { + case 6: /* (5 + 1) */ + DBUG_ASSERT(length <= 250); + *dst= (uint8) length; + return; + case 8: /* (5 + 3) */ + DBUG_ASSERT(length <= 0xFFFF); + *dst= 251; + int2store(dst + 1, length); + return; + case 9: /* (5 + 4) */ + DBUG_ASSERT(length <= (ulong) 0xFFFFFF); + *dst= 252; + int3store(dst + 1, length); + return; + case 10: /* (5 + 5) */ + *dst= 253; + int4store(dst + 1, length); + return; + default: + DBUG_ASSERT(0); + } + return; +} + + +/* + Decode record data length and advance given pointer to the next field + + SYNOPSIS + translog_variable_record_1group_decode_len() + src The pointer to the pointer to the length beginning + + RETURN + decoded length +*/ + +static translog_size_t translog_variable_record_1group_decode_len(byte **src) +{ + uint8 first= (uint8) (**src); + switch (first) { + case 251: + (*src)+= 3; + return (uint2korr((*src) - 2)); + case 252: + (*src)+= 4; + return (uint3korr((*src) - 3)); + case 253: + (*src)+= 5; + return (uint4korr((*src) - 4)); + case 254: + case 255: + DBUG_ASSERT(0); /* reserved for future use */ + return (0); + default: + (*src)++; + return (first); + } +} + + +/* + Get total length of this chunk (not only body) + + SYNOPSIS + translog_get_total_chunk_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + total length of the chunk +*/ + +static uint16 translog_get_total_chunk_length(byte *page, uint16 offset) +{ + DBUG_ENTER("translog_get_total_chunk_length"); + switch (page[offset] & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: + { + /* 0 chunk referred as LSN (head or tail) */ + translog_size_t rec_len; + byte *start= page + offset; + byte *ptr= start + 1 + 2; + uint16 chunk_len, header_len, page_rest; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr -start) + 2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + DBUG_PRINT("info", ("chunk len: %u + %u = %u", + (uint) header_len, (uint) chunk_len, + (uint) (chunk_len + header_len))); + DBUG_RETURN(chunk_len + header_len); + } + page_rest= TRANSLOG_PAGE_SIZE - offset; + DBUG_PRINT("info", ("page_rest %u", (uint) page_rest)); + if (rec_len + header_len < page_rest) + DBUG_RETURN(rec_len + header_len); + DBUG_RETURN(page_rest); + break; + } + case TRANSLOG_CHUNK_FIXED: + { + byte *ptr; + uint type= page[offset] & TRANSLOG_REC_TYPE; + uint length; + int i; + /* 1 (pseudo)fixed record (also LSN) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED")); + DBUG_ASSERT(log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH || + log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH); + if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH) + { + DBUG_PRINT("info", + ("Fixed length: %u", + (uint) (log_record_type_descriptor[type].fixed_length + 3))); + DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3); + } + { + ptr= page + offset + 3; /* first compressed LSN */ + length= log_record_type_descriptor[type].fixed_length + 3; + for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++) + { + /* first 2 bits is length - 2 */ + uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2; + ptr+= len; + /* subtract economized bytes */ + length-= (TRANSLOG_CLSN_MAX_LEN - len); + } + DBUG_PRINT("info", ("Pseudo-fixed length: %u", length)); + DBUG_RETURN(length); + } + break; + } + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u", + (uint) (TRANSLOG_PAGE_SIZE - offset))); + DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset); + break; + case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH")); + DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3); + DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3)); + DBUG_RETURN(uint2korr(page + offset + 1) + 3); + break; + default: + DBUG_ASSERT(0); + } +} + + +/* + Flush given buffer + + SYNOPSIS + translog_buffer_flush() + buffer This buffer should be flushed + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) +{ + uint32 i; + DBUG_ENTER("translog_buffer_flush"); + DBUG_PRINT("enter", + ("Buffer: #%u 0x%lx: " + "file: %d offset: (%lu,0x%lx) size: %lu", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->file, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size)); + + DBUG_ASSERT(buffer->file != -1); + + translog_wait_for_writers(buffer); + if (buffer->overlay && buffer->overlay->file != -1) + { + struct st_translog_buffer *overlay= buffer->overlay; + translog_buffer_unlock(buffer); + translog_buffer_lock(overlay); + translog_wait_for_buffer_free(overlay); + translog_buffer_unlock(overlay); + translog_buffer_lock(buffer); + } + + for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE) + { + PAGECACHE_FILE file; + file.file= buffer->file; + if (pagecache_write(log_descriptor.pagecache, + &file, + (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE, + 3, + buffer->buffer + i, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0)) + { + UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache", + (ulong) buffer->file, + (ulong) (LSN_OFFSET(buffer->offset)+ i))); + } + } + if (my_pwrite(buffer->file, (char*) buffer->buffer, + buffer->size, LSN_OFFSET(buffer->offset), + MYF(MY_WME | MY_NABP))) + { + UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu " + "to the disk (%d)", + (ulong) buffer->file, + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size, errno)); + DBUG_RETURN(1); + } + if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */ + translog_set_sent_to_file(&buffer->last_lsn); + + /* Free buffer */ + buffer->file= -1; + buffer->overlay= 0; + if (buffer->waiting_filling_buffer.last_thread) + { + wqueue_release_queue(&buffer->waiting_filling_buffer); + } + DBUG_RETURN(0); +} + + +/* + Recover page with sector protection (wipe out failed chunks) + + SYNOPSYS + translog_recover_page_up_to_sector() + page reference on the page + offset offset of failed sector + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_recover_page_up_to_sector(byte *page, uint16 offset) +{ + uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end; + DBUG_ENTER("translog_recover_page_up_to_sector"); + DBUG_PRINT("enter", ("offset: %u first chunk: %u", + (uint) offset, (uint) chunk_offset)); + + while (page[chunk_offset] != '\0' && chunk_offset < offset) + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + { + UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("chunk: offset: %u length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE) + { + UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + chunk_offset+= chunk_length; + } + + valid_chunk_end= chunk_offset; + /* end of trusted area - sector parsing */ + while (page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + break; + + DBUG_PRINT("info", ("chunk: offset: %u length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > + (uint) (offset + DISK_DRIVE_SECTOR_SIZE)) + break; + + chunk_offset+= chunk_length; + valid_chunk_end= chunk_offset; + } + DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end)); + + bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end); + + DBUG_RETURN(0); +} + + +/* + Log page validator + + SYNOPSIS + translog_page_validator() + page_addr The page to check + data data, need for validation (address in this case) + + RETURN + 0 OK + 1 Error +*/ +static my_bool translog_page_validator(byte *page_addr, gptr data_ptr) +{ + uint this_page_page_overhead; + uint flags; + byte *page= (byte*) page_addr, *page_pos; + TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr; + TRANSLOG_ADDRESS addr= *(data->addr); + DBUG_ENTER("translog_page_validator"); + + data->was_recovered= 0; + + if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE || + uint3korr(page + 3) != LSN_FILE_NO(addr)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "page address written in the page is incorrect: " + "File %lu instead of %lu or page %lu instead of %lu", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr), + (ulong) uint3korr(page), + (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE)); + DBUG_RETURN(1); + } + flags= (uint)(page[TRANSLOG_PAGE_FLAGS]); + this_page_page_overhead= page_overhead[flags]; + if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "Garbage in the page flags field detected : %x", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (uint) flags)); + DBUG_RETURN(1); + } + page_pos= page + (3 + 3 + 1); + if (flags & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(page + this_page_page_overhead, + TRANSLOG_PAGE_SIZE - + this_page_page_overhead); + if (crc != uint4korr(page_pos)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "CRC mismatch: calculated: %lx on the page %lx", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (ulong) crc, (ulong) uint4korr(page_pos))); + DBUG_RETURN(1); + } + page_pos+= CRC_LENGTH; /* Skip crc */ + } + if (flags & TRANSLOG_SECTOR_PROTECTION) + { + uint i, offset; + byte *table= page_pos; + uint16 current= uint2korr(table); + for (i= 2, offset= DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + i+= 2, offset+= DISK_DRIVE_SECTOR_SIZE) + { + /* + TODO: add chunk counting for "suspecting" sectors (difference is + more than 1-2) + */ + uint16 test= uint2korr(page + offset); + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", + i / 2, offset, (ulong) current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + if (((test < current) && + (LL(0xFFFF) - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) || + ((test >= current) && + (test - current > DISK_DRIVE_SECTOR_SIZE / 3))) + { + if (translog_recover_page_up_to_sector(page, offset)) + DBUG_RETURN(1); + data->was_recovered= 1; + DBUG_RETURN(0); + } + + /* Return value on the page */ + page[offset]= table[i]; + page[offset + 1]= table[i + 1]; + current= test; + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", + i / 2, offset, (ulong) current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + } + } + DBUG_RETURN(0); +} + + +/* + Get log page by file number and offset of the beginning of the page + + SYNOPSIS + translog_get_page() + data validator data, which contains the page address + buffer buffer for page placing + (might not be used in some cache implementations) + + RETURN + NULL - Error + # pointer to the page cache which should be used to read this page +*/ + +static byte *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, byte *buffer) +{ + TRANSLOG_ADDRESS addr= *(data->addr); + uint cache_index; + uint32 file_no= LSN_FILE_NO(addr); + DBUG_ENTER("translog_get_page"); + DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)", + (ulong) file_no, + (ulong) LSN_OFFSET(addr), + (ulong) LSN_OFFSET(addr))); + + /* it is really page address */ + DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0); + + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) < + OPENED_FILES_NUM) + { + PAGECACHE_FILE file; + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == -1) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(file_no)) == -1) + DBUG_RETURN(NULL); + } + file.file= log_descriptor.log_file_num[cache_index]; + + buffer= (byte*) + pagecache_valid_read(log_descriptor.pagecache, &file, + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, + 3, (char*) buffer, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0, + &translog_page_validator, (gptr) data); + } + else + { + /* + TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT + THE LAST USED FILES. THIS WILL BE A NOTABLE PROBLEM IF WE ARE + FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES. WE WILL + PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG + FILES. + */ + + File file= open_logfile_by_number_no_cache(file_no); + if (file == -1) + DBUG_RETURN(NULL); + if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE, + LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME))) + buffer= NULL; + else if (translog_page_validator((byte*) buffer, (gptr) data)) + buffer= NULL; + my_close(file, MYF(MY_WME)); + } + DBUG_RETURN(buffer); +} + + +/* + Finds last page of the given log file + + SYNOPSIS + translog_get_last_page_addr() + addr address structure to fill with data, which contain + file number of the log file + last_page_ok assigned 1 if last page was OK + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, + my_bool *last_page_ok) +{ + MY_STAT stat_buff, *stat; + char path[FN_REFLEN]; + uint32 rec_offset; + uint32 file_no= LSN_FILE_NO(*addr); + DBUG_ENTER("translog_get_last_page_addr"); + + if (!(stat= my_stat(translog_filename_by_fileno(file_no, path), + &stat_buff, MYF(MY_WME)))) + DBUG_RETURN(1); + DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size)); + if (stat->st_size > TRANSLOG_PAGE_SIZE) + { + rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * + TRANSLOG_PAGE_SIZE); + *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE); + } + else + { + *last_page_ok= 0; + rec_offset= 0; + } + *addr= MAKE_LSN(file_no, rec_offset); + DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset, + *last_page_ok)); + DBUG_RETURN(0); +} + + +/* + Get number bytes for record length storing + + SYNOPSIS + translog_variable_record_length_bytes() + length Record length wich will be codded + + RETURN + 1,3,4,5 - number of bytes to store given length +*/ + +static uint translog_variable_record_length_bytes(translog_size_t length) +{ + if (length < 250) + return 1; + if (length < 0xFFFF) + return 3; + if (length < (ulong) 0xFFFFFF) + return 4; + return 5; +} + + +/* + Get header of this chunk + + SYNOPSIS + translog_get_chunk_header_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + # total length of the chunk + 0 Error +*/ + +static uint16 translog_get_chunk_header_length(byte *page, uint16 offset) +{ + DBUG_ENTER("translog_get_chunk_header_length"); + page+= offset; + switch (*page & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: + { + /* 0 chunk referred as LSN (head or tail) */ + translog_size_t rec_len; + byte *start= page; + byte *ptr= start + 1 + 2; + uint16 chunk_len, header_len; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr - start) +2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + /* TODO: fine header end */ + DBUG_ASSERT(0); + } + DBUG_RETURN(header_len); + break; + } + case TRANSLOG_CHUNK_FIXED: + { + /* 1 (pseudo)fixed record (also LSN) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3")); + DBUG_RETURN(3); + } + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1")); + DBUG_RETURN(1); + break; + case TRANSLOG_CHUNK_LNGTH: + /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3")); + DBUG_RETURN(3); + break; + default: + DBUG_ASSERT(0); + } +} + + +/* + Initialize transaction log + + SYNOPSIS + translog_init() + directory Directory where log files are put + log_file_max_size max size of one log size (for new logs creation) + server_version version of MySQL server (MYSQL_VERSION_ID) + server_id server ID (replication & Co) + pagecache Page cache for the log reads + flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION + TRANSLOG_RECORD_CRC) + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_init(const char *directory, + uint32 log_file_max_size, + uint32 server_version, + uint32 server_id, PAGECACHE *pagecache, uint flags) +{ + int i; + int old_log_was_recovered= 0, logs_found= 0; + uint old_flags= flags; + TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; + DBUG_ENTER("translog_init"); + + + if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, + MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + + /* Directory to store files */ + unpack_dirname(log_descriptor.directory, directory); + + if ((log_descriptor.directory_fd= my_open(log_descriptor.directory, + O_RDONLY, MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'", + errno, log_descriptor.directory)); + DBUG_RETURN(1); + } + + /* max size of one log size (for new logs creation) */ + log_descriptor.log_file_max_size= + log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE); + /* server version */ + log_descriptor.server_version= server_version; + /* server ID */ + log_descriptor.server_id= server_id; + /* Page cache for the log reads */ + log_descriptor.pagecache= pagecache; + /* Flags */ + DBUG_ASSERT((flags & + ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) == 0); + log_descriptor.flags= flags; + for (i= 0; i < TRANSLOG_FLAGS_NUM; i++) + { + page_overhead[i]= 7; + if (i & TRANSLOG_PAGE_CRC) + page_overhead[i]+= CRC_LENGTH; + if (i & TRANSLOG_SECTOR_PROTECTION) + page_overhead[i]+= (TRANSLOG_PAGE_SIZE / + DISK_DRIVE_SECTOR_SIZE) * 2; + } + log_descriptor.page_overhead= page_overhead[flags]; + log_descriptor.page_capacity_chunk_2= + TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1; + DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0); + log_descriptor.buffer_capacity_chunk_2= + (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) * + log_descriptor.page_capacity_chunk_2; + log_descriptor.half_buffer_capacity_chunk_2= + log_descriptor.buffer_capacity_chunk_2 / 2; + DBUG_PRINT("info", + ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u", + log_descriptor.page_overhead, + log_descriptor.page_capacity_chunk_2, + log_descriptor.buffer_capacity_chunk_2, + log_descriptor.half_buffer_capacity_chunk_2)); + + /* *** Current state of the log handler *** */ + + /* Init log handler file handlers cache */ + for (i= 0; i < OPENED_FILES_NUM; i++) + log_descriptor.log_file_num[i]= -1; + + /* just to init it somehow */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + + /* Buffers for log writing */ + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { + if (translog_buffer_init(log_descriptor.buffers + i)) + DBUG_RETURN(1); +#ifndef DBUG_OFF + log_descriptor.buffers[i].buffer_no= (uint8) i; +#endif + DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx", + i, (ulong) log_descriptor.buffers + i)); + } + + logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); + + if (logs_found) + { + my_bool pageok; + /* + TODO: scan directory for maria_log.XXXXXXXX files and find + highest XXXXXXXX & set logs_found + TODO: check that last checkpoint within present log addresses space + + find the log end + */ + if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); + /* there was no checkpoints we will read from the beginning */ + sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE); + } + else + { + sure_page= last_checkpoint_lsn; + DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0); + sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE; + } + log_descriptor.horizon= last_page= MAKE_LSN(last_logno,0); + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + if (LSN_OFFSET(last_page) == 0) + { + if (LSN_FILE_NO(last_page) == 1) + { + logs_found= 0; /* file #1 has no pages */ + } + else + { + last_page-= LSN_ONE_FILE; + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + } + } + } + if (logs_found) + { + TRANSLOG_ADDRESS current_page= sure_page; + my_bool pageok; + + DBUG_ASSERT(sure_page <= last_page); + + /* TODO: check page size */ + + last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN; + /* scan and validate pages */ + do + { + TRANSLOG_ADDRESS current_file_last_page; + current_file_last_page= current_page; + if (translog_get_last_page_addr(¤t_file_last_page, &pageok)) + DBUG_RETURN(1); + if (!pageok) + { + DBUG_PRINT("error", ("File %lu have no complete last page", + (ulong) LSN_FILE_NO(current_file_last_page))); + old_log_was_recovered= 1; + /* This file is not written till the end so it should be last */ + last_page= current_file_last_page; + /* TODO: issue warning */ + } + do + { + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; + data.addr= ¤t_page; + if ((page= translog_get_page(&data, buffer)) == NULL) + DBUG_RETURN(1); + if (data.was_recovered) + { + DBUG_PRINT("error", ("file no: %lu (%d) " + "rec_offset: 0x%lx (%lu) (%d)", + (ulong) LSN_FILE_NO(current_page), + (uint3korr(page + 3) != + LSN_FILE_NO(current_page)), + (ulong) LSN_OFFSET(current_page), + (ulong) (LSN_OFFSET(current_page) / + TRANSLOG_PAGE_SIZE), + (uint3korr(page) != + LSN_OFFSET(current_page) / + TRANSLOG_PAGE_SIZE))); + old_log_was_recovered= 1; + break; + } + old_flags= page[TRANSLOG_PAGE_FLAGS]; + last_valid_page= current_page; + current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */ + } while (current_page <= current_file_last_page); + current_page+= LSN_ONE_FILE; + current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE); + } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) && + !old_log_was_recovered); + if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN) + { + /* Panic!!! Even page which should be valid is invalid */ + /* TODO: issue error */ + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Last valid page is in file: %lu " + "offset: %lu (0x%lx) " + "Logs found: %d was recovered: %d " + "flags match: %d", + (ulong) LSN_FILE_NO(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), + logs_found, old_log_was_recovered, + (old_flags == flags))); + + /* TODO: check server ID */ + if (logs_found && !old_log_was_recovered && old_flags == flags) + { + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; + uint16 chunk_offset; + data.addr= &last_valid_page; + /* continue old log */ + DBUG_ASSERT(LSN_FILE_NO(last_valid_page)== + LSN_FILE_NO(log_descriptor.horizon)); + if ((page= translog_get_page(&data, buffer)) == NULL || + (chunk_offset= translog_get_first_chunk_offset(page)) == 0) + DBUG_RETURN(1); + + /* Puts filled part of old page in the buffer */ + log_descriptor.horizon= last_valid_page; + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + /* + Free space if filled with 0 and first byte of + real chunk can't be 0 + */ + while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + DBUG_RETURN(1); + DBUG_PRINT("info", ("chunk: offset: %u length: %u", + (uint) chunk_offset, (uint) chunk_length)); + chunk_offset+= chunk_length; + + /* chunk can't cross the page border */ + DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE); + } + memcpy(log_descriptor.buffers->buffer, page, chunk_offset); + log_descriptor.bc.buffer->size+= chunk_offset; + log_descriptor.bc.ptr+= chunk_offset; + log_descriptor.bc.current_page_fill= chunk_offset; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + (chunk_offset + + LSN_OFFSET(last_valid_page))); + DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr - log_descriptor.bc. + buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + } + } + DBUG_PRINT("info", ("Logs found: %d was recovered: %d", + logs_found, old_log_was_recovered)); + if (!logs_found) + { + /* Start new log system from scratch */ + /* Used space */ + log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */ + /* Current logs file number in page cache */ + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(1)) == -1 || + translog_write_file_header()) + DBUG_RETURN(1); + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + /* assign buffer 0 */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + else if (old_log_was_recovered || old_flags != flags) + { + /* leave the damaged file untouched */ + log_descriptor.horizon+= LSN_ONE_FILE; + /* header page */ + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + if (translog_create_new_file()) + DBUG_RETURN(1); + /* + Buffer system left untouched after recovery => we should init it + (starting from buffer 0) + */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + + /* all LSNs that are on disk are flushed */ + log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon; + /* + horizon is (potentially) address of the next LSN we need decrease + it to signal that all LSNs before it are flushed + */ + log_descriptor.flushed--; /* offset decreased */ + log_descriptor.sent_to_file--; /* offset decreased */ + + DBUG_RETURN(0); +} + + +/* + Free transaction log file buffer + + SYNOPSIS + translog_buffer_destroy() + buffer_no The buffer to free + + NOTE + This buffer should be locked +*/ + +static void translog_buffer_destroy(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_destroy"); + DBUG_PRINT("enter", + ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->file, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size)); + DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0); + if (buffer->file != -1) + { + /* + We ignore errors here, because we can't do something about it + (it is shutting down) + */ + translog_buffer_flush(buffer); + } + DBUG_PRINT("info", ("Unlock mutex: 0x%lx", (ulong) &buffer->mutex)); + pthread_mutex_unlock(&buffer->mutex); + DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex)); + pthread_mutex_destroy(&buffer->mutex); + DBUG_VOID_RETURN; +} + + +/* + Free log handler resources + + SYNOPSIS + translog_destroy() +*/ + +void translog_destroy() +{ + uint i; + DBUG_ENTER("translog_destroy"); + if (log_descriptor.bc.buffer->file != -1) + translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc); + + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { + struct st_translog_buffer *buffer= log_descriptor.buffers + i; + /* + Lock the buffer just for safety, there should not be other + threads running. + */ + translog_buffer_lock(buffer); + translog_buffer_destroy(buffer); + } + /* close files */ + for (i= 0; i < OPENED_FILES_NUM; i++) + { + if (log_descriptor.log_file_num[i] != -1) + translog_close_log_file(log_descriptor.log_file_num[i]); + } + pthread_mutex_destroy(&log_descriptor.sent_to_file_lock); + my_close(log_descriptor.directory_fd, MYF(MY_WME)); + DBUG_VOID_RETURN; +} + + +/* + Lock the loghandler + + SYNOPSIS + translog_lock() + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_lock() +{ + struct st_translog_buffer *current_buffer; + DBUG_ENTER("translog_lock"); + + /* + Locking the loghandler mean locking current buffer, but it can change + during locking, so we should check it + */ + for (;;) + { + current_buffer= log_descriptor.bc.buffer; + if (translog_buffer_lock(current_buffer)) + DBUG_RETURN(1); + if (log_descriptor.bc.buffer == current_buffer) + break; + translog_buffer_unlock(current_buffer); + } + DBUG_RETURN(0); +} + + +/* + Unlock the loghandler + + SYNOPSIS + translog_unlock() + + RETURN + 0 OK + 1 Error +*/ + +static inline my_bool translog_unlock() +{ + DBUG_ENTER("translog_unlock"); + translog_buffer_unlock(log_descriptor.bc.buffer); + + DBUG_RETURN(0); +} + + +/* + Start new page + + SYNOPSIS + translog_page_next() + horizon \ Position in file and buffer where we are + cursor / + prev_buffer Buffer which should be flushed will be assigned + here if it is need. This is always set. + + NOTE + handler should be locked + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + struct st_translog_buffer **prev_buffer) +{ + struct st_translog_buffer *buffer= cursor->buffer; + DBUG_ENTER("translog_page_next"); + + if ((cursor->ptr +TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) || + (LSN_OFFSET(*horizon) > + log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE)) + { + DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d " + "File size: %lu max: %lu => %d", + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer), + (cursor->ptr + TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER), + (ulong) LSN_OFFSET(*horizon), + (ulong) log_descriptor.log_file_max_size, + (LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE)))); + if (translog_buffer_next(horizon, cursor, + LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE))) + DBUG_RETURN(1); + *prev_buffer= buffer; + DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed", + (uint) buffer->buffer_no, (ulong) buffer)); + } + else + { + DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): " + "Buffer Size: %lu (%lu)", + (uint) buffer->buffer_no, + (ulong) buffer, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + translog_finish_page(horizon, cursor); + translog_new_page_header(horizon, cursor); + *prev_buffer= NULL; + } + DBUG_RETURN(0); +} + + +/* + Write data of given length to the current page + + SYNOPSIS + translog_write_data_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + buffer buffer with data + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + byte *buffer) +{ + DBUG_ENTER("translog_write_data_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", + (ulong) length, (uint) cursor->current_page_fill)); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + memcpy(cursor->ptr, buffer, length); + cursor->ptr+= length; + (*horizon)+= length; /* adds offset */ + cursor->current_page_fill+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + + DBUG_RETURN(0); +} + + +/* + Write data from parts of given length to the current page + + SYNOPSIS + translog_write_parts_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + parts IN/OUT chunk source + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + struct st_translog_parts *parts) +{ + translog_size_t left= length; + uint cur= (uint) parts->current; + DBUG_ENTER("translog_write_parts_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u " + "Buffer size: %lu (%lu)", + (ulong) length, + (uint) (cur + 1), (uint) parts->parts.elements, + (uint) cursor->current_page_fill, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + do + { + translog_size_t len; + struct st_translog_part *part; + byte *buff; + + DBUG_ASSERT(cur < parts->parts.elements); + part= dynamic_element(&parts->parts, cur, struct st_translog_part *); + buff= part->buff; + DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu", + (uint) (cur + 1), (ulong) part->len, (ulong) left)); + + if (part->len > left) + { + /* we should write less then the current part */ + len= left; + part->len-= len; + part->buff+= len; + DBUG_PRINT("info", ("Set new part: %u Length: %lu", + (uint) (cur + 1), (ulong) part->len)); + } + else + { + len= part->len; + cur++; + DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len)); + } + DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u", + (ulong) cursor->ptr, (ulong)buff, (uint)len)); + memcpy(cursor->ptr, buff, len); + left-= len; + cursor->ptr+= len; + } while (left); + + DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)", + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) length, (ulong) length)); + parts->current= cur; + (*horizon)+= length; /* offset increasing */ + cursor->current_page_fill+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu) " + "Horizon: (%lu,0x%lx) buff offset: 0x%lx", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer), + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) (LSN_OFFSET(cursor->buffer->offset) + + cursor->buffer->size))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + + DBUG_RETURN(0); +} + + +/* + Put 1 group chunk type 0 header into parts array + + SYNOPSIS + translog_write_variable_record_1group_header() + parts Descriptor of record source parts + type The log record type + short_trid Sort transaction ID or 0 if it has no sense + header_length Calculated header length of chunk type 0 + chunk0_header Buffer for the chunk header writing +*/ + +static void +translog_write_variable_record_1group_header(struct st_translog_parts *parts, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + uint16 header_length, + byte *chunk0_header) +{ + struct st_translog_part part; + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + parts->total_record_length+= (part.len= header_length); + part.buff= chunk0_header; + /* puts chunk type */ + *chunk0_header= (byte) (type | TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + /* puts record length */ + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + /* puts 0 as chunk length which indicate 1 group record */ + int2store(chunk0_header + header_length - 2, 0); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); +} + + +/* + Increase number of writers for this buffer + + SYNOPSIS + translog_buffer_increase_writers() + buffer target buffer +*/ + +static inline void +translog_buffer_increase_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_increase_writers"); + buffer->copy_to_buffer_in_progress++; + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + DBUG_VOID_RETURN; +} + + +/* + Decrease number of writers for this buffer + + SYNOPSIS + translog_buffer_decrease_writers() + buffer target buffer +*/ + + +static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_decrease_writers"); + buffer->copy_to_buffer_in_progress--; + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + if (buffer->copy_to_buffer_in_progress == 0 && + buffer->waiting_filling_buffer.last_thread != NULL) + wqueue_release_queue(&buffer->waiting_filling_buffer); + DBUG_VOID_RETURN; +} + + +/* + Put chunk 2 from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk2_page() + parts Descriptor of record source parts + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush; + int rc; + byte chunk2_header[1]; + DBUG_ENTER("translog_write_variable_record_chunk2_page"); + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; + + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + + /* Puts chunk type */ + translog_write_data_on_page(horizon, cursor, 1, chunk2_header); + /* Puts chunk body */ + translog_write_parts_on_page(horizon, cursor, + log_descriptor.page_capacity_chunk_2, parts); + DBUG_RETURN(0); +} + + +/* + Put chunk 3 of requested length in the buffer from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk3_page() + parts Descriptor of record source parts + length Length of this chunk + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, + uint16 length, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush; + struct st_translog_part part; + int rc; + byte chunk3_header[1 + 2]; + DBUG_ENTER("translog_write_variable_record_chunk3_page"); + + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + if (length == 0) + { + /* It was call to write page header only (no data for chunk 3) */ + DBUG_PRINT("info", ("It is a call to make page header only")); + DBUG_RETURN(0); + } + + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + parts->total_record_length+= (part.len= 1 + 2); + part.buff= chunk3_header; + /* Puts chunk type */ + *chunk3_header= (byte) (TRANSLOG_CHUNK_LNGTH); + /* Puts chunk length */ + int2store(chunk3_header + 1, length); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); + + translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts); + DBUG_RETURN(0); +} + +/* + Move log pointer (horizon) on given number pages starting from next page, + and given offset on the last page + + SYNOPSIS + translog_advance_pointer() + pages Number of full pages starting from the next one + last_page_data Plus this data on the last page + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) +{ + translog_size_t last_page_offset= (log_descriptor.page_overhead + + last_page_data); + translog_size_t offset= (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_fill + + pages * TRANSLOG_PAGE_SIZE + last_page_offset); + translog_size_t buffer_end_offset, file_end_offset, min_offset; + DBUG_ENTER("translog_advance_pointer"); + DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (uint) (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_fill), + pages, (uint) log_descriptor.page_overhead, + (uint) last_page_data)); + + for (;;) + { + uint8 new_buffer_no; + struct st_translog_buffer *new_buffer; + struct st_translog_buffer *old_buffer; + buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size; + file_end_offset= (log_descriptor.log_file_max_size - + LSN_OFFSET(log_descriptor.horizon)); + DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, " + "file_end_offs: %lu", + (ulong) offset, (ulong) buffer_end_offset, + (ulong) file_end_offset)); + DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = " + "0x%lx (0x%lx)", + (uint) log_descriptor.bc.buffer->buffer_no, + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), + (ulong) log_descriptor.bc.buffer->size, + (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) + + log_descriptor.bc.buffer->size), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + + log_descriptor.bc.buffer->size == + LSN_OFFSET(log_descriptor.horizon)); + + if (offset <= buffer_end_offset && offset <= file_end_offset) + break; + old_buffer= log_descriptor.bc.buffer; + new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO; + new_buffer= log_descriptor.buffers + new_buffer_no; + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + min_offset= min(buffer_end_offset, file_end_offset); + /* TODO: check is it ptr or size enough */ + log_descriptor.bc.buffer->size+= min_offset; + log_descriptor.bc.ptr+= min_offset; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer))); + DBUG_ASSERT((ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer->buffer) == + log_descriptor.bc.buffer->size); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + translog_buffer_increase_writers(log_descriptor.bc.buffer); + + if (file_end_offset <= buffer_end_offset) + { + log_descriptor.horizon+= LSN_ONE_FILE; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", ("New file: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon))); + if (translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("info", ("The same file")); + log_descriptor.horizon+= min_offset; /* offset increasing */ + } + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + if (translog_buffer_unlock(old_buffer)) + DBUG_RETURN(1); + offset-= min_offset; + } + log_descriptor.bc.ptr+= offset; + log_descriptor.bc.buffer->size+= offset; + translog_buffer_increase_writers(log_descriptor.bc.buffer); + log_descriptor.horizon+= offset; /* offset increasing */ + log_descriptor.bc.current_page_fill= last_page_offset; + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) " + "offset: %u last page: %u", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer-> + buffer), (uint) offset, + (uint) last_page_offset)); + DBUG_PRINT("info", + ("pointer moved to: (%lu, 0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + log_descriptor.bc.protected= 0; + DBUG_RETURN(0); +} + + + +/* + Get page rest + + SYNOPSIS + translog_get_current_page_rest() + + NOTE loghandler should be locked + + RETURN + number of bytes left on the current page +*/ + +#define translog_get_current_page_rest() \ + (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill) + +/* + Get buffer rest in full pages + + SYNOPSIS + translog_get_current_buffer_rest() + + NOTE loghandler should be locked + + RETURN + number of full pages left on the current buffer +*/ + +#define translog_get_current_buffer_rest() \ + ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \ + log_descriptor.bc.ptr) / \ + TRANSLOG_PAGE_SIZE) + +/* + Calculate possible group size without first (current) page + + SYNOPSIS + translog_get_current_group_size() + + NOTE loghandler should be locked + + RETURN + group size without first (current) page +*/ + +static translog_size_t translog_get_current_group_size() +{ + /* buffer rest in full pages */ + translog_size_t buffer_rest= translog_get_current_buffer_rest(); + DBUG_ENTER("translog_get_current_group_size"); + DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest)); + + buffer_rest*= log_descriptor.page_capacity_chunk_2; + /* in case of only half of buffer free we can write this and next buffer */ + if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2) + { + DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu", + (ulong) buffer_rest, + (ulong) log_descriptor.buffer_capacity_chunk_2)); + buffer_rest+= log_descriptor.buffer_capacity_chunk_2; + } + + DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest)); + + DBUG_RETURN(buffer_rest); +} + + +/* + Write variable record in 1 group + + SYNOPSIS + translog_write_variable_record_1group() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_1group(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + void *tcb) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i; + translog_size_t record_rest, full_pages, first_page; + uint additional_chunk3_page= 0; + byte chunk0_header[1 + 2 + 5 + 2]; + DBUG_ENTER("translog_write_variable_record_1group"); + + *lsn= horizon= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, lsn, parts)) + { + translog_unlock(); + DBUG_RETURN(1); + } + cursor= log_descriptor.bc; + cursor.chaser= 1; + + /* Advance pointer To be able unlock the loghandler */ + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - header_length); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + + if (record_rest + 1 == log_descriptor.page_capacity_chunk_2) + { + DBUG_PRINT("info", ("2 chunks type 3 is needed")); + /* We will write 2 chunks type 3 at the end of this group */ + additional_chunk3_page= 1; + record_rest= 1; + } + + DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) " + "additional: %u (%u) rest %u = %u", + first_page, first_page - header_length, + full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + additional_chunk3_page, + additional_chunk3_page * + (log_descriptor.page_capacity_chunk_2 - 1), + record_rest, parts->record_length)); + /* record_rest + 3 is chunk type 3 overhead + record_rest */ + rc|= translog_advance_pointer(full_pages + additional_chunk3_page, + (record_rest ? record_rest + 3 : 0)); + log_descriptor.bc.buffer->last_lsn= *lsn; + + rc|= translog_unlock(); + + /* + Check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + /* fill the pages */ + translog_write_parts_on_page(&horizon, &cursor, first_page, parts); + + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + DBUG_RETURN(1); + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + } + + if (additional_chunk3_page) + { + if (translog_write_variable_record_chunk3_page(parts, + log_descriptor. + page_capacity_chunk_2 - 2, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE); + } + + if (translog_write_variable_record_chunk3_page(parts, + record_rest, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + if (!(rc= translog_buffer_lock(cursor.buffer))) + { + /* + Check if we wrote something on 1:st not full page and need to reconstruct + CRC and sector protection + */ + translog_buffer_decrease_writers(cursor.buffer); + } + rc|= translog_buffer_unlock(cursor.buffer); + DBUG_RETURN(rc); +} + + +/* + Write variable record in 1 chunk + + SYNOPSIS + translog_write_variable_record_1chunk() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_1chunk(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + void *tcb) +{ + int rc; + byte chunk0_header[1 + 2 + 5 + 2]; + DBUG_ENTER("translog_write_variable_record_1chunk"); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, + lsn, parts)) + { + translog_unlock(); + DBUG_RETURN(1); + } + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + log_descriptor.bc.buffer->last_lsn= *lsn; + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/* + Calculate and write LSN difference (compressed LSN) + + SYNOPSIS + translog_put_LSN_diff() + base_lsn LSN from which we calculate difference + lsn LSN for codding + dst Result will be written to dst[-pack_length] .. dst[-1] + + NOTE: + To store an LSN in a compact way we will use the following compression: + + If a log record has LSN1, and it contains the lSN2 as a back reference, + Instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + # pointer on coded LSN + NULL Error +*/ + +static byte *translog_put_LSN_diff(LSN base_lsn, LSN lsn, byte *dst) +{ + DBUG_ENTER("translog_put_LSN_diff"); + DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx) val: (0x%lu,0x%lx) dst: 0x%lx", + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn), (ulong) dst)); + if (LSN_FILE_NO(base_lsn) == LSN_FILE_NO(lsn)) + { + uint32 diff; + DBUG_ASSERT(base_lsn > lsn); + diff= base_lsn - lsn; + DBUG_PRINT("info", ("File is the same. Diff: 0x%lx", (ulong) diff)); + if (diff <= 0x3FFF) + { + dst-= 2; + /* + Note we store this high byte first to ensure that first byte has + 0 in the 3 upper bits. + */ + dst[0]= diff >> 8; + dst[1]= (diff & 0xFF); + } + else if (diff <= 0x3FFFFF) + { + dst-= 3; + dst[0]= 0x40 | (diff >> 16); + int2store(dst + 1, diff & 0xFFFF); + } + else if (diff <= 0x3FFFFFFF) + { + dst-= 4; + dst[0]= 0x80 | (diff >> 24); + int3store(dst + 1, diff & 0xFFFFFF); + } + else + { + dst-= 5; + dst[0]= 0xC0; + int4store(dst + 1, diff); + } + } + else + { + uint32 diff; + uint32 offset_diff; + ulonglong base_offset= LSN_OFFSET(base_lsn); + DBUG_ASSERT(base_lsn > lsn); + diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn); + DBUG_PRINT("info", ("File is different. Diff: 0x%lx", (ulong) diff)); + + if (base_offset < LSN_OFFSET(lsn)) + { + /* take 1 from file offset */ + diff--; + base_offset+= LL(0x100000000); + } + offset_diff= base_offset - LSN_OFFSET(lsn); + if (diff > 0x3f) + { + /*TODO: error - too long transaction - panic!!! */ + UNRECOVERABLE_ERROR(("Too big file diff: %lu", (ulong) diff)); + DBUG_RETURN(NULL); + } + dst-= 5; + *dst= (0xC0 | diff); + int4store(dst + 1, offset_diff); + } + DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst)); + DBUG_RETURN(dst); +} + + +/* + Get LSN from LSN-difference (compressed LSN) + + SYNOPSIS + translog_get_LSN_from_diff() + base_lsn LSN from which we calculate difference + src pointer to coded lsn + dst pointer to buffer where to write 7byte LSN + + NOTE: + To store an LSN in a compact way we will use the following compression: + + If a log record has LSN1, and it contains the lSN2 as a back reference, + Instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + pointer to buffer after decoded LSN +*/ + +static byte *translog_get_LSN_from_diff(LSN base_lsn, byte *src, byte *dst) +{ + LSN lsn; + uint32 diff; + uint32 first_byte; + uint32 file_no, rec_offset; + uint8 code; + DBUG_ENTER("translog_get_LSN_from_diff"); + DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx) src: 0x%lx dst 0x%lx", + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) src, (ulong) dst)); + first_byte= *((uint8*) src); + code= first_byte >> 6; /* Length in 2 upmost bits */ + first_byte&= 0x3F; + src++; /* Skip length + encode */ + file_no= LSN_FILE_NO(base_lsn); /* Assume relative */ + DBUG_PRINT("info", ("code: %u first byte: %lu", + (uint) code, (ulong) first_byte)); + switch (code) { + case 0: + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src)); + break; + case 1: + diff= uint2korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff); + break; + case 2: + diff= uint3korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff); + break; + case 3: + { + ulonglong base_offset= LSN_OFFSET(base_lsn); + diff= uint4korr(src); + if (diff > LSN_OFFSET(base_lsn)) + { + /* take 1 from file offset */ + first_byte++; + base_offset+= LL(0x100000000); + } + file_no= LSN_FILE_NO(base_lsn) - first_byte; + rec_offset= base_offset - diff; + break; + } + default: + DBUG_ASSERT(0); + DBUG_RETURN(NULL); + } + lsn= MAKE_LSN(file_no, rec_offset); + src+= code + 1; + lsn_store(dst, lsn); + DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst)); + DBUG_RETURN(src); +} + + +/* + Encode relative LSNs listed in the parameters + + SYNOPSIS + translog_relative_LSN_encode() + parts Parts list with encoded LSN(s) + base_lsn LSN which is base for encoding + lsns number of LSN(s) to encode + compressed_LSNs buffer which can be used for storing compressed LSN(s) + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, + LSN base_lsn, + uint lsns, byte *compressed_LSNs) +{ + struct st_translog_part *part; + uint lsns_len= lsns * LSN_STORE_SIZE; + + DBUG_ENTER("translog_relative_LSN_encode"); + + part= dynamic_element(&parts->parts, parts->current, + struct st_translog_part *); + /* collect all LSN(s) in one chunk if it (they) is (are) divided */ + if (part->len < lsns_len) + { + uint copied= part->len; + DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs)); + memcpy(compressed_LSNs, part->buff, part->len); + do + { + struct st_translog_part *next_part; + next_part= dynamic_element(&parts->parts, parts->current + 1, + struct st_translog_part *); + if ((next_part->len + copied) < lsns_len) + { + memcpy(compressed_LSNs + copied, next_part->buff, next_part->len); + copied+= next_part->len; + delete_dynamic_element(&parts->parts, parts->current + 1); + } + else + { + uint len= lsns_len - copied; + memcpy(compressed_LSNs + copied, next_part->buff, len); + copied= lsns_len; + next_part->buff+= len; + next_part->len-= len; + } + } while (copied < lsns_len); + part->len= lsns_len; + part->buff= compressed_LSNs; + } + { + /* Compress */ + LSN ref; + uint economy; + byte *ref_ptr= part->buff + lsns_len - LSN_STORE_SIZE; + byte *dst_ptr= part->buff + lsns_len; + for (; ref_ptr >= part->buff ; ref_ptr-= LSN_STORE_SIZE) + { + ref= lsn_korr(ref_ptr); + if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL) + DBUG_RETURN(1); + } + /* Note that dst_ptr did grow downward ! */ + economy= (uint) (dst_ptr - part->buff); + DBUG_PRINT("info", ("Economy: %u", economy)); + part->len-= economy; + parts->record_length-= economy; + parts->total_record_length-= economy; + part->buff= dst_ptr; + } + DBUG_RETURN(0); +} + + +/* + Write multi-group variable-size record + + SYNOPSIS + translog_write_variable_record_mgroup() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Header length calculated for 1 group + buffer_rest Beginning from which we plan to write in full pages + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_mgroup(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, + uint16 header_length, + translog_size_t buffer_rest, + void *tcb) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i, chunk2_page, full_pages; + uint curr_group= 0; + translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1; + translog_size_t done= 0; + struct st_translog_group_descriptor group; + DYNAMIC_ARRAY groups; + uint16 chunk3_size; + uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1; + uint16 last_page_capacity; + my_bool new_page_before_chunk0= 1, first_chunk0= 1; + byte chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1]; + byte chunk2_header[1]; + uint header_fixed_part= header_length + 2; + uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1); + DBUG_ENTER("translog_write_variable_record_mgroup"); + + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; + + if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor), + 10, 10 CALLER_INFO)) + { + translog_unlock(); + UNRECOVERABLE_ERROR(("init array failed")); + DBUG_RETURN(1); + } + + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - 1); + DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest)); + + if (record_rest < buffer_rest) + { + DBUG_PRINT("info", ("too many free space because changing header")); + buffer_rest-= log_descriptor.page_capacity_chunk_2; + DBUG_ASSERT(record_rest >= buffer_rest); + } + + do + { + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255) + { + /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */ + full_pages= 255; + buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2; + } + /* + group chunks = + full pages + first page (which actually can be full, too). + But here we assign number of chunks - 1 + */ + group.num= full_pages; + if (insert_dynamic(&groups, (gptr) &group)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + goto err_unlock; + } + + DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) " + "full_pages: %lu (%lu) " + "Left %lu", + groups.elements, + first_page, first_page - 1, + (ulong) full_pages, + (ulong) (full_pages * + log_descriptor.page_capacity_chunk_2), + (ulong)(parts->record_length - (first_page - 1 + + buffer_rest) - + done))); + rc|= translog_advance_pointer(full_pages, 0); + + rc|= translog_unlock(); + + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + done))); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + goto err; + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) " + "local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + done+= (first_page - 1 + buffer_rest); + + /* TODO: make separate function for following */ + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + rc= translog_buffer_lock(cursor.buffer); + if (!rc) + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + if (rc) + goto err; + + translog_lock(); + + first_page= translog_get_current_page_rest(); + buffer_rest= translog_get_current_group_size(); + } while (first_page + buffer_rest < (uint) (parts->record_length - done)); + + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + group.num= 0; /* 0 because it does not matter */ + if (insert_dynamic(&groups, (gptr) &group)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + goto err_unlock; + } + record_rest= parts->record_length - done; + DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest)); + if (first_page <= record_rest + 1) + { + chunk2_page= 1; + record_rest-= (first_page - 1); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + last_page_capacity= page_capacity; + } + else + { + chunk2_page= full_pages= 0; + last_page_capacity= first_page; + } + chunk3_size= 0; + chunk3_pages= 0; + if (last_page_capacity > record_rest + 1 && record_rest != 0) + { + if (last_page_capacity > + record_rest + header_fixed_part + groups.elements * (7 + 1)) + { + /* 1 record of type 0 */ + chunk3_pages= 0; + } + else + { + chunk3_pages= 1; + if (record_rest + 2 == last_page_capacity) + { + chunk3_size= record_rest - 1; + record_rest= 1; + } + else + { + chunk3_size= record_rest; + record_rest= 0; + } + } + } + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers + */ + while (page_capacity < + record_rest + header_fixed_part + + (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1)) + chunk0_pages++; + DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u " + "Group on last page: %u", + chunk0_pages, groups.elements, + groups_per_page, + (groups.elements - + ((page_capacity - header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)))); + DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) " + "chunk3: %u (%u) rest: %u", + first_page, + chunk2_page, full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + chunk3_pages, (uint) chunk3_size, (uint) record_rest)); + rc= translog_advance_pointer(full_pages + chunk3_pages + + (chunk0_pages - 1), + record_rest + header_fixed_part + + (groups.elements - + ((page_capacity - + header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)) * (7 + 1)); + rc|= translog_unlock(); + if (rc) + goto err; + + if (chunk2_page) + { + DBUG_PRINT("info", ("chunk 2 to finish first page")); + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + done))); + } + else if (chunk3_pages) + { + DBUG_PRINT("info", ("chunk 3")); + DBUG_ASSERT(full_pages == 0); + byte chunk3_header[3]; + chunk3_pages= 0; + chunk3_header[0]= TRANSLOG_CHUNK_LNGTH; + int2store(chunk3_header + 1, chunk3_size); + translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header); + translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - chunk3_size - done))); + } + else + { + DBUG_PRINT("info", ("no new_page_before_chunk0")); + new_page_before_chunk0= 0; + } + + for (i= 0; i < full_pages; i++) + { + DBUG_ASSERT(chunk2_page != 0); + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + goto err; + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + if (chunk3_pages && + translog_write_variable_record_chunk3_page(parts, + chunk3_size, + &horizon, &cursor)) + goto err; + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + + *chunk0_header= (byte) (type |TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + do + { + int limit; + if (new_page_before_chunk0) + { + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + } + new_page_before_chunk0= 1; + + if (first_chunk0) + { + first_chunk0= 0; + *lsn= horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, + lsn, parts)) + goto err; + } + + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers => the fist page is full or number of groups less then + possible number of full page. + */ + limit= (groups_per_page < groups.elements - curr_group ? + groups_per_page : groups.elements - curr_group); + DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u", + (uint) groups.elements, (uint) curr_group, + (uint) limit)); + + if (chunk0_pages == 1) + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u", + (uint) limit, (uint) record_rest, + (uint) (2 + limit * (7 + 1) + record_rest))); + int2store(chunk0_header + header_length - 2, + 2 + limit * (7 + 1) + record_rest); + } + else + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u", + (uint) limit, (uint) (2 + limit * (7 + 1)))); + int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1)); + } + int2store(chunk0_header + header_length, groups.elements - curr_group); + translog_write_data_on_page(&horizon, &cursor, header_fixed_part, + chunk0_header); + for (i= curr_group; i < limit + curr_group; i++) + { + struct st_translog_group_descriptor *grp_ptr; + grp_ptr= dynamic_element(&groups, i, + struct st_translog_group_descriptor *); + lsn_store(group_desc, grp_ptr->addr); + group_desc[7]= grp_ptr->num; + translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc); + } + + if (chunk0_pages == 1 && record_rest != 0) + translog_write_parts_on_page(&horizon, &cursor, record_rest, parts); + + chunk0_pages--; + curr_group+= limit; + + } while (chunk0_pages != 0); + rc= translog_buffer_lock(cursor.buffer); + if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0) + cursor.buffer->last_lsn= *lsn; + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + + delete_dynamic(&groups); + DBUG_RETURN(rc); + +err_unlock: + translog_unlock(); +err: + delete_dynamic(&groups); + DBUG_RETURN(1); +} + + +/* + Write the variable length log record + + SYNOPSIS + translog_write_variable_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_variable_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + void *tcb) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + uint header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + ulong buffer_rest; + uint page_rest; + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[2 * LSN_STORE_SIZE]; + + DBUG_ENTER("translog_write_variable_record"); + + translog_lock(); + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + DBUG_PRINT("info", ("header length: %u page_rest: %u", + header_length1, page_rest)); + + /* + header and part which we should read have to fit in one chunk + TODO: allow to divide readable header + */ + if (page_rest < + (header_length1 + log_record_type_descriptor[type].read_header_len)) + { + DBUG_PRINT("info", + ("Next page, size: %u header: %u + %u", + log_descriptor.bc.current_page_fill, + header_length1, + log_record_type_descriptor[type].read_header_len)); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + /* Chunk 2 header is 1 byte, so full page capacity will be one byte more */ + page_rest= log_descriptor.page_capacity_chunk_2 + 1; + DBUG_PRINT("info", ("page_rest: %u", page_rest)); + } + + /* + To minimize compressed size we will compress always relative to + very first chunk address (log_descriptor.horizon for now) + */ + if (log_record_type_descriptor[type].compressed_LSN > 0) + { + if (translog_relative_LSN_encode(parts, log_descriptor.horizon, + log_record_type_descriptor[type]. + compressed_LSN, compressed_LSNs)) + { + translog_unlock(); + if (buffer_to_flush != NULL) + { + /* + It is just try to finish log in nice way in case of error, so we + do not check result of the following functions, because we are + going return error state in any case + */ + translog_buffer_flush(buffer_to_flush); + translog_buffer_unlock(buffer_to_flush); + } + DBUG_RETURN(1); + } + /* recalculate header length after compression */ + header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + DBUG_PRINT("info", ("after compressing LSN(s) header length: %u " + "record length: %lu", + header_length1, (ulong)parts->record_length)); + } + + /* TODO: check space on current page for header + few bytes */ + if (page_rest >= parts->record_length + header_length1) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1chunk(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, tcb)); + } + + buffer_rest= translog_get_current_group_size(); + + if (buffer_rest >= parts->record_length + header_length1 - page_rest) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1group(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, tcb)); + } + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_mgroup(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, + buffer_rest, tcb)); +} + + +/* + Write the fixed and pseudo-fixed log record + + SYNOPSIS + translog_write_fixed_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + parts Descriptor of record source parts + tcb Transaction control block pointer for hooks by + record log type + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_fixed_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + void *tcb) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + byte chunk1_header[1 + 2]; + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[2 * LSN_STORE_SIZE]; + struct st_translog_part part; + int rc; + DBUG_ENTER("translog_write_fixed_record"); + DBUG_ASSERT((log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH && + parts->record_length == + log_record_type_descriptor[type].fixed_length) || + (log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH && + (parts->record_length - + log_record_type_descriptor[type].compressed_LSN * 2) <= + log_record_type_descriptor[type].fixed_length)); + + translog_lock(); + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + + DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", + ("Page size: %u record: %u next cond: %d", + log_descriptor.bc.current_page_fill, + (parts->record_length - + log_record_type_descriptor[type].compressed_LSN * 2 + 3), + ((((uint) log_descriptor.bc.current_page_fill) + + (parts->record_length - + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE))); + /* + check that there is enough place on current page: + (log_record_type_descriptor[type].fixed_length - economized on compressed + LSNs) bytes + */ + if ((((uint) log_descriptor.bc.current_page_fill) + + (parts->record_length - + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE) + { + DBUG_PRINT("info", ("Next page")); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + } + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, tcb, + lsn, parts)) + { + rc= 1; + goto err; + } + + /* compress LSNs */ + if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0); + if (translog_relative_LSN_encode(parts, *lsn, + log_record_type_descriptor[type]. + compressed_LSN, compressed_LSNs)) + { + rc= 1; + goto err; + } + } + + /* + Write the whole record at once (we know that there is enough place on + the destination page) + */ + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + parts->total_record_length+= (part.len= 1 + 2); + part.buff= chunk1_header; + *chunk1_header= (byte) (type | TRANSLOG_CHUNK_FIXED); + int2store(chunk1_header + 1, short_trid); + parts->current--; + set_dynamic(&parts->parts, (gptr) &part, parts->current); + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + + log_descriptor.bc.buffer->last_lsn= *lsn; + +err: + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/* + Write the log record + + SYNOPSIS + translog_write_record() + lsn LSN of the record will be written here + type the log record type + short_trid Sort transaction ID or 0 if it has no sense + tcb Transaction control block pointer for hooks by + record log type + partN_length length of Ns part of the log + partN_buffer pointer on Ns part buffer + 0 sign of the end of parts + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_write_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + void *tcb, + translog_size_t part1_length, + byte *part1_buff, ...) +{ + struct st_translog_parts parts; + struct st_translog_part part; + va_list pvar; + int rc; + DBUG_ENTER("translog_write_record"); + DBUG_PRINT("enter", ("type: %u ShortTrID: %u", + (uint) type, (uint)short_trid)); + + /* move information about parts into dynamic array */ + if (init_dynamic_array(&parts.parts, sizeof(struct st_translog_part), + 10, 10 CALLER_INFO)) + { + UNRECOVERABLE_ERROR(("init array failed")); + DBUG_RETURN(1); + } + + /* reserve place for header */ + parts.current= 1; + part.len= 0; + part.buff= 0; + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + + parts.record_length= part.len= part1_length; + part.buff= part1_buff; + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("record length: %lu %lu ...", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* count record length */ + va_start(pvar, part1_buff); + for (;;) + { + part.len= va_arg(pvar, translog_size_t); + if (part.len == 0) + break; + parts.record_length+= part.len; + part.buff= va_arg(pvar, byte*); + if (insert_dynamic(&parts.parts, (gptr) &part)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("record length: %lu %lu ...", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + } + va_end(pvar); + + /* + Start total_record_length from record_length then overhead will + be add + */ + parts.total_record_length= parts.record_length; + va_end(pvar); + DBUG_PRINT("info", ("record length: %lu %lu", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* process this parts */ + if (!(rc= (log_record_type_descriptor[type].prewrite_hook && + (*log_record_type_descriptor[type].prewrite_hook) (type, tcb, + &parts)))) + { + switch (log_record_type_descriptor[type].class) { + case LOGRECTYPE_VARIABLE_LENGTH: + rc= translog_write_variable_record(lsn, type, short_trid, &parts, tcb); + break; + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + rc= translog_write_fixed_record(lsn, type, short_trid, &parts, tcb); + break; + case LOGRECTYPE_NOT_ALLOWED: + default: + DBUG_ASSERT(0); + rc= 1; + } + } + + delete_dynamic(&parts.parts); + DBUG_RETURN(rc); +} + + +/* + Decode compressed (relative) LSN(s) + + SYNOPSIS + translog_relative_lsn_decode() + base_lsn LSN for encoding + src Decode LSN(s) from here + dst Put decoded LSNs here + lsns number of LSN(s) + + RETURN + position in sources after decoded LSN(s) +*/ + +static byte *translog_relative_LSN_decode(LSN base_lsn, + byte *src, byte *dst, uint lsns) +{ + uint i; + for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE) + { + src= translog_get_LSN_from_diff(base_lsn, src, dst); + } + return src; +} + +/* + Get header of fixed/pseudo length record and call hook for it processing + + SYNOPSIS + translog_fixed_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_fixed_length_header(byte *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff) +{ + struct st_log_record_type_descriptor *desc= + log_record_type_descriptor + buff->type; + byte *src= page + page_offset + 3; + byte *dst= buff->header; + byte *start= src; + uint lsns= desc->compressed_LSN; + uint length= desc->fixed_length + (lsns * 2); + + DBUG_ENTER("translog_fixed_length_header"); + + buff->record_length= length; + + if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(lsns > 0); + src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns); + lsns*= LSN_STORE_SIZE; + dst+= lsns; + length-= lsns; + buff->compressed_LSN_economy= (uint16) (lsns - (src - start)); + } + else + buff->compressed_LSN_economy= 0; + + memcpy(dst, src, length); + buff->non_header_data_start_offset= page_offset + + ((src + length) - (page + page_offset)); + buff->non_header_data_len= 0; + DBUG_RETURN(buff->record_length); +} + + +/* + Free resources used by TRANSLOG_HEADER_BUFFER + + SYNOPSIS + translog_free_record_header(); +*/ + +void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff) +{ + DBUG_ENTER("translog_free_record_header"); + if (buff->groups_no != 0) + { + my_free((gptr) buff->groups, MYF(0)); + buff->groups_no= 0; + } + DBUG_VOID_RETURN; +} + + +/* + Set current horizon in the scanner data structure + + SYNOPSIS + translog_scanner_set_horizon() + scanner Information about current chunk during scanning +*/ + +static void translog_scanner_set_horizon(struct st_translog_scanner_data + *scanner) +{ + translog_lock(); + scanner->horizon= log_descriptor.horizon; + translog_unlock(); +} + + +/* + Set last page in the scanner data structure + + SYNOPSIS + translog_scanner_set_last_page() + scanner Information about current chunk during scanning + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA + *scanner) +{ + my_bool page_ok; + scanner->last_file_page= scanner->page_addr; + return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)); +} + + +/* + Initialize reader scanner + + SYNOPSIS + translog_init_scanner() + lsn LSN with which it have to be inited + fixed_horizon true if it is OK do not read records which was written + after scanning beginning + scanner scanner which have to be inited + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner) +{ + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_init_scanner"); + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn)); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + + scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; + + scanner->fixed_horizon= fixed_horizon; + + translog_scanner_set_horizon(scanner); + DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon))); + + /* lsn < horizon */ + DBUG_ASSERT(lsn < scanner->horizon)); + + scanner->page_addr= lsn; + scanner->page_addr-= scanner->page_offset; /*decrease offset */ + + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Checks End of the Log + + SYNOPSIS + translog_scanner_eol() + scanner Information about current chunk during scanning + + RETURN + 1 End of the Log + 0 OK +*/ +static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eol"); + DBUG_PRINT("enter", + ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (uint) scanner->page_offset, + (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset))); + if (scanner->horizon > (scanner->page_addr + + scanner->page_offset)) + { + DBUG_PRINT("info", ("Horizon is not reached")); + DBUG_RETURN(0); + } + if (scanner->fixed_horizon) + { + DBUG_PRINT("info", ("Horizon is fixed and reached")); + DBUG_RETURN(1); + } + translog_scanner_set_horizon(scanner); + DBUG_PRINT("info", + ("Horizon is re-read, EOL: %d", + scanner->horizon <= (scanner->page_addr + + scanner->page_offset))); + DBUG_RETURN(scanner->horizon <= (scanner->page_addr + + scanner->page_offset)); +} + + +/* + Cheks End of the Page + + SYNOPSIS + translog_scanner_eop() + scanner Information about current chunk during scanning + + RETURN + 1 End of the Page + 0 OK +*/ +static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eop"); + DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE || + scanner->page[scanner->page_offset] == 0); +} + + +/* + Checks End of the File (I.e. we are scanning last page, which do not + mean end of this page) + + SYNOPSIS + translog_scanner_eof() + scanner Information about current chunk during scanning + + RETURN + 1 End of the File + 0 OK +*/ +static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eof"); + DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) == + LSN_FILE_NO(scanner->last_file_page)); + DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx " + "normal EOF: %d", + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->last_file_page), + LSN_OFFSET(scanner->page_addr) == + LSN_OFFSET(scanner->last_file_page))); + /* + TODO: detect damaged file EOF, + TODO: issue warning if damaged file EOF detected + */ + DBUG_RETURN(scanner->page_addr == + scanner->last_file_page); +} + + +/* + Move scanner to the next chunk + + SYNOPSIS + translog_get_next_chunk() + scanner Information about current chunk during scanning + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) +{ + uint16 len; + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_get_next_chunk"); + + if ((len= translog_get_total_chunk_length(scanner->page, + scanner->page_offset)) == 0) + DBUG_RETURN(1); + scanner->page_offset+= len; + + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + if (translog_scanner_eop(scanner)) + { + if (translog_scanner_eof(scanner)) + { + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr))); + /* if it is log end it have to be caught before */ + DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) > + LSN_FILE_NO(scanner->page_addr)); + scanner->page_addr+= LSN_ONE_FILE; + scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr, + TRANSLOG_PAGE_SIZE); + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + } + else + { + scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */ + } + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + + scanner->page_offset= translog_get_first_chunk_offset(scanner->page); + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + DBUG_ASSERT(scanner->page[scanner->page_offset]); + } + DBUG_RETURN(0); +} + + +/* + Get header of variable length record and call hook for it processing + + SYNOPSIS + translog_variable_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + scanner If present should be moved to the header page if + it differ from LSN page + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_variable_length_header(byte *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff, + TRANSLOG_SCANNER_DATA + *scanner) +{ + struct st_log_record_type_descriptor *desc= (log_record_type_descriptor + + buff->type); + byte *src= page + page_offset + 1 + 2; + byte *dst= buff->header; + LSN base_lsn; + uint lsns= desc->compressed_LSN; + uint16 chunk_len; + uint16 length= desc->read_header_len + (lsns * 2); + uint16 buffer_length= length; + uint16 body_len; + TRANSLOG_SCANNER_DATA internal_scanner; + + DBUG_ENTER("translog_variable_length_header"); + + buff->record_length= translog_variable_record_1group_decode_len(&src); + chunk_len= uint2korr(src); + DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u", + (ulong) buff->record_length, (uint) chunk_len, + (uint) length, (uint) buffer_length)); + if (chunk_len == 0) + { + uint16 page_rest; + DBUG_PRINT("info", ("1 group")); + src+= 2; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + + base_lsn= buff->lsn; + body_len= min(page_rest, buff->record_length); + } + else + { + uint grp_no, curr; + uint header_to_skip; + uint16 page_rest; + + DBUG_PRINT("info", ("multi-group")); + grp_no= buff->groups_no= uint2korr(src + 2); + if (!(buff->groups= + (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no, + MYF(0)))) + DBUG_RETURN(0); + DBUG_PRINT("info", ("Groups: %u", (uint) grp_no)); + src+= (2 + 2); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + curr= 0; + header_to_skip= src - (page + page_offset); + buff->chunk0_pages= 0; + + for (;;) + { + uint i, read= grp_no; + + buff->chunk0_pages++; + if (page_rest < grp_no * (7 + 1)) + read= page_rest / (7 + 1); + DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u " + "start from: %u", + buff->chunk0_pages, read, grp_no, curr)); + for (i= 0; i < read; i++, curr++) + { + DBUG_ASSERT(curr < buff->groups_no); + buff->groups[curr].addr= lsn_korr(src + i * (7 + 1)); + buff->groups[curr].num= src[i * (7 + 1) + 7]; + DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u", + curr, + (ulong) LSN_FILE_NO(buff->groups[curr].addr), + (ulong) LSN_OFFSET(buff->groups[curr].addr), + (uint) buff->groups[curr].num)); + } + grp_no-= read; + if (grp_no == 0) + { + if (scanner) + { + buff->chunk0_data_addr= scanner->page_addr; + buff->chunk0_data_addr+= (page_offset + header_to_skip + + read * (7 + 1)); /* offset increased */ + } + else + { + buff->chunk0_data_addr= buff->lsn; + /* offset increased */ + buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1)); + } + buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1); + DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u", + (ulong) LSN_FILE_NO(buff->chunk0_data_addr), + (ulong) LSN_OFFSET(buff->chunk0_data_addr), + buff->chunk0_data_len)); + break; + } + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner for header reading")); + scanner= &internal_scanner; + translog_init_scanner(buff->lsn, 1, scanner); + } + translog_get_next_chunk(scanner); + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + header_to_skip; + chunk_len= uint2korr(src - 2 - 2); + DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len)); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + } + + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner")); + scanner= &internal_scanner; + } + + base_lsn= buff->groups[0].addr; + translog_init_scanner(base_lsn, 1, scanner); + /* first group chunk is always chunk type 2 */ + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + 1; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + body_len= page_rest; + } + if (lsns) + { + byte *start= src; + src= translog_relative_LSN_decode(base_lsn, src, dst, lsns); + lsns*= LSN_STORE_SIZE; + dst+= lsns; + length-= lsns; + buff->record_length+= (buff->compressed_LSN_economy= + (uint16) (lsns - (src - start))); + DBUG_PRINT("info", ("lsns: %u length: %u economy: %u new length: %lu", + lsns / LSN_STORE_SIZE, (uint) length, + (uint) buff->compressed_LSN_economy, + (ulong) buff->record_length)); + body_len-= (src - start); + } + else + buff->compressed_LSN_economy= 0; + + DBUG_ASSERT(body_len >= length); + body_len-= length; + memcpy(dst, src, length); + buff->non_header_data_start_offset= src + length - page; + buff->non_header_data_len= body_len; + DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u", + buff->non_header_data_start_offset, + buff->non_header_data_len, buffer_length)); + DBUG_RETURN(buffer_length); +} + + +/* + Read record header from the given buffer + + SYNOPSIS + translog_read_record_header_from_buffer() + page page content buffer + page_offset offset of the chunk in the page + buff destination buffer + scanner If this is set the scanner will be moved to the + record header page (differ from LSN page in case of + multi-group records) +*/ + +translog_size_t +translog_read_record_header_from_buffer(byte *page, + uint16 page_offset, + TRANSLOG_HEADER_BUFFER *buff, + TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_read_record_header_from_buffer"); + DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_LSN || + (page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_FIXED); + buff->type= (page[page_offset] & TRANSLOG_REC_TYPE); + buff->short_trid= uint2korr(page + page_offset + 1); + DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%lu,0x%lx)", + (uint) buff->type, (uint)buff->short_trid, + (ulong) LSN_FILE_NO(buff->lsn), + (ulong) LSN_OFFSET(buff->lsn))); + /* Read required bytes from the header and call hook */ + switch (log_record_type_descriptor[buff->type].class) { + case LOGRECTYPE_VARIABLE_LENGTH: + DBUG_RETURN(translog_variable_length_header(page, page_offset, buff, + scanner)); + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + DBUG_RETURN(translog_fixed_length_header(page, page_offset, buff)); + default: + DBUG_ASSERT(0); + } + DBUG_RETURN(0); /* purecov: deadcode */ +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + buff log record header buffer + + NOTE + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_read_record_header(LSN lsn, + TRANSLOG_HEADER_BUFFER *buff) +{ + byte buffer[TRANSLOG_PAGE_SIZE], *page; + translog_size_t page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; + TRANSLOG_ADDRESS addr; + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_read_record_header"); + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn))); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + + buff->lsn= lsn; + buff->groups_no= 0; + data.addr= &addr; + data.was_recovered= 0; + addr= lsn; + addr-= page_offset; /* offset decreasing */ + if (!(page= translog_get_page(&data, buffer))) + DBUG_RETURN(0); + + DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset, + buff, 0)); +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header_scan() + scan scanner position to read + buff log record header buffer + move_scanner request to move scanner to the header position + + NOTE + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t +translog_read_record_header_scan(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff, + my_bool move_scanner) +{ + DBUG_ENTER("translog_read_record_header_scan"); + DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + buff->groups_no= 0; + buff->lsn= scanner->page_addr; + buff->lsn+= scanner->page_offset; /* offset increasing */ + DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page, + scanner->page_offset, + buff, + (move_scanner ? + scanner : 0))); +} + + +/* + Read record header and some fixed part of the next record (the part + depend on record type). + + SYNOPSIS + translog_read_next_record_header() + scanner data for scanning if lsn is NULL scanner data + will be used for continue scanning. + The scanner can be NULL. + buff log record header buffer + + NOTE + - it is like translog_read_record_header, but read next record, so see + its NOTES. + - in case of end of the log buff->lsn will be set to + (CONTROL_FILE_IMPOSSIBLE_LSN) + + RETURN + 0 error + TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 End of the log + # number of bytes in + TRANSLOG_HEADER_BUFFER::header + where stored decoded + part of the header +*/ + +translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff) +{ + uint8 chunk_type; + + buff->groups_no= 0; /* to be sure that we will free it right */ + + DBUG_ENTER("translog_read_next_record_header"); + DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner)); + DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + + do + { + if (translog_get_next_chunk(scanner)) + DBUG_RETURN(0); + chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE; + DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type, + (uint) scanner->page[scanner->page_offset])); + } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type != + TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0); + + if (scanner->page[scanner->page_offset] == 0) + { + /* Last record was read */ + buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + /* Return 'end of log' marker */ + DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); + } + DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0)); +} + + +/* + Moves record data reader to the next chunk and fill the data reader + information about that chunk. + + SYNOPSIS + translog_record_read_next_chunk() + data data cursor + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_record_read_next_chunk(struct st_translog_reader_data + *data) +{ + translog_size_t new_current_offset= data->current_offset + data->chunk_size; + uint16 chunk_header_len, chunk_len; + uint8 type; + DBUG_ENTER("translog_record_read_next_chunk"); + + if (data->eor) + { + DBUG_PRINT("info", ("end of the record flag set")); + DBUG_RETURN(1); + } + + if (data->header.groups_no && + data->header.groups_no - 1 != data->current_group && + data->header.groups[data->current_group].num == data->current_chunk) + { + /* Goto next group */ + data->current_group++; + data->current_chunk= 0; + DBUG_PRINT("info", ("skip to group: #%u", data->current_group)); + translog_init_scanner(data->header.groups[data->current_group].addr, + 1, &data->scanner); + } + else + { + data->current_chunk++; + if (translog_get_next_chunk(&data->scanner)) + DBUG_RETURN(1); + } + type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE; + + if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no) + { + DBUG_PRINT("info", + ("Last chunk: data len: %u offset: %u group: %u of %u", + data->header.chunk0_data_len, data->scanner.page_offset, + data->current_group, data->header.groups_no - 1)); + DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); + DBUG_ASSERT(data->header.lsn == + data->scanner.page_addr + data->scanner.page_offset); + translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner); + data->chunk_size= data->header.chunk0_data_len; + data->body_offset= data->scanner.page_offset; + data->current_offset= new_current_offset; + data->eor= 1; + DBUG_RETURN(0); + } + + if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED) + { + data->eor= 1; + DBUG_RETURN(1); /* End of record */ + } + + chunk_header_len= + translog_get_chunk_header_length(data->scanner.page, + data->scanner.page_offset); + chunk_len= translog_get_total_chunk_length(data->scanner.page, + data->scanner.page_offset); + data->chunk_size= chunk_len - chunk_header_len; + data->body_offset= data->scanner.page_offset + chunk_header_len; + data->current_offset= new_current_offset; + DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u " + "current_offset: %lu", + (uint) data->current_group, + (uint) data->current_chunk, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Initialize record reader data from LSN + + SYNOPSIS + translog_init_reader_data() + lsn reference to LSN we should start from + data reader data to initialize + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_init_reader_data(LSN lsn, + struct st_translog_reader_data *data) +{ + DBUG_ENTER("translog_init_reader_data"); + if (translog_init_scanner(lsn, 1, &data->scanner) || + !(data->read_header= + translog_read_record_header_scan(&data->scanner, &data->header, 1))) + { + DBUG_RETURN(1); + } + data->body_offset= data->header.non_header_data_start_offset; + data->chunk_size= data->header.non_header_data_len; + data->current_offset= data->read_header; + data->current_group= 0; + data->current_chunk= 0; + data->eor= 0; + DBUG_PRINT("info", ("read_header: %u " + "body_offset: %u chunk_size: %u current_offset: %lu", + (uint) data->read_header, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Read a part of the record. + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + offset From the beginning of the record beginning (read§ + by translog_read_record_header). + length Length of record part which have to be read. + buffer Buffer where to read the record part (have to be at + least 'length' bytes length) + + RETURN + length of data actually read +*/ + +translog_size_t translog_read_record(LSN lsn, + translog_size_t offset, + translog_size_t length, + byte *buffer, + struct st_translog_reader_data *data) +{ + translog_size_t requested_length= length; + translog_size_t end= offset + length; + struct st_translog_reader_data internal_data; + DBUG_ENTER("translog_read_record"); + + if (data == NULL) + { + DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); + data= &internal_data; + } + if (lsn || + (offset < data->current_offset && + !(offset < data->read_header && offset + length < data->read_header))) + { + if (translog_init_reader_data(lsn, data)) + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Offset: %lu length: %lu " + "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", + (ulong) offset, (ulong) length, + (ulong) LSN_FILE_NO(data->scanner.page_addr), + (ulong) LSN_OFFSET(data->scanner.page_addr), + (ulong) LSN_FILE_NO(data->scanner.horizon), + (ulong) LSN_OFFSET(data->scanner.horizon), + (ulong) LSN_FILE_NO(data->scanner.last_file_page), + (ulong) LSN_OFFSET(data->scanner.last_file_page), + (uint) data->scanner.page_offset, + (uint) data->scanner.page_offset, + data->scanner.fixed_horizon)); + if (offset < data->read_header) + { + uint16 len= min(data->read_header, end) - offset; + DBUG_PRINT("info", + ("enter header offset: %lu length: %lu", + (ulong) offset, (ulong) length)); + memcpy(buffer, data->header.header + offset, len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u offset: %lu curr: %lu length: %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + /* TODO: find first page which we should read by offset */ + + /* read the record chunk by chunk */ + for(;;) + { + uint page_end= data->current_offset + data->chunk_size; + DBUG_PRINT("info", + ("enter body offset: %lu curr: %lu " + "length: %lu page_end: %lu", + (ulong) offset, (ulong) data->current_offset, (ulong) length, + (ulong) page_end)); + if (offset < page_end) + { + uint len= page_end - offset; + DBUG_ASSERT(offset >= data->current_offset); + memcpy(buffer, + data->scanner.page + data->body_offset + + (offset - data->current_offset), len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u offset: %lu curr: %lu length: %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + if (translog_record_read_next_chunk(data)) + DBUG_RETURN(requested_length - length); + } +} + + +/* + Force skipping to the next buffer + + SYNOPSIS + translog_force_current_buffer_to_finish() +*/ + +static void translog_force_current_buffer_to_finish() +{ + TRANSLOG_ADDRESS new_buff_begunning; + uint16 old_buffer_no= log_descriptor.bc.buffer_no; + uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= (log_descriptor.buffers + + new_buffer_no); + struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer; + byte *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_fill; + uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + uint16 current_page_fill, write_counter, previous_offset; + DBUG_ENTER("translog_force_current_buffer_to_finish"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "New Buff: (%lu,0x%lx) " + "size: %lu (%lu) Pg: %u left: %u", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) LSN_FILE_NO(log_descriptor.bc.buffer->offset), + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) (LSN_OFFSET(log_descriptor.horizon) - + log_descriptor.bc.current_page_fill), + (ulong) LSN_FILE_NO(new_buff_begunning), + (ulong) LSN_OFFSET(new_buff_begunning), + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer), + (uint) log_descriptor.bc.current_page_fill, + (uint) left)); + + new_buff_begunning= log_descriptor.bc.buffer->offset; + new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */ + + DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); + DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == + LSN_FILE_NO(log_descriptor.bc.buffer->offset)); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) + { + /* + TODO: if 'left' is so small that can't hold any other record + then do not move the page + */ + DBUG_PRINT("info", ("left: %u", (uint) left)); + + /* decrease offset */ + new_buff_begunning-= log_descriptor.bc.current_page_fill; + current_page_fill= log_descriptor.bc.current_page_fill; + + bzero(log_descriptor.bc.ptr, left); + log_descriptor.bc.buffer->size+= left; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " + "Size: %lu", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + } + else + { + left= 0; + log_descriptor.bc.current_page_fill= 0; + } + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + write_counter= log_descriptor.bc.write_counter; + previous_offset= log_descriptor.bc.previous_offset; + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + log_descriptor.bc.buffer->offset= new_buff_begunning; + log_descriptor.bc.write_counter= write_counter; + log_descriptor.bc.previous_offset= previous_offset; + + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(data, &log_descriptor.bc); + if (left) + { + log_descriptor.bc.write_counter++; + log_descriptor.bc.previous_offset= current_page_fill; + } + else + { + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + } + } + + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(data + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); + int4store(data + 3 + 3 + 1, crc); + } + + if (left) + { + memcpy(new_buffer->buffer, data, current_page_fill); + log_descriptor.bc.ptr +=current_page_fill; + log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill= + current_page_fill; + new_buffer->overlay= old_buffer; + } + else + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + + DBUG_VOID_RETURN; +} + + +/* + Flush the log up to given LSN (included) + + SYNOPSIS + translog_flush() + lsn log record serial number up to which (inclusive) + the log have to be flushed + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_flush(LSN lsn) +{ + LSN old_flushed, sent_to_file; + int rc= 0; + uint i; + my_bool full_circle= 0; + DBUG_ENTER("translog_flush"); + DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn))); + + translog_lock(); + old_flushed= log_descriptor.flushed; + for (;;) + { + uint16 buffer_no= log_descriptor.bc.buffer_no; + uint16 buffer_start= buffer_no; + struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer; + struct st_translog_buffer *buffer= log_descriptor.bc.buffer; + /* we can't flush in future */ + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0); + if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0) + { + DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.flushed), + (ulong) LSN_OFFSET(log_descriptor.flushed))); + translog_unlock(); + DBUG_RETURN(0); + } + /* send to the file if it is not sent */ + translog_get_sent_to_file(&sent_to_file); + if (cmp_translog_addr(sent_to_file, lsn) >= 0) + break; + + do + { + buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO; + buffer= log_descriptor.buffers + buffer_no; + translog_buffer_lock(buffer); + translog_buffer_unlock(buffer_unlock); + buffer_unlock= buffer; + if (buffer->file != -1) + { + buffer_unlock= NULL; + if (buffer_start == buffer_no) + { + /* we made a circle */ + full_circle= 1; + translog_force_current_buffer_to_finish(); + } + break; + } + } while ((buffer_start != buffer_no) && + cmp_translog_addr(log_descriptor.flushed, lsn) < 0); + if (buffer_unlock != NULL) + translog_buffer_unlock(buffer_unlock); + rc= translog_buffer_flush(buffer); + translog_buffer_unlock(buffer); + if (rc) + DBUG_RETURN(1); + if (!full_circle) + translog_lock(); + } + + for (i= LSN_FILE_NO(old_flushed); i <= LSN_FILE_NO(lsn); i++) + { + uint cache_index; + File file; + + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - i) < + OPENED_FILES_NUM) + { + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == -1) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(i)) == -1) + { + translog_unlock(); + DBUG_RETURN(1); + } + } + file= log_descriptor.log_file_num[cache_index]; + rc|= my_sync(file, MYF(MY_WME)); + } + /* We sync file when we are closing it => do nothing if file closed */ + } + log_descriptor.flushed= sent_to_file; + rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME)); + translog_unlock(); + DBUG_RETURN(rc); +} diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h new file mode 100644 index 00000000000..3646afcf52a --- /dev/null +++ b/storage/maria/ma_loghandler.h @@ -0,0 +1,182 @@ + +/* Transaction log flags */ +#define TRANSLOG_PAGE_CRC 1 +#define TRANSLOG_SECTOR_PROTECTION (1<<1) +#define TRANSLOG_RECORD_CRC (1<<2) +#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \ + TRANSLOG_RECORD_CRC) + 1) + +/* + Page size in transaction log + It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE + (DISK_DRIVE_SECTOR_SIZE * 2^N) +*/ +#define TRANSLOG_PAGE_SIZE (8*1024) + +#include "ma_loghandler_lsn.h" + +/* short transaction ID type */ +typedef uint16 SHORT_TRANSACTION_ID; + +/* Length of CRC at end of pages */ +#define CRC_LENGTH 4 +/* + Length of disk drive sector size (we assume that writing it + to disk is atomic operation) +*/ +#define DISK_DRIVE_SECTOR_SIZE 512 + +/* types of records in the transaction log */ +enum translog_record_type +{ + LOGREC_RESERVED_FOR_CHUNKS23= 0, + LOGREC_REDO_INSERT_ROW_HEAD= 1, + LOGREC_REDO_INSERT_ROW_TAIL= 2, + LOGREC_REDO_INSERT_ROW_BLOB= 3, + LOGREC_REDO_INSERT_ROW_BLOBS= 4, + LOGREC_REDO_PURGE_ROW= 5, + eLOGREC_REDO_PURGE_BLOCKS= 6, + LOGREC_REDO_DELETE_ROW= 7, + LOGREC_REDO_UPDATE_ROW_HEAD= 8, + LOGREC_REDO_INDEX= 9, + LOGREC_REDO_UNDELETE_ROW= 10, + LOGREC_CLR_END= 11, + LOGREC_PURGE_END= 12, + LOGREC_UNDO_ROW_INSERT= 13, + LOGREC_UNDO_ROW_DELETE= 14, + LOGREC_UNDO_ROW_UPDATE= 15, + LOGREC_UNDO_KEY_INSERT= 16, + LOGREC_UNDO_KEY_DELETE= 17, + LOGREC_PREPARE= 18, + LOGREC_PREPARE_WITH_UNDO_PURGE= 19, + LOGREC_COMMIT= 20, + LOGREC_COMMIT_WITH_UNDO_PURGE= 21, + LOGREC_CHECKPOINT_PAGE= 22, + LOGREC_CHECKPOINT_TRAN= 23, + LOGREC_CHECKPOINT_TABL= 24, + LOGREC_REDO_CREATE_TABLE= 25, + LOGREC_REDO_RENAME_TABLE= 26, + LOGREC_REDO_DROP_TABLE= 27, + LOGREC_REDO_TRUNCATE_TABLE= 28, + LOGREC_FILE_ID= 29, + LOGREC_LONG_TRANSACTION_ID= 30, + LOGREC_RESERVED_FUTURE_EXTENSION= 63 +}; +#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */ + +/* Size of log file; One log file is restricted to 4G */ +typedef uint32 translog_size_t; + +#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024 + +typedef struct st_translog_group_descriptor +{ + TRANSLOG_ADDRESS addr; + uint8 num; +} TRANSLOG_GROUP; + + +typedef struct st_translog_header_buffer +{ + /* LSN of the read record */ + LSN lsn; + /* array of groups descriptors, can be used only if groups_no > 0 */ + TRANSLOG_GROUP *groups; + /* short transaction ID or 0 if it has no sense for the record */ + SHORT_TRANSACTION_ID short_trid; + /* + The Record length in buffer (including read header, but excluding + hidden part of record (type, short TrID, length) + */ + translog_size_t record_length; + /* + Buffer for write decoded header of the record (depend on the record + type) + */ + byte header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; + /* number of groups listed in */ + uint groups_no; + /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ + uint chunk0_pages; + /* type of the read record */ + enum translog_record_type type; + /* chunk 0 data address (valid only if groups_no > 0) */ + TRANSLOG_ADDRESS chunk0_data_addr; + /* + Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>) + */ + uint16 compressed_LSN_economy; + /* short transaction ID or 0 if it has no sense for the record */ + uint16 non_header_data_start_offset; + /* non read body data length in this first chunk */ + uint16 non_header_data_len; + /* chunk 0 data size (valid only if groups_no > 0) */ + uint16 chunk0_data_len; +} TRANSLOG_HEADER_BUFFER; + + +typedef struct st_translog_scanner_data +{ + byte buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ + TRANSLOG_ADDRESS page_addr; /* current page address */ + /* end of the log which we saw last time */ + TRANSLOG_ADDRESS horizon; + TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ + byte *page; /* page content pointer */ + /* offset of the chunk in the page */ + translog_size_t page_offset; + /* set horizon only once at init */ + my_bool fixed_horizon; +} TRANSLOG_SCANNER_DATA; + + +struct st_translog_reader_data +{ + TRANSLOG_HEADER_BUFFER header; /* Header */ + TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */ + translog_size_t body_offset; /* current chunk body offset */ + /* data offset from the record beginning */ + translog_size_t current_offset; + /* number of bytes read in header */ + uint16 read_header; + uint16 chunk_size; /* current chunk size */ + uint current_group; /* current group */ + uint current_chunk; /* current chunk in the group */ + my_bool eor; /* end of the record */ +}; + + +my_bool translog_init(const char *directory, uint32 log_file_max_size, + uint32 server_version, uint32 server_id, + PAGECACHE *pagecache, uint flags); + +my_bool translog_write_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + void *tcb, + translog_size_t part1_length, + byte *part1_buff, ...); + +void translog_destroy(); + +translog_size_t translog_read_record_header(LSN lsn, + TRANSLOG_HEADER_BUFFER *buff); + +void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); + +translog_size_t translog_read_record(LSN lsn, + translog_size_t offset, + translog_size_t length, + byte *buffer, + struct st_translog_reader_data *data); + +my_bool translog_flush(LSN lsn); + +my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner); + +translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff); + diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h new file mode 100644 index 00000000000..1789d3ce61b --- /dev/null +++ b/storage/maria/ma_loghandler_lsn.h @@ -0,0 +1,56 @@ +#ifndef _ma_loghandler_lsn_h +#define _ma_loghandler_lsn_h + +/* + Transaction log record address: + file_no << 32 | offset + file_no is only 3 bytes so we can use signed integer to make + comparison more simple. +*/ +typedef int64 TRANSLOG_ADDRESS; + +/* + Compare addresses + A1 > A2 -> result > 0 + A1 == A2 -> 0 + A1 < A2 -> result < 0 +*/ +#define cmp_translog_addr(A1,A2) ((A1) - (A2)) + +/* LSN type (address of certain log record chank */ +typedef TRANSLOG_ADDRESS LSN; + +/* Gets file number part of a LSN/log address */ +#define LSN_FILE_NO(L) ((L) >> 32) + +/* Gets raw file number part of a LSN/log address */ +#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) + +/* Gets record offset of a LSN/log address */ +#define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) + +/* Makes lsn/log address from file number and record offset */ +#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) + +/* checks LSN */ +#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) + +/* size of stored LSN on a disk */ +#define LSN_STORE_SIZE 7 + +/* Puts LSN into buffer (dst) */ +#define lsn_store(dst, lsn) \ + do { \ + int3store((dst), LSN_FILE_NO(lsn)); \ + int4store((dst) + 3, LSN_OFFSET(lsn)); \ + } while (0) + +/* Unpacks LSN from the buffer (P) */ +#define lsn_korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3)) + +/* what we need to add to LSN to increase it on one file */ +#define LSN_ONE_FILE ((int64)0x100000000LL) + +#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S)) + +#endif diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c new file mode 100644 index 00000000000..19113196f15 --- /dev/null +++ b/storage/maria/ma_open.c @@ -0,0 +1,1324 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* open a isam-database */ + +#include "ma_fulltext.h" +#include "ma_sp_defs.h" +#include "ma_rt_index.h" +#include "ma_blockrec.h" +#include <m_ctype.h> + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include <fcntl.h> +#else +#include <process.h> /* Prototype for getpid */ +#endif +#endif +#ifdef VMS +#include "static.c" +#endif + +static void setup_key_functions(MARIA_KEYDEF *keyinfo); +static my_bool maria_scan_init_dummy(MARIA_HA *info); +static void maria_scan_end_dummy(MARIA_HA *info); +static my_bool maria_once_init_dummy(MARIA_SHARE *, File); +static my_bool maria_once_end_dummy(MARIA_SHARE *); +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base); + +#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \ + pos+=size;} + + +#define disk_pos_assert(pos, end_pos) \ +if (pos > end_pos) \ +{ \ + my_errno=HA_ERR_CRASHED; \ + goto err; \ +} + + +/****************************************************************************** +** Return the shared struct if the table is already open. +** In MySQL the server will handle version issues. +******************************************************************************/ + +MARIA_HA *_ma_test_if_reopen(char *filename) +{ + LIST *pos; + + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename) && share->last_version) + return info; + } + return 0; +} + + +/****************************************************************************** + open a MARIA database. + See my_base.h for the handle_locking argument + if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table + is marked crashed or if we are not using locking and the table doesn't + have an open count of 0. +******************************************************************************/ + +MARIA_HA *maria_open(const char *name, int mode, uint open_flags) +{ + int kfile,open_mode,save_errno,have_rtree=0; + uint i,j,len,errpos,head_length,base_pos,info_length,keys, + key_parts,unique_key_parts,fulltext_keys,uniques; + char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], + data_name[FN_REFLEN]; + char *disk_cache, *disk_pos, *end_pos; + MARIA_HA info,*m_info,*old_info; + MARIA_SHARE share_buff,*share; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY]; + ulonglong max_key_file_length, max_data_file_length; + DBUG_ENTER("maria_open"); + + LINT_INIT(m_info); + kfile= -1; + errpos= 0; + head_length=sizeof(share_buff.state.header); + bzero((byte*) &info,sizeof(info)); + + my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME),MYF(0)); + pthread_mutex_lock(&THR_LOCK_maria); + if (!(old_info=_ma_test_if_reopen(name_buff))) + { + share= &share_buff; + bzero((gptr) &share_buff,sizeof(share_buff)); + share_buff.state.rec_per_key_part=rec_per_key_part; + share_buff.state.key_root=key_root; + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), + maria_key_cache); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open", + if (strstr(name, "/t1")) + { + my_errno= HA_ERR_CRASHED; + goto err; + }); + if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0) + { + if ((errno != EROFS && errno != EACCES) || + mode != O_RDONLY || + (kfile=my_open(name_buff,(open_mode=O_RDONLY) | O_SHARE,MYF(0))) < 0) + goto err; + } + share->mode=open_mode; + errpos= 1; + if (my_read(kfile,(char*) share->state.header.file_version,head_length, + MYF(MY_NABP))) + { + my_errno= HA_ERR_NOT_A_TABLE; + goto err; + } + if (memcmp((byte*) share->state.header.file_version, + (byte*) maria_file_magic, 4)) + { + DBUG_PRINT("error",("Wrong header in %s",name_buff)); + DBUG_DUMP("error_dump",(char*) share->state.header.file_version, + head_length); + my_errno=HA_ERR_NOT_A_TABLE; + goto err; + } + share->options= mi_uint2korr(share->state.header.options); + if (share->options & + ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS | + HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA | + HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE | + HA_OPTION_RELIES_ON_SQL_LAYER)) + { + DBUG_PRINT("error",("wrong options: 0x%lx", share->options)); + my_errno=HA_ERR_OLD_FILE; + goto err; + } + if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) && + ! (open_flags & HA_OPEN_FROM_SQL_LAYER)) + { + DBUG_PRINT("error", ("table cannot be openned from non-sql layer")); + my_errno= HA_ERR_UNSUPPORTED; + goto err; + } + /* Don't call realpath() if the name can't be a link */ + if (!strcmp(name_buff, org_name) || + my_readlink(index_name, org_name, MYF(0)) == -1) + (void) strmov(index_name, org_name); + *strrchr(org_name, '.')= '\0'; + (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT, + MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS); + + info_length=mi_uint2korr(share->state.header.header_length); + base_pos= mi_uint2korr(share->state.header.base_pos); + if (!(disk_cache=(char*) my_alloca(info_length+128))) + { + my_errno=ENOMEM; + goto err; + } + end_pos=disk_cache+info_length; + errpos= 2; + + VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0))); + errpos= 3; + if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP))) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + len=mi_uint2korr(share->state.header.state_info_length); + keys= (uint) share->state.header.keys; + uniques= (uint) share->state.header.uniques; + fulltext_keys= (uint) share->state.header.fulltext_keys; + key_parts= mi_uint2korr(share->state.header.key_parts); + unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts); + if (len != MARIA_STATE_INFO_SIZE) + { + DBUG_PRINT("warning", + ("saved_state_info_length: %d state_info_length: %d", + len,MARIA_STATE_INFO_SIZE)); + } + share->state_diff_length=len-MARIA_STATE_INFO_SIZE; + + _ma_state_info_read(disk_cache, &share->state); + len= mi_uint2korr(share->state.header.base_info_length); + if (len != MARIA_BASE_INFO_SIZE) + { + DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d", + len,MARIA_BASE_INFO_SIZE)); + } + disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base); + share->state.state_length=base_pos; + + if (!(open_flags & HA_OPEN_FOR_REPAIR) && + ((share->state.changed & STATE_CRASHED) || + ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && + (my_disable_locking && share->state.open_count)))) + { + DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u " + "changed: %u open_count: %u !locking: %d", + open_flags, share->state.changed, + share->state.open_count, my_disable_locking)); + my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? + HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); + goto err; + } + + /* sanity check */ + if (share->base.keystart > 65535 || share->base.rec_reflength > 8) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + + key_parts+=fulltext_keys*FT_SEGS; + if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MARIA_MAX_KEY || + key_parts >= MARIA_MAX_KEY * HA_MAX_KEY_SEG) + { + DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); + my_errno=HA_ERR_UNSUPPORTED; + goto err; + } + + /* Correct max_file_length based on length of sizeof(off_t) */ + max_data_file_length= + (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? + (((ulonglong) 1 << (share->base.rec_reflength*8))-1) : + (_ma_safe_mul(share->base.pack_reclength, + (ulonglong) 1 << (share->base.rec_reflength*8))-1); + + max_key_file_length= + _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH, + ((ulonglong) 1 << (share->base.key_reflength*8))-1); +#if SIZEOF_OFF_T == 4 + set_if_smaller(max_data_file_length, INT_MAX32); + set_if_smaller(max_key_file_length, INT_MAX32); +#endif + share->base.max_data_file_length=(my_off_t) max_data_file_length; + share->base.max_key_file_length=(my_off_t) max_key_file_length; + + if (share->options & HA_OPTION_COMPRESS_RECORD) + share->base.max_key_length+=2; /* For safety */ + + if (!my_multi_malloc(MY_WME, + &share,sizeof(*share), + &share->state.rec_per_key_part,sizeof(long)*key_parts, + &share->keyinfo,keys*sizeof(MARIA_KEYDEF), + &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF), + &share->keyparts, + (key_parts+unique_key_parts+keys+uniques) * + sizeof(HA_KEYSEG), + &share->rec, + (share->base.fields+1)*sizeof(MARIA_COLUMNDEF), + &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &share->unique_file_name,strlen(name_buff)+1, + &share->index_file_name,strlen(index_name)+1, + &share->data_file_name,strlen(data_name)+1, + &share->state.key_root,keys*sizeof(my_off_t), +#ifdef THREAD + &share->key_root_lock,sizeof(rw_lock_t)*keys, +#endif + &share->mmap_lock,sizeof(rw_lock_t), + NullS)) + goto err; + errpos= 4; + + *share=share_buff; + memcpy((char*) share->state.rec_per_key_part, + (char*) rec_per_key_part, sizeof(long)*key_parts); + memcpy((char*) share->state.key_root, + (char*) key_root, sizeof(my_off_t)*keys); + strmov(share->unique_file_name, name_buff); + share->unique_name_length= strlen(name_buff); + strmov(share->index_file_name, index_name); + strmov(share->data_file_name, data_name); + + share->block_size= maria_block_size; + { + HA_KEYSEG *pos=share->keyparts; + for (i=0 ; i < keys ; i++) + { + share->keyinfo[i].share= share; + disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]); + disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, + end_pos); + if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) + have_rtree=1; + set_if_smaller(share->block_size,share->keyinfo[i].block_length); + share->keyinfo[i].seg=pos; + for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + else if (pos->type == HA_KEYTYPE_BINARY) + pos->charset= &my_charset_bin; + } + if (share->keyinfo[i].flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + uint sp_segs=SPDIMS*2; + share->keyinfo[i].seg=pos-sp_segs; + share->keyinfo[i].keysegs--; +#else + my_errno=HA_ERR_UNSUPPORTED; + goto err; +#endif + } + else if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (!fulltext_keys) + { /* 4.0 compatibility code, to be removed in 5.0 */ + share->keyinfo[i].seg=pos-FT_SEGS; + share->keyinfo[i].keysegs-=FT_SEGS; + } + else + { + uint j; + share->keyinfo[i].seg=pos; + for (j=0; j < FT_SEGS; j++) + { + *pos= ft_keysegs[j]; + pos[0].language= pos[-1].language; + if (!(pos[0].charset= pos[-1].charset)) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + pos++; + } + } + if (!share->ft2_keyinfo.seg) + { + memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MARIA_KEYDEF)); + share->ft2_keyinfo.keysegs=1; + share->ft2_keyinfo.flag=0; + share->ft2_keyinfo.keylength= + share->ft2_keyinfo.minlength= + share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength; + share->ft2_keyinfo.seg=pos-1; + share->ft2_keyinfo.end=pos; + setup_key_functions(& share->ft2_keyinfo); + } + } + setup_key_functions(share->keyinfo+i); + share->keyinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->length=share->base.rec_reflength; + pos->null_bit=0; + pos->flag=0; /* For purify */ + pos++; + } + for (i=0 ; i < uniques ; i++) + { + disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]); + disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs * + HA_KEYSEG_SIZE, end_pos); + share->uniqueinfo[i].seg=pos; + for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + } + share->uniqueinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->null_bit=0; + pos->flag=0; + pos++; + } + share->ftparsers= 0; + } + share->data_file_type= share->state.header.data_file_type; + share->base_length= (BASE_ROW_HEADER_SIZE + + share->base.is_nulls_extended + + share->base.null_bytes + + share->base.pack_bytes + + test(share->options & HA_OPTION_CHECKSUM)); + if (share->base.transactional) + share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; + share->base.default_rec_buff_size= max(share->base.pack_reclength, + share->base.max_key_length); + if (share->data_file_type == DYNAMIC_RECORD) + { + share->base.extra_rec_buff_size= + (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH + + MARIA_REC_BUFF_OFFSET); + share->base.default_rec_buff_size+= share->base.extra_rec_buff_size; + } + disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, + end_pos); + for (i= j= 0 ; i < share->base.fields ; i++) + { + disk_pos=_ma_recinfo_read(disk_pos,&share->rec[i]); + share->rec[i].pack_type=0; + share->rec[i].huff_tree=0; + if (share->rec[i].type == (int) FIELD_BLOB) + { + share->blobs[j].pack_length= + share->rec[i].length-maria_portable_sizeof_char_ptr;; + share->blobs[j].offset= share->rec[i].offset; + j++; + } + } + share->rec[i].type=(int) FIELD_LAST; /* End marker */ + + if (_ma_open_datafile(&info, share, -1)) + goto err; + errpos= 5; + + share->kfile=kfile; + share->this_process=(ulong) getpid(); + share->last_process= share->state.process; + share->base.key_parts=key_parts; + share->base.all_key_parts=key_parts+unique_key_parts; + if (!(share->last_version=share->state.version)) + share->last_version=1; /* Safety */ + share->rec_reflength=share->base.rec_reflength; /* May be changed */ + share->base.margin_key_file_length=(share->base.max_key_file_length - + (keys ? MARIA_INDEX_BLOCK_MARGIN * + share->block_size * keys : 0)); + share->block_size= share->base.block_size; + my_afree((gptr) disk_cache); + _ma_setup_functions(share); + if ((*share->once_init)(share, info.dfile)) + goto err; + share->is_log_table= FALSE; +#ifdef THREAD + thr_lock_init(&share->lock); + VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST)); + for (i=0; i<keys; i++) + VOID(my_rwlock_init(&share->key_root_lock[i], NULL)); + VOID(my_rwlock_init(&share->mmap_lock, NULL)); + if (!thr_lock_inited) + { + /* Probably a single threaded program; Don't use concurrent inserts */ + maria_concurrent_insert=0; + } + else if (maria_concurrent_insert) + { + share->concurrent_insert= + ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE | + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD)) || + (open_flags & HA_OPEN_TMP_TABLE) || + share->data_file_type == BLOCK_RECORD || + have_rtree) ? 0 : 1; + if (share->concurrent_insert) + { + share->lock.get_status=_ma_get_status; + share->lock.copy_status=_ma_copy_status; + share->lock.update_status=_ma_update_status; + share->lock.check_status=_ma_check_status; + } + } +#endif + } + else + { + share= old_info->s; + if (mode == O_RDWR && share->mode == O_RDONLY) + { + my_errno=EACCES; /* Can't open in write mode */ + goto err; + } + if (share->data_file_type == BLOCK_RECORD) + info.dfile= share->bitmap.file; + else if (_ma_open_datafile(&info, share, old_info->dfile)) + goto err; + errpos= 5; + have_rtree= old_info->maria_rtree_recursion_state != NULL; + } + + /* alloc and set up private structure parts */ + if (!my_multi_malloc(MY_WME, + &m_info,sizeof(MARIA_HA), + &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &info.buff,(share->base.max_key_block_length*2+ + share->base.max_key_length), + &info.lastkey,share->base.max_key_length*3+1, + &info.first_mbr_key, share->base.max_key_length, + &info.filename,strlen(name)+1, + &info.maria_rtree_recursion_state,have_rtree ? 1024 : 0, + NullS)) + goto err; + errpos= 6; + + if (!have_rtree) + info.maria_rtree_recursion_state= NULL; + + strmov(info.filename,name); + memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs); + info.lastkey2=info.lastkey+share->base.max_key_length; + + info.s=share; + info.cur_row.lastpos= HA_OFFSET_ERROR; + info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND); + info.opt_flag=READ_CHECK_USED; + info.this_unique= (ulong) info.dfile; /* Uniq number in process */ + if (share->data_file_type == COMPRESSED_RECORD) + info.this_unique= share->state.unique; + info.this_loop=0; /* Update counter */ + info.last_unique= share->state.unique; + info.last_loop= share->state.update_count; + if (mode == O_RDONLY) + share->options|=HA_OPTION_READ_ONLY_DATA; + info.lock_type=F_UNLCK; + info.quick_mode=0; + info.bulk_insert=0; + info.ft1_to_ft2=0; + info.errkey= -1; + info.page_changed=1; + info.keyread_buff= info.buff + share->base.max_key_block_length; + if ((*share->init)(&info)) + goto err; + + pthread_mutex_lock(&share->intern_lock); + info.read_record= share->read_record; + share->reopen++; + share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + info.lock_type=F_RDLCK; + share->r_locks++; + share->tot_locks++; + } + if ((open_flags & HA_OPEN_TMP_TABLE) || + (share->options & HA_OPTION_TMP_TABLE)) + { + share->temporary= share->delay_key_write= 1; + + share->write_flag=MYF(MY_NABP); + share->w_locks++; /* We don't have to update status */ + share->tot_locks++; + info.lock_type=F_WRLCK; + } + if (((open_flags & HA_OPEN_DELAY_KEY_WRITE) || + (share->options & HA_OPTION_DELAY_KEY_WRITE)) && + maria_delay_key_write) + share->delay_key_write=1; + + info.state= &share->state.state; /* Change global values by default */ + pthread_mutex_unlock(&share->intern_lock); + + /* Allocate buffer for one record */ + /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */ + if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size, + share->base.default_rec_buff_size)) + goto err; + + bzero(info.rec_buff, share->base.default_rec_buff_size); + + *m_info=info; +#ifdef THREAD + thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info); +#endif + m_info->open_list.data=(void*) m_info; + maria_open_list=list_add(maria_open_list,&m_info->open_list); + + pthread_mutex_unlock(&THR_LOCK_maria); + DBUG_RETURN(m_info); + +err: + save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; + if ((save_errno == HA_ERR_CRASHED) || + (save_errno == HA_ERR_CRASHED_ON_USAGE) || + (save_errno == HA_ERR_CRASHED_ON_REPAIR)) + _ma_report_error(save_errno, name); + switch (errpos) { + case 6: + (*share->end)(&info); + my_free((gptr) m_info,MYF(0)); + /* fall through */ + case 5: + if (share->data_file_type != BLOCK_RECORD) + VOID(my_close(info.dfile,MYF(0))); + if (old_info) + break; /* Don't remove open table */ + (*share->once_end)(share); + /* fall through */ + case 4: + my_free((gptr) share,MYF(0)); + /* fall through */ + case 3: + /* fall through */ + case 2: + my_afree((gptr) disk_cache); + /* fall through */ + case 1: + VOID(my_close(kfile,MYF(0))); + /* fall through */ + case 0: + default: + break; + } + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno=save_errno; + DBUG_RETURN (NULL); +} /* maria_open */ + + +/* + Reallocate a buffer, if the current buffer is not large enough +*/ + +my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size) +{ + if (*old_size < new_size) + { + byte *addr; + if (!(addr= (byte*) my_realloc((gptr) *old_addr, new_size, + MYF(MY_ALLOW_ZERO_PTR)))) + return 1; + *old_addr= addr; + *old_size= new_size; + } + return 0; +} + + +ulonglong _ma_safe_mul(ulonglong a, ulonglong b) +{ + ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */ + + if (!a || max_val / a < b) + return max_val; + return a*b; +} + + /* Set up functions in structs */ + +void _ma_setup_functions(register MARIA_SHARE *share) +{ + share->once_init= maria_once_init_dummy; + share->once_end= maria_once_end_dummy; + share->init= maria_scan_init_dummy; + share->end= maria_scan_end_dummy; + share->scan_init= maria_scan_init_dummy; + share->scan_end= maria_scan_end_dummy; + share->write_record_init= _ma_write_init_default; + share->write_record_abort= _ma_write_abort_default; + + switch (share->data_file_type) { + case COMPRESSED_RECORD: + share->read_record= _ma_read_pack_record; + share->scan= _ma_read_rnd_pack_record; + share->once_init= _ma_once_init_pack_row; + share->once_end= _ma_once_end_pack_row; + /* Calculate checksum according how the original row was stored */ + if (share->state.header.org_data_file_type == STATIC_RECORD) + share->calc_checksum= _ma_static_checksum; + else + share->calc_checksum= _ma_checksum; + share->calc_write_checksum= share->calc_checksum; + break; + case DYNAMIC_RECORD: + share->read_record= _ma_read_dynamic_record; + share->scan= _ma_read_rnd_dynamic_record; + share->delete_record= _ma_delete_dynamic_record; + share->compare_record= _ma_cmp_dynamic_record; + share->compare_unique= _ma_cmp_dynamic_unique; + share->calc_checksum= share->calc_write_checksum= _ma_checksum; + /* add bits used to pack data to pack_reclength for faster allocation */ + share->base.pack_reclength+= share->base.pack_bytes; + if (share->base.blobs) + { + share->update_record= _ma_update_blob_record; + share->write_record= _ma_write_blob_record; + } + else + { + share->write_record= _ma_write_dynamic_record; + share->update_record= _ma_update_dynamic_record; + } + break; + case STATIC_RECORD: + share->read_record= _ma_read_static_record; + share->scan= _ma_read_rnd_static_record; + share->delete_record= _ma_delete_static_record; + share->compare_record= _ma_cmp_static_record; + share->update_record= _ma_update_static_record; + share->write_record= _ma_write_static_record; + share->compare_unique= _ma_cmp_static_unique; + share->calc_checksum= share->calc_write_checksum= _ma_static_checksum; + break; + case BLOCK_RECORD: + share->once_init= _ma_once_init_block_row; + share->once_end= _ma_once_end_block_row; + share->init= _ma_init_block_row; + share->end= _ma_end_block_row; + share->write_record_init= _ma_write_init_block_record; + share->write_record_abort= _ma_write_abort_block_record; + share->scan_init= _ma_scan_init_block_record; + share->scan_end= _ma_scan_end_block_record; + share->read_record= _ma_read_block_record; + share->scan= _ma_scan_block_record; + share->delete_record= _ma_delete_block_record; + share->compare_record= _ma_compare_block_record; + share->update_record= _ma_update_block_record; + share->write_record= _ma_write_block_record; + share->compare_unique= _ma_cmp_block_unique; + share->calc_checksum= _ma_checksum; + share->calc_write_checksum= 0; + break; + } + share->file_read= _ma_nommap_pread; + share->file_write= _ma_nommap_pwrite; + if (!(share->options & HA_OPTION_CHECKSUM) && + share->data_file_type != COMPRESSED_RECORD) + share->calc_checksum= share->calc_write_checksum= 0; + return; +} + + +static void setup_key_functions(register MARIA_KEYDEF *keyinfo) +{ + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) + { +#ifdef HAVE_RTREE_KEYS + keyinfo->ck_insert = maria_rtree_insert; + keyinfo->ck_delete = maria_rtree_delete; +#else + DBUG_ASSERT(0); /* maria_open should check it never happens */ +#endif + } + else + { + keyinfo->ck_insert = _ma_ck_write; + keyinfo->ck_delete = _ma_ck_delete; + } + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { /* Simple prefix compression */ + keyinfo->bin_search= _ma_seq_search; + keyinfo->get_key= _ma_get_binary_pack_key; + keyinfo->pack_key= _ma_calc_bin_pack_key_length; + keyinfo->store_key= _ma_store_bin_pack_key; + } + else if (keyinfo->flag & HA_VAR_LENGTH_KEY) + { + keyinfo->get_key= _ma_get_pack_key; + if (keyinfo->seg[0].flag & HA_PACK_KEY) + { /* Prefix compression */ + if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) || + (keyinfo->seg->flag & HA_NULL_PART)) + keyinfo->bin_search= _ma_seq_search; + else + keyinfo->bin_search= _ma_prefix_search; + keyinfo->pack_key= _ma_calc_var_pack_key_length; + keyinfo->store_key= _ma_store_var_pack_key; + } + else + { + keyinfo->bin_search= _ma_seq_search; + keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */ + keyinfo->store_key= _ma_store_static_key; + } + } + else + { + keyinfo->bin_search= _ma_bin_search; + keyinfo->get_key= _ma_get_static_key; + keyinfo->pack_key= _ma_calc_static_key_length; + keyinfo->store_key= _ma_store_static_key; + } + return; +} + + +/* + Function to save and store the header in the index file (.MYI) +*/ + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) +{ + uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + uchar *ptr=buff; + uint i, keys= (uint) state->header.keys; + DBUG_ENTER("_ma_state_info_write"); + + memcpy_fixed(ptr,&state->header,sizeof(state->header)); + ptr+=sizeof(state->header); + + /* open_count must be first because of _ma_mark_file_changed ! */ + mi_int2store(ptr,state->open_count); ptr+= 2; + *ptr++= (uchar)state->changed; + *ptr++= state->sortkey; + mi_rowstore(ptr,state->state.records); ptr+= 8; + mi_rowstore(ptr,state->state.del); ptr+= 8; + mi_rowstore(ptr,state->split); ptr+= 8; + mi_sizestore(ptr,state->dellink); ptr+= 8; + mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8; + mi_sizestore(ptr,state->state.key_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.data_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.empty); ptr+= 8; + mi_sizestore(ptr,state->state.key_empty); ptr+= 8; + mi_int8store(ptr,state->auto_increment); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8; + mi_int4store(ptr,state->process); ptr+= 4; + mi_int4store(ptr,state->unique); ptr+= 4; + mi_int4store(ptr,state->status); ptr+= 4; + mi_int4store(ptr,state->update_count); ptr+= 4; + + ptr+= state->state_diff_length; + + for (i=0; i < keys; i++) + { + mi_sizestore(ptr,state->key_root[i]); ptr+= 8; + } + mi_sizestore(ptr,state->key_del); ptr+= 8; + if (pWrite & 2) /* From maria_chk */ + { + uint key_parts= mi_uint2korr(state->header.key_parts); + mi_int4store(ptr,state->sec_index_changed); ptr+= 4; + mi_int4store(ptr,state->sec_index_used); ptr+= 4; + mi_int4store(ptr,state->version); ptr+= 4; + mi_int8store(ptr,state->key_map); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8; + mi_sizestore(ptr,state->rec_per_key_rows); ptr+= 8; + for (i=0 ; i < key_parts ; i++) + { + mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4; + } + } + + if (pWrite & 1) + DBUG_RETURN(my_pwrite(file,(char*) buff, (uint) (ptr-buff), 0L, + MYF(MY_NABP | MY_THREADSAFE))); + DBUG_RETURN(my_write(file, (char*) buff, (uint) (ptr-buff), + MYF(MY_NABP))); +} + + +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state) +{ + uint i,keys,key_parts; + memcpy_fixed(&state->header,ptr, sizeof(state->header)); + ptr+= sizeof(state->header); + keys= (uint) state->header.keys; + key_parts= mi_uint2korr(state->header.key_parts); + + state->open_count = mi_uint2korr(ptr); ptr+= 2; + state->changed= (my_bool) *ptr++; + state->sortkey= (uint) *ptr++; + state->state.records= mi_rowkorr(ptr); ptr+= 8; + state->state.del = mi_rowkorr(ptr); ptr+= 8; + state->split = mi_rowkorr(ptr); ptr+= 8; + state->dellink= mi_sizekorr(ptr); ptr+= 8; + state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8; + state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8; + state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8; + state->state.empty = mi_sizekorr(ptr); ptr+= 8; + state->state.key_empty= mi_sizekorr(ptr); ptr+= 8; + state->auto_increment=mi_uint8korr(ptr); ptr+= 8; + state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8; + state->process= mi_uint4korr(ptr); ptr+= 4; + state->unique = mi_uint4korr(ptr); ptr+= 4; + state->status = mi_uint4korr(ptr); ptr+= 4; + state->update_count=mi_uint4korr(ptr); ptr+= 4; + + ptr+= state->state_diff_length; + + for (i=0; i < keys; i++) + { + state->key_root[i]= mi_sizekorr(ptr); ptr+= 8; + } + state->key_del= mi_sizekorr(ptr); ptr+= 8; + state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4; + state->sec_index_used = mi_uint4korr(ptr); ptr+= 4; + state->version = mi_uint4korr(ptr); ptr+= 4; + state->key_map = mi_uint8korr(ptr); ptr+= 8; + state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8; + state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->rec_per_key_rows=mi_sizekorr(ptr); ptr+= 8; + for (i=0 ; i < key_parts ; i++) + { + state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4; + } + return ptr; +} + + +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead) +{ + char buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + + if (!maria_single_user) + { + if (pRead) + { + if (my_pread(file, buff, state->state_length,0L, MYF(MY_NABP))) + return (MY_FILE_ERROR); + } + else if (my_read(file, buff, state->state_length,MYF(MY_NABP))) + return (MY_FILE_ERROR); + _ma_state_info_read(buff, state); + } + return 0; +} + + +/**************************************************************************** +** store and read of MARIA_BASE_INFO +****************************************************************************/ + +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) +{ + uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff; + + mi_sizestore(ptr,base->keystart); ptr+= 8; + mi_sizestore(ptr,base->max_data_file_length); ptr+= 8; + mi_sizestore(ptr,base->max_key_file_length); ptr+= 8; + mi_rowstore(ptr,base->records); ptr+= 8; + mi_rowstore(ptr,base->reloc); ptr+= 8; + mi_int4store(ptr,base->mean_row_length); ptr+= 4; + mi_int4store(ptr,base->reclength); ptr+= 4; + mi_int4store(ptr,base->pack_reclength); ptr+= 4; + mi_int4store(ptr,base->min_pack_length); ptr+= 4; + mi_int4store(ptr,base->max_pack_length); ptr+= 4; + mi_int4store(ptr,base->min_block_length); ptr+= 4; + mi_int2store(ptr,base->fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2; + mi_int2store(ptr,base->max_field_lengths); ptr+= 2; + mi_int2store(ptr,base->pack_fields); ptr+= 2; + mi_int2store(ptr,0); ptr+= 2; + mi_int2store(ptr,base->null_bytes); ptr+= 2; + mi_int2store(ptr,base->original_null_bytes); ptr+= 2; + mi_int2store(ptr,base->field_offsets); ptr+= 2; + mi_int2store(ptr,base->min_row_length); ptr+= 2; + mi_int2store(ptr,base->block_size); ptr+= 2; + *ptr++= base->rec_reflength; + *ptr++= base->key_reflength; + *ptr++= base->keys; + *ptr++= base->auto_key; + *ptr++= base->transactional; + *ptr++= 0; /* Reserved */ + mi_int2store(ptr,base->pack_bytes); ptr+= 2; + mi_int2store(ptr,base->blobs); ptr+= 2; + mi_int2store(ptr,base->max_key_block_length); ptr+= 2; + mi_int2store(ptr,base->max_key_length); ptr+= 2; + mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2; + *ptr++= base->extra_alloc_procent; + bzero(ptr,16); ptr+= 16; /* extra */ + DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE); + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base) +{ + base->keystart= mi_sizekorr(ptr); ptr+= 8; + base->max_data_file_length= mi_sizekorr(ptr); ptr+= 8; + base->max_key_file_length= mi_sizekorr(ptr); ptr+= 8; + base->records= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->reloc= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->mean_row_length= mi_uint4korr(ptr); ptr+= 4; + base->reclength= mi_uint4korr(ptr); ptr+= 4; + base->pack_reclength= mi_uint4korr(ptr); ptr+= 4; + base->min_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->max_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->min_block_length= mi_uint4korr(ptr); ptr+= 4; + base->fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2; + base->max_field_lengths= mi_uint2korr(ptr); ptr+= 2; + base->pack_fields= mi_uint2korr(ptr); ptr+= 2; + ptr+= 2; + base->null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->original_null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->field_offsets= mi_uint2korr(ptr); ptr+= 2; + base->min_row_length= mi_uint2korr(ptr); ptr+= 2; + base->block_size= mi_uint2korr(ptr); ptr+= 2; + + base->rec_reflength= *ptr++; + base->key_reflength= *ptr++; + base->keys= *ptr++; + base->auto_key= *ptr++; + base->transactional= *ptr++; + ptr++; + base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; + base->blobs= mi_uint2korr(ptr); ptr+= 2; + base->max_key_block_length= mi_uint2korr(ptr); ptr+= 2; + base->max_key_length= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_bytes= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_procent= *ptr++; + ptr+= 16; + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_keydef +---------------------------------------------------------------------------*/ + +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef) +{ + uchar buff[MARIA_KEYDEF_SIZE]; + uchar *ptr=buff; + + *ptr++= (uchar) keydef->keysegs; + *ptr++= keydef->key_alg; /* Rtree or Btree */ + mi_int2store(ptr,keydef->flag); ptr+= 2; + mi_int2store(ptr,keydef->block_length); ptr+= 2; + mi_int2store(ptr,keydef->keylength); ptr+= 2; + mi_int2store(ptr,keydef->minlength); ptr+= 2; + mi_int2store(ptr,keydef->maxlength); ptr+= 2; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef) +{ + keydef->keysegs = (uint) *ptr++; + keydef->key_alg = *ptr++; /* Rtree or Btree */ + + keydef->flag = mi_uint2korr(ptr); ptr+= 2; + keydef->block_length = mi_uint2korr(ptr); ptr+= 2; + keydef->keylength = mi_uint2korr(ptr); ptr+= 2; + keydef->minlength = mi_uint2korr(ptr); ptr+= 2; + keydef->maxlength = mi_uint2korr(ptr); ptr+= 2; + keydef->underflow_block_length=keydef->block_length/3; + keydef->version = 0; /* Not saved */ + keydef->parser = &ft_default_parser; + keydef->ftparser_nr = 0; + return ptr; +} + +/*************************************************************************** +** maria_keyseg +***************************************************************************/ + +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg) +{ + uchar buff[HA_KEYSEG_SIZE]; + uchar *ptr=buff; + ulong pos; + + *ptr++= keyseg->type; + *ptr++= keyseg->language; + *ptr++= keyseg->null_bit; + *ptr++= keyseg->bit_start; + *ptr++= keyseg->bit_end; + *ptr++= keyseg->bit_length; + mi_int2store(ptr,keyseg->flag); ptr+= 2; + mi_int2store(ptr,keyseg->length); ptr+= 2; + mi_int4store(ptr,keyseg->start); ptr+= 4; + pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos; + mi_int4store(ptr, pos); + ptr+=4; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg) +{ + keyseg->type = *ptr++; + keyseg->language = *ptr++; + keyseg->null_bit = *ptr++; + keyseg->bit_start = *ptr++; + keyseg->bit_end = *ptr++; + keyseg->bit_length = *ptr++; + keyseg->flag = mi_uint2korr(ptr); ptr+= 2; + keyseg->length = mi_uint2korr(ptr); ptr+= 2; + keyseg->start = mi_uint4korr(ptr); ptr+= 4; + keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4; + keyseg->charset=0; /* Will be filled in later */ + if (keyseg->null_bit) + keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7)); + else + { + keyseg->bit_pos= (uint16)keyseg->null_pos; + keyseg->null_pos= 0; + } + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_uniquedef +---------------------------------------------------------------------------*/ + +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def) +{ + uchar buff[MARIA_UNIQUEDEF_SIZE]; + uchar *ptr=buff; + + mi_int2store(ptr,def->keysegs); ptr+=2; + *ptr++= (uchar) def->key; + *ptr++ = (uchar) def->null_are_equal; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *def) +{ + def->keysegs = mi_uint2korr(ptr); + def->key = ptr[2]; + def->null_are_equal=ptr[3]; + return ptr+4; /* 1 extra byte */ +} + +/*************************************************************************** +** MARIA_COLUMNDEF +***************************************************************************/ + +uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo) +{ + uchar buff[MARIA_COLUMNDEF_SIZE]; + uchar *ptr=buff; + + mi_int6store(ptr,recinfo->offset); ptr+= 6; + mi_int2store(ptr,recinfo->type); ptr+= 2; + mi_int2store(ptr,recinfo->length); ptr+= 2; + mi_int2store(ptr,recinfo->fill_length); ptr+= 2; + mi_int2store(ptr,recinfo->null_pos); ptr+= 2; + mi_int2store(ptr,recinfo->empty_pos); ptr+= 2; + (*ptr++)= recinfo->null_bit; + (*ptr++)= recinfo->empty_bit; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo) +{ + recinfo->offset= mi_uint6korr(ptr); ptr+= 6; + recinfo->type= mi_sint2korr(ptr); ptr+= 2; + recinfo->length= mi_uint2korr(ptr); ptr+= 2; + recinfo->fill_length= mi_uint2korr(ptr); ptr+= 2; + recinfo->null_pos= mi_uint2korr(ptr); ptr+= 2; + recinfo->empty_pos= mi_uint2korr(ptr); ptr+= 2; + recinfo->null_bit= (uint8) *ptr++; + recinfo->empty_bit= (uint8) *ptr++; + return ptr; +} + +/************************************************************************** + Open data file + We can't use dup() here as the data file descriptors need to have different + active seek-positions. + + The argument file_to_dup is here for the future if there would on some OS + exist a dup()-like call that would give us two different file descriptors. +*************************************************************************/ + +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, + File file_to_dup __attribute__((unused))) +{ + info->dfile= my_open(share->data_file_name, share->mode | O_SHARE, + MYF(MY_WME)); + return info->dfile >= 0 ? 0 : 1; +} + + +int _ma_open_keyfile(MARIA_SHARE *share) +{ + if ((share->kfile=my_open(share->unique_file_name, share->mode | O_SHARE, + MYF(MY_WME))) < 0) + return 1; + return 0; +} + + +/* + Disable all indexes. + + SYNOPSIS + maria_disable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Disable all indexes. + + RETURN + 0 ok +*/ + +int maria_disable_indexes(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + maria_clear_all_keys_active(share->state.key_map); + return 0; +} + + +/* + Enable all indexes + + SYNOPSIS + maria_enable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Enable all indexes. The indexes might have been disabled + by maria_disable_index() before. + The function works only if both data and indexes are empty, + otherwise a repair is required. + To be sure, call handler::delete_all_rows() before. + + RETURN + 0 ok + HA_ERR_CRASHED data or index is non-empty. +*/ + +int maria_enable_indexes(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share= info->s; + + if (share->state.state.data_file_length || + (share->state.state.key_file_length != share->base.keystart)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + error= HA_ERR_CRASHED; + } + else + maria_set_all_keys_active(share->state.key_map, share->base.keys); + return error; +} + + +/* + Test if indexes are disabled. + + SYNOPSIS + maria_indexes_are_disabled() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Test if indexes are disabled. + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + 2 non-unique indexes are disabled +*/ + +int maria_indexes_are_disabled(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + /* + No keys or all are enabled. keys is the number of keys. Left shifted + gives us only one bit set. When decreased by one, gives us all all bits + up to this one set and it gets unset. + */ + if (!share->base.keys || + (maria_is_all_keys_active(share->state.key_map, share->base.keys))) + return 0; + + /* All are disabled */ + if (maria_is_any_key_active(share->state.key_map)) + return 1; + + /* + We have keys. Some enabled, some disabled. + Don't check for any non-unique disabled but return directly 2 + */ + return 2; +} + + +static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + +static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused))) +{ +} + +static my_bool maria_once_init_dummy(MARIA_SHARE *share + __attribute__((unused)), + File dfile __attribute__((unused))) +{ + return 0; +} + +static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused))) +{ + return 0; +} diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c new file mode 100644 index 00000000000..7c875e7b91d --- /dev/null +++ b/storage/maria/ma_packrec.c @@ -0,0 +1,1426 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to compressed records */ + +#include "maria_def.h" + +#define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */ + +#if INT_MAX > 65536L +#define BITS_SAVED 32 +#define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */ +#else +#define BITS_SAVED 16 +#define MAX_QUICK_TABLE_BITS 6 +#endif + +#define get_bit(BU) ((BU)->bits ? \ + (BU)->current_byte & ((maria_bit_type) 1 << --(BU)->bits) :\ + (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\ + (BU)->current_byte & ((maria_bit_type) 1 << (BITS_SAVED-1)))) +#define skip_to_next_byte(BU) ((BU)->bits&=~7) +#define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count)) + +#define decode_bytes_test_bit(bit) \ + if (low_byte & (1 << (7-bit))) \ + pos++; \ + if (*pos & IS_CHAR) \ + { bits-=(bit+1); break; } \ + pos+= *pos + +#define OFFSET_TABLE_SIZE 512 + +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys); +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table,byte **intervall_buff, + uint16 *tmp_buff); +static void make_quick_table(uint16 *to_table,uint16 *decode_table, + uint *next_free,uint value,uint bits, + uint max_bits); +static void fill_quick_table(uint16 *table,uint bits, uint max_bits, + uint value); +static uint copy_decode_table(uint16 *to_pos,uint offset, + uint16 *decode_table); +static uint find_longest_bitstream(uint16 *table, uint16 *end); +static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field, + MARIA_BIT_BUFF *buff, + byte *to, + byte *end); +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree); +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer, + uint length); +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count); +static void fill_buffer(MARIA_BIT_BUFF *bit_buff); +static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); +#ifdef HAVE_MMAP +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, + my_size_t *rec_buff_size_p, + uchar *header); +#endif + +static maria_bit_type mask[]= +{ + 0x00000000, + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, + 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, +#if BITS_SAVED > 16 + 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, + 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, + 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff, +#endif +}; + + +my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile) +{ + share->options|= HA_OPTION_READ_ONLY_DATA; + if (_ma_read_pack_info(share, dfile, + (pbool) + test(!(share->options & + (HA_OPTION_PACK_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD))))) + return 1; + return 0; +} + +my_bool _ma_once_end_pack_row(MARIA_SHARE *share) +{ + if (share->decode_trees) + { + my_free((gptr) share->decode_trees,MYF(0)); + my_free((gptr) share->decode_tables,MYF(0)); + } + return 0; +} + + +/* Read all packed info, allocate memory and fix field structs */ + +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys) +{ + int diff_length; + uint i,trees,huff_tree_bits,rec_reflength,length; + uint16 *decode_table,*tmp_buff; + ulong elements,intervall_length; + char *disk_cache,*intervall_buff; + uchar header[32]; + MARIA_BIT_BUFF bit_buff; + DBUG_ENTER("_ma_read_pack_info"); + + if (maria_quick_table_bits < 4) + maria_quick_table_bits=4; + else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS) + maria_quick_table_bits=MAX_QUICK_TABLE_BITS; + + my_errno=0; + if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP))) + { + if (!my_errno) + my_errno=HA_ERR_END_OF_FILE; + goto err0; + } + if (memcmp((byte*) header, (byte*) maria_pack_file_magic, 3)) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err0; + } + share->pack.version= header[3]; + share->pack.header_length= uint4korr(header+4); + share->min_pack_length=(uint) uint4korr(header+8); + share->max_pack_length=(uint) uint4korr(header+12); + set_if_bigger(share->base.pack_reclength,share->max_pack_length); + elements=uint4korr(header+16); + intervall_length=uint4korr(header+20); + trees=uint2korr(header+24); + share->pack.ref_length=header[26]; + rec_reflength=header[27]; + diff_length=(int) rec_reflength - (int) share->base.rec_reflength; + if (fix_keys) + share->rec_reflength=rec_reflength; + share->base.min_block_length=share->min_pack_length+1; + if (share->min_pack_length > 254) + share->base.min_block_length+=2; + + if (!(share->decode_trees=(MARIA_DECODE_TREE*) + my_malloc((uint) (trees*sizeof(MARIA_DECODE_TREE)+ + intervall_length*sizeof(byte)), + MYF(MY_WME)))) + goto err0; + intervall_buff=(byte*) (share->decode_trees+trees); + + length=(uint) (elements*2+trees*(1 << maria_quick_table_bits)); + if (!(share->decode_tables=(uint16*) + my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+ + (uint) (share->pack.header_length+7), + MYF(MY_WME | MY_ZEROFILL)))) + goto err1; + tmp_buff=share->decode_tables+length; + disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE); + + if (my_read(file,disk_cache, + (uint) (share->pack.header_length-sizeof(header)), + MYF(MY_NABP))) + goto err2; + + huff_tree_bits=max_bit(trees ? trees-1 : 0); + init_bit_buffer(&bit_buff, (uchar*) disk_cache, + (uint) (share->pack.header_length-sizeof(header))); + /* Read new info for each field */ + for (i=0 ; i < share->base.fields ; i++) + { + share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5); + share->rec[i].pack_type=(uint) get_bits(&bit_buff,6); + share->rec[i].space_length_bits=get_bits(&bit_buff,5); + share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, + huff_tree_bits); + share->rec[i].unpack= get_unpack_function(share->rec+i); + } + skip_to_next_byte(&bit_buff); + decode_table=share->decode_tables; + for (i=0 ; i < trees ; i++) + if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table, + &intervall_buff,tmp_buff)) + goto err3; + decode_table=(uint16*) + my_realloc((gptr) share->decode_tables, + (uint) ((byte*) decode_table - (byte*) share->decode_tables), + MYF(MY_HOLD_ON_ERROR)); + { + long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables); + share->decode_tables=decode_table; + for (i=0 ; i < trees ; i++) + share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table, + diff, uint16*); + } + + /* Fix record-ref-length for keys */ + if (fix_keys) + { + for (i=0 ; i < share->base.keys ; i++) + { + MARIA_KEYDEF *keyinfo= &share->keyinfo[i]; + keyinfo->keylength+= (uint16) diff_length; + keyinfo->minlength+= (uint16) diff_length; + keyinfo->maxlength+= (uint16) diff_length; + keyinfo->seg[keyinfo->flag & HA_FULLTEXT ? + FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength; + } + if (share->ft2_keyinfo.seg) + { + MARIA_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo; + ft2_keyinfo->keylength+= (uint16) diff_length; + ft2_keyinfo->minlength+= (uint16) diff_length; + ft2_keyinfo->maxlength+= (uint16) diff_length; + } + } + + if (bit_buff.error || bit_buff.pos < bit_buff.end) + goto err3; + + DBUG_RETURN(0); + +err3: + my_errno=HA_ERR_WRONG_IN_RECORD; +err2: + my_free((gptr) share->decode_tables,MYF(0)); +err1: + my_free((gptr) share->decode_trees,MYF(0)); +err0: + DBUG_RETURN(1); +} + + + /* Read on huff-code-table from datafile */ + +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table, byte **intervall_buff, + uint16 *tmp_buff) +{ + uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits, + next_free_offset; + uint16 *ptr,*end; + + LINT_INIT(ptr); + if (!get_bits(bit_buff,1)) + { + min_chr=get_bits(bit_buff,8); + elements=get_bits(bit_buff,9); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + intervall_length=0; + ptr=tmp_buff; + } + else + { + min_chr=0; + elements=get_bits(bit_buff,15); + intervall_length=get_bits(bit_buff,16); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + decode_tree->quick_table_bits=0; + ptr= *decode_table; + } + size=elements*2-2; + + for (end=ptr+size ; ptr < end ; ptr++) + { + if (get_bit(bit_buff)) + *ptr= (uint16) get_bits(bit_buff,offset_bits); + else + *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr)); + } + skip_to_next_byte(bit_buff); + + decode_tree->table= *decode_table; + decode_tree->intervalls= *intervall_buff; + if (! intervall_length) + { + table_bits=find_longest_bitstream(tmp_buff, tmp_buff+OFFSET_TABLE_SIZE); + if (table_bits == (uint) ~0) + return 1; + if (table_bits > maria_quick_table_bits) + table_bits=maria_quick_table_bits; + next_free_offset= (1 << table_bits); + make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits, + table_bits); + (*decode_table)+= next_free_offset; + decode_tree->quick_table_bits=table_bits; + } + else + { + (*decode_table)=end; + bit_buff->pos-= bit_buff->bits/8; + memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length); + (*intervall_buff)+=intervall_length; + bit_buff->pos+=intervall_length; + bit_buff->bits=0; + } + return 0; +} + + +static void make_quick_table(uint16 *to_table, uint16 *decode_table, + uint *next_free_offset, uint value, uint bits, + uint max_bits) +{ + if (!bits--) + { + to_table[value]= (uint16) *next_free_offset; + *next_free_offset=copy_decode_table(to_table, *next_free_offset, + decode_table); + return; + } + if (!(*decode_table & IS_CHAR)) + { + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + decode_table++; + value|= (1 << bits); + if (!(*decode_table & IS_CHAR)) + { + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + return; +} + + +static void fill_quick_table(uint16 *table, uint bits, uint max_bits, + uint value) +{ + uint16 *end; + value|=(max_bits-bits) << 8; + for (end=table+ (1 << bits) ; + table < end ; + *table++ = (uint16) value | IS_CHAR) ; +} + + +static uint copy_decode_table(uint16 *to_pos, uint offset, + uint16 *decode_table) +{ + uint prev_offset; + prev_offset= offset; + + if (!(*decode_table & IS_CHAR)) + { + to_pos[offset]=2; + offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table); + } + else + { + to_pos[offset]= *decode_table; + offset+=2; + } + decode_table++; + + if (!(*decode_table & IS_CHAR)) + { + to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1); + offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table); + } + else + to_pos[prev_offset+1]= *decode_table; + return offset; +} + + +static uint find_longest_bitstream(uint16 *table, uint16 *end) +{ + uint length=1,length2; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + return ~0; + length=find_longest_bitstream(next, end)+1; + } + table++; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + return ~0; + length2=find_longest_bitstream(table+ *table, end)+1; + length=max(length,length2); + } + return length; +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_pack_record() + info A pointer to MARIA_HA. + filepos File offset of the record. + buf RETURN The buffer to receive the record. + + RETURN + 0 on success + HA_ERR_WRONG_IN_RECORD or -1 on error +*/ + +int _ma_read_pack_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("maria_read_pack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + file=info->dfile; + if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, file, + filepos)) + goto err; + if (my_read(file,(byte*) info->rec_buff + block_info.offset , + block_info.rec_len - block_info.offset, MYF(MY_NABP))) + goto panic; + info->update|= HA_STATE_AKTIV; + DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + DBUG_RETURN(-1); +} + + + +int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + register byte *to, byte *from, ulong reclength) +{ + byte *end_field; + reg3 MARIA_COLUMNDEF *end; + MARIA_COLUMNDEF *current_field; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_pack_rec_unpack"); + + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + to+= info->s->base.null_bytes; + from+= info->s->base.null_bytes; + reclength-= info->s->base.null_bytes; + } + init_bit_buffer(bit_buff, (uchar*) from, reclength); + for (current_field=share->rec, end=current_field+share->base.fields ; + current_field < end ; + current_field++,to=end_field) + { + end_field=to+current_field->length; + (*current_field->unpack)(current_field, bit_buff, to, end_field); + } + if (!bit_buff->error && + bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) + DBUG_RETURN(0); + info->update&= ~HA_STATE_AKTIV; + DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); +} /* _ma_pack_rec_unpack */ + + + /* Return function to unpack field */ + +static void (*get_unpack_function(MARIA_COLUMNDEF *rec)) + (MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, byte *, byte *) +{ + switch (rec->base_type) { + case FIELD_SKIP_ZERO: + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_skip_zero; + return &uf_skip_zero; + case FIELD_NORMAL: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + return &uf_space_normal; + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_normal; + return &decode_bytes; + case FIELD_SKIP_ENDSPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_endspace_selected; + return &uf_space_endspace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_endspace_selected; + return &uf_endspace; + case FIELD_SKIP_PRESPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_prespace_selected; + return &uf_space_prespace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_prespace_selected; + return &uf_prespace; + case FIELD_CONSTANT: + return &uf_constant; + case FIELD_INTERVALL: + return &uf_intervall; + case FIELD_ZERO: + case FIELD_CHECK: + return &uf_zero; + case FIELD_BLOB: + return &uf_blob; + case FIELD_VARCHAR: + if (rec->length <= 256) /* 255 + 1 byte length */ + return &uf_varchar1; + return &uf_varchar2; + case FIELD_LAST: + default: + return 0; /* This should never happend */ + } +} + + /* The different functions to unpack a field */ + +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + { + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff,to,end); + bzero((char*) end,rec->space_length_bits); + } +} + +static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + +static void uf_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } +} + +static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); +} + +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + + +static void uf_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); +} + + +static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } +} + +static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); +} + +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff, to, end); + bzero((char*) end,rec->space_length_bits); +} + +static void uf_constant(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + byte *to, byte *end) +{ + memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to)); +} + +static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, + byte *end) +{ + reg1 uint field_length=(uint) (end-to); + memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff, + rec->huff_tree), + (size_t) field_length); +} + + +/*ARGSUSED*/ +static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)), + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + byte *to, byte *end) +{ + bzero(to, (uint) (end-to)); +} + +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero(to, (uint) (end-to)); + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + uint pack_length=(uint) (end-to)-maria_portable_sizeof_char_ptr; + if (bit_buff->blob_pos+length > bit_buff->blob_end) + { + bit_buff->error=1; + bzero((byte*) to,(end-to)); + return; + } + decode_bytes(rec,bit_buff,(byte*) bit_buff->blob_pos, + (byte*) bit_buff->blob_pos+length); + _ma_store_blob_length((byte*) to,pack_length,length); + memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, + sizeof(char*)); + bit_buff->blob_pos+=length; + } +} + + +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]= 0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + *to= (char) length; + decode_bytes(rec,bit_buff,to+1,to+1+length); + } +} + + +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]=to[1]=0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + int2store(to,length); + decode_bytes(rec,bit_buff,to+2,to+2+length); + } +} + + /* Functions to decode of buffer of bits */ + +#if BITS_SAVED == 64 + +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->decode_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits <= 32) + { + if (bit_buff->pos > bit_buff->end+4) + { + bit_buff->error=1; + return; /* Can't be right */ + } + bit_buff->current_byte= (bit_buff->current_byte << 32) + + ((((uint) bit_buff->pos[3])) + + (((uint) bit_buff->pos[2]) << 8) + + (((uint) bit_buff->pos[1]) << 16) + + (((uint) bit_buff->pos[0]) << 24)); + bit_buff->pos+=4; + bits+=32; + } + /* First use info in quick_table */ + low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + *to++ = (char) (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + for (;;) + { + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = (char) *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} + +#else + +static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->huff_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits < table_bits) + { + if (bit_buff->pos > bit_buff->end+1) + { + bit_buff->error=1; + return; /* Can't be right */ + } +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + if (bits) /* We must have at leasts 9 bits */ + { + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos++; + bits+=8; + } + else + { + bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) + + ((uint) ((uchar) bit_buff->pos[1])); + bit_buff->pos+=2; + bits+=16; + } +#endif + } + /* First use info in quick_table */ + low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + *to++ = (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + for (;;) + { + if (bits < 8) + { /* We don't need to check end */ +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos+=1; + bits+=8; +#endif + } + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = (char) *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} +#endif /* BIT_SAVED == 64 */ + + +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree) +{ + uint16 *pos=decode_tree->table; + for (;;) + { + if (get_bit(bit_buff)) + pos++; + if (*pos & IS_CHAR) + return (uint) (*pos & ~IS_CHAR); + pos+= *pos; + } +} + + +int _ma_read_rnd_pack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + File file; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_pack_record"); + + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + + file= info->dfile; + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache, (byte*) block_info.header, + filepos, share->pack.ref_length, + skip_deleted_blocks ? READING_NEXT : 0)) + goto err; + file= -1; + } + if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, + file, filepos)) + goto err; /* Error code is already set */ +#ifndef DBUG_OFF + if (block_info.rec_len > share->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache, (byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks ? READING_NEXT : 0)) + goto err; + } + else + { + if (my_read(info->dfile,(byte*) info->rec_buff + block_info.offset, + block_info.rec_len-block_info.offset, + MYF(MY_NABP))) + goto err; + } + info->packed_length= block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= block_info.filepos+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + + + /* Read and process header from a huff-record-file */ + +uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, my_size_t *rec_buff_size_p, + File file, my_off_t filepos) +{ + uchar *header= info->header; + uint head_length,ref_length; + LINT_INIT(ref_length); + + if (file >= 0) + { + ref_length=maria->s->pack.ref_length; + /* + We can't use my_pread() here because maria_read_rnd_pack_record assumes + position is ok + */ + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,ref_length,MYF(MY_NABP))) + return BLOCK_FATAL_ERROR; + DBUG_DUMP("header",(byte*) header,ref_length); + } + head_length= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + head_length+= read_pack_length((uint) maria->s->pack.version, + header + head_length, &info->blob_len); + if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p, + info->rec_len + info->blob_len + + maria->s->base.extra_rec_buff_size)) + return BLOCK_FATAL_ERROR; /* not enough memory */ + bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len; + bit_buff->blob_end= bit_buff->blob_pos + info->blob_len; + maria->blob_length=info->blob_len; + } + info->filepos=filepos+head_length; + if (file > 0) + { + info->offset=min(info->rec_len, ref_length - head_length); + memcpy(*rec_buff_p, header + head_length, info->offset); + } + return 0; +} + + + /* rutines for bit buffer */ + /* Note buffer must be 6 byte bigger than longest row */ + +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer, + uint length) +{ + bit_buff->pos=buffer; + bit_buff->end=buffer+length; + bit_buff->bits=bit_buff->error=0; + bit_buff->current_byte=0; /* Avoid purify errors */ +} + +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff, uint count) +{ + uint tmp; + count-=bit_buff->bits; + tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count; + fill_buffer(bit_buff); + bit_buff->bits=BITS_SAVED - count; + return tmp+(bit_buff->current_byte >> (BITS_SAVED - count)); +} + + /* Fill in empty bit_buff->current_byte from buffer */ + /* Sets bit_buff->error if buffer is exhausted */ + +static void fill_buffer(MARIA_BIT_BUFF *bit_buff) +{ + if (bit_buff->pos >= bit_buff->end) + { + bit_buff->error= 1; + bit_buff->current_byte=0; + return; + } +#if BITS_SAVED == 64 + bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) + + (((uint) ((uchar) bit_buff->pos[6])) << 8) + + (((uint) ((uchar) bit_buff->pos[5])) << 16) + + (((uint) ((uchar) bit_buff->pos[4])) << 24) + + ((ulonglong) + ((((uint) ((uchar) bit_buff->pos[3]))) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32)); + bit_buff->pos+=8; +#else +#if BITS_SAVED == 32 + bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)); + bit_buff->pos+=4; +#else + bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+ + (((uint) ((uchar) bit_buff->pos[0])) << 8)); + bit_buff->pos+=2; +#endif +#endif +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +/***************************************************************************** + Some redefined functions to handle files when we are using memmap +*****************************************************************************/ +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#ifdef HAVE_MMAP + +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *, MARIA_RECORD_POS, + my_bool); + +my_bool _ma_memmap_file(MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_memmap_file"); + + if (!info->s->file_map) + { + if (my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)) < + share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning",("File isn't extended for memmap")); + DBUG_RETURN(0); + } + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + DBUG_RETURN(0); + } + info->opt_flag|= MEMMAP_USED; + info->read_record= share->read_record= _ma_read_mempack_record; + share->scan= _ma_read_rnd_mempack_record; + DBUG_RETURN(1); +} + + +void _ma_unmap_file(MARIA_HA *info) +{ + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); +} + + +static uchar * +_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, + my_size_t *rec_buff_size_p, + uchar *header) +{ + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->blob_len); + /* _ma_alloc_rec_buff sets my_errno on error */ + if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p, + info->blob_len + maria->s->base.extra_rec_buff_size)) + return 0; /* not enough memory */ + bit_buff->blob_pos= (uchar*) *rec_buff_p; + bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len; + } + return header; +} + + +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos; + DBUG_ENTER("maria_read_mempack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, + &info->rec_buff_size, + (uchar*) share->file_map+ + filepos))) + DBUG_RETURN(-1); + DBUG_RETURN(_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); +} + + +/*ARGSUSED*/ +static int _ma_read_rnd_mempack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks + __attribute__((unused))) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos,*start; + DBUG_ENTER("_ma_read_rnd_mempack_record"); + + if (filepos >= share->state.state.data_file_length) + { + my_errno=HA_ERR_END_OF_FILE; + goto err; + } + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, + &info->rec_buff, + &info->rec_buff_size, + (uchar*) + (start= share->file_map + + filepos)))) + goto err; +#ifndef DBUG_OFF + if (block_info.rec_len > info->s->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + info->packed_length=block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+(uint) (pos-start)+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + +#endif /* HAVE_MMAP */ + + /* Save length of row */ + +uint _ma_save_pack_length(uint version, byte *block_buff, ulong length) +{ + if (length < 254) + { + *(uchar*) block_buff= (uchar) length; + return 1; + } + if (length <= 65535) + { + *(uchar*) block_buff=254; + int2store(block_buff+1,(uint) length); + return 3; + } + *(uchar*) block_buff=255; + if (version == 1) /* old format */ + { + DBUG_ASSERT(length <= 0xFFFFFF); + int3store(block_buff + 1, (ulong) length); + return 4; + } + else + { + int4store(block_buff + 1, (ulong) length); + return 5; + } +} + + +static uint read_pack_length(uint version, const uchar *buf, ulong *length) +{ + if (buf[0] < 254) + { + *length= buf[0]; + return 1; + } + else if (buf[0] == 254) + { + *length= uint2korr(buf + 1); + return 3; + } + if (version == 1) /* old format */ + { + *length= uint3korr(buf + 1); + return 4; + } + else + { + *length= uint4korr(buf + 1); + return 5; + } +} + + +uint _ma_calc_pack_length(uint version, ulong length) +{ + return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5; +} diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c new file mode 100644 index 00000000000..d0a11ad08ab --- /dev/null +++ b/storage/maria/ma_page.c @@ -0,0 +1,158 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read and write key blocks */ + +#include "maria_def.h" + + /* Fetch a key-page in memory */ + +byte *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, + byte *buff, int return_buffer) +{ + byte *tmp; + uint page_size; + DBUG_ENTER("_ma_fetch_keypage"); + DBUG_PRINT("enter",("page: %ld", (long) page)); + + tmp= key_cache_read(info->s->key_cache, info->s->kfile, page, level, buff, + info->s->block_size, info->s->block_size, + return_buffer); + if (tmp == info->buff) + info->keybuff_used=1; + else if (!tmp) + { + DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno)); + info->last_keypage=HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + info->last_keypage=page; + page_size=maria_getint(tmp); + if (page_size < 4 || page_size > keyinfo->block_length) + { + DBUG_PRINT("error",("page %lu had wrong page length: %u", + (ulong) page, page_size)); + DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); + info->last_keypage = HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + tmp= 0; + } + DBUG_RETURN(tmp); +} /* _ma_fetch_keypage */ + + + /* Write a key-page on disk */ + +int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff) +{ + reg3 uint length; + DBUG_ENTER("_ma_write_keypage"); + +#ifndef FAST /* Safety check */ + if (page < info->s->base.keystart || + page+keyinfo->block_length > info->state->key_file_length || + (page & (MARIA_MIN_KEY_BLOCK_LENGTH-1))) + { + DBUG_PRINT("error",("Trying to write inside key status region: key_start: %lu length: %lu page: %lu", + (long) info->s->base.keystart, + (long) info->state->key_file_length, + (long) page)); + my_errno=EINVAL; + DBUG_RETURN((-1)); + } + DBUG_PRINT("page",("write page at: %lu",(long) page)); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); +#endif + + if ((length=keyinfo->block_length) > IO_SIZE*2 && + info->state->key_file_length != page+length) + length= ((maria_getint(buff)+IO_SIZE-1) & (uint) ~(IO_SIZE-1)); +#ifdef HAVE_purify + { + length=maria_getint(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + length=keyinfo->block_length; + } +#endif + DBUG_RETURN((key_cache_write(info->s->key_cache, + info->s->kfile,page, level, (byte*) buff,length, + (uint) keyinfo->block_length, + (int) ((info->lock_type != F_UNLCK) || + info->s->delay_key_write)))); +} /* maria_write_keypage */ + + + /* Remove page from disk */ + +int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level) +{ + my_off_t old_link; + char buff[8]; + DBUG_ENTER("_ma_dispose"); + DBUG_PRINT("enter",("pos: %ld", (long) pos)); + + old_link= info->s->state.key_del; + info->s->state.key_del= pos; + mi_sizestore(buff,old_link); + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + DBUG_RETURN(key_cache_write(info->s->key_cache, + info->s->kfile, pos , level, buff, + sizeof(buff), + (uint) keyinfo->block_length, + (int) (info->lock_type != F_UNLCK))); +} /* _ma_dispose */ + + + /* Make new page on disk */ + +my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) +{ + my_off_t pos; + byte buff[8]; + DBUG_ENTER("_ma_new"); + + if ((pos= info->s->state.key_del) == HA_OFFSET_ERROR) + { + if (info->state->key_file_length >= + info->s->base.max_key_file_length - keyinfo->block_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + DBUG_RETURN(HA_OFFSET_ERROR); + } + pos=info->state->key_file_length; + info->state->key_file_length+= keyinfo->block_length; + } + else + { + if (!key_cache_read(info->s->key_cache, + info->s->kfile, pos, level, + buff, + (uint) sizeof(buff), + (uint) keyinfo->block_length,0)) + pos= HA_OFFSET_ERROR; + else + info->s->state.key_del= mi_sizekorr(buff); + } + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + DBUG_PRINT("exit",("Pos: %ld",(long) pos)); + DBUG_RETURN(pos); +} /* _ma_new */ diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c new file mode 100644 index 00000000000..c1312cb1e77 --- /dev/null +++ b/storage/maria/ma_panic.c @@ -0,0 +1,126 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "ma_fulltext.h" + +/* + Stop usage of Maria + + SYNOPSIS + maria_panic() + flag HA_PANIC_CLOSE: All maria files (tables and log) are closed. + maria_end() is called. + HA_PANIC_WRITE: All misam files are unlocked and + all changed data in single user maria is + written to file + HA_PANIC_READ All maria files that was locked when + maria_panic(HA_PANIC_WRITE) was done is + locked. A maria_readinfo() is done for + all single user files to get changes + in database + + RETURN + 0 ok + # error number in case of error +*/ + +int maria_panic(enum ha_panic_function flag) +{ + int error=0; + LIST *list_element,*next_open; + MARIA_HA *info; + DBUG_ENTER("maria_panic"); + + if (!maria_inited) + DBUG_RETURN(0); + pthread_mutex_lock(&THR_LOCK_maria); + for (list_element=maria_open_list ; list_element ; list_element=next_open) + { + next_open=list_element->next; /* Save if close */ + info=(MARIA_HA*) list_element->data; + switch (flag) { + case HA_PANIC_CLOSE: + pthread_mutex_unlock(&THR_LOCK_maria); /* Not exactly right... */ + if (maria_close(info)) + error=my_errno; + pthread_mutex_lock(&THR_LOCK_maria); + break; + case HA_PANIC_WRITE: /* Do this to free databases */ +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->options & HA_OPTION_READ_ONLY_DATA) + break; +#endif + if (flush_key_blocks(info->s->key_cache, info->s->kfile, FLUSH_RELEASE)) + error=my_errno; + if (info->opt_flag & WRITE_CACHE_USED) + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + if (info->opt_flag & READ_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK),1); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->kfile >= 0 && my_close(info->s->kfile,MYF(0))) + error = my_errno; + if (info->dfile >= 0 && my_close(info->dfile,MYF(0))) + error = my_errno; + info->s->kfile=info->dfile= -1; /* Files aren't open anymore */ + break; +#endif + case HA_PANIC_READ: /* Restore to before WRITE */ +#ifdef CANT_OPEN_FILES_TWICE + { /* Open closed files */ + char name_buff[FN_REFLEN]; + if (info->s->kfile < 0) + if ((info->s->kfile= my_open(fn_format(name_buff,info->filename,"", + N_NAME_IEXT,4),info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + if (info->dfile < 0) + { + if ((info->dfile= my_open(fn_format(name_buff,info->filename,"", + N_NAME_DEXT,4),info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + info->rec_cache.file=info->dfile; + } + } +#endif + if (info->was_locked) + { + if (maria_lock_database(info, info->was_locked)) + error=my_errno; + info->was_locked=0; + } + break; + } + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (flag == HA_PANIC_CLOSE) + maria_end(); + if (!error) + DBUG_RETURN(0); + DBUG_RETURN(my_errno=error); +} /* maria_panic */ diff --git a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c new file mode 100644 index 00000000000..f387f2b7de3 --- /dev/null +++ b/storage/maria/ma_preload.c @@ -0,0 +1,117 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Preload indexes into key cache +*/ + +#include "maria_def.h" + + +/* + Preload pages of the index file for a table into the key cache + + SYNOPSIS + maria_preload() + info open table + map map of indexes to preload into key cache + ignore_leaves only non-leaves pages are to be preloaded + + RETURN VALUE + 0 if a success. error code - otherwise. + + NOTES. + At present pages for all indexes are preloaded. + In future only pages for indexes specified in the key_map parameter + of the table will be preloaded. +*/ + +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves) +{ + uint i; + ulong length, block_length= 0; + uchar *buff= NULL; + MARIA_SHARE* share= info->s; + uint keys= share->state.header.keys; + MARIA_KEYDEF *keyinfo= share->keyinfo; + my_off_t key_file_length= share->state.state.key_file_length; + my_off_t pos= share->base.keystart; + DBUG_ENTER("maria_preload"); + + if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos) + DBUG_RETURN(0); + + block_length= keyinfo[0].block_length; + + /* Check whether all indexes use the same block size */ + for (i= 1 ; i < keys ; i++) + { + if (keyinfo[i].block_length != block_length) + DBUG_RETURN(my_errno= HA_ERR_NON_UNIQUE_BLOCK_SIZE); + } + + length= info->preload_buff_size/block_length * block_length; + set_if_bigger(length, block_length); + + if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME)))) + DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); + + if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_RELEASE)) + goto err; + + do + { + /* Read the next block of index file into the preload buffer */ + if ((my_off_t) length > (key_file_length-pos)) + length= (ulong) (key_file_length-pos); + if (my_pread(share->kfile, (byte*) buff, length, pos, MYF(MY_FAE|MY_FNABP))) + goto err; + + if (ignore_leaves) + { + uchar *end= buff+length; + do + { + if (_ma_test_if_nod(buff)) + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, block_length)) + goto err; + } + pos+= block_length; + } + while ((buff+= block_length) != end); + buff= end-length; + } + else + { + if (key_cache_insert(share->key_cache, + share->kfile, pos, DFLT_INIT_HITS, + (byte*) buff, length)) + goto err; + pos+= length; + } + } + while (pos != key_file_length); + + my_free((char*) buff, MYF(0)); + DBUG_RETURN(0); + +err: + my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(my_errno= errno); +} diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c new file mode 100644 index 00000000000..d3806aa79bc --- /dev/null +++ b/storage/maria/ma_range.c @@ -0,0 +1,258 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Gives a approximated number of how many records there is between two keys. + Used when optimizing querries. + */ + +#include "maria_def.h" +#include "ma_rt_index.h" + +static ha_rows _ma_record_pos(MARIA_HA *info,const byte *key,uint key_len, + enum ha_rkey_function search_flag); +static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo, byte *key, + uint key_len,uint nextflag, my_off_t pos); +static uint _ma_keynr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *keypos, uint *ret_max_key); + + +/* + Estimate how many records there is in a given range + + SYNOPSIS + maria_records_in_range() + info MARIA handler + inx Index to use + min_key Min key. Is = 0 if no min range + max_key Max key. Is = 0 if no max range + + NOTES + We should ONLY return 0 if there is no rows in range + + RETURN + HA_POS_ERROR error (or we can't estimate number of rows) + number Estimated number of rows +*/ + + +ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key, + key_range *max_key) +{ + ha_rows start_pos,end_pos,res; + DBUG_ENTER("maria_records_in_range"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(HA_POS_ERROR); + + if (fast_ma_readinfo(info)) + DBUG_RETURN(HA_POS_ERROR); + info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + { + byte *key_buff; + uint start_key_len; + + /* + The problem is that the optimizer doesn't support + RTree keys properly at the moment. + Hope this will be fixed some day. + But now NULL in the min_key means that we + didn't make the task for the RTree key + and expect BTree functionality from it. + As it's not able to handle such request + we return the error. + */ + if (!min_key) + { + res= HA_POS_ERROR; + break; + } + key_buff= info->lastkey+info->s->base.max_key_length; + start_key_len= _ma_pack_key(info,inx, key_buff, + min_key->key, min_key->length, + (HA_KEYSEG**) 0); + res= maria_rtree_estimate(info, inx, key_buff, start_key_len, + maria_read_vec[min_key->flag]); + res= res ? res : 1; /* Don't return 0 */ + break; + } +#endif + case HA_KEY_ALG_BTREE: + default: + start_pos= (min_key ? + _ma_record_pos(info, min_key->key, min_key->length, + min_key->flag) : + (ha_rows) 0); + end_pos= (max_key ? + _ma_record_pos(info, max_key->key, max_key->length, + max_key->flag) : + info->state->records+ (ha_rows) 1); + res= (end_pos < start_pos ? (ha_rows) 0 : + (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos)); + if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) + res=HA_POS_ERROR; + } + + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + fast_ma_writeinfo(info); + + DBUG_PRINT("info",("records: %ld",(ulong) (res))); + DBUG_RETURN(res); +} + + + /* Find relative position (in records) for key in index-tree */ + +static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len, + enum ha_rkey_function search_flag) +{ + uint inx=(uint) info->lastinx, nextflag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx; + byte *key_buff; + double pos; + DBUG_ENTER("_ma_record_pos"); + DBUG_PRINT("enter",("search_flag: %d",search_flag)); + + if (key_len == 0) + key_len= USE_WHOLE_KEY; + key_buff=info->lastkey+info->s->base.max_key_length; + key_len= _ma_pack_key(info, inx, key_buff, key, key_len, + (HA_KEYSEG**) 0); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, key_len);); + nextflag=maria_read_vec[search_flag]; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + key_len=USE_WHOLE_KEY; + + pos= _ma_search_pos(info,keyinfo, key_buff, key_len, + nextflag | SEARCH_SAVE_BUFF, + info->s->state.key_root[inx]); + if (pos >= 0.0) + { + DBUG_PRINT("exit",("pos: %ld",(ulong) (pos*info->state->records))); + DBUG_RETURN((ulong) (pos*info->state->records+0.5)); + } + DBUG_RETURN(HA_POS_ERROR); +} + + + /* This is a modified version of _ma_search */ + /* Returns offset for key in indextable (decimal 0.0 <= x <= 1.0) */ + +static double _ma_search_pos(register MARIA_HA *info, + register MARIA_KEYDEF *keyinfo, + byte *key, uint key_len, uint nextflag, + register my_off_t pos) +{ + int flag; + uint nod_flag,keynr,max_keynr; + my_bool after_key; + byte *keypos, *buff; + double offset; + DBUG_ENTER("_ma_search_pos"); + + if (pos == HA_OFFSET_ERROR) + DBUG_RETURN(0.5); + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1))) + goto err; + flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag, + &keypos,info->lastkey, &after_key); + nod_flag=_ma_test_if_nod(buff); + keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr); + + if (flag) + { + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); /* error */ + /* + Didn't found match. keypos points at next (bigger) key + Try to find a smaller, better matching key. + Matches keynr + [0-1] + */ + if (flag > 0 && ! nod_flag) + offset= 1.0; + else if ((offset= _ma_search_pos(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); + } + else + { + /* + Found match. Keypos points at the start of the found key + Matches keynr+1 + */ + offset=1.0; /* Matches keynr+1 */ + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + /* + There may be identical keys in the tree. Try to match on of those. + Matches keynr + [0-1] + */ + if ((offset= _ma_search_pos(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); /* Read error */ + } + } + DBUG_PRINT("info",("keynr: %d offset: %g max_keynr: %d nod: %d flag: %d", + keynr,offset,max_keynr,nod_flag,flag)); + DBUG_RETURN((keynr+offset)/(max_keynr+1)); +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1.0); +} + + + /* Get keynummer of current key and max number of keys in nod */ + +static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *keypos, uint *ret_max_key) +{ + uint nod_flag,keynr,max_key; + byte t_buff[HA_MAX_KEY_BUFF],*end; + + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag); + return (uint) (keypos-page)/(keyinfo->keylength+nod_flag); + } + + max_key=keynr=0; + t_buff[0]=0; /* Safety */ + while (page < end) + { + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff)) + return 0; /* Error */ + max_key++; + if (page == keypos) + keynr=max_key; + } + *ret_max_key=max_key; + return(keynr); +} diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c new file mode 100644 index 00000000000..b6739b86874 --- /dev/null +++ b/storage/maria/ma_recovery.c @@ -0,0 +1,268 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +typedef struct st_record_type_properties { + /* used for debug error messages or "maria_read_log" command-line tool: */ + char *name, + my_bool record_ends_group; + /* a function to execute when we see the record during the REDO phase */ + int (*record_execute_in_redo_phase)(RECORD *); /* param will be record header instead later */ + /* a function to execute when we see the record during the UNDO phase */ + int (*record_execute_in_undo_phase)(RECORD *); /* param will be record header instead later */ +} RECORD_TYPE_PROPERTIES; + +int no_op(RECORD *) {return 0}; + +RECORD_TYPE_PROPERTIES all_record_type_properties[]= +{ + /* listed here in the order of the "log records type" enumeration */ + {"REDO_INSERT_HEAD", FALSE, redo_insert_head_execute_in_redo_phase, no_op}, + ..., + {"UNDO_INSERT" , TRUE , undo_insert_execute_in_redo_phase, undo_insert_execute_in_undo_phase}, + {"COMMIT", , TRUE , commit_execute_in_redo_phase, no_op}, + ... +}; + +int redo_insert_head_execute_in_redo_phase(RECORD *record) +{ + /* write the data to the proper page */ +} + +int undo_insert_execute_in_redo_phase(RECORD *record) +{ + trans_table[short_trans_id].undo_lsn= record.lsn; + /* don't restore the old version of the row */ +} + +int undo_insert_execute_in_undo_phase(RECORD *record) +{ + /* restore the old version of the row */ + trans_table[short_trans_id].undo_lsn= record.prev_undo_lsn; +} + +int commit_execute_in_redo_phase(RECORD *record) +{ + trans_table[short_trans_id].state= COMMITTED; + /* + and that's all: the delete/update handler should not be woken up! as there + may be REDO for purge further in the log. + */ +} + +#define record_ends_group(R) \ + all_record_type_properties[(R)->type].record_ends_group) + +#define execute_log_record_in_redo_phase(R) \ + all_record_type_properties[(R).type].record_execute_in_redo_phase(R) + + +int recovery() +{ + control_file_create_or_open(); + /* + init log handler: tell it that we are going to do large reads of the + log, sequential and backward. Log handler could decide to alloc a big + read-only IO_CACHE for this, or use its usual page cache. + */ + + /* read checkpoint log record from log handler */ + RECORD *checkpoint_record= log_read_record(last_checkpoint_lsn_at_start); + + /* parse this record, build structs (dirty_pages, transactions table, file_map) */ + /* + read log records (note: sometimes only the header is needed, for ex during + REDO phase only the header of UNDO is needed, not the 4G blob in the + variable-length part, so I could use that; however for PREPARE (which is a + variable-length record) I'll need to read the full record in the REDO + phase): + */ + + /**** REDO PHASE *****/ + + record= log_read_record(min(rec_lsn, ...)); /* later, read only header */ + + /* + if log handler knows the end LSN of the log, we could print here how many + MB of log we have to read (to give an idea of the time), and print + progress notes. + */ + + while (record != NULL) + { + /* + A complete group is a set of log records with an "end mark" record + (e.g. a set of REDOs for an operation, terminated by an UNDO for this + operation); if there is no "end mark" record the group is incomplete + and won't be executed. + */ + if (record_ends_group(record) + { + if (trans_table[record.short_trans_id].group_start_lsn != 0) + { + /* + There is a complete group for this transaction, containing more than + this event. + We're going to read recently read log records: + for this log_read_record() to be efficient (not touch the disk), + log handler could cache recently read pages + (can just use an IO_CACHE of 10 MB to read the log, or the normal + log handler page cache). + Without it only OS file cache will help. + */ + record2= + log_read_record(trans_table[record.short_trans_id].group_start_lsn); + + do + { + if (record2.short_trans_id == record.short_trans_id) + execute_log_record_in_redo_phase(record2); /* it's in our group */ + record2= log_read_next_record(); + } + while (record2.lsn < record.lsn); + trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */ + } + execute_log_record_in_redo_phase(record); + } + else /* record does not end group */ + { + /* just record the fact, can't know if can execute yet */ + if (trans_table[short_trans_id].group_start_lsn == 0) /* group not yet started */ + trans_table[short_trans_id].group_start_lsn= record.lsn; + } + + /* + Later we can optimize: instead of "execute_log_record(record2)", do + copy_record_into_exec_buffer(record2): + this will just copy record into a multi-record (10 MB?) memory buffer, + and when buffer is full, will do sorting of REDOs per + page id and execute them. + This sorting will enable us to do more sequential reads of the + data/index pages. + Note that updating bitmap pages (when we have executed a REDO for a page + we update its bitmap page) may break the sequential read of pages, + so maybe we should read and cache bitmap pages in the beginning. + Or ok the sequence will be broken, but quickly all bitmap pages will be + in memory and so the sequence will not be broken anymore. + Sorting could even determine, based on physical device of files + ("st_dev" in stat()), that some files should be should be taken by + different threads, if we want to do parallism. + */ + /* + Here's how to read a complete variable-length record if needed: + <sanja> read the header, allocate buffer of record length, read whole + record. + */ + record= log_read_next_record(); + } + + /* + Earlier or here, create true transactions in TM. + If done earlier, note that TM should not wake up the delete/update handler + when it receives a commit info, as existing REDO for purge may exist in + the log, and so the delete/update handler may do changes which conflict + with these REDOs. + Even if done here, better to not wake it up now as we're going to free the + page cache. + + MikaelR suggests: support checkpoints during REDO phase too: do checkpoint + after a certain amount of log records have been executed. This helps + against repeated crashes. Those checkpoints could not be user-requested + (as engine is not communicating during the REDO phase), so they would be + automatic: this changes the original assumption that we don't write to the + log while in the REDO phase, but why not. How often should we checkpoint? + */ + + /* + We want to have two steps: + engine->recover_with_max_memory(); + next_engine->recover_with_max_memory(); + engine->init_with_normal_memory(); + next_engine->init_with_normal_memory(); + So: in recover_with_max_memory() allocate a giant page cache, do REDO + phase, then all page cache is flushed and emptied and freed (only retain + small structures like TM): take full checkpoint, which is useful if + next engine crashes in its recovery the next second. + Destroy all shares (maria_close()), then at init_with_normal_memory() we + do this: + */ + + /**** UNDO PHASE *****/ + + print_information_to_error_log(nb of trans to roll back, nb of prepared trans); + + /* + Launch one or more threads to do the background rollback. Don't wait for + them to complete their rollback (background rollback; for debugging, we + can have an option which waits). Set a counter (total_of_rollback_threads) + to the number of threads to lauch. + + Note that InnoDB's rollback-in-background works as long as InnoDB is the + last engine to recover, otherwise MySQL will refuse new connections until + the last engine has recovered so it's not "background" from the user's + point of view. InnoDB is near top of sys_table_types so all others + (e.g. BDB) recover after it... So it's really "online rollback" only if + InnoDB is the only engine. + */ + + /* wake up delete/update handler */ + /* tell the TM that it can now accept new transactions */ + + /* + mark that checkpoint requests are now allowed. + */ +} + +pthread_handler_decl rollback_background_thread() +{ + /* + execute the normal runtime-rollback code for a bunch of transactions. + */ + while (trans in list_of_trans_to_rollback_by_this_thread) + { + while (trans->undo_lsn != 0) + { + /* this is the normal runtime-rollback code: */ + record= log_read_record(trans->undo_lsn); + execute_log_record_in_undo_phase(record); + trans->undo_lsn= record.prev_undo_lsn; + } + /* remove trans from list */ + } + lock_mutex(rollback_threads); /* or atomic counter */ + if (--total_of_rollback_threads == 0) + { + /* + All rollback threads are done. Print "rollback finished" to the error + log and take a full checkpoint. + */ + } + unlock_mutex(rollback_threads); + pthread_exit(); +} diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h new file mode 100644 index 00000000000..05026f4b52a --- /dev/null +++ b/storage/maria/ma_recovery.h @@ -0,0 +1,26 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* Performs recovery of the engine at start */ +int recovery(); diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c new file mode 100644 index 00000000000..5d89cc063d7 --- /dev/null +++ b/storage/maria/ma_rename.c @@ -0,0 +1,89 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Rename a table +*/ + +#include "ma_fulltext.h" + +int maria_rename(const char *old_name, const char *new_name) +{ + char from[FN_REFLEN],to[FN_REFLEN]; + int data_file_rename_error; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + DBUG_ENTER("maria_rename"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename old_table"); + _ma_check_table_is_closed(new_name,"rename new table2"); +#endif + /* LOCKTODO take X-lock on table here */ +#ifdef USE_RAID + { + MARIA_HA *info; + if (!(info=maria_open(old_name, O_RDONLY, 0))) + DBUG_RETURN(my_errno); + raid_type = info->s->base.raid_type; + raid_chunks = info->s->base.raid_chunks; + maria_close(info); + } +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename raidcheck"); +#endif +#endif /* USE_RAID */ + + fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + /* + RECOVERYTODO log the two renames below. Update + ZeroDirtyPagesLSN of the table on disk (=> sync the files), this is + needed so that Recovery does not pick a wrong table. + Then do the file renames. + For this log record to be of any use for Recovery, we need the upper MySQL + layer to be crash-safe in DDLs; when it is we should reconsider the moment + of writing this log record, how to use it in Recovery, and force the log. + For now this record is only informative. But ZeroDirtyPagesLSN is + critically needed! + */ + if (my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR))) + DBUG_RETURN(my_errno); + fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + data_file_rename_error= my_raid_rename(from, to, raid_chunks, + MYF(MY_WME | MY_SYNC_DIR)); + else +#endif + data_file_rename_error= + my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR)); + if (data_file_rename_error) + { + /* + now we have a renamed index file and a non-renamed data file, try to + undo the rename of the index file. + */ + data_file_rename_error= my_errno; + fn_format(from, old_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + fn_format(to, new_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + my_rename_with_symlink(to, from, MYF(MY_WME | MY_SYNC_DIR)); + } + DBUG_RETURN(data_file_rename_error); + +} diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c new file mode 100644 index 00000000000..6fa8af75c40 --- /dev/null +++ b/storage/maria/ma_rfirst.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read first row through a specfic key */ + +int maria_rfirst(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rfirst"); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + DBUG_RETURN(maria_rnext(info,buf,inx)); +} /* maria_rfirst */ diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c new file mode 100644 index 00000000000..ad27d3c286c --- /dev/null +++ b/storage/maria/ma_rkey.c @@ -0,0 +1,179 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read record based on a key */ + +#include "maria_def.h" +#include "ma_rt_index.h" + + /* Read a record using key */ + /* Ordinary search_flag is 0 ; Give error if no record with key */ + +int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, + uint key_len, enum ha_rkey_function search_flag) +{ + byte *key_buff; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + HA_KEYSEG *last_used_keyseg; + uint pack_key_length, use_key_length, nextflag; + DBUG_ENTER("maria_rkey"); + DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d", + (long) info, (long) buf, inx, search_flag)); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->last_key_func= search_flag; + keyinfo= share->keyinfo + inx; + + if (info->once_flags & USE_PACKED_KEYS) + { + info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */ + /* + key is already packed!; This happens when we are using a MERGE TABLE + */ + key_buff= info->lastkey+info->s->base.max_key_length; + pack_key_length= key_len; + bmove(key_buff,key,key_len); + last_used_keyseg= 0; + } + else + { + if (key_len == 0) + key_len=USE_WHOLE_KEY; + /* Save the packed key for later use in the second buffer of lastkey. */ + key_buff=info->lastkey+info->s->base.max_key_length; + pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, key, + key_len, &last_used_keyseg); + /* Save packed_key_length for use by the MERGE engine. */ + info->pack_key_length= pack_key_length; + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, pack_key_length);); + } + + if (fast_ma_readinfo(info)) + goto err; + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + + nextflag=maria_read_vec[search_flag]; + use_key_length=pack_key_length; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + use_key_length=USE_WHOLE_KEY; + + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + info->cur_row.lastpos= HA_OFFSET_ERROR; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!_ma_search(info, keyinfo, key_buff, use_key_length, + maria_read_vec[search_flag], + info->s->state.key_root[inx]) && + share->concurrent_insert) + { + /* + If we searching for a partial key (or using >, >=, < or <=) and + the data is outside of the data file, we need to continue searching + for the first key inside the data file + */ + if (info->cur_row.lastpos >= info->state->data_file_length && + (search_flag != HA_READ_KEY_EXACT || + last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) + { + do + { + uint not_used[2]; + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + full length exact key, because the keys are sorted + according to position + */ + if (_ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) + break; + /* + Check that the found key does still match the search. + _ma_search_next() delivers the next key regardless of its + value. + */ + if (search_flag == HA_READ_KEY_EXACT && + ha_key_cmp(keyinfo->seg, (uchar*) key_buff, + (uchar*) info->lastkey, use_key_length, + SEARCH_FIND, not_used)) + { + my_errno= HA_ERR_KEY_NOT_FOUND; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + } while (info->cur_row.lastpos >= info->state->data_file_length); + } + } + } + if (share->concurrent_insert) + rw_unlock(&share->key_root_lock[inx]); + + if (info->cur_row.lastpos == HA_OFFSET_ERROR) + { + fast_ma_writeinfo(info); + goto err; + } + + /* Calculate length of the found key; Used by maria_rnext_same */ + if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg) + info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey, + last_used_keyseg); + else + info->last_rkey_length= pack_key_length; + + + /* Check if we don't want to have record back, only error message */ + if (!buf) + { + fast_ma_writeinfo(info); + DBUG_RETURN(0); + } + if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + + info->cur_row.lastpos= HA_OFFSET_ERROR; /* Didn't find row */ + +err: + /* Store last used key as a base for read next */ + memcpy(info->lastkey,key_buff,pack_key_length); + info->last_rkey_length= pack_key_length; + bzero((char*) info->lastkey+pack_key_length,info->s->base.rec_reflength); + info->lastkey_length=pack_key_length+info->s->base.rec_reflength; + + if (search_flag == HA_READ_AFTER_KEY) + info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */ + DBUG_RETURN(my_errno); +} /* _ma_rkey */ diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c new file mode 100644 index 00000000000..504cc89aed3 --- /dev/null +++ b/storage/maria/ma_rlast.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read last row with the same key as the previous read. */ + +int maria_rlast(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rlast"); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_NEXT_FOUND; + DBUG_RETURN(maria_rprev(info,buf,inx)); +} /* maria_rlast */ diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c new file mode 100644 index 00000000000..c7feded933e --- /dev/null +++ b/storage/maria/ma_rnext.c @@ -0,0 +1,123 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#include "ma_rt_index.h" + + /* + Read next row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rnext(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + uint flag; + DBUG_ENTER("maria_rnext"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_BIGGER; /* Read next */ + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_PREV_FOUND) + flag=0; /* Read first */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + changed= _ma_test_if_changed(info); + if (!flag) + { + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + error=maria_rtree_get_first(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + error= _ma_search_first(info,info->s->keyinfo+inx, + info->s->state.key_root[inx]); + break; + } + } + else + { + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + /* + Note that rtree doesn't support that the table + may be changed since last call, so we do need + to skip rows inserted by other threads like in btree + */ + error= maria_rtree_get_next(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!changed) + error= _ma_search_next(info,info->s->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + info->s->state.key_root[inx]); + else + error= _ma_search(info,info->s->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY,flag, info->s->state.key_root[inx]); + } + } + + if (info->s->concurrent_insert) + { + if (!error) + { + while (info->cur_row.lastpos >= info->state->data_file_length) + { + /* Skip rows inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,info->s->keyinfo+inx, + info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + } + } + rw_unlock(&info->s->key_root_lock[inx]); + } + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_PRINT("error",("Got error: %d, errno: %d",error, my_errno)); + DBUG_RETURN(my_errno); +} /* maria_rnext */ diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c new file mode 100644 index 00000000000..a5ce0cfe15c --- /dev/null +++ b/storage/maria/ma_rnext_same.c @@ -0,0 +1,108 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include "ma_rt_index.h" + +/* + Read next row with the same key as previous read, but abort if + the key changes. + One may have done a write, update or delete of the previous row. + + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! +*/ + +int maria_rnext_same(MARIA_HA *info, byte *buf) +{ + int error; + uint inx,not_used[2]; + MARIA_KEYDEF *keyinfo; + DBUG_ENTER("maria_rnext_same"); + + if ((int) (inx= info->lastinx) < 0 || + info->cur_row.lastpos == HA_OFFSET_ERROR) + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + keyinfo= info->s->keyinfo+inx; + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch (keyinfo->key_alg) + { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if ((error=maria_rtree_find_next(info,inx, + maria_read_vec[info->last_key_func]))) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!(info->update & HA_STATE_RNEXT_SAME)) + { + /* First rnext_same; Store old key */ + memcpy(info->lastkey2,info->lastkey,info->last_rkey_length); + } + for (;;) + { + if ((error= _ma_search_next(info,keyinfo,info->lastkey, + info->lastkey_length,SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + if (ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, + (uchar*) info->lastkey2, + info->last_rkey_length, SEARCH_FIND, not_used)) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + /* Skip rows that are inserted by other threads since we got a lock */ + if (info->cur_row.lastpos < info->state->data_file_length) + break; + } + } + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rnext_same */ diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c new file mode 100644 index 00000000000..ea562359ded --- /dev/null +++ b/storage/maria/ma_rprev.c @@ -0,0 +1,89 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* + Read previous row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rprev(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + register uint flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_rprev"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_SMALLER; /* Read previous */ + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_NEXT_FOUND) + flag=0; /* Read last */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + changed= _ma_test_if_changed(info); + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + if (!flag) + error= _ma_search_last(info, share->keyinfo+inx, + share->state.key_root[inx]); + else if (!changed) + error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + share->state.key_root[inx]); + else + error= _ma_search(info,share->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY, flag, share->state.key_root[inx]); + + if (share->concurrent_insert) + { + if (!error) + { + while (info->cur_row.lastpos >= info->state->data_file_length) + { + /* Skip rows that are inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length, + SEARCH_SMALLER, + share->state.key_root[inx]))) + break; + } + } + rw_unlock(&share->key_root_lock[inx]); + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_PREV_FOUND; + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rprev */ diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c new file mode 100644 index 00000000000..33940d5f23f --- /dev/null +++ b/storage/maria/ma_rrnd.c @@ -0,0 +1,54 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read a record with random-access. The position to the record must + get by MARIA_HA. The next record can be read with pos= MARIA_POS_ERROR */ + + +#include "maria_def.h" + +/* + Read a row based on position. + + RETURN + 0 Ok. + HA_ERR_RECORD_DELETED Record is deleted. + HA_ERR_END_OF_FILE EOF. +*/ + +int maria_rrnd(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + DBUG_ENTER("maria_rrnd"); + + DBUG_ASSERT(filepos != HA_OFFSET_ERROR); +#ifdef NOT_USED + if (filepos == HA_OFFSET_ERROR) + { + skip_deleted_blocks=1; + if (info->cur_row.lastpos == HA_OFFSET_ERROR) /* First read ? */ + filepos= info->s->pack.header_length; /* Read first record */ + else + filepos= info->cur_row.nextpos; + } +#endif + + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + + DBUG_RETURN((*info->s->read_record)(info, buf, filepos)); +} diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c new file mode 100644 index 00000000000..7556c1e7332 --- /dev/null +++ b/storage/maria/ma_rsame.c @@ -0,0 +1,70 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +/* + Find current row with read on position or read on key + + NOTES + If inx >= 0 find record using key + + RETURN + 0 Ok + HA_ERR_KEY_NOT_FOUND Row is deleted + HA_ERR_END_OF_FILE End of file +*/ + + +int maria_rsame(MARIA_HA *info, byte *record, int inx) +{ + DBUG_ENTER("maria_rsame"); + + if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx)) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + if (info->cur_row.lastpos == HA_OFFSET_ERROR || + info->update & HA_STATE_DELETED) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */ + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* Read row from data file */ + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (inx >= 0) + { + info->lastinx=inx; + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->cur_row.lastpos); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY, + SEARCH_SAME, + info->s->state.key_root[inx])); + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + } + + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) + DBUG_RETURN(0); + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); +} /* maria_rsame */ diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c new file mode 100644 index 00000000000..92138693c62 --- /dev/null +++ b/storage/maria/ma_rsamepos.c @@ -0,0 +1,59 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* read record through position and fix key-position */ +/* As maria_rsame but supply a position */ + +#include "maria_def.h" + + + /* + ** If inx >= 0 update index pointer + ** Returns one of the following values: + ** 0 = Ok. + ** HA_ERR_KEY_NOT_FOUND = Row is deleted + ** HA_ERR_END_OF_FILE = End of file + */ + +int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, + MARIA_RECORD_POS filepos) +{ + DBUG_ENTER("maria_rsame_with_pos"); + DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); + + if (inx < -1 || + (inx >= 0 && ! maria_is_key_active(info->s->state.key_map, inx))) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if ((*info->s->read_record)(info, record, filepos)) + { + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); + } + info->cur_row.lastpos= filepos; + info->lastinx= inx; + if (inx >= 0) + { + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->cur_row.lastpos); + info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */ + } + DBUG_RETURN(0); +} /* maria_rsame_pos */ diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c new file mode 100644 index 00000000000..8e8ec6c991b --- /dev/null +++ b/storage/maria/ma_rt_index.c @@ -0,0 +1,1093 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +#define REINSERT_BUFFER_INC 10 +#define PICK_BY_AREA +/*#define PICK_BY_PERIMETER*/ + +typedef struct st_page_level +{ + uint level; + my_off_t offs; +} stPageLevel; + +typedef struct st_page_list +{ + ulong n_pages; + ulong m_pages; + stPageLevel *pages; +} stPageList; + + +/* + Find next key in r-tree according to search_flag recursively + + NOTES + Used in maria_rtree_find_first() and maria_rtree_find_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uint search_flag, + uint nod_cmp_flag, my_off_t page, int level) +{ + uint nod_flag; + int res; + byte *page_buf, *k, *last; + int k_len; + uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k= page_buf + *saved_key; + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last= rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + if (!(res = maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, k, + info->last_rkey_length, nod_cmp_flag))) + { + switch ((res = maria_rtree_find_req(info, keyinfo, search_flag, + nod_cmp_flag, + _ma_kpos(nod_flag, k), + level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: /* error */ + case -1: + goto err1; + } + } + } + else + { + /* this is a leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, + k, info->last_rkey_length, search_flag)) + { + byte *after_key = (byte*) rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + info->maria_rtree_recursion_depth = level; + *saved_key = last - page_buf; + + if (after_key < last) + { + info->int_keypos = info->buff; + info->int_maxpos = info->buff + (last - after_key); + memcpy(info->buff, after_key, last - after_key); + info->keybuff_used = 0; + } + else + { + info->keybuff_used = 1; + } + + res = 0; + goto ok; + } + } + } + info->cur_row.lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->cur_row.lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Find first key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_first() + info Handler to MARIA file + uint keynr Key number to use + key Key to search for + key_length Length of 'key' + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + /* + Save searched key, include data pointer. + The data pointer is required if the search_flag contains MBR_DATA. + */ + memcpy(info->first_mbr_key, key, keyinfo->keylength); + info->last_rkey_length = key_length; + + info->maria_rtree_recursion_depth = -1; + info->keybuff_used = 1; + + nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, + 0); +} + + +/* + Find next key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_next() + info Handler to MARIA file + uint keynr Key number to use + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (info->update & HA_STATE_DELETED) + return maria_rtree_find_first(info, keynr, info->lastkey, + info->lastkey_length, + search_flag); + + if (!info->keybuff_used) + { + byte *key= info->int_keypos; + + while (key < info->int_maxpos) + { + if (!maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, key, + info->last_rkey_length, search_flag)) + { + byte *after_key= key + keyinfo->keylength; + + info->cur_row.lastpos= _ma_dpos(info, 0, after_key); + memcpy(info->lastkey, key, info->lastkey_length); + + if (after_key < info->int_maxpos) + info->int_keypos= after_key; + else + info->keybuff_used= 1; + return 0; + } + key+= keyinfo->keylength; + } + } + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Get next key in r-tree recursively + + NOTES + Used in maria_rtree_get_first() and maria_rtree_get_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_length, + my_off_t page, int level) +{ + byte *page_buf, *last, *k; + uint nod_flag, k_len; + int res; + uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) + return -1; + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + if (!nod_flag) + { + /* Only leaf pages contain data references. */ + /* Need to check next key with data reference. */ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + } + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + switch ((res = maria_rtree_get_req(info, keyinfo, key_length, + _ma_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: + case -1: /* error */ + goto err1; + } + } + else + { + /* this is a leaf */ + byte *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + + info->maria_rtree_recursion_depth = level; + *saved_key = k - page_buf; + + if (after_key < last) + { + info->int_keypos = (byte*) saved_key; + memcpy(info->buff, page_buf, keyinfo->block_length); + info->int_maxpos = rt_PAGE_END(info->buff); + info->keybuff_used = 0; + } + else + { + info->keybuff_used = 1; + } + + res = 0; + goto ok; + } + } + info->cur_row.lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->cur_row.lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Get first key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + info->maria_rtree_recursion_depth = -1; + info->keybuff_used = 1; + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); +} + + +/* + Get next key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (!info->keybuff_used) + { + uint k_len = keyinfo->keylength - info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(info->int_keypos) */ + byte *key = info->buff + *(int*)info->int_keypos + k_len + + info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(key) */ + byte *after_key = key + k_len + info->s->base.rec_reflength; + + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength); + + *(int*)info->int_keypos = key - info->buff; + if (after_key >= info->int_maxpos) + { + info->keybuff_used = 1; + } + + return 0; + } + else + { + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); + } +} + + +/* + Choose non-leaf better key for insertion +*/ + +#ifdef PICK_BY_PERIMETER +static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, + uint key_length, byte *page_buf, + uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double perimeter; + double best_perimeter; + uchar *best_key; + uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + uchar *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_perimeter); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if ((increase = maria_rtree_perimeter_increase(keyinfo->seg, k, key, key_length, + &perimeter)) == -1) + return NULL; + if ((increase < best_incr)|| + (increase == best_incr && perimeter < best_perimeter)) + { + best_key = k; + best_perimeter= perimeter; + best_incr = increase; + } + } + return best_key; +} + +#endif /*PICK_BY_PERIMETER*/ + +#ifdef PICK_BY_AREA +static byte *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, byte *page_buf, + uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double area; + double best_area; + byte *best_key; + byte *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + byte *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_area); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + /* The following is safe as -1.0 is an exact number */ + if ((increase = maria_rtree_area_increase(keyinfo->seg, k, key, key_length, + &area)) == -1.0) + return NULL; + /* The following should be safe, even if we compare doubles */ + if (increase < best_incr) + { + best_key = k; + best_area = area; + best_incr = increase; + } + else + { + /* The following should be safe, even if we compare doubles */ + if ((increase == best_incr) && (area < best_area)) + { + best_key = k; + best_area = area; + best_incr = increase; + } + } + } + return best_key; +} + +#endif /*PICK_BY_AREA*/ + +/* + Go down and insert key into tree + + RETURN + -1 Error + 0 Child was not split + 1 Child was split +*/ + +static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + my_off_t *new_page, + int ins_level, int level) +{ + uint nod_flag; + int res; + byte *page_buf, *k; + + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */ + (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */ + { + if ((k = maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf, + nod_flag)) == NULL) + goto err1; + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), new_page, + ins_level, level + 1))) + { + case 0: /* child was not split */ + { + maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + case 1: /* child was split */ + { + byte *new_key = page_buf + keyinfo->block_length + nod_flag; + /* set proper MBR for key */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + /* add new key for new page */ + _ma_kpointer(info, new_key - nod_flag, *new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + *new_page)) + goto err1; + res = maria_rtree_add_key(info, keyinfo, new_key, key_length, + page_buf, new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + default: + case -1: /* error */ + { + goto err1; + } + } + } + else + { + res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf, + new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + +ok: + my_afree(page_buf); + return res; + +err1: + my_afree(page_buf); + return -1; +} + + +/* + Insert key into the tree + + RETURN + -1 Error + 0 Root was not split + 1 Root was split +*/ + +static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, byte *key, + uint key_length, int ins_level) +{ + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + int res; + my_off_t new_page; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + int res; + + if ((old_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + return -1; + info->keybuff_used = 1; + maria_putint(info->buff, 2, 0); + res = maria_rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL); + if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff)) + return 1; + info->s->state.key_root[keynr] = old_root; + return res; + } + + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + old_root, &new_page, ins_level, 0))) + { + case 0: /* root was not split */ + { + break; + } + case 1: /* root was split, grow a new root */ + { + byte *new_root_buf, *new_key; + my_off_t new_root; + uint nod_flag = info->s->base.key_reflength; + + if (!(new_root_buf= (byte*) my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + + maria_putint(new_root_buf, 2, nod_flag); + if ((new_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + goto err1; + + new_key = new_root_buf + keyinfo->block_length + nod_flag; + + _ma_kpointer(info, new_key - nod_flag, old_root); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + old_root)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) + == -1) + goto err1; + _ma_kpointer(info, new_key - nod_flag, new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + new_page)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) + == -1) + goto err1; + if (_ma_write_keypage(info, keyinfo, new_root, + DFLT_INIT_HITS, new_root_buf)) + goto err1; + info->s->state.key_root[keynr] = new_root; + + my_afree((byte*)new_root_buf); + break; +err1: + my_afree((byte*)new_root_buf); + return -1; + } + default: + case -1: /* error */ + { + break; + } + } + return res; +} + + +/* + Insert key into the tree - interface function + + RETURN + -1 Error + 0 OK +*/ + +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + return (!key_length || + (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ? + -1 : 0; +} + + +/* + Fill reinsert page buffer + + RETURN + -1 Error + 0 OK +*/ + +static int maria_rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page, + int level) +{ + if (ReinsertList->n_pages == ReinsertList->m_pages) + { + ReinsertList->m_pages += REINSERT_BUFFER_INC; + if (!(ReinsertList->pages = (stPageLevel*)my_realloc((gptr)ReinsertList->pages, + ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR)))) + goto err1; + } + /* save page to ReinsertList */ + ReinsertList->pages[ReinsertList->n_pages].offs = page; + ReinsertList->pages[ReinsertList->n_pages].level = level; + ReinsertList->n_pages++; + return 0; + +err1: + return -1; +} + + +/* + Go down and delete key from the tree + + RETURN + -1 Error + 0 Deleted + 1 Not found + 2 Empty leaf +*/ + +static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + uint *page_size, + stPageList *ReinsertList, int level) +{ + ulong i; + uint nod_flag; + int res; + byte *page_buf, *last, *k; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), i++) + { + if (nod_flag) + { + /* not leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + { + switch ((res = maria_rtree_delete_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), page_size, ReinsertList, level + 1))) + { + case 0: /* deleted */ + { + /* test page filling */ + if (*page_size + key_length >= + rt_PAGE_MIN_SIZE(keyinfo->block_length)) + { + /* OK */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + } + else + { + /* too small: delete key & add it descendant to reinsert list */ + if (maria_rtree_fill_reinsert_list(ReinsertList, + _ma_kpos(nod_flag, k), + level + 1)) + goto err1; + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_getint(page_buf); + } + + goto ok; + } + case 1: /* not found - continue searching */ + { + break; + } + case 2: /* vacuous case: last key in the leaf */ + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_getint(page_buf); + res = 0; + goto ok; + } + default: /* error */ + case -1: + { + goto err1; + } + } + } + } + else + { + /* leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_EQUAL | MBR_DATA)) + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + *page_size = maria_getint(page_buf); + if (*page_size == 2) + { + /* last key in the leaf */ + res = 2; + if (_ma_dispose(info, keyinfo, page, DFLT_INIT_HITS)) + goto err1; + } + else + { + res = 0; + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + goto ok; + } + } + } + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Delete key - interface function + + RETURN + -1 Error + 0 Deleted +*/ + +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + uint page_size; + stPageList ReinsertList; + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + ReinsertList.pages = NULL; + ReinsertList.n_pages = 0; + ReinsertList.m_pages = 0; + + switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root, + &page_size, &ReinsertList, 0)) + { + case 2: + { + info->s->state.key_root[keynr] = HA_OFFSET_ERROR; + return 0; + } + case 0: + { + uint nod_flag; + ulong i; + for (i = 0; i < ReinsertList.n_pages; ++i) + { + uint nod_flag; + byte *page_buf, *k, *last; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + goto err1; + } + if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if (maria_rtree_insert_level(info, keynr, k, key_length, + ReinsertList.pages[i].level) == -1) + { + my_afree(page_buf); + goto err1; + } + } + my_afree(page_buf); + if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS)) + goto err1; + } + if (ReinsertList.pages) + my_free((byte*) ReinsertList.pages, MYF(0)); + + /* check for redundant root (not leaf, 1 child) and eliminate */ + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + goto err1; + if (!_ma_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, + info->buff, 0)) + goto err1; + nod_flag = _ma_test_if_nod(info->buff); + page_size = maria_getint(info->buff); + if (nod_flag && (page_size == 2 + key_length + nod_flag)) + { + my_off_t new_root = _ma_kpos(nod_flag, + rt_PAGE_FIRST_KEY(info->buff, nod_flag)); + if (_ma_dispose(info, keyinfo, old_root, DFLT_INIT_HITS)) + goto err1; + info->s->state.key_root[keynr] = new_root; + } + info->update= HA_STATE_DELETED; + return 0; + +err1: + return -1; + } + case 1: /* not found */ + { + my_errno = HA_ERR_KEY_NOT_FOUND; + return -1; + } + default: + case -1: /* error */ + return -1; + } +} + + +/* + Estimate number of suitable keys in the tree + + RETURN + estimated value +*/ + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint flag) +{ + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + my_off_t root; + uint i = 0; + uint nod_flag, k_len; + byte *page_buf, *k, *last; + double area = 0; + ha_rows res = 0; + + if (flag & MBR_DISJOINT) + return info->state->records; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + return HA_POS_ERROR; + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) + return HA_POS_ERROR; + if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), i++) + { + if (nod_flag) + { + double k_area = maria_rtree_rect_volume(keyinfo->seg, k, key_length); + + /* The following should be safe, even if we compare doubles */ + if (k_area == 0) + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += 1; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) + area += 1; + } + else + goto err1; + } + else + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area+= maria_rtree_overlapping_area(keyinfo->seg, key, k, + key_length) / k_area; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) + area+= (maria_rtree_rect_volume(keyinfo->seg, key, key_length) / + k_area); + } + else + goto err1; + } + } + else + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, flag)) + ++res; + } + } + if (nod_flag) + { + if (i) + res = (ha_rows) (area / i * info->state->records); + else + res = HA_POS_ERROR; + } + + my_afree((byte*)page_buf); + return res; + +err1: + my_afree(page_buf); + return HA_POS_ERROR; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h new file mode 100644 index 00000000000..76b6c6e230c --- /dev/null +++ b/storage/maria/ma_rt_index.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_index_h +#define _rt_index_h + +#ifdef HAVE_RTREE_KEYS + +#define rt_PAGE_FIRST_KEY(page, nod_flag) (page + 2 + nod_flag) +#define rt_PAGE_NEXT_KEY(key, key_length, nod_flag) (key + key_length + \ + (nod_flag ? nod_flag : info->s->base.rec_reflength)) +#define rt_PAGE_END(page) (page + maria_getint(page)) + +#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 3) + +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length); +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length); + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag); +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag); + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length); +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length); + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint flag); + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_length, + my_off_t *new_page_offs); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_index_h */ diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c new file mode 100644 index 00000000000..1453195d263 --- /dev/null +++ b/storage/maria/ma_rt_key.c @@ -0,0 +1,102 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +/* + Add key to the page + + RESULT VALUES + -1 Error + 0 Not split + 1 Split +*/ + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page) +{ + uint page_size = maria_getint(page_buf); + uint nod_flag = _ma_test_if_nod(page_buf); + + if (page_size + key_length + info->s->base.rec_reflength <= + keyinfo->block_length) + { + /* split won't be necessary */ + if (nod_flag) + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key - nod_flag, key_length + nod_flag); + page_size += key_length + nod_flag; + } + else + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key, key_length + + info->s->base.rec_reflength); + page_size += key_length + info->s->base.rec_reflength; + } + maria_putint(page_buf, page_size, nod_flag); + return 0; + } + + return (maria_rtree_split_page(info, keyinfo, page_buf, key, key_length, + new_page) ? -1 : 1); +} + + +/* + Delete key from the page +*/ + +int maria_rtree_delete_key(MARIA_HA *info, byte *page_buf, byte *key, + uint key_length, uint nod_flag) +{ + uint16 page_size = maria_getint(page_buf); + byte *key_start; + + key_start= key - nod_flag; + if (!nod_flag) + key_length += info->s->base.rec_reflength; + + memmove(key_start, key + key_length, page_size - key_length - + (key - page_buf)); + page_size-= key_length + nod_flag; + + maria_putint(page_buf, page_size, nod_flag); + return 0; +} + + +/* + Calculate and store key MBR +*/ + +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, my_off_t child_page) +{ + if (!_ma_fetch_keypage(info, keyinfo, child_page, + DFLT_INIT_HITS, info->buff, 0)) + return -1; + + return maria_rtree_page_mbr(info, keyinfo->seg, info->buff, key, key_length); +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h new file mode 100644 index 00000000000..f44251782c1 --- /dev/null +++ b/storage/maria/ma_rt_key.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Ramil Kalimullin, who has a shared copyright to this code */ + +#ifndef _rt_key_h +#define _rt_key_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page); +int maria_rtree_delete_key(MARIA_HA *info, byte *page, byte *key, + uint key_length, uint nod_flag); +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, my_off_t child_page); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_key_h */ diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c new file mode 100644 index 00000000000..851618a4300 --- /dev/null +++ b/storage/maria/ma_rt_mbr.c @@ -0,0 +1,807 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_mbr.h" + +#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax)) +#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax)) +#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax)) +#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax)) +#define EQUAL_CMP(amin, amax, bmin, bmax) ((amin != bmin) || (amax != bmax)) + +#define FCMP(A, B) ((int)(A) - (int)(B)) +#define p_inc(A, B, X) {A += X; B += X;} + +#define RT_CMP(nextflag) \ + if (nextflag & MBR_INTERSECT) \ + { \ + if (INTERSECT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_CONTAIN) \ + { \ + if (CONTAIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_WITHIN) \ + { \ + if (WITHIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_EQUAL) \ + { \ + if (EQUAL_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_DISJOINT) \ + { \ + if (DISJOINT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + }\ + else /* if unknown comparison operator */ \ + { \ + DBUG_ASSERT(0); \ + } + +#define RT_CMP_KORR(type, korr_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + RT_CMP(nextflag); \ +} + +#define RT_CMP_GET(type, get_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + RT_CMP(nextflag); \ +} + +/* + Compares two keys a and b depending on nextflag + nextflag can contain these flags: + MBR_INTERSECT(a,b) a overlaps b + MBR_CONTAIN(a,b) a contains b + MBR_DISJOINT(a,b) a disjoint b + MBR_WITHIN(a,b) a within b + MBR_EQUAL(a,b) All coordinates of MBRs are equal + MBR_DATA(a,b) Data reference is the same + Returns 0 on success. +*/ + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *b, byte *a, uint key_length, + uint nextflag) +{ + for (; (int) key_length > 0; keyseg += 2 ) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_CMP_KORR(int8, mi_sint1korr, 1, nextflag); + break; + case HA_KEYTYPE_BINARY: + RT_CMP_KORR(uint8, mi_uint1korr, 1, nextflag); + break; + case HA_KEYTYPE_SHORT_INT: + RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag); + break; + case HA_KEYTYPE_USHORT_INT: + RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag); + break; + case HA_KEYTYPE_INT24: + RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag); + break; + case HA_KEYTYPE_UINT24: + RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag); + break; + case HA_KEYTYPE_LONG_INT: + RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag); + break; + case HA_KEYTYPE_ULONG_INT: + RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag) + break; + case HA_KEYTYPE_ULONGLONG: + RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag) + break; +#endif + case HA_KEYTYPE_FLOAT: + /* The following should be safe, even if we compare doubles */ + RT_CMP_GET(float, mi_float4get, 4, nextflag); + break; + case HA_KEYTYPE_DOUBLE: + RT_CMP_GET(double, mi_float8get, 8, nextflag); + break; + case HA_KEYTYPE_END: + goto end; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + +end: + if (nextflag & MBR_DATA) + { + byte *end = a + keyseg->length; + do + { + if (*a++ != *b++) + return FCMP(a[-1], b[-1]); + } while (a != end); + } + return 0; +} + +#define RT_VOL_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +#define RT_VOL_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +/* + Calculates rectangle volume +*/ +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte *a, uint key_length) +{ + double res = 1; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_VOL_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_VOL_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_VOL_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_VOL_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_VOL_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_VOL_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_VOL_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_VOL_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_VOL_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_VOL_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return res; +} + +#define RT_D_MBR_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + +#define RT_D_MBR_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + + +/* + Creates an MBR as an array of doubles. +*/ + +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, double *res) +{ + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_D_MBR_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_D_MBR_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_D_MBR_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_D_MBR_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return 0; +} + +#define RT_COMB_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +#define RT_COMB_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +/* + Creates common minimal bounding rectungle + for two input rectagnles a and b + Result is written to c +*/ + +int maria_rtree_combine_rect(HA_KEYSEG *keyseg, byte* a, byte* b, byte* c, + uint key_length) +{ + for ( ; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_COMB_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_COMB_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_COMB_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_COMB_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + c+= keyseg_length; + } + return 0; +} + + +#define RT_OVL_AREA_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +#define RT_OVL_AREA_GET(type, get_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +/* +Calculates overlapping area of two MBRs a & b +*/ +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length) +{ + double res = 1; + for (; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_OVL_AREA_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_OVL_AREA_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_OVL_AREA_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_OVL_AREA_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_OVL_AREA_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_OVL_AREA_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_OVL_AREA_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return res; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return res; +} + +#define RT_AREA_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_AREA_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* + Calculates MBR_AREA(a+b) - MBR_AREA(a) +*/ + +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area) +{ + double a_area= 1.0; + double loc_ab_area= 1.0; + + *ab_area= 1.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_AREA_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_AREA_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_AREA_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_AREA_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_AREA_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_AREA_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_AREA_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_AREA_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_AREA_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_AREA_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + goto safe_end; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } +safe_end: + *ab_area= loc_ab_area; + return loc_ab_area - a_area; +} + +#define RT_PERIM_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_PERIM_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* +Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a) +*/ +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length, double *ab_perim) +{ + double a_perim = 0.0; + + *ab_perim= 0.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PERIM_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PERIM_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PERIM_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PERIM_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_PERIM_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PERIM_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PERIM_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PERIM_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PERIM_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PERIM_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return *ab_perim - a_perim; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return *ab_perim - a_perim; +} + + +#define RT_PAGE_MBR_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(k + inc); \ + amax = korr_func(k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + bmin = korr_func(k + inc); \ + bmax = korr_func(k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +#define RT_PAGE_MBR_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, k + inc); \ + get_func(amax, k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + get_func(bmin, k + inc); \ + get_func(bmax, k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +/* + Calculates key page total MBR = MBR(key1) + MBR(key2) + ... +*/ +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte *c, uint key_length) +{ + uint inc = 0; + uint k_len = key_length; + uint nod_flag = _ma_test_if_nod(page_buf); + byte *k; + byte *last = rt_PAGE_END(page_buf); + + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + /* Handle NULL part */ + if (keyseg->null_bit) + { + return 1; + } + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PAGE_MBR_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PAGE_MBR_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PAGE_MBR_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PAGE_MBR_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_PAGE_MBR_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PAGE_MBR_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PAGE_MBR_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PAGE_MBR_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PAGE_MBR_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PAGE_MBR_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PAGE_MBR_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PAGE_MBR_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + } + return 0; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h new file mode 100644 index 00000000000..3282ee0d7a3 --- /dev/null +++ b/storage/maria/ma_rt_mbr.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_mbr_h +#define _rt_mbr_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *a, byte *b, uint key_length, + uint nextflag); +int maria_rtree_combine_rect(HA_KEYSEG *keyseg,byte *, byte *, byte*, + uint key_length); +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte*, uint key_length); +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, + double *res); +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length); +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area); +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length, double *ab_perim); +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte* c, uint key_length); +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_mbr_h */ diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c new file mode 100644 index 00000000000..00c8d18f5e5 --- /dev/null +++ b/storage/maria/ma_rt_split.c @@ -0,0 +1,351 @@ +/* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +typedef struct +{ + double square; + int n_node; + byte *key; + double *coords; +} SplitStruct; + +inline static double *reserve_coords(double **d_buffer, int n_dim) +{ + double *coords = *d_buffer; + (*d_buffer) += n_dim * 2; + return coords; +} + +static void mbr_join(double *a, const double *b, int n_dim) +{ + double *end = a + n_dim * 2; + do + { + if (a[0] > b[0]) + a[0] = b[0]; + + if (a[1] < b[1]) + a[1] = b[1]; + + a += 2; + b += 2; + }while (a != end); +} + +/* +Counts the square of mbr which is a join of a and b +*/ +static double mbr_join_square(const double *a, const double *b, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= + ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]); + + a += 2; + b += 2; + }while (a != end); + + return square; +} + +static double count_square(const double *a, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= a[1] - a[0]; + a += 2; + }while (a != end); + return square; +} + +inline static void copy_coords(double *dst, const double *src, int n_dim) +{ + memcpy(dst, src, sizeof(double) * (n_dim * 2)); +} + +/* +Select two nodes to collect group upon +*/ +static void pick_seeds(SplitStruct *node, int n_entries, + SplitStruct **seed_a, SplitStruct **seed_b, int n_dim) +{ + SplitStruct *cur1; + SplitStruct *lim1 = node + (n_entries - 1); + SplitStruct *cur2; + SplitStruct *lim2 = node + n_entries; + + double max_d = -DBL_MAX; + double d; + + for (cur1 = node; cur1 < lim1; ++cur1) + { + for (cur2=cur1 + 1; cur2 < lim2; ++cur2) + { + + d = mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square - + cur2->square; + if (d > max_d) + { + max_d = d; + *seed_a = cur1; + *seed_b = cur2; + } + } + } +} + +/* +Select next node and group where to add +*/ +static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2, + SplitStruct **choice, int *n_group, int n_dim) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + + double max_diff = -DBL_MAX; + + for (; cur<end; ++cur) + { + double diff; + double abs_diff; + + if (cur->n_node) + { + continue; + } + + diff = mbr_join_square(g1, cur->coords, n_dim) - + mbr_join_square(g2, cur->coords, n_dim); + + abs_diff = fabs(diff); + if (abs_diff > max_diff) + { + max_diff = abs_diff; + *n_group = 1 + (diff > 0); + *choice = cur; + } + } +} + +/* +Mark not-in-group entries as n_group +*/ +static void mark_all_entries(SplitStruct *node, int n_entries, int n_group) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + for (; cur<end; ++cur) + { + if (cur->n_node) + { + continue; + } + cur->n_node = n_group; + } +} + +static int split_maria_rtree_node(SplitStruct *node, int n_entries, + int all_size, /* Total key's size */ + int key_size, + int min_size, /* Minimal group size */ + int size1, int size2 /* initial group sizes */, + double **d_buffer, int n_dim) +{ + SplitStruct *cur; + SplitStruct *a; + SplitStruct *b; + double *g1 = reserve_coords(d_buffer, n_dim); + double *g2 = reserve_coords(d_buffer, n_dim); + SplitStruct *next; + int next_node; + int i; + SplitStruct *end = node + n_entries; + + if (all_size < min_size * 2) + { + return 1; + } + + cur = node; + for (; cur<end; ++cur) + { + cur->square = count_square(cur->coords, n_dim); + cur->n_node = 0; + } + + pick_seeds(node, n_entries, &a, &b, n_dim); + a->n_node = 1; + b->n_node = 2; + + + copy_coords(g1, a->coords, n_dim); + size1 += key_size; + copy_coords(g2, b->coords, n_dim); + size2 += key_size; + + + for (i=n_entries - 2; i>0; --i) + { + if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */ + { + mark_all_entries(node, n_entries, 1); + break; + } + + if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */ + { + mark_all_entries(node, n_entries, 2); + break; + } + + pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim); + if (next_node == 1) + { + size1 += key_size; + mbr_join(g1, next->coords, n_dim); + } + else + { + size2 += key_size; + mbr_join(g2, next->coords, n_dim); + } + next->n_node = next_node; + } + + return 0; +} + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, + uint key_length, my_off_t *new_page_offs) +{ + int n1, n2; /* Number of items in groups */ + + SplitStruct *task; + SplitStruct *cur; + SplitStruct *stop; + double *coord_buf; + double *next_coord; + double *old_coord; + int n_dim; + byte *source_cur, *cur1, *cur2; + byte *new_page; + int err_code= 0; + uint nod_flag= _ma_test_if_nod(page); + uint full_length= key_length + (nod_flag ? nod_flag : + info->s->base.rec_reflength); + int max_keys= (maria_getint(page)-2) / (full_length); + + n_dim = keyinfo->keysegs / 2; + + if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) * + (max_keys + 1 + 4) + + sizeof(SplitStruct) * (max_keys + 1)))) + return -1; + + task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4)); + + next_coord = coord_buf; + + stop = task + max_keys; + source_cur = rt_PAGE_FIRST_KEY(page, nod_flag); + + for (cur = task; cur < stop; ++cur, source_cur = rt_PAGE_NEXT_KEY(source_cur, + key_length, nod_flag)) + { + cur->coords = reserve_coords(&next_coord, n_dim); + cur->key = source_cur; + maria_rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords); + } + + cur->coords = reserve_coords(&next_coord, n_dim); + maria_rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); + cur->key = key; + + old_coord = next_coord; + + if (split_maria_rtree_node(task, max_keys + 1, + maria_getint(page) + full_length + 2, full_length, + rt_PAGE_MIN_SIZE(keyinfo->block_length), + 2, 2, &next_coord, n_dim)) + { + err_code = 1; + goto split_err; + } + + if (!(new_page = (byte*) my_alloca((uint)keyinfo->block_length))) + { + err_code= -1; + goto split_err; + } + + stop = task + (max_keys + 1); + cur1 = rt_PAGE_FIRST_KEY(page, nod_flag); + cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag); + + n1= n2 = 0; + for (cur = task; cur < stop; ++cur) + { + byte *to; + if (cur->n_node == 1) + { + to = cur1; + cur1 = rt_PAGE_NEXT_KEY(cur1, key_length, nod_flag); + ++n1; + } + else + { + to = cur2; + cur2 = rt_PAGE_NEXT_KEY(cur2, key_length, nod_flag); + ++n2; + } + if (to != cur->key) + memcpy(to - nod_flag, cur->key - nod_flag, full_length); + } + + maria_putint(page, 2 + n1 * full_length, nod_flag); + maria_putint(new_page, 2 + n2 * full_length, nod_flag); + + if ((*new_page_offs= _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + err_code= -1; + else + err_code= _ma_write_keypage(info, keyinfo, *new_page_offs, + DFLT_INIT_HITS, new_page); + + my_afree((byte*)new_page); + +split_err: + my_afree((byte*) coord_buf); + return err_code; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c new file mode 100644 index 00000000000..04b0c88c222 --- /dev/null +++ b/storage/maria/ma_rt_test.c @@ -0,0 +1,474 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA rtree table */ +/* Written by Alex Barkov who has a shared copyright to this code */ + + +#include "maria.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" + +#define MAX_REC_LENGTH 1024 +#define ndims 2 +#define KEYALG HA_KEY_ALG_RTREE + +static int read_with_pos(MARIA_HA * file, int silent); +static void create_record(char *record,uint rownr); +static void create_record1(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); +static int run_test(const char *filename); + +static double rt_data[]= +{ + /*1*/ 0,10,0,10, + /*2*/ 5,15,0,10, + /*3*/ 0,10,5,15, + /*4*/ 10,20,10,20, + /*5*/ 0,10,0,10, + /*6*/ 5,15,0,10, + /*7*/ 0,10,5,15, + /*8*/ 10,20,10,20, + /*9*/ 0,10,0,10, + /*10*/ 5,15,0,10, + /*11*/ 0,10,5,15, + /*12*/ 10,20,10,20, + /*13*/ 0,10,0,10, + /*14*/ 5,15,0,10, + /*15*/ 0,10,5,15, + /*16*/ 10,20,10,20, + /*17*/ 5,15,0,10, + /*18*/ 0,10,5,15, + /*19*/ 10,20,10,20, + /*20*/ 0,10,0,10, + + /*1*/ 100,110,0,10, + /*2*/ 105,115,0,10, + /*3*/ 100,110,5,15, + /*4*/ 110,120,10,20, + /*5*/ 100,110,0,10, + /*6*/ 105,115,0,10, + /*7*/ 100,110,5,15, + /*8*/ 110,120,10,20, + /*9*/ 100,110,0,10, + /*10*/ 105,115,0,10, + /*11*/ 100,110,5,15, + /*12*/ 110,120,10,20, + /*13*/ 100,110,0,10, + /*14*/ 105,115,0,10, + /*15*/ 100,110,5,15, + /*16*/ 110,120,10,20, + /*17*/ 105,115,0,10, + /*18*/ 100,110,5,15, + /*19*/ 110,120,10,20, + /*20*/ 100,110,0,10, + -1 +}; + +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("rt_test")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range range; + + int silent=0; + int opt_unique=0; + int create_flag=0; + int key_type=HA_KEYTYPE_DOUBLE; + int key_length=8; + int null_fields=0; + int nrecords=sizeof(rt_data)/(sizeof(double)*4);/* 3000;*/ + int rec_length=0; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd= 10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + rec_length=1; + + /* Define 2*ndims columns for coordinates*/ + + for (i=1; i<=2*ndims ;i++){ + recinfo[i].type=FIELD_NORMAL; + recinfo[i].length=key_length; + rec_length+=key_length; + } + + /* Define a key with 2*ndims segments */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=2*ndims; + keyinfo[0].flag=0; + keyinfo[0].key_alg=KEYALG; + + for (i=0; i<2*ndims; i++){ + keyinfo[0].seg[i].type= key_type; + keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */ + keyinfo[0].seg[i].start= (key_length*i)+1; + keyinfo[0].seg[i].length=key_length; + keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[i].null_pos=0; + keyinfo[0].seg[i].language=default_charset_info->number; + } + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 1+2*ndims+opt_unique, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i<nrecords; i++ ) + { + create_record(record,i); + error=maria_write(file,record); + print_record(record,maria_position(file),"\n"); + if (!error) + { + row_count++; + } + else + { + printf("maria_write: %d\n", error); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Reading rows with key\n"); + + for (i=0 ; i < nrecords ; i++) + { + my_errno=0; + create_record(record,i); + + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_EQUAL); + + if (error && error!=HA_ERR_KEY_NOT_FOUND) + { + printf(" maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + if (error == HA_ERR_KEY_NOT_FOUND) + { + print_record(record,maria_position(file)," NOT FOUND\n"); + continue; + } + print_record(read_record,maria_position(file),"\n"); + } + + if (!silent) + printf("- Deleting rows\n"); + for (i=0; i < nrecords/4; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,maria_position(file),"\n"); + + error=maria_delete(file,read_record); + if (error) + { + printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if (!silent) + printf("- Updating rows with position\n"); + for (i=0; i < (nrecords - nrecords/4) ; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + if (error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,maria_position(file),""); + create_record(record,i+nrecords*upd); + printf("\t-> "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_record(record, nrecords*4/5); + print_record(record,0," search for\n"); + + if ((error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for (i=0;i<nrecords;i++) + { + if ((error=maria_rnext(file,read_record,0))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_records_in_range()\n"); + + create_record1(record, nrecords*4/5); + print_record(record,0,"\n"); + + range.key= record+1; + range.length= 1000; /* Big enough */ + range.flag= HA_READ_MBR_INTERSECT; + hrows= maria_records_in_range(file,0, &range, (key_range*) 0); + printf(" %ld rows\n", (long) hrows); + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return 0; + +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + + + +static int read_with_pos (MARIA_HA * file,int silent) +{ + int error; + int i; + char read_record[MAX_REC_LENGTH]; + + if (!silent) + printf("- Reading rows with position\n"); + for (i=0;;i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + if (error==HA_ERR_END_OF_FILE) + break; + if (error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + return error; + } + print_record(read_record,maria_position(file),"\n"); + } + return 0; +} + + +#ifdef NOT_USED +static void bprint_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + i=(unsigned char)record[0]; + printf("%02X ",i); + + for( pos=record+1, i=0; i<32; i++,pos++){ + int b=(unsigned char)*pos; + printf("%02X",b); + } + printf("%s",tail); +} +#endif + + +static void print_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + double c; + + printf(" rec=(%d)",(unsigned char)record[0]); + for ( pos=record+1, i=0; i<2*ndims; i++) + { + memcpy(&c,pos,sizeof(c)); + float8get(c,pos); + printf(" %.14g ",c); + pos+=sizeof(c); + } + printf("pos=%ld",(long int)offs); + printf("%s",tail); +} + + + +static void create_record1(char *record,uint rownr) +{ + int i; + char * pos; + double c=rownr+10; + + bzero((char*) record,MAX_REC_LENGTH); + record[0]=0x01; /* DEL marker */ + + for ( pos=record+1, i=0; i<2*ndims; i++) + { + memcpy(pos,&c,sizeof(c)); + float8store(pos,c); + pos+=sizeof(c); + } +} + +#ifdef NOT_USED + +static void create_record0(char *record,uint rownr) +{ + int i; + char * pos; + double c=rownr+10; + double c0=0; + + bzero((char*) record,MAX_REC_LENGTH); + record[0]=0x01; /* DEL marker */ + + for ( pos=record+1, i=0; i<ndims; i++) + { + memcpy(pos,&c0,sizeof(c0)); + float8store(pos,c0); + pos+=sizeof(c0); + memcpy(pos,&c,sizeof(c)); + float8store(pos,c); + pos+=sizeof(c); + } +} + +#endif + +static void create_record(char *record,uint rownr) +{ + int i; + char *pos; + double *data= rt_data+rownr*4; + record[0]=0x01; /* DEL marker */ + for ( pos=record+1, i=0; i<ndims*2; i++) + { + float8store(pos,data[i]); + pos+=8; + } +} + +#else +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + exit(0); +} +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_scan.c b/storage/maria/ma_scan.c new file mode 100644 index 00000000000..4538c87e2be --- /dev/null +++ b/storage/maria/ma_scan.c @@ -0,0 +1,61 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read through all rows sequntially */ + +#include "maria_def.h" + +int maria_scan_init(register MARIA_HA *info) +{ + DBUG_ENTER("maria_scan_init"); + + info->cur_row.nextpos= info->s->pack.header_length; /* Read first record */ + info->lastinx= -1; /* Can't forward or backward */ + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + + if ((*info->s->scan_init)(info)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); +} + +/* + Read a row based on position. + + SYNOPSIS + maria_scan() + info Maria handler + record Read data here + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # Error code +*/ + +int maria_scan(MARIA_HA *info, byte *record) +{ + DBUG_ENTER("maria_scan"); + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1)); +} + + +void maria_scan_end(MARIA_HA *info) +{ + (*info->s->scan_end)(info); +} diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c new file mode 100644 index 00000000000..d8738ae4639 --- /dev/null +++ b/storage/maria/ma_search.c @@ -0,0 +1,1906 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* key handling functions */ + +#include "ma_fulltext.h" +#include "m_ctype.h" + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, + byte *key, byte *keypos, + uint *return_key_length); + + /* Check index */ + +int _ma_check_index(MARIA_HA *info, int inx) +{ + if (inx == -1) /* Use last index */ + inx=info->lastinx; + if (inx < 0 || ! maria_is_key_active(info->s->state.key_map, inx)) + { + my_errno=HA_ERR_WRONG_INDEX; + return -1; + } + if (info->lastinx != inx) /* Index changed */ + { + info->lastinx = inx; + info->page_changed=1; + info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) | + HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); + } + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + return(-1); + return(inx); +} /* _ma_check_index */ + + + /* + ** Search after row by a key + ** Position to row is stored in info->lastpos + ** Return: -1 if not found + ** 1 if one should continue search on higher level + */ + +int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, uint key_len, uint nextflag, register my_off_t pos) +{ + my_bool last_key; + int error,flag; + uint nod_flag; + byte *keypos,*maxpos; + byte lastkey[HA_MAX_KEY_BUFF],*buff; + DBUG_ENTER("_ma_search"); + DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", + (ulong) pos, nextflag, (ulong) info->cur_row.lastpos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len);); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->cur_row.lastpos= HA_OFFSET_ERROR; + if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST))) + DBUG_RETURN(-1); /* Not found ; return error */ + DBUG_RETURN(1); /* Search at upper levels */ + } + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + DBUG_DUMP("page", buff, maria_getint(buff)); + + flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, + &keypos,lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(buff); + maxpos=buff+maria_getint(buff)-1; + + if (flag) + { + if ((error= _ma_search(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) <= 0) + DBUG_RETURN(error); + + if (flag >0) + { + if (nextflag & (SEARCH_SMALLER | SEARCH_LAST) && + keypos == buff+2+nod_flag) + DBUG_RETURN(1); /* Bigger than key */ + } + else if (nextflag & SEARCH_BIGGER && keypos >= maxpos) + DBUG_RETURN(1); /* Smaller than key */ + } + else + { + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + if ((error= _ma_search(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) >= 0 || + my_errno != HA_ERR_KEY_NOT_FOUND) + DBUG_RETURN(error); + info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */ + } + } + if (pos != info->last_keypage) + { + byte *old_buff=buff; + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + keypos=buff+(keypos-old_buff); + maxpos=buff+(maxpos-old_buff); + } + + if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0) + { + uint not_used[2]; + if (_ma_get_prev_key(info,keyinfo, buff, info->lastkey, keypos, + &info->lastkey_length)) + goto err; + if (!(nextflag & SEARCH_SMALLER) && + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, key_len, + SEARCH_FIND, not_used)) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + goto err; + } + } + else + { + info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey); + if (!info->lastkey_length) + goto err; + memcpy(info->lastkey,lastkey,info->lastkey_length); + } + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + /* Save position for a possible read next / previous */ + info->int_keypos= info->keyread_buff+ (keypos-buff); + info->int_maxpos= info->keyread_buff+ (maxpos-buff); + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=0; + info->keybuff_used= (info->keyread_buff != buff); /* If we have to reread */ + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); + +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + DBUG_RETURN (-1); +} /* _ma_search */ + + + /* Search after key in page-block */ + /* If packed key puts smaller or identical key in buff */ + /* ret_pos point to where find or bigger key starts */ + /* ARGSUSED */ + +int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff __attribute__((unused)), my_bool *last_key) +{ + reg4 int start,mid,end,save_end; + int flag; + uint totlength,nod_flag,not_used[2]; + DBUG_ENTER("_ma_bin_search"); + + LINT_INIT(flag); + totlength=keyinfo->keylength+(nod_flag=_ma_test_if_nod(page)); + start=0; mid=1; + save_end=end=(int) ((maria_getint(page)-2-nod_flag)/totlength-1); + DBUG_PRINT("test",("maria_getint: %d end: %d",maria_getint(page),end)); + page+=2+nod_flag; + + while (start != end) + { + mid= (start+end)/2; + if ((flag=ha_key_cmp(keyinfo->seg,(uchar*) page+(uint) mid*totlength, + (uchar*) key, key_len, comp_flag, not_used)) + >= 0) + end=mid; + else + start=mid+1; + } + if (mid != start) + flag=ha_key_cmp(keyinfo->seg, (uchar*) page+(uint) start*totlength, + (uchar*) key, key_len, comp_flag, not_used); + if (flag < 0) + start++; /* point at next, bigger key */ + *ret_pos=page+(uint) start*totlength; + *last_key= end == save_end; + DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start)); + DBUG_RETURN(flag); +} /* _ma_bin_search */ + + +/* + Locate a packed key in a key page. + + SYNOPSIS + _ma_seq_search() + info Open table information. + keyinfo Key definition information. + page Key page (beginning). + key Search key. + key_len Length to use from search key or USE_WHOLE_KEY + comp_flag Search flags like SEARCH_SAME etc. + ret_pos RETURN Position in key page behind this key. + buff RETURN Copy of previous or identical unpacked key. + last_key RETURN If key is last in page. + + DESCRIPTION + Used instead of _ma_bin_search() when key is packed. + Puts smaller or identical key in buff. + Key is searched sequentially. + + RETURN + > 0 Key in 'buff' is smaller than search key. + 0 Key in 'buff' is identical to search key. + < 0 Not found. +*/ + +int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *last_key) +{ + int flag; + uint nod_flag,length,not_used[2]; + byte t_buff[HA_MAX_KEY_BUFF],*end; + DBUG_ENTER("_ma_seq_search"); + + LINT_INIT(flag); LINT_INIT(length); + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + t_buff[0]=0; /* Avoid bugs */ + while (page < end) + { + length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff); + if (length == 0 || page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: 0x%lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + if ((flag= ha_key_cmp(keyinfo->seg, (uchar*) t_buff,(uchar*) key, + key_len,comp_flag, not_used)) >= 0) + break; +#ifdef EXTRA_DEBUG + DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff, + flag)); +#endif + memcpy(buff,t_buff,length); + *ret_pos=page; + } + if (flag == 0) + memcpy(buff,t_buff,length); /* Result is first key */ + *last_key= page == end; + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_seq_search */ + + +int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, uint nextflag, + byte **ret_pos, byte *buff, my_bool *last_key) +{ + /* + my_flag is raw comparison result to be changed according to + SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags. + flag is the value returned by ha_key_cmp and as treated as final + */ + int flag=0, my_flag=-1; + uint nod_flag, length, len, matched, cmplen, kseg_len; + uint prefix_len,suffix_len; + int key_len_skip, seg_len_pack, key_len_left; + byte *end; + uchar *kseg, *vseg, *saved_vseg, *saved_from; + uchar *sort_order= keyinfo->seg->charset->sort_order; + byte tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + byte *saved_to; + uint saved_length=0, saved_prefix_len=0; + uint length_pack; + DBUG_ENTER("_ma_prefix_search"); + + LINT_INIT(length); + LINT_INIT(prefix_len); + LINT_INIT(seg_len_pack); + LINT_INIT(saved_from); + LINT_INIT(saved_to); + LINT_INIT(saved_vseg); + + t_buff[0]=0; /* Avoid bugs */ + end= page+maria_getint(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + kseg= (uchar*) key; + + get_key_pack_length(kseg_len, length_pack, kseg); + key_len_skip=length_pack+kseg_len; + key_len_left=(int) key_len- (int) key_len_skip; + /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */ + cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack; + DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); + + /* + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + (prefix) length The high bit is set if this is a prefix for the prev key. + [suffix length] Packed length of suffix if the previous was a prefix. + (suffix) data Key data bytes (past the common prefix or whole segment). + [next-key-seg] Next key segments (([packed length], data), ...) + pointer Reference to the data file (last_keyseg->length). + */ + + matched=0; /* how many char's from prefix were alredy matched */ + len=0; /* length of previous key unpacked */ + + while (page < end) + { + uint packed= *page & 128; + + vseg= (uchar*) page; + if (keyinfo->seg->length >= 127) + { + suffix_len=mi_uint2korr(vseg) & 32767; + vseg+=2; + } + else + suffix_len= *vseg++ & 127; + + if (packed) + { + if (suffix_len == 0) + { + /* == 0x80 or 0x8000, same key, prefix length == old key length. */ + prefix_len=len; + } + else + { + /* > 0x80 or 0x8000, this is prefix lgt, packed suffix lgt follows. */ + prefix_len=suffix_len; + get_key_length(suffix_len,vseg); + } + } + else + { + /* Not packed. No prefix used from last key. */ + prefix_len=0; + } + + len=prefix_len+suffix_len; + seg_len_pack=get_pack_length(len); + t_buff=tt_buff+3-seg_len_pack; + store_key_length(t_buff,len); + + if (prefix_len > saved_prefix_len) + memcpy(t_buff+seg_len_pack+saved_prefix_len,saved_vseg, + prefix_len-saved_prefix_len); + saved_vseg=vseg; + saved_prefix_len=prefix_len; + + DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, + suffix_len,vseg)); + { + uchar *from= vseg+suffix_len; + HA_KEYSEG *keyseg; + uint l; + + for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) + { + + if (keyseg->flag & HA_NULL_PART) + { + if (!(*from++)) + continue; + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + get_key_length(l,from); + } + else + l=keyseg->length; + + from+=l; + } + from+= keyseg->length; + page= (byte*) from+nod_flag; + length= (uint) (from-vseg); + } + + if (page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: %lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + + if (matched >= prefix_len) + { + /* We have to compare. But we can still skip part of the key */ + uint left; + uchar *k= kseg+prefix_len; + + /* + If prefix_len > cmplen then we are in the end-space comparison + phase. Do not try to acces the key any more ==> left= 0. + */ + left= ((len <= cmplen) ? suffix_len : + ((prefix_len < cmplen) ? cmplen - prefix_len : 0)); + + matched=prefix_len+left; + + if (sort_order) + { + for (my_flag=0;left;left--) + if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) + break; + } + else + { + for (my_flag=0;left;left--) + if ((my_flag= (int) *vseg++ - (int) *k++)) + break; + } + + if (my_flag>0) /* mismatch */ + break; + if (my_flag==0) /* match */ + { + /* + ** len cmplen seg_left_len more_segs + ** < matched=len; continue search + ** > = prefix ? found : (matched=len; continue search) + ** > < - ok, found + ** = < - ok, found + ** = = - ok, found + ** = = + next seg + */ + if (len < cmplen) + { + if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT2)) + my_flag= -1; + else + { + /* We have to compare k and vseg as if they were space extended */ + uchar *end= k+ (cmplen - len); + for ( ; k < end && *k == ' '; k++) ; + if (k == end) + goto cmp_rest; /* should never happen */ + if ((uchar) *k < (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + } + else if (len > cmplen) + { + uchar *end; + if ((nextflag & SEARCH_PREFIX) && key_len_left == 0) + goto fix_flag; + + /* We have to compare k and vseg as if they were space extended */ + for (end=vseg + (len-cmplen) ; + vseg < end && *vseg == (byte) ' '; + vseg++, matched++) ; + DBUG_ASSERT(vseg < end); + + if ((uchar) *vseg > (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + else + { + cmp_rest: + if (key_len_left>0) + { + uint not_used[2]; + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, + k, key_len_left, nextflag, not_used)) >= 0) + break; + } + else + { + /* + at this line flag==-1 if the following lines were already + visited and 0 otherwise, i.e. flag <=0 here always !!! + */ + fix_flag: + DBUG_ASSERT(flag <= 0); + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) + flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + if (flag>=0) + break; + } + } + } + matched-=left; + } + /* else (matched < prefix_len) ---> do nothing. */ + + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to= buff+saved_length; + saved_from= saved_vseg; + saved_length=length; + *ret_pos=page; + } + if (my_flag) + flag=(keyinfo->seg->flag & HA_REVERSE_SORT) ? -my_flag : my_flag; + if (flag == 0) + { + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to= buff+saved_length; + saved_from= saved_vseg; + saved_length=length; + } + if (saved_length) + memcpy(saved_to, (byte*) saved_from, saved_length); + + *last_key= page == end; + + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_prefix_search */ + + + /* Get pos to a key_block */ + +my_off_t _ma_kpos(uint nod_flag, byte *after_key) +{ + after_key-=nod_flag; + switch (nod_flag) { +#if SIZEOF_OFF_T > 4 + case 7: + return mi_uint7korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 6: + return mi_uint6korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 5: + return mi_uint5korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; +#else + case 7: + after_key++; + case 6: + after_key++; + case 5: + after_key++; +#endif + case 4: + return ((my_off_t) mi_uint4korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 3: + return ((my_off_t) mi_uint3korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 2: + return (my_off_t) (mi_uint2korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH); + case 1: + return (uint) (*after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 0: /* At leaf page */ + default: /* Impossible */ + return(HA_OFFSET_ERROR); + } +} /* _kpos */ + + + /* Save pos to a key_block */ + +void _ma_kpointer(register MARIA_HA *info, register byte *buff, my_off_t pos) +{ + pos/=MARIA_MIN_KEY_BLOCK_LENGTH; + switch (info->s->base.key_reflength) { +#if SIZEOF_OFF_T > 4 + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + case 1: buff[0]= (char) (uchar) pos; break; + default: abort(); /* impossible */ + } +} /* _ma_kpointer */ + + + /* Calc pos to a data-record from a key */ + + +my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const byte *after_key) +{ + my_off_t pos; + after_key-=(nod_flag + info->s->rec_reflength); + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: pos= (my_off_t) mi_uint8korr(after_key); break; + case 7: pos= (my_off_t) mi_uint7korr(after_key); break; + case 6: pos= (my_off_t) mi_uint6korr(after_key); break; + case 5: pos= (my_off_t) mi_uint5korr(after_key); break; +#else + case 8: pos= (my_off_t) mi_uint4korr(after_key+4); break; + case 7: pos= (my_off_t) mi_uint4korr(after_key+3); break; + case 6: pos= (my_off_t) mi_uint4korr(after_key+2); break; + case 5: pos= (my_off_t) mi_uint4korr(after_key+1); break; +#endif + case 4: pos= (my_off_t) mi_uint4korr(after_key); break; + case 3: pos= (my_off_t) mi_uint3korr(after_key); break; + case 2: pos= (my_off_t) mi_uint2korr(after_key); break; + default: + pos=0L; /* Shut compiler up */ + } + return ((info->s->data_file_type == STATIC_RECORD) ? + pos * info->s->base.pack_reclength : pos); +} + + +/* Calc position from a record pointer ( in delete link chain ) */ + +my_off_t _ma_rec_pos(MARIA_SHARE *s, byte *ptr) +{ + my_off_t pos; + switch (s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: + pos= (my_off_t) mi_uint8korr(ptr); + if (pos == HA_OFFSET_ERROR) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 7: + pos= (my_off_t) mi_uint7korr(ptr); + if (pos == (((my_off_t) 1) << 56) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 6: + pos= (my_off_t) mi_uint6korr(ptr); + if (pos == (((my_off_t) 1) << 48) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 5: + pos= (my_off_t) mi_uint5korr(ptr); + if (pos == (((my_off_t) 1) << 40) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; +#else + case 8: + case 7: + case 6: + case 5: + ptr+= (s->rec_reflength-4); + /* fall through */ +#endif + case 4: + pos= (my_off_t) mi_uint4korr(ptr); + if (pos == (my_off_t) (uint32) ~0L) + return HA_OFFSET_ERROR; + break; + case 3: + pos= (my_off_t) mi_uint3korr(ptr); + if (pos == (my_off_t) (1 << 24) -1) + return HA_OFFSET_ERROR; + break; + case 2: + pos= (my_off_t) mi_uint2korr(ptr); + if (pos == (my_off_t) (1 << 16) -1) + return HA_OFFSET_ERROR; + break; + default: abort(); /* Impossible */ + } + return ((s->data_file_type == STATIC_RECORD) ? + pos * s->base.pack_reclength : pos); +} + + + /* save position to record */ + +void _ma_dpointer(MARIA_HA *info, byte *buff, my_off_t pos) +{ + if (info->s->data_file_type == STATIC_RECORD && + pos != HA_OFFSET_ERROR) + pos/= info->s->base.pack_reclength; + + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: mi_int8store(buff,pos); break; + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 8: *buff++=0; + /* fall trough */ + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + default: abort(); /* Impossible */ + } +} /* _ma_dpointer */ + + + /* Get key from key-block */ + /* page points at previous key; its advanced to point at next key */ + /* key should contain previous key */ + /* Returns length of found key + pointers */ + /* nod_flag is a flag if we are on nod */ + + /* same as _ma_get_key but used with fixed length keys */ + +uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page, register byte *key) +{ + memcpy((byte*) key,(byte*) *page, + (size_t) (keyinfo->keylength+nod_flag)); + *page+=keyinfo->keylength+nod_flag; + return(keyinfo->keylength); +} /* _ma_get_static_key */ + + +/* + get key witch is packed against previous key or key with a NULL column. + + SYNOPSIS + _ma_get_pack_key() + keyinfo key definition information. + nod_flag If nod: Length of node pointer, else zero. + page_pos RETURN position in key page behind this key. + key IN/OUT in: prev key, out: unpacked key. + + RETURN + key_length + length of data pointer +*/ + +uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page_pos, register byte *key) +{ + reg1 HA_KEYSEG *keyseg; + byte *start_key,*page=*page_pos; + uint length; + + start_key=key; + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_PACK_KEY) + { + /* key with length, packed to previous key */ + byte *start= key; + uint packed= *page & 128,tot_length,rest_length; + if (keyseg->length >= 127) + { + length=mi_uint2korr(page) & 32767; + page+=2; + } + else + length= *page++ & 127; + + if (packed) + { + if (length > (uint) keyseg->length) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + if (length == 0) /* Same key */ + { + if (keyseg->flag & HA_NULL_PART) + *key++=1; /* Can't be NULL */ + get_key_length(length,key); + key+= length; /* Same diff_key as prev */ + if (length > keyseg->length) + { + DBUG_PRINT("error", + ("Found too long null packed key: %u of %u at 0x%lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; + } + continue; + } + if (keyseg->flag & HA_NULL_PART) + { + key++; /* Skip null marker*/ + start++; + } + + get_key_length(rest_length,page); + tot_length=rest_length+length; + + /* If the stored length has changed, we must move the key */ + if (tot_length >= 255 && *start != (char) 255) + { + /* length prefix changed from a length of one to a length of 3 */ + bmove_upp((char*) key+length+3,(char*) key+length+1,length); + *key=255; + mi_int2store(key+1,tot_length); + key+=3+length; + } + else if (tot_length < 255 && *start == (char) 255) + { + bmove(key+1,key+3,length); + *key=tot_length; + key+=1+length; + } + else + { + store_key_length_inc(key,tot_length); + key+=length; + } + memcpy(key,page,rest_length); + page+=rest_length; + key+=rest_length; + continue; + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!length--) /* Null part */ + { + *key++=0; + continue; + } + *key++=1; /* Not null */ + } + } + if (length > (uint) keyseg->length) + { + DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + store_key_length_inc(key,length); + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!(*key++ = *page++)) + continue; + } + if (keyseg->flag & + (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + byte *tmp=page; + get_key_length(length,tmp); + length+=(uint) (tmp-page); + } + else + length=keyseg->length; + } + memcpy((byte*) key,(byte*) page,(size_t) length); + key+=length; + page+=length; + } + length=keyseg->length+nod_flag; + bmove((byte*) key,(byte*) page,length); + *page_pos= page+length; + return ((uint) (key-start_key)+keyseg->length); +} /* _ma_get_pack_key */ + + + +/* key that is packed relatively to previous */ + +uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page_pos, register byte *key) +{ + reg1 HA_KEYSEG *keyseg; + byte *start_key,*page,*page_end,*from,*from_end; + uint length,tmp; + DBUG_ENTER("_ma_get_binary_pack_key"); + + page= *page_pos; + page_end=page+HA_MAX_KEY_BUFF+1; + start_key=key; + + /* + Keys are compressed the following way: + + prefix length Packed length of prefix for the prev key. (1 or 3 bytes) + for each key segment: + [is null] Null indicator if can be null (1 byte, zero means null) + [length] Packed length if varlength (1 or 3 bytes) + pointer Reference to the data file (last_keyseg->length). + */ + get_key_length(length,page); + if (length) + { + if (length > keyinfo->maxlength) + { + DBUG_PRINT("error", + ("Found too long binary packed key: %u of %u at 0x%lx", + length, keyinfo->maxlength, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Wrong key */ + } + from=key; from_end=key+length; + } + else + { + from=page; from_end=page_end; /* Not packed key */ + } + + /* + The trouble is that key is split in two parts: + The first part is in from ...from_end-1. + The second part starts at page + */ + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + { + if (from == from_end) { from=page; from_end=page_end; } + if (!(*key++ = *from++)) + continue; /* Null part */ + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + /* Get length of dynamic length key part */ + if (from == from_end) { from=page; from_end=page_end; } + if ((length= (*key++ = *from++)) == 255) + { + if (from == from_end) { from=page; from_end=page_end; } + length= (uint) ((*key++ = *from++)) << 8; + if (from == from_end) { from=page; from_end=page_end; } + length+= (uint) ((*key++ = *from++)); + } + } + else + length=keyseg->length; + + if ((tmp=(uint) (from_end-from)) <= length) + { + key+=tmp; /* Use old key */ + length-=tmp; + from=page; from_end=page_end; + } + DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u", + (long) key, (long) from, length)); + memmove((byte*) key, (byte*) from, (size_t) length); + key+=length; + from+=length; + } + length=keyseg->length+nod_flag; + if ((tmp=(uint) (from_end-from)) <= length) + { + memcpy(key+tmp,page,length-tmp); /* Get last part of key */ + *page_pos= page+length-tmp; + } + else + { + if (from_end != page_end) + { + DBUG_PRINT("error",("Error when unpacking key")); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Error */ + } + memcpy((byte*) key,(byte*) from,(size_t) length); + *page_pos= from+length; + } + DBUG_RETURN((uint) (key-start_key)+keyseg->length); +} + + + /* Get key at position without knowledge of previous key */ + /* Returns pointer to next key */ + +byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, byte *keypos, uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + bmove((byte*) key,(byte*) keypos,keyinfo->keylength+nod_flag); + DBUG_RETURN(keypos+keyinfo->keylength+nod_flag); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page <= keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page, + *return_key_length)); + DBUG_RETURN(page); +} /* _ma_get_key */ + + + /* Get key at position without knowledge of previous key */ + /* Returns 0 if ok */ + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, + uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_prev_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *return_key_length=keyinfo->keylength; + bmove((byte*) key,(byte*) keypos- *return_key_length-nod_flag, + *return_key_length); + DBUG_RETURN(0); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page < keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} /* _ma_get_key */ + + + + /* Get last key from key-page */ + /* Return pointer to where key starts */ + +byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *lastkey, byte *endpos, uint *return_key_length) +{ + uint nod_flag; + byte *lastpos; + DBUG_ENTER("_ma_get_last_key"); + DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page, + (long) endpos)); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + lastpos=endpos-keyinfo->keylength-nod_flag; + *return_key_length=keyinfo->keylength; + if (lastpos > page) + bmove((byte*) lastkey,(byte*) lastpos,keyinfo->keylength+nod_flag); + } + else + { + lastpos=(page+=2+nod_flag); + lastkey[0]=0; + while (page < endpos) + { + lastpos=page; + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey); + if (*return_key_length == 0) + { + DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", + (long) page)); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos, + *return_key_length)); + DBUG_RETURN(lastpos); +} /* _ma_get_last_key */ + + + /* Calculate length of key */ + +uint _ma_keylength(MARIA_KEYDEF *keyinfo, register const byte *key) +{ + reg1 HA_KEYSEG *keyseg; + const byte *start; + + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + return (keyinfo->keylength); + + start= key; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return((uint) (key-start)+keyseg->length); +} /* _ma_keylength */ + + +/* + Calculate length of part key. + + Used in maria_rkey() to find the key found for the key-part that was used. + This is needed in case of multi-byte character sets where we may search + after '0xDF' but find 'ss' +*/ + +uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, + HA_KEYSEG *end) +{ + reg1 HA_KEYSEG *keyseg; + const byte *start= key; + + for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return (uint) (key-start); +} + + +/* Move a key */ + +byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from) +{ + reg1 uint length; + memcpy(to, from, (size_t) (length= _ma_keylength(keyinfo, from))); + return to+length; +} + + +/* + Find next/previous record with same key + + WARNING + This can't be used when database is touched after last read +*/ + +int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, uint nextflag, my_off_t pos) +{ + int error; + uint nod_flag; + byte lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_search_next"); + DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu page_changed %d keybuff_used: %d", + nextflag, (ulong) info->cur_row.lastpos, + (ulong) info->int_keypos, + info->page_changed, info->keybuff_used)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length);); + + /* Force full read if we are at last key or if we are not on a leaf + and the key tree has changed since we used it last time + Note that even if the key tree has changed since last read, we can use + the last read data from the leaf if we haven't used the buffer for + something else. + */ + + if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) || + info->page_changed || + (info->int_keytree_version != keyinfo->version && + (info->int_nod_flag || info->keybuff_used))) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + + if (info->keybuff_used) + { + if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage, + DFLT_INIT_HITS,info->keyread_buff,0)) + DBUG_RETURN(-1); + info->keybuff_used=0; + } + + /* Last used buffer is in info->keyread_buff */ + nod_flag=_ma_test_if_nod(info->keyread_buff); + + if (nextflag & SEARCH_BIGGER) /* Next key */ + { + my_off_t tmp_pos= _ma_kpos(nod_flag,info->int_keypos); + if (tmp_pos != HA_OFFSET_ERROR) + { + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, tmp_pos)) <=0) + DBUG_RETURN(error); + } + memcpy(lastkey,key,key_length); + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag, + &info->int_keypos,lastkey))) + DBUG_RETURN(-1); + } + else /* Previous key */ + { + uint length; + /* Find start of previous key */ + info->int_keypos= _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, + info->int_keypos, &length); + if (!info->int_keypos) + DBUG_RETURN(-1); + if (info->int_keypos == info->keyread_buff+2) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, + _ma_kpos(nod_flag,info->int_keypos))) <= 0) + DBUG_RETURN(error); + + /* QQ: We should be able to optimize away the following call */ + if (! _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, + info->int_keypos,&info->lastkey_length)) + DBUG_RETURN(-1); + } + memcpy(info->lastkey,lastkey,info->lastkey_length); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_next */ + + + /* Search after position for the first row in an index */ + /* This is stored in info->cur_row.lastpos */ + +int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + byte *page; + DBUG_ENTER("_ma_search_first"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->keyread_buff,0)) + { + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(info->keyread_buff); + page=info->keyread_buff+2+nod_flag; + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, + info->lastkey))) + DBUG_RETURN(-1); /* Crashed */ + + info->int_keypos=page; info->int_maxpos=info->keyread_buff+maria_getint(info->keyread_buff)-1; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->keybuff_used=0; + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + + DBUG_PRINT("exit",("found key at %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_first */ + + + /* Search after position for the last row in an index */ + /* This is stored in info->cur_row.lastpos */ + +int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + byte *buff,*page; + DBUG_ENTER("_ma_search_last"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + buff=info->keyread_buff; + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0)) + { + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + page= buff+maria_getint(buff); + nod_flag=_ma_test_if_nod(buff); + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!_ma_get_last_key(info,keyinfo,buff,info->lastkey,page, + &info->lastkey_length)) + DBUG_RETURN(-1); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->int_keypos=info->int_maxpos=page; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->keybuff_used=0; + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_last */ + + + +/**************************************************************************** +** +** Functions to store and pack a key in a page +** +** maria_calc_xx_key_length takes the following arguments: +** nod_flag If nod: Length of nod-pointer +** next_key Position to pos after the new key in buffer +** org_key Key that was before the next key in buffer +** prev_key Last key before current key +** key Key that will be stored +** s_temp Information how next key will be packed +****************************************************************************/ + +/* Static length key */ + +int +_ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key= key; + return (int) (s_temp->totlength=keyinfo->keylength+nod_flag); +} + +/* Variable length key */ + +int +_ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key= key; + return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag); +} + +/* + length of key with a variable length first segment which is prefix + compressed (maria_chk reports 'packed + stripped') + + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + prefix byte(s) The high bit is set if this is a prefix for the prev key + length Packed length if the previous was a prefix byte + [length] data bytes ('length' bytes) + next-key-seg Next key segments + + If the first segment can have NULL: + The length is 0 for NULLS and 1+length for not null columns. + +*/ + +int +_ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *next_key, + byte *org_key, byte *prev_key, const byte *key, + MARIA_KEY_PARAM *s_temp) +{ + reg1 HA_KEYSEG *keyseg; + int length; + uint key_length,ref_length,org_key_length=0, + length_pack,new_key_length,diff_flag,pack_marker; + const byte *start,*end,*key_end; + uchar *sort_order; + bool same_length; + + length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0; + same_length=0; keyseg=keyinfo->seg; + key_length= _ma_keylength(keyinfo,key)+nod_flag; + + sort_order=0; + if ((keyinfo->flag & HA_FULLTEXT) && + ((keyseg->type == HA_KEYTYPE_TEXT) || + (keyseg->type == HA_KEYTYPE_VARTEXT1) || + (keyseg->type == HA_KEYTYPE_VARTEXT2)) && + !use_strnxfrm(keyseg->charset)) + sort_order= keyseg->charset->sort_order; + + /* diff flag contains how many bytes is needed to pack key */ + if (keyseg->length >= 127) + { + diff_flag=2; + pack_marker=32768; + } + else + { + diff_flag= 1; + pack_marker=128; + } + s_temp->pack_marker=pack_marker; + + /* Handle the case that the first part have NULL values */ + if (keyseg->flag & HA_NULL_PART) + { + if (!*key++) + { + s_temp->key= key; + s_temp->key_length= 0; + s_temp->totlength= key_length-1+diff_flag; + s_temp->next_key_pos= 0; /* No next key */ + return (s_temp->totlength); + } + s_temp->store_not_null=1; + key_length--; /* We don't store NULL */ + if (prev_key && !*prev_key++) + org_key=prev_key=0; /* Can't pack against prev */ + else if (org_key) + org_key++; /* Skip NULL */ + } + else + s_temp->store_not_null=0; + s_temp->prev_key= org_key; + + /* The key part will start with a packed length */ + + get_key_pack_length(new_key_length,length_pack,key); + end= key_end= key+ new_key_length; + start= key; + + /* Calc how many characters are identical between this and the prev. key */ + if (prev_key) + { + get_key_length(org_key_length,prev_key); + s_temp->prev_key=prev_key; /* Pointer at data */ + /* Don't use key-pack if length == 0 */ + if (new_key_length && new_key_length == org_key_length) + same_length=1; + else if (new_key_length > org_key_length) + end= key + org_key_length; + + if (sort_order) /* SerG */ + { + while (key < end && + sort_order[* (uchar*) key] == sort_order[* (uchar*) prev_key]) + { + key++; prev_key++; + } + } + else + { + while (key < end && *key == *prev_key) + { + key++; prev_key++; + } + } + } + + s_temp->key=key; + s_temp->key_length= (uint) (key_end-key); + + if (same_length && key == key_end) + { + /* identical variable length key */ + s_temp->ref_length= pack_marker; + length=(int) key_length-(int) (key_end-start)-length_pack; + length+= diff_flag; + if (next_key) + { /* Can't combine with next */ + s_temp->n_length= *next_key; /* Needed by _ma_store_key */ + next_key=0; + } + } + else + { + if (start != key) + { /* Starts as prev key */ + ref_length= (uint) (key-start); + s_temp->ref_length= ref_length + pack_marker; + length= (int) (key_length - ref_length); + + length-= length_pack; + length+= diff_flag; + length+= ((new_key_length-ref_length) >= 255) ? 3 : 1;/* Rest_of_key */ + } + else + { + s_temp->key_length+=s_temp->store_not_null; /* If null */ + length= key_length - length_pack+ diff_flag; + } + } + s_temp->totlength=(uint) length; + s_temp->prev_length=0; + DBUG_PRINT("test",("tot_length: %u length: %d uniq_key_length: %u", + key_length, length, s_temp->key_length)); + + /* If something after that hasn't length=0, test if we can combine */ + if ((s_temp->next_key_pos=next_key)) + { + uint packed,n_length; + + packed = *next_key & 128; + if (diff_flag == 2) + { + n_length= mi_uint2korr(next_key) & 32767; /* Length of next key */ + next_key+=2; + } + else + n_length= *next_key++ & 127; + if (!packed) + n_length-= s_temp->store_not_null; + + if (n_length || packed) /* Don't pack 0 length keys */ + { + uint next_length_pack, new_ref_length=s_temp->ref_length; + + if (packed) + { + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key) + { + get_key_length(org_key_length,org_key); + key=start; + if (sort_order) /* SerG */ + { + while (key < end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) org_key]) + { + key++; org_key++; + } + } + else + { + while (key < end && *key == *org_key) + { + key++; org_key++; + } + } + if ((new_ref_length= (uint) (key - start))) + new_ref_length+=pack_marker; + } + + if (!n_length) + { + /* + We put a different key between two identical variable length keys + Extend next key to have same prefix as this key + */ + if (new_ref_length) /* prefix of previus key */ + { /* make next key longer */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= org_key_length - + (new_ref_length-pack_marker); + s_temp->n_ref_length= s_temp->part_of_prev_key; + s_temp->n_length= s_temp->prev_length; + n_length= get_pack_length(s_temp->prev_length); + s_temp->prev_key+= (new_ref_length - pack_marker); + length+= s_temp->prev_length + n_length; + } + else + { /* Can't use prev key */ + s_temp->part_of_prev_key=0; + s_temp->prev_length= org_key_length; + s_temp->n_ref_length=s_temp->n_length= org_key_length; + length+= org_key_length; + } + return (int) length; + } + + ref_length=n_length; + /* Get information about not packed key suffix */ + get_key_pack_length(n_length,next_length_pack,next_key); + + /* Test if new keys has fewer characters that match the previous key */ + if (!new_ref_length) + { /* Can't use prev key */ + s_temp->part_of_prev_key= 0; + s_temp->prev_length= ref_length; + s_temp->n_ref_length= s_temp->n_length= n_length+ref_length; + return (int) length+ref_length-next_length_pack; + } + if (ref_length+pack_marker > new_ref_length) + { + uint new_pack_length=new_ref_length-pack_marker; + /* We must copy characters from the original key to the next key */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= ref_length - new_pack_length; + s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length; + s_temp->prev_key+= new_pack_length; + length-= (next_length_pack - get_pack_length(s_temp->n_length)); + return (int) length + s_temp->prev_length; + } + } + else + { + /* Next key wasn't a prefix of previous key */ + ref_length=0; + next_length_pack=0; + } + DBUG_PRINT("test",("length: %d next_key: 0x%lx", length, + (long) next_key)); + + { + uint tmp_length; + key=(start+=ref_length); + if (key+n_length < key_end) /* Normalize length based */ + key_end= key+n_length; + if (sort_order) /* SerG */ + { + while (key < key_end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) next_key]) + { + key++; next_key++; + } + } + else + { + while (key < key_end && *key == *next_key) + { + key++; next_key++; + } + } + if (!(tmp_length=(uint) (key-start))) + { /* Key can't be re-packed */ + s_temp->next_key_pos=0; + return length; + } + ref_length+=tmp_length; + n_length-=tmp_length; + length-=tmp_length+next_length_pack; /* We gained these chars */ + } + if (n_length == 0 && ref_length == new_key_length) + { + s_temp->n_ref_length=pack_marker; /* Same as prev key */ + } + else + { + s_temp->n_ref_length=ref_length | pack_marker; + length+= get_pack_length(n_length); + s_temp->n_length=n_length; + } + } + } + return length; +} + + +/* Length of key which is prefix compressed */ + +int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *next_key, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp) +{ + uint length,key_length,ref_length; + + s_temp->totlength=key_length= _ma_keylength(keyinfo,key)+nod_flag; +#ifdef HAVE_purify + s_temp->n_length= s_temp->n_ref_length=0; /* For valgrind */ +#endif + s_temp->key=key; + s_temp->prev_key=org_key; + if (prev_key) /* If not first key in block */ + { + /* pack key against previous key */ + /* + As keys may be identical when running a sort in maria_chk, we + have to guard against the case where keys may be identical + */ + const byte *end; + end=key+key_length; + for ( ; *key == *prev_key && key < end; key++,prev_key++) ; + s_temp->ref_length= ref_length=(uint) (key-s_temp->key); + length=key_length - ref_length + get_pack_length(ref_length); + } + else + { + /* No previous key */ + s_temp->ref_length=ref_length=0; + length=key_length+1; + } + if ((s_temp->next_key_pos=next_key)) /* If another key after */ + { + /* pack key against next key */ + uint next_length,next_length_pack; + get_key_pack_length(next_length,next_length_pack,next_key); + + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key && next_length) + { + const byte *end; + for (key= s_temp->key, end=key+next_length ; + *key == *org_key && key < end; + key++,org_key++) ; + ref_length= (uint) (key - s_temp->key); + } + + if (next_length > ref_length) + { + /* We put a key with different case between two keys with the same prefix + Extend next key to have same prefix as + this key */ + s_temp->n_ref_length= ref_length; + s_temp->prev_length= next_length-ref_length; + s_temp->prev_key+= ref_length; + return (int) (length+ s_temp->prev_length - next_length_pack + + get_pack_length(ref_length)); + } + /* Check how many characters are identical to next key */ + key= s_temp->key+next_length; + while (*key++ == *next_key++) ; + if ((ref_length= (uint) (key - s_temp->key)-1) == next_length) + { + s_temp->next_key_pos=0; + return length; /* can't pack next key */ + } + s_temp->prev_length=0; + s_temp->n_ref_length=ref_length; + return (int) (length-(ref_length - next_length) - next_length_pack + + get_pack_length(ref_length)); + } + return (int) length; +} + + +/* +** store a key packed with _ma_calc_xxx_key_length in page-buffert +*/ + +/* store key without compression */ + +void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + memcpy((byte*) key_pos,(byte*) s_temp->key,(size_t) s_temp->totlength); +} + + +/* store variable length key with prefix compression */ + +#define store_pack_length(test,pos,length) { \ + if (test) { *((pos)++) = (uchar) (length); } else \ + { *((pos)++) = (uchar) ((length) >> 8); *((pos)++) = (uchar) (length); } } + + +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + uint length; + byte *start; + + start=key_pos; + + if (s_temp->ref_length) + { + /* Packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->ref_length); + /* If not same key after */ + if (s_temp->ref_length != s_temp->pack_marker) + store_key_length_inc(key_pos,s_temp->key_length); + } + else + { + /* Not packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length); + } + bmove((byte*) key_pos,(byte*) s_temp->key, + (length=s_temp->totlength-(uint) (key_pos-start))); + + if (!s_temp->next_key_pos) /* No following key */ + return; + key_pos+=length; + + if (s_temp->prev_length) + { + /* Extend next key because new key didn't have same prefix as prev key */ + if (s_temp->part_of_prev_key) + { + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->part_of_prev_key); + store_key_length_inc(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->n_length); + } + memcpy(key_pos, s_temp->prev_key, s_temp->prev_length); + } + else if (s_temp->n_ref_length) + { + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length); + if (s_temp->n_ref_length == s_temp->pack_marker) + return; /* Identical key */ + store_key_length(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length); + } +} + + +/* variable length key with prefix compression */ + +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + store_key_length_inc(key_pos,s_temp->ref_length); + memcpy((char*) key_pos,(char*) s_temp->key+s_temp->ref_length, + (size_t) s_temp->totlength-s_temp->ref_length); + + if (s_temp->next_key_pos) + { + key_pos+=(uint) (s_temp->totlength-s_temp->ref_length); + store_key_length_inc(key_pos,s_temp->n_ref_length); + if (s_temp->prev_length) /* If we must extend key */ + { + memcpy(key_pos,s_temp->prev_key,s_temp->prev_length); + } + } +} diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c new file mode 100644 index 00000000000..9134645f847 --- /dev/null +++ b/storage/maria/ma_sort.c @@ -0,0 +1,1056 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Creates a index for a database by reading keys, sorting them and outputing + them in sorted order through MARIA_SORT_INFO functions. +*/ + +#include "ma_fulltext.h" +#if defined(MSDOS) || defined(__WIN__) +#include <fcntl.h> +#else +#include <stddef.h> +#endif +#include <queues.h> + +/* static variables */ + +#undef MIN_SORT_MEMORY +#undef MYF_RW +#undef DISK_BUFFER_SIZE + +#define MERGEBUFF 15 +#define MERGEBUFF2 31 +#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD) +#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) +#define DISK_BUFFER_SIZE (IO_SIZE*16) + + +/* + Pointers of functions for store and read keys from temp file +*/ + +extern void print_error _VARARGS((const char *fmt,...)); + +/* Functions defined in this file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info,uint keys, + byte **sort_keys, + DYNAMIC_ARRAY *buffpek,int *maxbuffer, + IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions); +static int NEAR_F write_keys(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count, BUFFPEK *buffpek,IO_CACHE *tempfile); +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, + IO_CACHE *tempfile); +static int NEAR_F write_index(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count); +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info,uint keys, + byte **sort_keys, + BUFFPEK *buffpek,int *maxbuffer, + IO_CACHE *t_file); +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info,uint keys, + IO_CACHE *from_file, IO_CACHE *to_file, + byte **sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb); +static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint, byte **,BUFFPEK *, int, + IO_CACHE *); +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info); + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile); +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info, IO_CACHE *to_file, + char *key, uint sort_length, uint count); +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file, + char* key, uint sort_length, + uint count); +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs); + +/* + Creates a index of sorted keys + + SYNOPSIS + _ma_create_index_by_sort() + info Sort parameters + no_messages Set to 1 if no output + sortbuff_size Size if sortbuffer to allocate + + RESULT + 0 ok + <> 0 Error +*/ + +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong sortbuff_size) +{ + int error,maxbuffer,skr; + uint memavl,old_memavl,keys,sort_length; + DYNAMIC_ARRAY buffpek; + ha_rows records; + byte **sort_keys; + IO_CACHE tempfile, tempfile_for_exceptions; + DBUG_ENTER("_ma_create_index_by_sort"); + DBUG_PRINT("enter",("sort_length: %d", info->key_length)); + + if (info->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + info->write_keys= write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys= write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + + my_b_clear(&tempfile); + my_b_clear(&tempfile_for_exceptions); + bzero((char*) &buffpek,sizeof(buffpek)); + sort_keys= (byte **) NULL; error= 1; + maxbuffer=1; + + memavl=max(sortbuff_size,MIN_SORT_MEMORY); + records= info->sort_info->max_records; + sort_length= info->key_length; + LINT_INIT(keys); + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (records+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= records+1; + else + do + { + skr=maxbuffer; + if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/ + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) + { + _ma_check_print_error(info->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (records/(keys-1)+1)) != skr); + + if ((sort_keys=(byte**) my_malloc(keys*(sort_length+sizeof(char*))+ + HA_FT_MAXBYTELEN, MYF(0)))) + { + if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer, + maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= 0; + } + else + break; + } + old_memavl=memavl; + if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY) + memavl=MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + goto err; /* purecov: tested */ + } + (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */ + + if (!no_messages) + printf(" - Searching for keys, allocating buffer for %d keys\n",keys); + + if ((records=find_all_keys(info,keys,sort_keys,&buffpek,&maxbuffer, + &tempfile,&tempfile_for_exceptions)) + == HA_POS_ERROR) + goto err; /* purecov: tested */ + if (maxbuffer == 0) + { + if (!no_messages) + printf(" - Dumping %lu keys\n", (ulong) records); + if (write_index(info,sort_keys, (uint) records)) + goto err; /* purecov: inspected */ + } + else + { + keys=(keys*(sort_length+sizeof(char*)))/sort_length; + if (maxbuffer >= MERGEBUFF2) + { + if (!no_messages) + printf(" - Merging %lu keys\n", (ulong) records); /* purecov: tested */ + if (merge_many_buff(info,keys,sort_keys, + dynamic_element(&buffpek,0,BUFFPEK *),&maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + if (flush_io_cache(&tempfile) || + reinit_io_cache(&tempfile,READ_CACHE,0L,0,0)) + goto err; /* purecov: inspected */ + if (!no_messages) + printf(" - Last merge and dumping keys\n"); /* purecov: tested */ + if (merge_index(info,keys,sort_keys,dynamic_element(&buffpek,0,BUFFPEK *), + maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + + if (flush_maria_ft_buf(info) || _ma_flush_pending_blocks(info)) + goto err; + + if (my_b_inited(&tempfile_for_exceptions)) + { + MARIA_HA *index=info->sort_info->info; + uint keyno=info->key; + uint key_length, ref_length=index->s->rec_reflength; + + if (!no_messages) + printf(" - Adding exceptions\n"); /* purecov: tested */ + if (flush_io_cache(&tempfile_for_exceptions) || + reinit_io_cache(&tempfile_for_exceptions,READ_CACHE,0L,0,0)) + goto err; + + while (!my_b_read(&tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length)) + && !my_b_read(&tempfile_for_exceptions,(byte*)sort_keys, + (uint) key_length)) + { + if (_ma_ck_write(index,keyno,(byte*) sort_keys,key_length-ref_length)) + goto err; + } + } + + error =0; + +err: + if (sort_keys) + my_free((gptr) sort_keys,MYF(0)); + delete_dynamic(&buffpek); + close_cached_file(&tempfile); + close_cached_file(&tempfile_for_exceptions); + + DBUG_RETURN(error ? -1 : 0); +} /* _ma_create_index_by_sort */ + + +/* Search after all keys and place them in a temp. file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, + byte **sort_keys, DYNAMIC_ARRAY *buffpek, + int *maxbuffer, IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions) +{ + int error; + uint idx; + DBUG_ENTER("find_all_keys"); + + idx=error=0; + sort_keys[0]= (byte*) (sort_keys+keys); + + while (!(error=(*info->key_read)(info,sort_keys[idx]))) + { + if (info->real_key_length > info->key_length) + { + if (write_key(info,sort_keys[idx],tempfile_for_exceptions)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + continue; + } + + if (++idx == keys) + { + if (info->write_keys(info,sort_keys,idx-1, + (BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + + sort_keys[0]=(byte*) (sort_keys+keys); + memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + idx=1; + } + sort_keys[idx]=sort_keys[idx-1]+info->key_length; + } + if (error > 0) + DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */ + if (buffpek->elements) + { + if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + *maxbuffer=buffpek->elements-1; + } + else + *maxbuffer=0; + + DBUG_RETURN((*maxbuffer)*(keys-1)+idx); +} /* find_all_keys */ + + +#ifdef THREAD +/* Search after all keys and place them in a temp. file */ + +pthread_handler_t _ma_thr_find_all_keys(void *arg) +{ + MARIA_SORT_PARAM *sort_param= (MARIA_SORT_PARAM*) arg; + int error; + uint memavl,old_memavl,keys,sort_length; + uint idx, maxbuffer; + byte **sort_keys=0; + + LINT_INIT(keys); + + error=1; + + if (my_thread_init()) + goto err; + + { /* Add extra block since DBUG_ENTER declare variables */ + DBUG_ENTER("_ma_thr_find_all_keys"); + DBUG_PRINT("enter", ("master: %d", sort_param->master)); + if (sort_param->sort_info->got_error) + goto err; + + if (sort_param->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + sort_param->write_keys= write_keys_varlen; + sort_param->read_to_buffer= read_to_buffer_varlen; + sort_param->write_key= write_merge_key_varlen; + } + else + { + sort_param->write_keys= write_keys; + sort_param->read_to_buffer= read_to_buffer; + sort_param->write_key= write_merge_key; + } + + my_b_clear(&sort_param->tempfile); + my_b_clear(&sort_param->tempfile_for_exceptions); + bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek)); + bzero((char*) &sort_param->unique, sizeof(sort_param->unique)); + + memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY); + idx= sort_param->sort_info->max_records; + sort_length= sort_param->key_length; + maxbuffer= 1; + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= idx+1; + else + { + uint skr; + do + { + skr= maxbuffer; + if (memavl < sizeof(BUFFPEK)*maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) + { + _ma_check_print_error(sort_param->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); + } + if ((sort_keys= (byte **) + my_malloc(keys*(sort_length+sizeof(char*))+ + ((sort_param->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) + { + if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK), + maxbuffer, maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= (byte **) NULL; /* for err: label */ + } + else + break; + } + old_memavl= memavl; + if ((memavl= memavl/4*3) < MIN_SORT_MEMORY && + old_memavl > MIN_SORT_MEMORY) + memavl= MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(sort_param->sort_info->param, + "Sort buffer too small"); + goto err; /* purecov: tested */ + } + + if (sort_param->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n", + sort_param->key+1, keys); + sort_param->sort_keys= sort_keys; + + idx= error= 0; + sort_keys[0]= (byte*) (sort_keys+keys); + + DBUG_PRINT("info", ("reading keys")); + while (!(error= sort_param->sort_info->got_error) && + !(error= (*sort_param->key_read)(sort_param, sort_keys[idx]))) + { + if (sort_param->real_key_length > sort_param->key_length) + { + if (write_key(sort_param,sort_keys[idx], + &sort_param->tempfile_for_exceptions)) + goto err; + continue; + } + + if (++idx == keys) + { + if (sort_param->write_keys(sort_param, sort_keys, idx - 1, + (BUFFPEK *)alloc_dynamic(&sort_param-> + buffpek), + &sort_param->tempfile)) + goto err; + sort_keys[0]= (byte*) (sort_keys+keys); + memcpy(sort_keys[0], sort_keys[idx - 1], + (size_t) sort_param->key_length); + idx= 1; + } + sort_keys[idx]=sort_keys[idx - 1] + sort_param->key_length; + } + if (error > 0) + goto err; + if (sort_param->buffpek.elements) + { + if (sort_param->write_keys(sort_param,sort_keys, idx, + (BUFFPEK *) alloc_dynamic(&sort_param-> + buffpek), + &sort_param->tempfile)) + goto err; + sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx; + } + else + sort_param->keys= idx; + + sort_param->sort_keys_length= keys; + goto ok; + +err: + DBUG_PRINT("error", ("got some error")); + sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */ + my_free((gptr) sort_keys,MYF(MY_ALLOW_ZERO_PTR)); + sort_param->sort_keys=0; + delete_dynamic(& sort_param->buffpek); + close_cached_file(&sort_param->tempfile); + close_cached_file(&sort_param->tempfile_for_exceptions); + +ok: + free_root(&sort_param->wordroot, MYF(0)); + /* + Detach from the share if the writer is involved. Avoid others to + be blocked. This includes a flush of the write buffer. This will + also indicate EOF to the readers. + */ + if (sort_param->sort_info->info->rec_cache.share) + remove_io_thread(&sort_param->sort_info->info->rec_cache); + + /* Readers detach from the share if any. Avoid others to be blocked. */ + if (sort_param->read_cache.share) + remove_io_thread(&sort_param->read_cache); + + pthread_mutex_lock(&sort_param->sort_info->mutex); + if (!--sort_param->sort_info->threads_running) + pthread_cond_signal(&sort_param->sort_info->cond); + pthread_mutex_unlock(&sort_param->sort_info->mutex); + DBUG_PRINT("exit", ("======== ending thread ========")); + } + my_thread_end(); + return NULL; +} + + +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + ulong length, keys; + ulong *rec_per_key_part=param->rec_per_key_part; + int got_error=sort_info->got_error; + uint i; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + MARIA_SORT_PARAM *sinfo; + byte *mergebuf=0; + DBUG_ENTER("_ma_thr_write_keys"); + LINT_INIT(length); + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++) + { + if (!sinfo->sort_keys) + { + got_error=1; + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + continue; + } + if (!got_error) + { + maria_set_key_active(share->state.key_map, sinfo->key); + + if (!sinfo->buffpek.elements) + { + if (param->testflag & T_VERBOSE) + { + printf("Key %d - Dumping %u keys\n",sinfo->key+1, sinfo->keys); + fflush(stdout); + } + if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) || + flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo)) + got_error=1; + } + if (!got_error && param->testflag & T_STATISTICS) + maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sinfo->notnull: NULL, + (ulonglong) info->state->records); + } + my_free((gptr) sinfo->sort_keys,MYF(0)); + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + sinfo->sort_keys=0; + } + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, + delete_dynamic(&sinfo->buffpek), + close_cached_file(&sinfo->tempfile), + close_cached_file(&sinfo->tempfile_for_exceptions), + sinfo++) + { + if (got_error) + continue; + if (sinfo->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + sinfo->write_keys=write_keys_varlen; + sinfo->read_to_buffer=read_to_buffer_varlen; + sinfo->write_key=write_merge_key_varlen; + } + else + { + sinfo->write_keys=write_keys; + sinfo->read_to_buffer=read_to_buffer; + sinfo->write_key=write_merge_key; + } + if (sinfo->buffpek.elements) + { + uint maxbuffer=sinfo->buffpek.elements-1; + if (!mergebuf) + { + length=param->sort_buffer_length; + while (length >= MIN_SORT_MEMORY && !mergebuf) + { + mergebuf=my_malloc(length, MYF(0)); + length=length*3/4; + } + if (!mergebuf) + { + got_error=1; + continue; + } + } + keys=length/sinfo->key_length; + if (maxbuffer >= MERGEBUFF2) + { + if (param->testflag & T_VERBOSE) + printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys); + if (merge_many_buff(sinfo, keys, (byte **) mergebuf, + dynamic_element(&sinfo->buffpek, 0, BUFFPEK *), + (int*) &maxbuffer, &sinfo->tempfile)) + { + got_error=1; + continue; + } + } + if (flush_io_cache(&sinfo->tempfile) || + reinit_io_cache(&sinfo->tempfile,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + if (param->testflag & T_VERBOSE) + printf("Key %d - Last merge and dumping keys\n", sinfo->key+1); + if (merge_index(sinfo, keys, (byte**) mergebuf, + dynamic_element(&sinfo->buffpek,0,BUFFPEK *), + maxbuffer,&sinfo->tempfile) || + flush_maria_ft_buf(sinfo) || + _ma_flush_pending_blocks(sinfo)) + { + got_error=1; + continue; + } + } + if (my_b_inited(&sinfo->tempfile_for_exceptions)) + { + uint key_length; + + if (param->testflag & T_VERBOSE) + printf("Key %d - Dumping 'long' keys\n", sinfo->key+1); + + if (flush_io_cache(&sinfo->tempfile_for_exceptions) || + reinit_io_cache(&sinfo->tempfile_for_exceptions,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + + while (!got_error && + !my_b_read(&sinfo->tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length))) + { + byte maria_ft_buf[HA_FT_MAXBYTELEN + HA_FT_WLEN + 10]; + if (key_length > sizeof(maria_ft_buf) || + my_b_read(&sinfo->tempfile_for_exceptions, (byte*)maria_ft_buf, + (uint)key_length) || + _ma_ck_write(info, sinfo->key, maria_ft_buf, + key_length - info->s->rec_reflength)) + got_error=1; + } + } + } + my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(got_error); +} +#endif /* THREAD */ + +/* Write all keys in memory to file for later merge */ + +static int write_keys(MARIA_SORT_PARAM *info, register byte **sort_keys, + uint count, BUFFPEK *buffpek, IO_CACHE *tempfile) +{ + byte **end; + uint sort_length=info->key_length; + DBUG_ENTER("write_keys"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if (my_b_write(tempfile, *sort_keys, (uint) sort_length)) + DBUG_RETURN(1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_keys */ + + +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) +{ + int err; + uint16 len= _ma_keylength(info->keyinfo, bufs); + + /* The following is safe as this is a local file */ + if ((err= my_b_write(to_file, (byte*)&len, sizeof(len)))) + return (err); + if ((err= my_b_write(to_file,bufs, (uint) len))) + return (err); + return (0); +} + + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, + register byte **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile) +{ + byte **end; + int err; + DBUG_ENTER("write_keys_varlen"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if ((err= my_var_write(info,tempfile, *sort_keys))) + DBUG_RETURN(err); + } + DBUG_RETURN(0); +} /* write_keys_varlen */ + + +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, + IO_CACHE *tempfile) +{ + uint key_length=info->real_key_length; + DBUG_ENTER("write_key"); + + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); + + if (my_b_write(tempfile, (byte*)&key_length,sizeof(key_length)) || + my_b_write(tempfile, key, (uint) key_length)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} /* write_key */ + + +/* Write index */ + +static int NEAR_F write_index(MARIA_SORT_PARAM *info, + register byte **sort_keys, + register uint count) +{ + DBUG_ENTER("write_index"); + + qsort2((gptr) sort_keys,(size_t) count,sizeof(byte*), + (qsort2_cmp) info->key_cmp,info); + while (count--) + { + if ((*info->key_write)(info, *sort_keys++)) + DBUG_RETURN(-1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_index */ + + + /* Merge buffers to make < MERGEBUFF2 buffers */ + +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info, uint keys, + byte **sort_keys, BUFFPEK *buffpek, + int *maxbuffer, IO_CACHE *t_file) +{ + register int i; + IO_CACHE t_file2, *from_file, *to_file, *temp; + BUFFPEK *lastbuff; + DBUG_ENTER("merge_many_buff"); + + if (*maxbuffer < MERGEBUFF2) + DBUG_RETURN(0); /* purecov: inspected */ + if (flush_io_cache(t_file) || + open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + from_file= t_file ; to_file= &t_file2; + while (*maxbuffer >= MERGEBUFF2) + { + reinit_io_cache(from_file,READ_CACHE,0L,0,0); + reinit_io_cache(to_file,WRITE_CACHE,0L,0,0); + lastbuff=buffpek; + for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF) + { + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+i+MERGEBUFF-1)) + break; /* purecov: inspected */ + } + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+ *maxbuffer)) + break; /* purecov: inspected */ + if (flush_io_cache(to_file)) + break; /* purecov: inspected */ + temp=from_file; from_file=to_file; to_file=temp; + *maxbuffer= (int) (lastbuff-buffpek)-1; + } + close_cached_file(to_file); /* This holds old result */ + if (to_file == t_file) + *t_file=t_file2; /* Copy result file */ + + DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */ +} /* merge_many_buff */ + + +/* + Read data to buffer + + SYNOPSIS + read_to_buffer() + fromfile File to read from + buffpek Where to read from + sort_length max length to read + RESULT + > 0 Ammount of bytes read + -1 Error +*/ + +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint length; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + if (my_pread(fromfile->file,(byte*) buffpek->base, + (length= sort_length*count),buffpek->file_pos,MYF_RW)) + return((uint) -1); /* purecov: inspected */ + buffpek->key=buffpek->base; + buffpek->file_pos+= length; /* New filepos */ + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer */ + +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint16 length_of_key = 0; + uint idx; + byte *buffp; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + buffp= buffpek->base; + + for (idx=1;idx<=count;idx++) + { + if (my_pread(fromfile->file,(byte*)&length_of_key,sizeof(length_of_key), + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=sizeof(length_of_key); + if (my_pread(fromfile->file,(byte*) buffp,length_of_key, + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=length_of_key; + buffp = buffp + sort_length; + } + buffpek->key=buffpek->base; + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer_varlen */ + + +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file,char* key, + uint sort_length, uint count) +{ + uint idx; + + char *bufs = key; + for (idx=1;idx<=count;idx++) + { + int err; + if ((err= my_var_write(info,to_file, (byte*) bufs))) + return (err); + bufs=bufs+sort_length; + } + return(0); +} + + +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)), + IO_CACHE *to_file, char* key, + uint sort_length, uint count) +{ + return my_b_write(to_file,(byte*) key,(uint) sort_length*count); +} + +/* + Merge buffers to one buffer + If to_file == 0 then use info->key_write +*/ + +static int NEAR_F +merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, + IO_CACHE *to_file, byte **sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb) +{ + int error; + uint sort_length,maxcount; + ha_rows count; + my_off_t to_start_filepos; + byte *strpos; + BUFFPEK *buffpek,**refpek; + QUEUE queue; + volatile int *killed= _ma_killed_ptr(info->sort_info->param); + DBUG_ENTER("merge_buffers"); + + count=error=0; + maxcount=keys/((uint) (Tb-Fb) +1); + LINT_INIT(to_start_filepos); + if (to_file) + to_start_filepos=my_b_tell(to_file); + strpos= (byte*) sort_keys; + sort_length=info->key_length; + + if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0, + (int (*)(void*, byte *,byte*)) info->key_cmp, + (void*) info)) + DBUG_RETURN(1); /* purecov: inspected */ + + for (buffpek= Fb ; buffpek <= Tb ; buffpek++) + { + count+= buffpek->count; + buffpek->base= strpos; + buffpek->max_keys=maxcount; + strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, + sort_length)); + if (error == -1) + goto err; /* purecov: inspected */ + queue_insert(&queue,(char*) buffpek); + } + + while (queue.elements > 1) + { + for (;;) + { + if (*killed) + { + error=1; goto err; + } + buffpek=(BUFFPEK*) queue_top(&queue); + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + (uint) sort_length,1)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + if ((*info->key_write)(info,(void*) buffpek->key)) + { + error=1; goto err; /* purecov: inspected */ + } + } + buffpek->key+=sort_length; + if (! --buffpek->mem_count) + { + if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) + { + byte *base= buffpek->base; + uint max_keys=buffpek->max_keys; + + VOID(queue_remove(&queue,0)); + + /* Put room used by buffer to use in other buffer */ + for (refpek= (BUFFPEK**) &queue_top(&queue); + refpek <= (BUFFPEK**) &queue_end(&queue); + refpek++) + { + buffpek= *refpek; + if (buffpek->base+buffpek->max_keys*sort_length == base) + { + buffpek->max_keys+=max_keys; + break; + } + else if (base+max_keys*sort_length == buffpek->base) + { + buffpek->base=base; + buffpek->max_keys+=max_keys; + break; + } + } + break; /* One buffer have been removed */ + } + } + else if (error == -1) + goto err; /* purecov: inspected */ + queue_replaced(&queue); /* Top element has been replaced */ + } + } + buffpek=(BUFFPEK*) queue_top(&queue); + buffpek->base= (byte*) sort_keys; + buffpek->max_keys=keys; + do + { + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + sort_length,buffpek->mem_count)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + register byte *end; + strpos= buffpek->key; + for (end= strpos+buffpek->mem_count*sort_length; + strpos != end ; + strpos+=sort_length) + { + if ((*info->key_write)(info, (byte*) strpos)) + { + error=1; goto err; /* purecov: inspected */ + } + } + } + } + while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != + -1 && error != 0); + + lastbuff->count=count; + if (to_file) + lastbuff->file_pos=to_start_filepos; +err: + delete_queue(&queue); + DBUG_RETURN(error); +} /* merge_buffers */ + + + /* Do a merge to output-file (save only positions) */ + +static int NEAR_F +merge_index(MARIA_SORT_PARAM *info, uint keys, byte **sort_keys, + BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile) +{ + DBUG_ENTER("merge_index"); + if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek, + buffpek+maxbuffer)) + DBUG_RETURN(1); /* purecov: inspected */ + DBUG_RETURN(0); +} /* merge_index */ + + +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info) +{ + int err=0; + if (info->sort_info->ft_buf) + { + err=_ma_sort_ft_buf_flush(info); + my_free((gptr)info->sort_info->ft_buf, MYF(0)); + info->sort_info->ft_buf=0; + } + return err; +} + diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h new file mode 100644 index 00000000000..6aac741bb2c --- /dev/null +++ b/storage/maria/ma_sp_defs.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _SP_DEFS_H +#define _SP_DEFS_H + +#define SPDIMS 2 +#define SPTYPE HA_KEYTYPE_DOUBLE +#define SPLEN 8 + +#ifdef HAVE_SPATIAL + +enum wkbType +{ + wkbPoint = 1, + wkbLineString = 2, + wkbPolygon = 3, + wkbMultiPoint = 4, + wkbMultiLineString = 5, + wkbMultiPolygon = 6, + wkbGeometryCollection = 7 +}; + +enum wkbByteOrder +{ + wkbXDR = 0, /* Big Endian */ + wkbNDR = 1 /* Little Endian */ +}; + +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos); + +#endif /*HAVE_SPATIAL*/ +#endif /* _SP_DEFS_H */ diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c new file mode 100644 index 00000000000..b365f8deb0f --- /dev/null +++ b/storage/maria/ma_sp_key.c @@ -0,0 +1,300 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_SPATIAL + +#include "ma_sp_defs.h" + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top); +static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr); + +static void get_double(double *d, const byte *pos) +{ + float8get(*d, pos); +} + +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos) +{ + HA_KEYSEG *keyseg; + MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr]; + uint len = 0; + byte *pos; + uint dlen; + uchar *dptr; + double mbr[SPDIMS * 2]; + uint i; + + keyseg = &keyinfo->seg[-1]; + pos = (byte*)record + keyseg->start; + + dlen = _ma_calc_blob_length(keyseg->bit_start, pos); + memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*)); + if (!dptr) + { + my_errno= HA_ERR_NULL_IN_SPATIAL; + return 0; + } + sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */ + + for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++) + { + uint length = keyseg->length; + + pos = ((byte*)mbr) + keyseg->start; + if (keyseg->flag & HA_SWAP_KEY) + { +#ifdef HAVE_ISNAN + if (keyseg->type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr, pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key, length); + key+= length; + continue; + } + } + else if (keyseg->type == HA_KEYTYPE_DOUBLE) + { + double nr; + get_double(&nr, pos); + if (isnan(nr)) + { + bzero(key, length); + key+= length; + continue; + } + } +#endif + pos += length; + while (length--) + { + *key++ = *--pos; + } + } + else + { + memcpy((byte*)key, pos, length); + key += keyseg->length; + } + len += keyseg->length; + } + _ma_dpointer(info, key, filepos); + return len; +} + +/* +Calculate minimal bounding rectangle (mbr) of the spatial object +stored in "well-known binary representation" (wkb) format. +*/ +static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr) +{ + uint i; + + for (i=0; i < n_dims; ++i) + { + mbr[i * 2] = DBL_MAX; + mbr[i * 2 + 1] = -DBL_MAX; + } + + return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1); +} + +/* + Add one point stored in wkb to mbr +*/ + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order __attribute__((unused)), + double *mbr) +{ + double ord; + double *mbr_end= mbr + n_dims * 2; + + while (mbr < mbr_end) + { + if ((*wkb) > end - 8) + return -1; + get_double(&ord, (const byte*) *wkb); + (*wkb)+= 8; + if (ord < *mbr) + float8store((char*) mbr, ord); + mbr++; + if (ord > *mbr) + float8store((char*) mbr, ord); + mbr++; + } + return 0; +} + + +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr); +} + + +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_points; + + n_points = uint4korr(*wkb); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + return 0; +} + + +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_linear_rings; + uint n_points; + + n_linear_rings = uint4korr((*wkb)); + (*wkb) += 4; + + for (; n_linear_rings > 0; --n_linear_rings) + { + n_points = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + } + return 0; +} + +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top) +{ + int res; + uchar byte_order; + uint wkb_type; + + byte_order = *(*wkb); + ++(*wkb); + + wkb_type = uint4korr((*wkb)); + (*wkb) += 4; + + switch ((enum wkbType) wkb_type) + { + case wkbPoint: + res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbLineString: + res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbPolygon: + res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbMultiPoint: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiLineString: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiPolygon: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbGeometryCollection: + { + uint n_items; + + if (!top) + return -1; + + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0)) + return -1; + } + res = 0; + break; + } + default: + res = -1; + } + return res; +} + +#endif /*HAVE_SPATIAL*/ diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c new file mode 100644 index 00000000000..1ac1a74d7d7 --- /dev/null +++ b/storage/maria/ma_sp_test.c @@ -0,0 +1,569 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA spatial table */ +/* Written by Alex Barkov, who has a shared copyright to this code */ + +#include "maria.h" + +#ifdef HAVE_SPATIAL +#include "ma_sp_defs.h" + +#define MAX_REC_LENGTH 1024 +#define KEYALG HA_KEY_ALG_RTREE + +static void create_linestring(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); + +static void create_key(char *key,uint rownr); +static void print_key(const char *key,const char * tail); + +static int run_test(const char *filename); +static int read_with_pos(MARIA_HA * file, int silent); + +static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, + uchar *wkb); +static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims); + +static char blob_key[MAX_REC_LENGTH]; + + +int main(int argc __attribute__((unused)),char *argv[]) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("sp_test")); +} + + +int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range min_range, max_range; + int silent=0; + int create_flag=0; + int null_fields=0; + int nrecords=30; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char key[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd=10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + + + /* Define spatial column */ + + recinfo[1].type=FIELD_BLOB; + recinfo[1].length=4 + maria_portable_sizeof_char_ptr; + + + + /* Define a key with 1 spatial segment */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].flag=HA_SPATIAL; + keyinfo[0].key_alg=KEYALG; + + keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY; + keyinfo[0].seg[0].flag=0; + keyinfo[0].seg[0].start= 1; + keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */ + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language=default_charset_info->number; + keyinfo[0].seg[0].bit_start=4; /* Long BLOB */ + + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 2, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i<nrecords; i++ ) + { + create_linestring(record,i); + error=maria_write(file,record); + print_record(record,maria_position(file),"\n"); + if (!error) + { + row_count++; + } + else + { + printf("maria_write: %d\n", error); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Deleting rows with position\n"); + for (i=0; i < nrecords/4; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,maria_position(file),"\n"); + error=maria_delete(file,read_record); + if (error) + { + printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if (!silent) + printf("- Updating rows with position\n"); + for (i=0; i < nrecords/2 ; i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + if (error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + print_record(read_record,maria_position(file),""); + create_linestring(record,i+nrecords*upd); + printf("\t-> "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_key(key, nrecords*4/5); + print_key(key," search for INTERSECT\n"); + + if ((error=maria_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for(i=0;i<nrecords;i++) { + if ((error=maria_rnext(file,read_record,0))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_records_in_range()\n"); + + create_key(key, nrecords*upd); + print_key(key," INTERSECT\n"); + min_range.key= key; + min_range.length= 1000; /* Big enough */ + min_range.flag= HA_READ_MBR_INTERSECT; + max_range.key= record+1; + max_range.length= 1000; /* Big enough */ + max_range.flag= HA_READ_KEY_EXACT; + hrows= maria_records_in_range(file,0, &min_range, &max_range); + printf(" %ld rows\n", (long) hrows); + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return 0; + +err: + printf("got error: %3d when using maria-database\n",my_errno); + maria_end(); + return 1; /* skip warning */ +} + + +static int read_with_pos (MARIA_HA * file,int silent) +{ + int error; + int i; + char read_record[MAX_REC_LENGTH]; + int rows=0; + + if (!silent) + printf("- Reading rows with position\n"); + for (i=0;;i++) + { + my_errno=0; + bzero((char*) read_record,MAX_REC_LENGTH); + error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR); + if (error) + { + if (error==HA_ERR_END_OF_FILE) + break; + if (error==HA_ERR_RECORD_DELETED) + continue; + printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno); + return error; + } + rows++; + print_record(read_record,maria_position(file),"\n"); + } + printf(" %d rows\n",rows); + return 0; +} + + +#ifdef NOT_USED +static void bprint_record(char * record, + my_off_t offs __attribute__((unused)), + const char * tail) +{ + int i; + char * pos; + i=(unsigned char)record[0]; + printf("%02X ",i); + + for( pos=record+1, i=0; i<32; i++,pos++) + { + int b=(unsigned char)*pos; + printf("%02X",b); + } + printf("%s",tail); +} +#endif + + +static void print_record(char * record, my_off_t offs,const char * tail) +{ + char *pos; + char *ptr; + uint len; + + printf(" rec=(%d)",(unsigned char)record[0]); + pos=record+1; + len=sint4korr(pos); + pos+=4; + printf(" len=%d ",len); + memcpy_fixed(&ptr,pos,sizeof(char*)); + if (ptr) + maria_rtree_PrintWKB((uchar*) ptr,SPDIMS); + else + printf("<NULL> "); + printf(" offs=%ld ",(long int)offs); + printf("%s",tail); +} + + +#ifdef NOT_USED +static void create_point(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i; + + for(i=0;i<SPDIMS;i++) + x[i]=rownr; + + bzero((char*) record,MAX_REC_LENGTH); + *pos=0x01; /* DEL marker */ + pos++; + + memset(blob_key,0,sizeof(blob_key)); + tmp=maria_rtree_CreatePointWKB(x,SPDIMS,blob_key); + + int4store(pos,tmp); + pos+=4; + + ptr=blob_key; + memcpy_fixed(pos,&ptr,sizeof(char*)); +} +#endif + + +static void create_linestring(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i,j; + int npoints=2; + + for(j=0;j<npoints;j++) + for(i=0;i<SPDIMS;i++) + x[i+j*SPDIMS]=rownr*j; + + bzero((char*) record,MAX_REC_LENGTH); + *pos=0x01; /* DEL marker */ + pos++; + + memset(blob_key,0,sizeof(blob_key)); + tmp=maria_rtree_CreateLineStringWKB(x,SPDIMS,npoints, (uchar*) blob_key); + + int4store(pos,tmp); + pos+=4; + + ptr=blob_key; + memcpy_fixed(pos,&ptr,sizeof(char*)); +} + + +static void create_key(char *key,uint rownr) +{ + double c=rownr; + char *pos; + uint i; + + bzero(key,MAX_REC_LENGTH); + for ( pos=key, i=0; i<2*SPDIMS; i++) + { + float8store(pos,c); + pos+=sizeof(c); + } +} + +static void print_key(const char *key,const char * tail) +{ + double c; + uint i; + + printf(" key="); + for (i=0; i<2*SPDIMS; i++) + { + float8get(c,key); + key+=sizeof(c); + printf("%.14g ",c); + } + printf("%s",tail); +} + + +#ifdef NOT_USED + +static int maria_rtree_CreatePointWKB(double *ords, uint n_dims, uchar *wkb) +{ + uint i; + + *wkb = wkbXDR; + ++wkb; + int4store(wkb, wkbPoint); + wkb += 4; + + for (i=0; i < n_dims; ++i) + { + float8store(wkb, ords[i]); + wkb += 8; + } + return 5 + n_dims * 8; +} +#endif + + +static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, + uchar *wkb) +{ + uint i; + uint n_ords = n_dims * n_points; + + *wkb = wkbXDR; + ++wkb; + int4store(wkb, wkbLineString); + wkb += 4; + int4store(wkb, n_points); + wkb += 4; + for (i=0; i < n_ords; ++i) + { + float8store(wkb, ords[i]); + wkb += 8; + } + return 9 + n_points * n_dims * 8; +} + + +static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims) +{ + uint wkb_type; + + ++wkb; + wkb_type = uint4korr(wkb); + wkb += 4; + + switch ((enum wkbType)wkb_type) + { + case wkbPoint: + { + uint i; + double ord; + + printf("POINT("); + for (i=0; i < n_dims; ++i) + { + float8get(ord, wkb); + wkb += 8; + printf("%.14g", ord); + if (i < n_dims - 1) + printf(" "); + else + printf(")"); + } + break; + } + case wkbLineString: + { + uint p, i; + uint n_points; + double ord; + + printf("LineString("); + n_points = uint4korr(wkb); + wkb += 4; + for (p=0; p < n_points; ++p) + { + for (i=0; i < n_dims; ++i) + { + float8get(ord, wkb); + wkb += 8; + printf("%.14g", ord); + if (i < n_dims - 1) + printf(" "); + } + if (p < n_points - 1) + printf(", "); + else + printf(")"); + } + break; + } + case wkbPolygon: + { + printf("POLYGON(...)"); + break; + } + case wkbMultiPoint: + { + printf("MULTIPOINT(...)"); + break; + } + case wkbMultiLineString: + { + printf("MULTILINESTRING(...)"); + break; + } + case wkbMultiPolygon: + { + printf("MULTIPOLYGON(...)"); + break; + } + case wkbGeometryCollection: + { + printf("GEOMETRYCOLLECTION(...)"); + break; + } + default: + { + printf("UNKNOWN GEOMETRY TYPE"); + break; + } + } +} + +#else +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + exit(0); +} +#endif /*HAVE_SPATIAL*/ diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c new file mode 100644 index 00000000000..c5580e1e981 --- /dev/null +++ b/storage/maria/ma_static.c @@ -0,0 +1,67 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Static variables for MARIA library. All definied here for easy making of + a shared library +*/ + +#ifndef _global_h +#include "maria_def.h" +#endif + +LIST *maria_open_list=0; +uchar NEAR maria_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 9, '\001', }; +uchar NEAR maria_pack_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 10, '\001', }; +uint maria_quick_table_bits=9; +ulong maria_block_size= MARIA_KEY_BLOCK_LENGTH; +my_bool maria_flush= 0, maria_single_user= 0; +my_bool maria_delay_key_write= 0; +#if defined(THREAD) && !defined(DONT_USE_RW_LOCKS) +ulong maria_concurrent_insert= 2; +#else +ulong maria_concurrent_insert= 0; +#endif +my_off_t maria_max_temp_length= MAX_FILE_SIZE; +ulong maria_bulk_insert_tree_size=8192*1024; +ulong maria_data_pointer_size= 4; + +KEY_CACHE maria_key_cache_var; +KEY_CACHE *maria_key_cache= &maria_key_cache_var; + +/* Enough for comparing if number is zero */ +byte maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +/* + read_vec[] is used for converting between P_READ_KEY.. and SEARCH_ + Position is , == , >= , <= , > , < +*/ + +uint NEAR maria_read_vec[]= +{ + SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER, + SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER, + SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER, + MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL +}; + +uint NEAR maria_readnext_vec[]= +{ + SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER, + SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER +}; diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c new file mode 100644 index 00000000000..72c5e10d9ab --- /dev/null +++ b/storage/maria/ma_statrec.c @@ -0,0 +1,300 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to handle fixed-length-records */ + +#include "maria_def.h" + + +my_bool _ma_write_static_record(MARIA_HA *info, const byte *record) +{ + byte temp[8]; /* max pointer length */ + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + my_off_t filepos=info->s->state.dellink; + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info,(char*) &temp[0],info->s->base.rec_reflength, + info->s->state.dellink+1, + MYF(MY_NABP))) + goto err; + info->s->state.dellink= _ma_rec_pos(info->s,temp); + info->state->del--; + info->state->empty-=info->s->base.pack_reclength; + if (info->s->file_write(info, (char*) record, info->s->base.reclength, + filepos, + MYF(MY_NABP))) + goto err; + } + else + { + if (info->state->data_file_length > info->s->base.max_data_file_length- + info->s->base.pack_reclength) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + return(2); + } + if (info->opt_flag & WRITE_CACHE_USED) + { /* Cash in use */ + if (my_b_write(&info->rec_cache, (byte*) record, + info->s->base.reclength)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (my_b_write(&info->rec_cache, (byte*) temp,length)) + goto err; + } + } + else + { + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_write(info,(char*) record,info->s->base.reclength, + info->state->data_file_length, + info->s->write_flag)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (info->s->file_write(info, (byte*) temp,length, + info->state->data_file_length+ + info->s->base.reclength, + info->s->write_flag)) + goto err; + } + } + info->state->data_file_length+=info->s->base.pack_reclength; + info->s->state.split++; + } + return 0; + err: + return 1; +} + +my_bool _ma_update_static_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *record) +{ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + return (info->s->file_write(info, + (char*) record,info->s->base.reclength, + pos, + MYF(MY_NABP)) != 0); +} + + +my_bool _ma_delete_static_record(MARIA_HA *info) +{ + byte temp[9]; /* 1+sizeof(uint32) */ + info->state->del++; + info->state->empty+=info->s->base.pack_reclength; + temp[0]= '\0'; /* Mark that record is deleted */ + _ma_dpointer(info,temp+1,info->s->state.dellink); + info->s->state.dellink= info->cur_row.lastpos; + info->rec_cache.seek_not_done=1; + return (info->s->file_write(info, temp, 1+info->s->rec_reflength, + info->cur_row.lastpos, MYF(MY_NABP)) != 0); +} + + +my_bool _ma_cmp_static_record(register MARIA_HA *info, + register const byte *old) +{ + DBUG_ENTER("_ma_cmp_static_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + { + DBUG_RETURN(1); + } + info->rec_cache.seek_not_done=1; /* We have done a seek */ + } + + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, + info->s->base.reclength, + info->cur_row.lastpos, + MYF(MY_NABP))) + DBUG_RETURN(1); + if (memcmp((byte*) info->rec_buff, (byte*) old, + (uint) info->s->base.reclength)) + { + DBUG_DUMP("read",old,info->s->base.reclength); + DBUG_DUMP("disk",info->rec_buff,info->s->base.reclength); + my_errno=HA_ERR_RECORD_CHANGED; /* Record have changed */ + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + DBUG_ENTER("_ma_cmp_static_unique"); + + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, + pos, MYF(MY_NABP))) + DBUG_RETURN(1); + DBUG_RETURN(_ma_unique_comp(def, record, (byte*) info->rec_buff, + def->null_are_equal)); +} + + +/* + Read a fixed-length-record + + RETURN + 0 Ok + 1 record delete + -1 on read-error or locking-error +*/ + +int _ma_read_static_record(register MARIA_HA *info, register byte *record, + MARIA_RECORD_POS pos) +{ + int error; + + if (pos != HA_OFFSET_ERROR) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file <= pos && + flush_io_cache(&info->rec_cache)) + return(-1); + info->rec_cache.seek_not_done=1; /* We have done a seek */ + + error=info->s->file_read(info,(char*) record,info->s->base.reclength, + pos, MYF(MY_NABP)) != 0; + fast_ma_writeinfo(info); + if (! error) + { + if (!*record) + { + my_errno=HA_ERR_RECORD_DELETED; + return(1); /* Record is deleted */ + } + info->update|= HA_STATE_AKTIV; /* Record is read */ + return(0); + } + return(-1); /* Error on read */ + } + fast_ma_writeinfo(info); /* No such record */ + return(-1); +} + + + +int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + int locked,error,cache_read; + uint cache_length; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_static_record"); + + cache_read=0; + cache_length=0; + if (info->opt_flag & READ_CACHE_USED) + { /* Cache in use */ + if (filepos == my_b_tell(&info->rec_cache) && + (skip_deleted_blocks || !filepos)) + { + cache_read=1; /* Read record using cache */ + cache_length=(uint) (info->rec_cache.read_end - info->rec_cache.read_pos); + } + else + info->rec_cache.seek_not_done=1; /* Filepos is changed */ + } + locked=0; + if (info->lock_type == F_UNLCK) + { + if (filepos >= info->state->data_file_length) + { /* Test if new records */ + if (_ma_readinfo(info,F_RDLCK,0)) + DBUG_RETURN(my_errno); + locked=1; + } + else + { /* We don't nead new info */ +#ifndef UNSAFE_LOCKING + if ((! cache_read || share->base.reclength > cache_length) && + share->tot_locks == 0) + { /* record not in cache */ + locked=1; + } +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + } + if (filepos >= info->state->data_file_length) + { + DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld", + (long) filepos/share->base.reclength, (long) filepos, + (long) info->state->records, (long) info->state->del)); + fast_ma_writeinfo(info); + DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE); + } + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+share->base.pack_reclength; + + if (! cache_read) /* No cacheing */ + { + if ((error= _ma_read_static_record(info, buf, filepos))) + { + if (error > 0) + error=my_errno=HA_ERR_RECORD_DELETED; + else + error=my_errno; + } + DBUG_RETURN(error); + } + + /* Read record with cacheing */ + error=my_b_read(&info->rec_cache,(byte*) buf,share->base.reclength); + if (info->s->base.pack_reclength != info->s->base.reclength && !error) + { + char tmp[8]; /* Skill fill bytes */ + error=my_b_read(&info->rec_cache,(byte*) tmp, + info->s->base.pack_reclength - info->s->base.reclength); + } + if (locked) + VOID(_ma_writeinfo(info,0)); /* Unlock keyfile */ + if (!error) + { + if (!buf[0]) + { /* Record is removed */ + DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); + } + /* Found and may be updated */ + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + DBUG_RETURN(0); + } + /* my_errno should be set if rec_cache.error == -1 */ + if (info->rec_cache.error != -1 || my_errno == 0) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(my_errno); /* Something wrong (EOF?) */ +} diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c new file mode 100644 index 00000000000..0f37391c1d4 --- /dev/null +++ b/storage/maria/ma_test1.c @@ -0,0 +1,714 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA table */ + +#include "maria.h" +#include <my_getopt.h> +#include <m_string.h> + +#define MAX_REC_LENGTH 1024 + +static void usage(); + +static int rec_pointer_size=0, flags[50]; +static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE; +static int key_type=HA_KEYTYPE_NUM; +static int create_flag=0; +static enum data_file_type record_type= DYNAMIC_RECORD; + +static uint insert_count, update_count, remove_count; +static uint pack_keys=0, pack_seg=0, key_length; +static uint unique_key=HA_NOSAME; +static my_bool key_cacheing, null_fields, silent, skip_update, opt_unique, + verbose, skip_delete; +static MARIA_COLUMNDEF recinfo[4]; +static MARIA_KEYDEF keyinfo[10]; +static HA_KEYSEG keyseg[10]; +static HA_KEYSEG uniqueseg[10]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_key(char *key,uint rownr); +static void create_record(char *record,uint rownr); +static void update_record(char *record); + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + my_init(); + maria_init(); + if (key_cacheing) + init_key_cache(maria_key_cache,KEY_CACHE_BLOCK_SIZE,IO_SIZE*16,0,0); + get_options(argc,argv); + + exit(run_test("test1")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j,error,deleted,rec_length,uniques=0; + ha_rows found,row_count; + char record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + + bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) &create_info,sizeof(create_info)); + + /* First define 2 columns */ + create_info.null_bytes= 1; + recinfo[0].type= key_field; + recinfo[0].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(key_length); + recinfo[1].type=extra_field; + recinfo[1].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : 24); + if (extra_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(recinfo[1].length); + if (opt_unique) + { + recinfo[2].type=FIELD_CHECK; + recinfo[2].length=MARIA_UNIQUE_HASH_LENGTH; + } + rec_length= recinfo[0].length+recinfo[1].length+recinfo[2].length; + + if (key_type == HA_KEYTYPE_VARTEXT1 && + key_length > 255) + key_type= HA_KEYTYPE_VARTEXT2; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= pack_seg; + keyinfo[0].seg[0].start=1; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + if (pack_seg & HA_BLOB_PART) + { + keyinfo[0].seg[0].bit_start=4; /* Length of blob length */ + } + keyinfo[0].flag = (uint8) (pack_keys | unique_key); + + bzero((byte*) flags,sizeof(flags)); + if (opt_unique) + { + uint start; + uniques=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + bzero((char*) uniqueseg,sizeof(uniqueseg)); + uniquedef.seg=uniqueseg; + uniquedef.keysegs=2; + + /* Make a unique over all columns (except first NULL fields) */ + for (i=0, start=1 ; i < 2 ; i++) + { + uniqueseg[i].start=start; + start+=recinfo[i].length; + uniqueseg[i].length=recinfo[i].length; + uniqueseg[i].language= default_charset_info->number; + } + uniqueseg[0].type= key_type; + uniqueseg[0].null_bit= null_fields ? 2 : 0; + uniqueseg[1].type= HA_KEYTYPE_TEXT; + if (extra_field == FIELD_BLOB) + { + uniqueseg[1].length=0; /* The whole blob */ + uniqueseg[1].bit_start=4; /* long blob */ + uniqueseg[1].flag|= HA_BLOB_PART; + } + else if (extra_field == FIELD_VARCHAR) + { + uniqueseg[1].flag|= HA_VAR_LENGTH_PART; + uniqueseg[1].type= (HA_VARCHAR_PACKLENGTH(recinfo[1].length-1) == 1 ? + HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2); + } + } + else + uniques=0; + + if (!silent) + printf("- Creating maria file\n"); + create_info.max_rows=(ulong) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/40 : + 0); + if (maria_create(filename, record_type, 1, keyinfo,2+opt_unique,recinfo, + uniques, &uniquedef, &create_info, + create_flag)) + goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + row_count=deleted=0; + for (i=49 ; i>=1 ; i-=2 ) + { + if (insert_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + j=i%25 +1; + create_record(record,j); + error=maria_write(file,record); + if (!error) + row_count++; + flags[j]=1; + if (verbose || error) + printf("J= %2d maria_write: %d errno: %d\n", j,error,my_errno); + } + + /* Insert 2 rows with null values */ + if (null_fields) + { + create_record(record,0); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + flags[0]=2; + } + + if (!skip_update) + { + if (opt_unique) + { + if (!silent) + printf("- Checking unique constraint\n"); + create_record(record,j); + if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE) + { + printf("unique check failed\n"); + } + } + if (!silent) + printf("- Updating rows\n"); + + /* Update first last row to force extend of file */ + if (maria_rsame(file,read_record,-1)) + { + printf("Can't find last row with maria_rsame\n"); + } + else + { + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update last row: %.*s\n", + keyinfo[0].seg[0].length,read_record+1); + } + } + + /* Read through all rows and update them */ + assert(maria_scan_init(file) == 0); + + found=0; + while ((error= maria_scan(file,read_record)) == 0) + { + if (update_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update row: %.*s, error: %d\n", + keyinfo[0].seg[0].length,record+1,my_errno); + } + found++; + } + if (found != row_count) + printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count); + maria_scan_end(file); + } + + if (!silent) + printf("- Reopening file\n"); + if (maria_close(file)) goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err; + if (!skip_delete) + { + if (!silent) + printf("- Removing keys\n"); + + for (i=0 ; i <= 10 ; i++) + { + /* testing */ + if (remove_count-- == 0) + { + fprintf(stderr, + "delete-rows number of rows deleted; Going down hard!\n"); + VOID(maria_close(file)); + exit(0) ; + } + j=i*2; + if (!flags[j]) + continue; + create_key(key,j); + my_errno=0; + if ((error = maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))) + { + if (verbose || (flags[j] >= 1 || + (error && my_errno != HA_ERR_KEY_NOT_FOUND))) + printf("key: '%.*s' maria_rkey: %3d errno: %3d\n", + (int) key_length,key+test(null_fields),error,my_errno); + } + else + { + error=maria_delete(file,read_record); + if (verbose || error) + printf("key: '%.*s' maria_delete: %3d errno: %3d\n", + (int) key_length, key+test(null_fields), error, my_errno); + if (! error) + { + deleted++; + flags[j]--; + } + } + } + } + if (!silent) + printf("- Reading rows with key\n"); + record[1]= 0; /* For nicer printf */ + for (i=0 ; i <= 25 ; i++) + { + create_key(key,i); + my_errno=0; + error=maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT); + if (verbose || + (error == 0 && flags[i] == 0 && unique_key) || + (error && (flags[i] != 0 || my_errno != HA_ERR_KEY_NOT_FOUND))) + { + printf("key: '%.*s' maria_rkey: %3d errno: %3d record: %s\n", + (int) key_length,key+test(null_fields),error,my_errno,record+1); + } + } + + if (!silent) + printf("- Reading rows with position\n"); + if (maria_scan_init(file)) + { + fprintf(stderr, "maria_scan_init failed\n"); + goto err; + } + + for (i=1,found=0 ; i <= 30 ; i++) + { + my_errno=0; + if ((error= maria_scan(file, read_record)) == HA_ERR_END_OF_FILE) + { + if (found != row_count-deleted) + printf("Found only %ld of %ld rows\n", (ulong) found, + (ulong) (row_count - deleted)); + break; + } + if (!error) + found++; + if (verbose || (error != 0 && error != HA_ERR_RECORD_DELETED && + error != HA_ERR_END_OF_FILE)) + { + printf("pos: %2d maria_rrnd: %3d errno: %3d record: %s\n", + i-1,error,my_errno,read_record+1); + } + } + if (maria_close(file)) + goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + + +static void create_key_part(char *key,uint rownr) +{ + if (!unique_key) + rownr&=7; /* Some identical keys */ + if (keyinfo[0].seg[0].type == HA_KEYTYPE_NUM) + { + sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr); + } + else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 || + keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2) + { /* Alpha record */ + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + bfill(key+3,keyinfo[0].seg[0].length-5,rownr < 10 ? 'a' : 'b'); + } + } + else + { /* Alpha record */ + if (keyinfo[0].seg[0].flag & HA_SPACE_PACK) + sprintf(key,"%-*d",keyinfo[0].seg[0].length,rownr); + else + { + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + key[1]= (rownr < 10 ? 'a' : 'b'); + } + } + } +} + + +static void create_key(char *key,uint rownr) +{ + if (keyinfo[0].seg[0].null_bit) + { + if (rownr == 0) + { + key[0]=1; /* null key */ + key[1]=0; /* For easy print of key */ + return; + } + *key++=0; + } + if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint tmp; + create_key_part(key+2,rownr); + tmp=strlen(key+2); + int2store(key,tmp); + } + else + create_key_part(key,rownr); +} + + +static char blob_key[MAX_REC_LENGTH]; +static char blob_record[MAX_REC_LENGTH+20*20]; + + +static void create_record(char *record,uint rownr) +{ + char *pos; + bzero((char*) record,MAX_REC_LENGTH); + record[0]=1; /* delete marker */ + if (rownr == 0 && keyinfo[0].seg[0].null_bit) + record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */ + + pos=record+1; + if (recinfo[0].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + create_key_part(blob_key,rownr); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); + create_key_part(pos+pack_length,rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + pos+= recinfo[0].length; + } + else + { + create_key_part(pos,rownr); + pos+=recinfo[0].length; + } + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr;; + sprintf(blob_record,"... row: %d", rownr); + strappend(blob_record,max(MAX_REC_LENGTH-rownr,10),' '); + tmp=strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + sprintf(pos+pack_length, "... row: %d", rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + } + else + { + sprintf(pos,"... row: %d", rownr); + strappend(pos,recinfo[1].length,' '); + } +} + +/* change row to test re-packing of rows and reallocation of keys */ + +static void update_record(char *record) +{ + char *pos=record+1; + if (recinfo[0].type == FIELD_BLOB) + { + char *column,*ptr; + int length; + length=uint4korr(pos); /* Long blob */ + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_key,column,length); /* Move old key */ + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */ + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + blob_key, length, blob_key, length); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + default_charset_info->cset->casedn(default_charset_info, + pos + pack_length, length, + pos + pack_length, length); + pos+=recinfo[0].length; + } + else + { + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + pos, keyinfo[0].seg[0].length, + pos, keyinfo[0].seg[0].length); + pos+=recinfo[0].length; + } + + if (recinfo[1].type == FIELD_BLOB) + { + char *column; + int length; + length=uint4korr(pos); + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_record,column,length); + bfill(blob_record+length,20,'.'); /* Make it larger */ + length+=20; + int4store(pos,length); + column=blob_record; + memcpy_fixed(pos+4,&column,sizeof(char*)); + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + /* Second field is longer than 10 characters */ + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + pos= record+ recinfo[1].offset; + bfill(pos+pack_length+length,recinfo[1].length-length-pack_length,'.'); + length=recinfo[1].length-pack_length; + if (pack_length == 1) + *(uchar*) pos= (uchar) length; + else + int2store(pos,length); + } + else + { + bfill(pos+recinfo[1].length-10,10,'.'); + } +} + + +static struct my_option my_long_options[] = +{ + {"checksum", 'c', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Undocumented", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"delete-rows", 'd', "Abort after this many rows has been deleted", + (gptr*) &remove_count, (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG, + 1000, 0, 0, 0, 0, 0}, + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"insert-rows", 'i', "Undocumented", (gptr*) &insert_count, + (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"key-alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-binary-pack", 'B', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-blob", 'b', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-cache", 'K', "Undocumented", (gptr*) &key_cacheing, + (gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, + 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0}, + {"key-multiple", 'm', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-prefix_pack", 'P', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-space_pack", 'p', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-varchar", 'w', "Test VARCHAR keys", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"null-fields", 'N', "Define fields with NULL", + (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"row-fixed-size", 'S', "Fixed size records", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"rows-in-block", 'M', "Store rows in block format", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"row-pointer-size", 'R', "Undocumented", (gptr*) &rec_pointer_size, + (gptr*) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Undocumented", + (gptr*) &silent, (gptr*) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-delete", 'U', "Don't test deletes", (gptr*) &skip_delete, + (gptr*) &skip_delete, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-update", 'D', "Don't test updates", (gptr*) &skip_update, + (gptr*) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unique", 'C', "Undocumented", (gptr*) &opt_unique, (gptr*) &opt_unique, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"update-rows", 'u', "Undocumented", (gptr*) &update_count, + (gptr*) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be more verbose", (gptr*) &verbose, (gptr*) &verbose, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'a': + key_type= HA_KEYTYPE_TEXT; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'R': /* Length of record pointer */ + if (rec_pointer_size > 3) + rec_pointer_size=0; + break; + case 'P': + pack_keys= HA_PACK_KEY; /* Use prefix compression */ + break; + case 'B': + pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */ + break; + case 'M': + record_type= BLOCK_RECORD; + break; + case 'S': + if (key_field == FIELD_VARCHAR) + { + create_flag=0; /* Static sized varchar */ + record_type= STATIC_RECORD; + } + else if (key_field != FIELD_BLOB) + { + key_field=FIELD_NORMAL; /* static-size record */ + extra_field=FIELD_NORMAL; + record_type= STATIC_RECORD; + } + break; + case 'p': + pack_keys=HA_PACK_KEY; /* Use prefix + space packing */ + pack_seg=HA_SPACE_PACK; + key_type=HA_KEYTYPE_TEXT; + break; + case 'm': + unique_key=0; + break; + case 'b': + key_field=FIELD_BLOB; /* blob key */ + extra_field= FIELD_BLOB; + pack_seg|= HA_BLOB_PART; + key_type= HA_KEYTYPE_VARTEXT1; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; + break; + case 'k': + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) + { + fprintf(stderr,"Wrong key length\n"); + exit(1); + } + break; + case 'w': + key_field=FIELD_VARCHAR; /* varchar keys */ + extra_field= FIELD_VARCHAR; + key_type= HA_KEYTYPE_VARTEXT1; + pack_seg|= HA_VAR_LENGTH_PART; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + break; + case 'V': + printf("test1 Ver 1.2 \n"); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + usage(); + exit(1); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c new file mode 100644 index 00000000000..46b8c710d4a --- /dev/null +++ b/storage/maria/ma_test2.c @@ -0,0 +1,1100 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av isam-databas: stor test */ + +#ifndef USE_MY_FUNC /* We want to be able to dbug this !! */ +#define USE_MY_FUNC +#endif +#ifdef DBUG_OFF +#undef DBUG_OFF +#endif +#ifndef SAFEMALLOC +#define SAFEMALLOC +#endif +#include "maria_def.h" +#include <m_ctype.h> +#include <my_bit.h> + +#define STANDARD_LENGTH 37 +#define MARIA_KEYS 6 +#define MAX_PARTS 4 +#if !defined(MSDOS) && !defined(labs) +#define labs(a) abs(a) +#endif + +static void get_options(int argc, char *argv[]); +static uint rnd(uint max_value); +static void fix_length(byte *record,uint length); +static void put_blob_in_record(char *blob_pos,char **blob_buffer, + ulong *length); +static void copy_key(struct st_maria_info *info,uint inx, + uchar *record,uchar *key); + +static int verbose=0,testflag=0, + first_key=0,async_io=0,key_cacheing=0,write_cacheing=0,locking=0, + rec_pointer_size=0,pack_fields=1,silent=0, + opt_quick_mode=0; +static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1; +static int create_flag= 0, srand_arg= 0; +static ulong key_cache_size=IO_SIZE*16; +static uint key_cache_block_size= KEY_CACHE_BLOCK_SIZE; +static enum data_file_type record_type= DYNAMIC_RECORD; + +static uint keys=MARIA_KEYS,recant=1000; +static uint use_blob=0; +static uint16 key1[1001],key3[5000]; +static char record[300],record2[300],key[100],key2[100], + read_record[300],read_record2[300],read_record3[300]; +static HA_KEYSEG glob_keyseg[MARIA_KEYS][MAX_PARTS]; + + /* Test program */ + +int main(int argc, char *argv[]) +{ + uint i; + int j,n1,n2,n3,error,k; + uint write_count,update,dupp_keys,opt_delete,start,length,blob_pos, + reclength,ant,found_parts; + my_off_t lastpos; + ha_rows range_records,records; + MARIA_HA *file; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + MARIA_INFO info; + const char *filename; + char *blob_buffer; + MARIA_CREATE_INFO create_info; + MY_INIT(argv[0]); + + filename= "test2"; + get_options(argc,argv); + if (! async_io) + my_disable_async_io=1; + + maria_init(); + reclength=STANDARD_LENGTH+60+(use_blob ? 8 : 0); + blob_pos=STANDARD_LENGTH+60; + keyinfo[0].seg= &glob_keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=6; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].seg[0].flag=(uint8) pack_seg; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = pack_type; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[1].seg= &glob_keyseg[1][0]; + keyinfo[1].seg[0].start=7; + keyinfo[1].seg[0].length=6; + keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[1].seg[0].flag=0; + keyinfo[1].seg[0].null_bit=0; + keyinfo[1].seg[0].null_pos=0; + keyinfo[1].seg[1].start=0; /* two part key */ + keyinfo[1].seg[1].length=6; + keyinfo[1].seg[1].type=HA_KEYTYPE_NUM; + keyinfo[1].seg[1].flag=HA_REVERSE_SORT; + keyinfo[1].seg[1].null_bit=0; + keyinfo[1].seg[1].null_pos=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=2; + keyinfo[1].flag =0; + keyinfo[1].block_length= MARIA_MIN_KEY_BLOCK_LENGTH; /* Diff blocklength */ + keyinfo[2].seg= &glob_keyseg[2][0]; + keyinfo[2].seg[0].start=12; + keyinfo[2].seg[0].length=8; + keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[2].seg[0].flag=HA_REVERSE_SORT; + keyinfo[2].seg[0].null_bit=0; + keyinfo[2].seg[0].null_pos=0; + keyinfo[2].key_alg=HA_KEY_ALG_BTREE; + keyinfo[2].keysegs=1; + keyinfo[2].flag =HA_NOSAME; + keyinfo[2].block_length= 0; /* Default block length */ + keyinfo[3].seg= &glob_keyseg[3][0]; + keyinfo[3].seg[0].start=0; + keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0); + keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[3].seg[0].language=default_charset_info->number; + keyinfo[3].seg[0].flag=(uint8) pack_seg; + keyinfo[3].seg[0].null_bit=0; + keyinfo[3].seg[0].null_pos=0; + keyinfo[3].key_alg=HA_KEY_ALG_BTREE; + keyinfo[3].keysegs=1; + keyinfo[3].flag = pack_type; + keyinfo[3].block_length= 0; /* Default block length */ + keyinfo[4].seg= &glob_keyseg[4][0]; + keyinfo[4].seg[0].start=0; + keyinfo[4].seg[0].length=5; + keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[4].seg[0].language=default_charset_info->number; + keyinfo[4].seg[0].flag=0; + keyinfo[4].seg[0].null_bit=0; + keyinfo[4].seg[0].null_pos=0; + keyinfo[4].key_alg=HA_KEY_ALG_BTREE; + keyinfo[4].keysegs=1; + keyinfo[4].flag = pack_type; + keyinfo[4].block_length= 0; /* Default block length */ + keyinfo[5].seg= &glob_keyseg[5][0]; + keyinfo[5].seg[0].start=0; + keyinfo[5].seg[0].length=4; + keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[5].seg[0].language=default_charset_info->number; + keyinfo[5].seg[0].flag=pack_seg; + keyinfo[5].seg[0].null_bit=0; + keyinfo[5].seg[0].null_pos=0; + keyinfo[5].key_alg=HA_KEY_ALG_BTREE; + keyinfo[5].keysegs=1; + keyinfo[5].flag = pack_type; + keyinfo[5].block_length= 0; /* Default block length */ + + recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[0].length=7; + recinfo[0].null_bit=0; + recinfo[0].null_pos=0; + recinfo[1].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[1].length=5; + recinfo[1].null_bit=0; + recinfo[1].null_pos=0; + recinfo[2].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[2].length=9; + recinfo[2].null_bit=0; + recinfo[2].null_pos=0; + recinfo[3].type=FIELD_NORMAL; + recinfo[3].length=STANDARD_LENGTH-7-5-9-4; + recinfo[3].null_bit=0; + recinfo[3].null_pos=0; + recinfo[4].type=pack_fields ? FIELD_SKIP_ZERO : 0; + recinfo[4].length=4; + recinfo[4].null_bit=0; + recinfo[4].null_pos=0; + recinfo[5].type=pack_fields ? FIELD_SKIP_ENDSPACE : 0; + recinfo[5].length=60; + recinfo[5].null_bit=0; + recinfo[5].null_pos=0; + if (use_blob) + { + recinfo[6].type=FIELD_BLOB; + recinfo[6].length=4+maria_portable_sizeof_char_ptr; + recinfo[6].null_bit=0; + recinfo[6].null_pos=0; + } + + write_count=update=dupp_keys=opt_delete=0; + blob_buffer=0; + + for (i=1000 ; i>0 ; i--) key1[i]=0; + for (i=4999 ; i>0 ; i--) key3[i]=0; + + if (!silent) + printf("- Creating maria-file\n"); + file= 0; + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=(ha_rows) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/ + reclength : 0); + create_info.reloc_rows=(ha_rows) 100; + if (maria_create(filename, record_type, keys,&keyinfo[first_key], + use_blob ? 7 : 6, &recinfo[0], + 0,(MARIA_UNIQUEDEF*) 0, + &create_info,create_flag)) + goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + if (key_cacheing) + init_key_cache(maria_key_cache,key_cache_block_size,key_cache_size,0,0); + if (locking) + maria_lock_database(file,F_WRLCK); + if (write_cacheing) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + if (opt_quick_mode) + maria_extra(file,HA_EXTRA_QUICK,0); + + for (i=0 ; i < recant ; i++) + { + ulong blob_length; + n1=rnd(1000); n2=rnd(100); n3=rnd(5000); + sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); + int4store(record+STANDARD_LENGTH-4,(long) i); + fix_length(record,(uint) STANDARD_LENGTH+rnd(60)); + put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length); + DBUG_PRINT("test",("record: %d blob_length: %lu", i, blob_length)); + + if (maria_write(file,record)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("Error: %d in write at record: %d\n",my_errno,i); + goto err; + } + if (verbose) printf(" Double key: %d\n",n3); + } + else + { + if (key3[n3] == 1 && first_key <3 && first_key+keys >= 3) + { + printf("Error: Didn't get error when writing second key: '%8d'\n",n3); + goto err; + } + write_count++; key1[n1]++; key3[n3]=1; + } + + /* Check if we can find key without flushing database */ + if (i % 10 == 0) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + if (!j) + for (j=999 ; j>0 && key1[j] == 0 ; j--) ; + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("Test in loop: Can't find key: \"%s\"\n",key); + goto err; + } + } + } + if (testflag == 1) + goto end; + + if (write_cacheing) + { + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + } + if (key_cacheing) + resize_key_cache(maria_key_cache,key_cache_block_size,key_cache_size*2,0,0); + + if (!silent) + printf("- Delete\n"); + if (srand_arg) + srand(srand_arg); + for (i=0 ; i<recant/10 ; i++) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (bcmp(read_record+keyinfo[0].seg[0].start, + key, keyinfo[0].seg[0].length)) + { + printf("Found wrong record when searching for key: \"%s\"\n",key); + goto err; + } + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (maria_delete(file,read_record)) + { + printf("error: %d; can't delete record: \"%s\"\n", my_errno,read_record); + goto err; + } + opt_delete++; + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + } + else + puts("Warning: Skipping delete test because no dupplicate keys"); + } + if (testflag == 2) + goto end; + + if (!silent) + printf("- Update\n"); + if (srand_arg) + srand(srand_arg); + for (i=0 ; i<recant/10 ; i++) + { + n1=rnd(1000); n2=rnd(100); n3=rnd(5000); + sprintf(record2,"%6d:%4d:%8d:XXX: %4d ",n1,n2,n3,update); + int4store(record2+STANDARD_LENGTH-4,(long) i); + fix_length(record2,(uint) STANDARD_LENGTH+rnd(60)); + + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (bcmp(read_record+keyinfo[0].seg[0].start, + key, keyinfo[0].seg[0].length)) + { + printf("Found wrong record when searching for key: \"%s\"; Found \"%.*s\"\n", + key, keyinfo[0].seg[0].length, + read_record+keyinfo[0].seg[0].start); + goto err; + } + if (use_blob) + { + ulong blob_length; + if (i & 1) + put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length); + else + bmove(record+blob_pos,read_record+blob_pos,8); + } + if (maria_update(file,read_record,record2)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("error: %d; can't update:\nFrom: \"%s\"\nTo: \"%s\"\n", + my_errno,read_record,record2); + goto err; + } + if (verbose) + printf("Double key when tried to update:\nFrom: \"%s\"\nTo: \"%s\"\n",record,record2); + } + else + { + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + key1[n1]++; key3[n3]=1; + update++; + } + } + } + if (testflag == 3) + goto end; + + for (i=999, dupp_keys=j=0 ; i>0 ; i--) + { + if (key1[i] > dupp_keys) + { + dupp_keys=key1[i]; j=i; + } + } + sprintf(key,"%6d",j); + start=keyinfo[0].seg[0].start; + length=keyinfo[0].seg[0].length; + if (dupp_keys) + { + if (!silent) + printf("- Same key: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("first - next -> last - prev -> first")); + if (verbose) printf(" Using key: \"%s\" Keys: %d\n",key,dupp_keys); + + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + if (maria_rsame(file,read_record2,-1)) + goto err; + if (memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame didn't find same record\n"); + goto end; + } + info.recpos=maria_position(file); + if (maria_rfirst(file,read_record2,0) || + maria_rsame_with_pos(file,read_record2,0,info.recpos) || + memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame_with_pos didn't find same record\n"); + goto end; + } + { + info.recpos= maria_position(file); + int skr=maria_rnext(file,read_record2,0); + if ((skr && my_errno != HA_ERR_END_OF_FILE) || + maria_rprev(file,read_record2,-1) || + memcmp(read_record,read_record2,reclength) != 0 || + info.recpos != maria_position(file)) + { + printf("maria_rsame_with_pos lost position\n"); + goto end; + } + } + ant=1; + while (maria_rnext(file,read_record2,0) == 0 && + memcmp(read_record2+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("next: Found: %d keys of %d\n",ant,dupp_keys); + goto end; + } + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("prev: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + + /* Check of maria_rnext_same */ + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + ant=1; + while (!maria_rnext_same(file,read_record3) && ant < dupp_keys+10) + ant++; + if (ant != dupp_keys || my_errno != HA_ERR_END_OF_FILE) + { + printf("maria_rnext_same: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + } + + if (!silent) + printf("- All keys: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("All keys: first - next -> last - prev -> first")); + ant=1; + if (maria_rfirst(file,read_record,0)) + { + printf("Can't find first record\n"); + goto end; + } + while ((error=maria_rnext(file,read_record3,0)) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete || error != HA_ERR_END_OF_FILE) + { + printf("next: I found: %d records of %d (error: %d)\n", + ant, write_count - opt_delete, error); + goto end; + } + if (maria_rlast(file,read_record2,0) || + bcmp(read_record2,read_record3,reclength)) + { + printf("Can't find last record\n"); + DBUG_DUMP("record2",(byte*) read_record2,reclength); + DBUG_DUMP("record3",(byte*) read_record3,reclength); + goto end; + } + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete) + { + printf("prev: I found: %d records of %d\n",ant,write_count); + goto end; + } + if (bcmp(read_record,read_record3,reclength)) + { + printf("Can't find first record\n"); + goto end; + } + + if (!silent) + printf("- Test if: Read first - next - prev - prev - next == first\n"); + DBUG_PRINT("progpos",("- Read first - next - prev - prev - next == first")); + if (maria_rfirst(file,read_record,0) || + maria_rnext(file,read_record3,0) || + maria_rprev(file,read_record3,0) || + maria_rprev(file,read_record3,0) == 0 || + maria_rnext(file,read_record3,0)) + goto err; + if (bcmp(read_record,read_record3,reclength) != 0) + printf("Can't find first record\n"); + + if (!silent) + printf("- Test if: Read last - prev - next - next - prev == last\n"); + DBUG_PRINT("progpos",("Read last - prev - next - next - prev == last")); + if (maria_rlast(file,read_record2,0) || + maria_rprev(file,read_record3,0) || + maria_rnext(file,read_record3,0) || + maria_rnext(file,read_record3,0) == 0 || + maria_rprev(file,read_record3,0)) + goto err; + if (bcmp(read_record2,read_record3,reclength)) + printf("Can't find last record\n"); + + if (!silent) + puts("- Test read key-part"); + strmov(key2,key); + for(i=strlen(key2) ; i-- > 1 ;) + { + key2[i]=0; + + /* The following row is just to catch some bugs in the key code */ + bzero((char*) file->lastkey,file->s->base.max_key_length*2); + if (maria_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX)) + goto err; + if (bcmp(read_record+start,key,(uint) i)) + { + puts("Didn't find right record"); + goto end; + } + } + if (dupp_keys > 2) + { + if (!silent) + printf("- Read key (first) - next - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - next - delete - next -> last")); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_rnext(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-1) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1); + goto end; + } + } + if (dupp_keys>4) + { + if (!silent) + printf("- Read last of key - prev - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - prev - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-2) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2); + goto end; + } + } + if (dupp_keys > 6) + { + if (!silent) + printf("- Read first - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - delete - next -> last")); + if (maria_rkey(file,read_record3,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + if (maria_rnext(file,read_record,0)) + goto err; /* Skall finnas poster */ + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-3) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3); + goto end; + } + + if (!silent) + printf("- Read last - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-4) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4); + goto end; + } + } + + if (!silent) + puts("- Test if: Read rrnd - same"); + DBUG_PRINT("progpos",("Read rrnd - same")); + assert(maria_scan_init(file) == 0); + for (i=0 ; i < write_count ; i++) + { + int tmp; + if ((tmp= maria_scan(file,read_record)) && + tmp != HA_ERR_END_OF_FILE && + tmp != HA_ERR_RECORD_DELETED) + { + printf("Got error %d when scanning table\n", tmp); + break; + } + } + maria_scan_end(file); + if (i != write_count && i != write_count - opt_delete) + { + printf("Found wrong number of rows while scanning table\n"); + goto err; + } + + bmove(read_record2,read_record,reclength); + for (i=min(2,keys) ; i-- > 0 ;) + { + if (maria_rsame(file,read_record2,(int) i)) goto err; + if (bcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame didn't find same record\n"); + goto end; + } + } + if (!silent) + puts("- Test maria_records_in_range"); + maria_status(file,&info,HA_STATUS_VARIABLE); + for (i=0 ; i < info.keys ; i++) + { + key_range min_key, max_key; + if (maria_rfirst(file,read_record,(int) i) || + maria_rlast(file,read_record2,(int) i)) + goto err; + copy_key(file,(uint) i,(uchar*) read_record,(uchar*) key); + copy_key(file,(uint) i,(uchar*) read_record2,(uchar*) key2); + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_KEY_EXACT; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_AFTER_KEY; + + range_records= maria_records_in_range(file,(int) i, &min_key, &max_key); + if (range_records < info.records*8/10 || + range_records > info.records*12/10) + { + printf("maria_records_range returned %ld; Should be about %ld\n", + (long) range_records,(long) info.records); + goto end; + } + if (verbose) + { + printf("maria_records_range returned %ld; Exact is %ld (diff: %4.2g %%)\n", + (long) range_records, (long) info.records, + labs((long) range_records - (long) info.records)*100.0/ + info.records); + } + } + for (i=0 ; i < 5 ; i++) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + for (k=rnd(1000)+1 ; k>0 && key1[k] == 0 ; k--) ; + if (j != 0 && k != 0) + { + key_range min_key, max_key; + if (j > k) + swap_variables(int, j, k); + sprintf(key,"%6d",j); + sprintf(key2,"%6d",k); + + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_AFTER_KEY; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_BEFORE_KEY; + range_records= maria_records_in_range(file, 0, &min_key, &max_key); + records=0; + for (j++ ; j < k ; j++) + records+=key1[j]; + if ((long) range_records < (long) records*7/10-2 || + (long) range_records > (long) records*14/10+2) + { + printf("maria_records_range for key: %d returned %lu; Should be about %lu\n", + i, (ulong) range_records, (ulong) records); + goto end; + } + if (verbose && records) + { + printf("maria_records_range returned %lu; Exact is %lu (diff: %4.2g %%)\n", + (ulong) range_records, (ulong) records, + labs((long) range_records-(long) records)*100.0/records); + + } + } + } + + if (!silent) + printf("- maria_info\n"); + maria_status(file,&info,HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (info.records != write_count-opt_delete || info.deleted > opt_delete + update + || info.keys != keys) + { + puts("Wrong info from maria_info"); + printf("Got: records: %lu delete: %lu i_keys: %d\n", + (ulong) info.records, (ulong) info.deleted, info.keys); + } + if (verbose) + { + char buff[80]; + get_date(buff,3,info.create_time); + printf("info: Created %s\n",buff); + get_date(buff,3,info.check_time); + printf("info: checked %s\n",buff); + get_date(buff,3,info.update_time); + printf("info: Modified %s\n",buff); + } + + maria_panic(HA_PANIC_WRITE); + maria_panic(HA_PANIC_READ); + if (maria_is_changed(file)) + puts("Warning: maria_is_changed reported that datafile was changed"); + + if (!silent) + printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n"); + if (maria_reset(file) || maria_extra(file,HA_EXTRA_CACHE,0)) + { + if (locking || (!use_blob && !pack_fields)) + { + puts("got error from maria_extra(HA_EXTRA_CACHE)"); + goto end; + } + } + ant=0; + assert(maria_scan_init(file) == 0); + while ((error= maria_scan(file,record)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + maria_scan_end(file); + if (ant != write_count-opt_delete) + { + printf("scan with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + + ant=0; + maria_scan_init(file); + while ((error=maria_scan(file,record)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + if (ant != write_count-opt_delete) + { + printf("scan with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + + if (testflag == 4) + goto end; + + if (!silent) + printf("- Removing keys\n"); + DBUG_PRINT("progpos",("Removing keys")); + lastpos = HA_OFFSET_ERROR; + /* DBUG_POP(); */ + maria_reset(file); + found_parts=0; + maria_scan_init(file); + while ((error= maria_scan(file,read_record)) != HA_ERR_END_OF_FILE) + { + info.recpos=maria_position(file); + if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR) + { + printf("maria_rrnd didn't advance filepointer; old: %ld, new: %ld\n", + (long) lastpos, (long) info.recpos); + goto err; + } + lastpos=info.recpos; + if (error == 0) + { + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (rnd(2) == 1 && maria_rsame(file,read_record,-1)) + { + printf("can't find record %lx\n",(long) info.recpos); + goto err; + } + if (use_blob) + { + ulong blob_length,pos; + uchar *ptr; + longget(blob_length,read_record+blob_pos+4); + ptr=(uchar*) blob_length; + longget(blob_length,read_record+blob_pos); + for (pos=0 ; pos < blob_length ; pos++) + { + if (ptr[pos] != (uchar) (blob_length+pos)) + { + printf("Found blob with wrong info at %ld\n",(long) lastpos); + maria_scan_end(file); + my_errno= 0; + goto err; + } + } + } + if (maria_delete(file,read_record)) + { + printf("can't delete record: %6.6s, delete_count: %d\n", + read_record, opt_delete); + maria_scan_end(file); + goto err; + } + opt_delete++; + } + else + found_parts++; + } + maria_scan_end(file); + if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED) + printf("error: %d from maria_rrnd\n",my_errno); + if (write_count != opt_delete) + { + printf("Deleted only %d of %d records (%d parts)\n",opt_delete,write_count, + found_parts); + goto err; + } +end: + if (maria_close(file)) + goto err; + maria_panic(HA_PANIC_CLOSE); /* Should close log */ + if (!silent) + { + printf("\nFollowing test have been made:\n"); + printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete); + if (rec_pointer_size) + printf("Record pointer size: %d\n",rec_pointer_size); + printf("maria_block_size: %lu\n", maria_block_size); + if (key_cacheing) + { + puts("Key cache used"); + printf("key_cache_block_size: %u\n", key_cache_block_size); + if (write_cacheing) + puts("Key cache resized"); + } + if (write_cacheing) + puts("Write cacheing used"); + if (write_cacheing) + puts("quick mode"); + if (async_io && locking) + puts("Asyncron io with locking used"); + else if (locking) + puts("Locking used"); + if (use_blob) + puts("blobs used"); + printf("key cache status: \n\ +blocks used:%10lu\n\ +not flushed:%10lu\n\ +w_requests: %10lu\n\ +writes: %10lu\n\ +r_requests: %10lu\n\ +reads: %10lu\n", + maria_key_cache->blocks_used, + maria_key_cache->global_blocks_changed, + (ulong) maria_key_cache->global_cache_w_requests, + (ulong) maria_key_cache->global_cache_write, + (ulong) maria_key_cache->global_cache_r_requests, + (ulong) maria_key_cache->global_cache_read); + } + end_key_cache(maria_key_cache,1); + my_free(blob_buffer, MYF(MY_ALLOW_ZERO_PTR)); + my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO); + return(0); +err: + printf("got error: %d when using MARIA-database\n",my_errno); + if (file) + VOID(maria_close(file)); + maria_end(); + return(1); +} /* main */ + + +/* Read options */ + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'B': + pack_type= HA_BINARY_PACK_KEY; + break; + case 'b': + use_blob= 1; + if (*++pos) + use_blob= atol(pos); + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + if (*++pos) + key_cache_size=atol(pos); + break; + case 'W': /* Use write cacheing */ + write_cacheing=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'd': + remove_count= atoi(++pos); + break; + case 'i': + if (*++pos) + srand(srand_arg= atoi(pos)); + break; + case 'L': + locking=1; + break; + case 'A': /* use asyncron io */ + async_io=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'v': /* verbose */ + verbose=1; + break; + case 'm': /* records */ + if ((recant=atoi(++pos)) < 10 && testflag > 1) + { + fprintf(stderr,"record count must be >= 10 (if testflag != 1)\n"); + exit(1); + } + break; + case 'e': /* maria_block_length */ + if ((maria_block_size= atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) + { + fprintf(stderr,"Wrong maria_block_length\n"); + exit(1); + } + maria_block_size= my_round_up_to_next_power(maria_block_size); + break; + case 'E': /* maria_block_length */ + if ((key_cache_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + key_cache_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) + { + fprintf(stderr,"Wrong key_cache_block_size\n"); + exit(1); + } + key_cache_block_size= my_round_up_to_next_power(key_cache_block_size); + break; + case 'f': + if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS) + first_key=0; + break; + case 'k': + if ((keys=(uint) atoi(++pos)) < 1 || + keys > (uint) (MARIA_KEYS-first_key)) + keys=MARIA_KEYS-first_key; + break; + case 'M': + record_type= BLOCK_RECORD; + break; + case 'P': + pack_type=0; /* Don't use DIFF_LENGTH */ + pack_seg=0; + break; + case 'R': /* Length of record pointer */ + rec_pointer_size=atoi(++pos); + if (rec_pointer_size > 7) + rec_pointer_size=0; + break; + case 'S': + pack_fields=0; /* Static-length-records */ + record_type= STATIC_RECORD; + break; + case 's': + silent=1; + break; + case 't': + testflag=atoi(++pos); /* testmod */ + break; + case 'q': + opt_quick_mode=1; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'D': + create_flag|=HA_CREATE_DELAY_KEY_WRITE; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + printf("Usage: %s [-?AbBcDIKLPRqSsVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n", + progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} /* get options */ + + /* Get a random value 0 <= x <= n */ + +static uint rnd(uint max_value) +{ + return (uint) ((rand() & 32767)/32767.0*max_value); +} /* rnd */ + + + /* Create a variable length record */ + +static void fix_length(byte *rec, uint length) +{ + bmove(rec+STANDARD_LENGTH, + "0123456789012345678901234567890123456789012345678901234567890", + length-STANDARD_LENGTH); + strfill(rec+length,STANDARD_LENGTH+60-length,' '); +} /* fix_length */ + + + /* Put maybe a blob in record */ + +static void put_blob_in_record(char *blob_pos, char **blob_buffer, + ulong *blob_length) +{ + ulong i,length; + if (use_blob) + { + if (rnd(10) == 0) + { + if (! *blob_buffer && + !(*blob_buffer=my_malloc((uint) use_blob,MYF(MY_WME)))) + { + use_blob=0; + return; + } + length=rnd(use_blob); + for (i=0 ; i < length ; i++) + (*blob_buffer)[i]=(char) (length+i); + int4store(blob_pos,length); + memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*)); + *blob_length= length; + } + else + { + int4store(blob_pos,0); + *blob_length= 0; + } + } + return; +} + + +static void copy_key(MARIA_HA *info,uint inx,uchar *rec,uchar *key_buff) +{ + HA_KEYSEG *keyseg; + + for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++) + { + memcpy(key_buff,rec+keyseg->start,(size_t) keyseg->length); + key_buff+=keyseg->length; + } + return; +} diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c new file mode 100644 index 00000000000..f6ed248ce16 --- /dev/null +++ b/storage/maria/ma_test3.c @@ -0,0 +1,501 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av locking */ + +#ifndef __NETWARE__ + +#include "maria.h" +#include <sys/types.h> +#ifdef HAVE_SYS_WAIT_H +# include <sys/wait.h> +#endif +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) +#endif +#ifndef WIFEXITED +# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) +#endif + + +#if defined(HAVE_LRAND48) +#define rnd(X) (lrand48() % X) +#define rnd_init(X) srand48(X) +#else +#define rnd(X) (random() % X) +#define rnd_init(X) srandom(X) +#endif + + +const char *filename= "test3"; +uint tests=10,forks=10,key_cacheing=0; + +static void get_options(int argc, char *argv[]); +void start_test(int id); +int test_read(MARIA_HA *,int),test_write(MARIA_HA *,int,int), + test_update(MARIA_HA *,int,int),test_rrnd(MARIA_HA *,int); + +struct record { + char id[8]; + char nr[4]; + char text[10]; +} record; + + +int main(int argc,char **argv) +{ + int status,wait_ret; + uint i=0; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + HA_KEYSEG keyseg[10][2]; + MY_INIT(argv[0]); + get_options(argc,argv); + + fprintf(stderr, "WARNING! this program is to test 'external locking'" + " (when several processes share a table through file locking)" + " which is not supported by Maria at all; expect errors." + " We may soon remove this program.\n"); + maria_init(); + bzero((char*) keyinfo,sizeof(keyinfo)); + bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) keyseg,sizeof(keyseg)); + keyinfo[0].seg= &keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=8; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag=HA_SPACE_PACK; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = (uint8) HA_PACK_KEY; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[1].seg= &keyseg[1][0]; + keyinfo[1].seg[0].start=8; + keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */ + keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT; + keyinfo[1].seg[0].flag=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=1; + keyinfo[1].flag =HA_NOSAME; + keyinfo[1].block_length= 0; /* Default block length */ + + recinfo[0].type=0; + recinfo[0].length=sizeof(record.id); + recinfo[1].type=0; + recinfo[1].length=sizeof(record.nr); + recinfo[2].type=0; + recinfo[2].length=sizeof(record.text); + + puts("- Creating maria-file"); + my_delete(filename,MYF(0)); /* Remove old locks under gdb */ + if (maria_create(filename,BLOCK_RECORD, 2, &keyinfo[0],2,&recinfo[0],0, + (MARIA_UNIQUEDEF*) 0, (MARIA_CREATE_INFO*) 0,0)) + exit(1); + + rnd_init(0); + printf("- Starting %d processes\n",forks); fflush(stdout); + for (i=0 ; i < forks; i++) + { + if (!fork()) + { + start_test(i+1); + sleep(1); + return 0; + } + VOID(rnd(1)); + } + + for (i=0 ; i < forks ; i++) + while ((wait_ret=wait(&status)) && wait_ret == -1); + maria_end(); + return 0; +} + + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'f': + forks=atoi(++pos); + break; + case 't': + tests=atoi(++pos); + break; + case 'K': /* Use key cacheing */ + key_cacheing=1; + break; + case 'A': /* All flags */ + key_cacheing=1; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + puts("Test av locking with threads\n"); + printf("Usage: %s [-?lKA] [-f#] [-t#]\n",progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} + + +void start_test(int id) +{ + uint i; + int error,lock_type; + MARIA_INFO isam_info; + MARIA_HA *file,*file1,*file2=0,*lock; + + if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) || + !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) + { + fprintf(stderr,"Can't open isam-file: %s\n",filename); + exit(1); + } + if (key_cacheing && rnd(2) == 0) + init_key_cache(maria_key_cache, KEY_CACHE_BLOCK_SIZE, 65536L, 0, 0); + printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout); + + for (error=i=0 ; i < tests && !error; i++) + { + file= (rnd(2) == 1) ? file1 : file2; + lock=0 ; lock_type=0; + if (rnd(10) == 0) + { + if (maria_lock_database(lock=(rnd(2) ? file1 : file2), + lock_type=(rnd(2) == 0 ? F_RDLCK : F_WRLCK))) + { + fprintf(stderr,"%2d: start: Can't lock table %d\n",id,my_errno); + error=1; + break; + } + } + switch (rnd(4)) { + case 0: error=test_read(file,id); break; + case 1: error=test_rrnd(file,id); break; + case 2: error=test_write(file,id,lock_type); break; + case 3: error=test_update(file,id,lock_type); break; + } + if (lock) + maria_lock_database(lock,F_UNLCK); + } + if (!error) + { + maria_status(file1,&isam_info,HA_STATUS_VARIABLE); + printf("%2d: End of test. Records: %ld Deleted: %ld\n", + id,(long) isam_info.records, (long) isam_info.deleted); + fflush(stdout); + } + + maria_close(file1); + maria_close(file2); + if (error) + { + printf("%2d: Aborted\n",id); fflush(stdout); + exit(1); + } +} + + +int test_read(MARIA_HA *file,int id) +{ + uint i,lock,found,next,prev; + ulong find; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table %d\n",id,my_errno); + return 1; + } + } + + found=next=prev=0; + for (i=0 ; i < 100 ; i++) + { + find=rnd(100000); + if (!maria_rkey(file,record.id,1,(byte*) &find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in read\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n",id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n", + id,my_errno); + return 1; + } + } + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + return 1; + } + } + printf("%2d: read: found: %5d next: %5d prev: %5d\n", + id,found,next,prev); + fflush(stdout); + return 0; +} + + +int test_rrnd(MARIA_HA *file,int id) +{ + uint count,lock; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_CACHE,0); + } + + count=0; + if (maria_rrnd(file,record.id,0L)) + { + if (my_errno == HA_ERR_END_OF_FILE) + goto end; + fprintf(stderr,"%2d: Can't read first record (%d)\n",id,my_errno); + return 1; + } + for (count=1 ; !maria_rrnd(file,record.id,HA_OFFSET_ERROR) ;count++) ; + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rrnd\n",id,my_errno); + return 1; + } + +end: + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: rrnd: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_write(MARIA_HA *file,int id,int lock_type) +{ + uint i,tries,count,lock; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + } + + sprintf(record.id,"%7d",getpid()); + strnmov(record.text,"Testing...", sizeof(record.text)); + + tries=(uint) rnd(100)+10; + for (i=count=0 ; i < tries ; i++) + { + uint32 tmp=rnd(80000)+20000; + int4store(record.nr,tmp); + if (!maria_write(file,record.id)) + count++; + else + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d (errno %d) from write\n",id,my_errno, + errno); + return 1; + } + } + } + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: write: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_update(MARIA_HA *file,int id,int lock_type) +{ + uint i,lock,found,next,prev,update; + uint32 tmp; + char find[4]; + struct record new_record; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + return 1; + } + } + bzero((char*) &new_record,sizeof(new_record)); + strmov(new_record.text,"Updated"); + + found=next=prev=update=0; + for (i=0 ; i < 100 ; i++) + { + tmp=rnd(100000); + int4store(find,tmp); + if (!maria_rkey(file,record.id,1,(byte*) find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in update\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + continue; + } + } + } + memcpy_fixed(new_record.id,record.id,sizeof(record.id)); + tmp=rnd(20000)+40000; + int4store(new_record.nr,tmp); + if (!maria_update(file,record.id,new_record.id)) + update++; + else + { + if (my_errno != HA_ERR_RECORD_CHANGED && + my_errno != HA_ERR_RECORD_DELETED && + my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d from update\n",id,my_errno); + return 1; + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"Can't unlock table,id, error%d\n",my_errno); + return 1; + } + } + printf("%2d: update: %5d\n",id,update); fflush(stdout); + return 0; +} + +#else /* __NETWARE__ */ + +#include <stdio.h> + +main() +{ + fprintf(stderr,"this test has not been ported to NetWare\n"); + return 0; +} + +#endif /* __NETWARE__ */ diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res new file mode 100644 index 00000000000..57b0feeeae8 --- /dev/null +++ b/storage/maria/ma_test_all.res @@ -0,0 +1,62 @@ +Running tests with dynamic row format +Running tests with static row format +Running tests with block row format +ma_test2 -s -L -K -R1 -m2000 ; Should give error 135 +Error: 135 in write at record: 1099 +got error: 135 when using MARIA-database +./maria_chk -sm test2 will warn that 'Datafile is almost full' +maria_chk: MARIA file test2 +maria_chk: warning: Datafile is almost full, 65516 of 65534 used +MARIA-table 'test2' is usable but should be fixed + +real 0m0.808s +user 0m0.584s +sys 0m0.212s + +real 0m0.780s +user 0m0.584s +sys 0m0.176s + +real 0m0.809s +user 0m0.616s +sys 0m0.180s + +real 0m1.356s +user 0m1.140s +sys 0m0.188s + +real 0m0.783s +user 0m0.600s +sys 0m0.176s + +real 0m1.390s +user 0m1.184s +sys 0m0.152s + +real 0m1.875s +user 0m1.632s +sys 0m0.244s + +real 0m1.313s +user 0m1.148s +sys 0m0.160s + +real 0m1.846s +user 0m1.644s +sys 0m0.188s + +real 0m1.875s +user 0m1.632s +sys 0m0.212s + +real 0m1.819s +user 0m1.672s +sys 0m0.124s + +real 0m2.117s +user 0m1.816s +sys 0m0.292s + +real 0m1.871s +user 0m1.636s +sys 0m0.196s diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh new file mode 100755 index 00000000000..17e654ac51f --- /dev/null +++ b/storage/maria/ma_test_all.sh @@ -0,0 +1,192 @@ +#!/bin/sh +# +# Execute some simple basic test on MyISAM libary to check if things +# works at all. + +valgrind="valgrind --alignment=8 --leak-check=yes" +silent="-s" +suffix="" +#set -x -v -e + +run_tests() +{ + row_type=$1 + # + # First some simple tests + # + ./ma_test1$suffix $silent $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -N $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -P --checksum $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -P -N $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent -B -N -R2 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -k 480 --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -N -R1 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --key_length=127 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -N --key_length=128 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B --key_length=64 --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B -k 480 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m -P --unique --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -m -p $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -w --unique $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w --key_length=64 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w -N --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -w --key_length=480 --checksum $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -b -N $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -a -b --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent -p -B --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --checksum --unique $row_type + ./maria_chk$suffix -se test1 + ./ma_test1$suffix $silent --unique $row_type + ./maria_chk$suffix -se test1 + + ./ma_test1$suffix $silent --key_multiple -N -S $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -a -p --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -a -B --key_length=480 $row_type + ./maria_chk$suffix -sm test1 + ./ma_test1$suffix $silent --key_multiple -P -S $row_type + ./maria_chk$suffix -sm test1 + + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -ess test1 + + ./ma_test2$suffix $silent -L -K -W -P $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -K -W -P -A $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -K -P -R3 -m50 -b1000000 $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -L -B $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -D -B -c $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e4096 -K $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e8192 -K $row_type + ./maria_chk$suffix -sm test2 + ./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type + ./maria_chk$suffix -sm test2 +} + +run_repair_tests() +{ + row_type=$1 + ./ma_test1$suffix $silent --checksum $row_type + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rs test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rs --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqs --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -ros --correct-checksum test1 + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -rqos --correct-checksum test1 + ./maria_chk$suffix -se test1 +} + +run_pack_tests() +{ + row_type=$1 + # check of maria_pack / maria_chk + ./ma_test1$suffix $silent --checksum $row_type + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -ess test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rus test1 + ./maria_chk$suffix -es test1 + + ./ma_test1$suffix $silent --checksum -S $row_type + ./maria_chk$suffix -se test1 + ./maria_chk$suffix -ros test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -se test1 + + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -rqs test1 + ./maria_chk$suffix -es test1 + ./maria_chk$suffix -rus test1 + ./maria_chk$suffix -es test1 +} + +echo "Running tests with dynamic row format" +run_tests "" +run_repair_tests "" +run_pack_tests "" + +echo "Running tests with static row format" +run_tests -S +run_repair_tests -S +run_pack_tests -S + +echo "Running tests with block row format" +run_tests -M + +# +# Tests that gives warnings +# + +./ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 +./maria_chk$suffix -sm test2 +echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" +./ma_test2$suffix $silent -L -K -R1 -m2000 +echo "./maria_chk$suffix -sm test2 will warn that 'Datafile is almost full'" +./maria_chk$suffix -sm test2 +./maria_chk$suffix -ssm test2 + +# +# Some timing tests +# +time ./ma_test2$suffix $silent +time ./ma_test2$suffix $silent -S +time ./ma_test2$suffix $silent -M +time ./ma_test2$suffix $silent -B +time ./ma_test2$suffix $silent -L +time ./ma_test2$suffix $silent -K +time ./ma_test2$suffix $silent -K -B +time ./ma_test2$suffix $silent -L -B +time ./ma_test2$suffix $silent -L -K -B +time ./ma_test2$suffix $silent -L -K -W -B +time ./ma_test2$suffix $silent -L -K -W -B -S +time ./ma_test2$suffix $silent -L -K -W -B -M +time ./ma_test2$suffix $silent -D -K -W -B -S diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c new file mode 100644 index 00000000000..5d0133c8ac1 --- /dev/null +++ b/storage/maria/ma_unique.c @@ -0,0 +1,236 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to check if a row is unique */ + +#include "maria_def.h" +#include <m_ctype.h> + +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, + ha_checksum unique_hash, my_off_t disk_pos) +{ + my_off_t lastpos=info->cur_row.lastpos; + MARIA_KEYDEF *key= &info->s->keyinfo[def->key]; + byte *key_buff= info->lastkey2; + DBUG_ENTER("_ma_check_unique"); + DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash)); + + maria_unique_store(record+key->seg->start, unique_hash); + _ma_make_key(info,def->key,key_buff,record,0); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (_ma_search(info,info->s->keyinfo+def->key,key_buff, + MARIA_UNIQUE_HASH_LENGTH, + SEARCH_FIND,info->s->state.key_root[def->key])) + { + info->page_changed=1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_RETURN(0); /* No matching rows */ + } + + for (;;) + { + if (info->cur_row.lastpos != disk_pos && + !(*info->s->compare_unique)(info,def,record,info->cur_row.lastpos)) + { + my_errno=HA_ERR_FOUND_DUPP_UNIQUE; + info->errkey= (int) def->key; + info->dup_key_pos= info->cur_row.lastpos; + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_PRINT("info",("Found duplicate")); + DBUG_RETURN(1); /* Found identical */ + } + if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey, + MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, + info->s->state.key_root[def->key]) || + bcmp((char*) info->lastkey, (char*) key_buff, + MARIA_UNIQUE_HASH_LENGTH)) + { + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_RETURN(0); /* end of tree */ + } + } +} + + +/* + Calculate a hash for a row + + TODO + Add support for bit fields +*/ + +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *record) +{ + const byte *pos, *end; + ha_checksum crc= 0; + ulong seed1=0, seed2= 4; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + /* + Change crc in a way different from an empty string or 0. + (This is an optimisation; The code will work even if this isn't + done) + */ + crc=((crc << 8) + 511+ + (crc >> (8*sizeof(ha_checksum)-8))); + continue; + } + } + pos= record+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); + if (!length || length > tmp_length) + length=tmp_length; /* The whole blob */ + } + end= pos+length; + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + keyseg->charset->coll->hash_sort(keyseg->charset, + (const uchar*) pos, length, &seed1, + &seed2); + crc^= seed1; + } + else + while (pos != end) + crc=((crc << 8) + + (((uchar) *(uchar*) pos++))) + + (crc >> (8*sizeof(ha_checksum)-8)); + } + return crc; +} + + +/* + compare unique key for two rows + + TODO + Add support for bit fields + + RETURN + 0 if both rows have equal unique value + 1 Rows are different +*/ + +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal) +{ + const byte *pos_a, *pos_b, *end; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint a_length, b_length; + a_length= b_length= keyseg->length; + + /* If part is NULL it's regarded as different */ + if (keyseg->null_bit) + { + uint tmp; + if ((tmp=(a[keyseg->null_pos] & keyseg->null_bit)) != + (uint) (b[keyseg->null_pos] & keyseg->null_bit)) + return 1; + if (tmp) + { + if (!null_are_equal) + return 1; + continue; + } + } + pos_a= a+keyseg->start; + pos_b= b+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + if (pack_length == 1) + { + a_length= (uint) *(uchar*) pos_a++; + b_length= (uint) *(uchar*) pos_b++; + } + else + { + a_length= uint2korr(pos_a); + b_length= uint2korr(pos_b); + pos_a+= 2; /* Skip VARCHAR length */ + pos_b+= 2; + } + set_if_smaller(a_length, keyseg->length); /* Safety */ + set_if_smaller(b_length, keyseg->length); /* safety */ + } + else if (keyseg->flag & HA_BLOB_PART) + { + /* Only compare 'length' characters if length != 0 */ + a_length= _ma_calc_blob_length(keyseg->bit_start,pos_a); + b_length= _ma_calc_blob_length(keyseg->bit_start,pos_b); + /* Check that a and b are of equal length */ + if (keyseg->length) + { + /* + This is used in some cases when we are not interested in comparing + the whole length of the blob. + */ + set_if_smaller(a_length, keyseg->length); + set_if_smaller(b_length, keyseg->length); + } + memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*)); + memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*)); + } + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + (uchar *) pos_b, b_length, 0, 1)) + return 1; + } + else + { + if (a_length != b_length) + return 1; + end= pos_a+a_length; + while (pos_a != end) + { + if (*pos_a++ != *pos_b++) + return 1; + } + } + } + return 0; +} diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c new file mode 100644 index 00000000000..e3e391dcccc --- /dev/null +++ b/storage/maria/ma_update.c @@ -0,0 +1,251 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Update an old row in a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) +{ + int flag,key_changed,save_errno; + reg3 my_off_t pos; + uint i; + byte old_key[HA_MAX_KEY_BUFF],*new_key; + bool auto_key_changed=0; + ulonglong changed; + MARIA_SHARE *share=info->s; + ha_checksum old_checksum; + DBUG_ENTER("maria_update"); + LINT_INIT(new_key); + LINT_INIT(changed); + LINT_INIT(old_checksum); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL); + } + pos= info->cur_row.lastpos; + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + + if ((*share->compare_record)(info,oldrec)) + { + save_errno= my_errno; + DBUG_PRINT("warning", ("Got error from compare record")); + goto err_end; /* Record has changed */ + } + + if (share->calc_checksum) + { + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + old_checksum= (*info->s->calc_checksum)(info, oldrec); + } + + /* Calculate and check all unique constraints */ + key_changed=0; + for (i=0 ; i < share->state.header.uniques ; i++) + { + MARIA_UNIQUEDEF *def=share->uniqueinfo+i; + if (_ma_unique_comp(def, newrec, oldrec,1) && + _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec), + pos)) + { + save_errno=my_errno; + goto err_end; + } + } + if (_ma_mark_file_changed(info)) + { + save_errno=my_errno; + goto err_end; + } + + /* Check which keys changed from the original row */ + + new_key= info->lastkey2; + changed=0; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_cmp(info,i,oldrec, newrec)) + { + if ((int) i == info->lastinx) + { + /* + We are changeing the index we are reading on. Mark that + the index data has changed and we need to do a full search + when doing read-next + */ + key_changed|=HA_STATE_WRITTEN; + } + changed|=((ulonglong) 1 << i); + if (_ma_ft_update(info,i,(char*) old_key,oldrec,newrec,pos)) + goto err; + } + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (new_length != old_length || + memcmp(old_key, new_key, new_length)) + { + if ((int) i == info->lastinx) + key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */ + changed|=((ulonglong) 1 << i); + share->keyinfo[i].version++; + if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err; + if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err; + if (share->base.auto_key == i+1) + auto_key_changed=1; + } + } + } + } + /* + If we are running with external locking, we must update the index file + that something has changed. + */ + if (changed || !my_disable_locking) + key_changed|= HA_STATE_CHANGED; + + if (share->calc_checksum) + { + info->cur_row.checksum= (*share->calc_checksum)(info,newrec); + /* Store new checksum in index file header */ + key_changed|= HA_STATE_CHANGED; + } + { + /* + Don't update index file if data file is not extended and no status + information changed + */ + MARIA_STATUS_INFO state; + ha_rows org_split; + my_off_t org_delete_link; + + memcpy((char*) &state, (char*) info->state, sizeof(state)); + org_split= share->state.split; + org_delete_link= share->state.dellink; + if ((*share->update_record)(info,pos,newrec)) + goto err; + if (!key_changed && + (memcmp((char*) &state, (char*) info->state, sizeof(state)) || + org_split != share->state.split || + org_delete_link != share->state.dellink)) + key_changed|= HA_STATE_CHANGED; /* Must update index file */ + } + if (auto_key_changed) + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, newrec)); + if (share->calc_checksum) + info->state->checksum+= (info->cur_row.checksum - old_checksum); + + /* + We can't yet have HA_STATE_ACTIVE here, as block_record dosn't support + it + */ + info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | key_changed); + + /* + Every Maria function that updates Maria table must end with + call to _ma_writeinfo(). If operation (second param of + _ma_writeinfo()) is not 0 it sets share->changed to 1, that is + flags that data has changed. If operation is 0, this function + equals to no-op in this case. + + ma_update() must always pass !0 value as operation, since even if + there is no index change there could be data change. + */ + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + DBUG_PRINT("error",("key: %d errno: %d",i,my_errno)); + save_errno=my_errno; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL) + { + info->errkey= (int) i; + flag=0; + do + { + if (((ulonglong) 1 << i) & changed) + { + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if ((flag++ && _ma_ft_del(info,i,(char*) new_key,newrec,pos)) || + _ma_ft_add(info,i,(char*) old_key,oldrec,pos)) + break; + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + if ((flag++ && _ma_ck_delete(info,i,new_key,new_length)) || + _ma_ck_write(info,i,old_key,old_length)) + break; + } + } + } while (i-- != 0); + } + else + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | + key_changed); + + err_end: + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + save_errno=HA_ERR_CRASHED; + } + DBUG_RETURN(my_errno=save_errno); +} /* maria_update */ diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c new file mode 100644 index 00000000000..3f1ca59a00a --- /dev/null +++ b/storage/maria/ma_write.c @@ -0,0 +1,1081 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Write a row to a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +#define MAX_POINTER_LENGTH 8 + + /* Functions declared in this file */ + +static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, + uint key_length, my_off_t pos, byte *father_buff, + byte *father_keypos, my_off_t father_page, + my_bool insert_last); +static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key, + byte *curr_buff,byte *father_buff, + byte *father_keypos,my_off_t father_page); +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key); +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,byte *key, + uint key_length); +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,byte *key, + uint key_length); + + +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, + const byte *record + __attribute__((unused))) +{ + return ((info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) ? + info->s->state.dellink : + info->state->data_file_length); +} + +my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + + +/* Write new record to a table */ + +int maria_write(MARIA_HA *info, byte *record) +{ + MARIA_SHARE *share=info->s; + uint i; + int save_errno; + MARIA_RECORD_POS filepos; + byte *buff; + my_bool lock_tree= share->concurrent_insert; + my_bool fatal_error; + DBUG_ENTER("maria_write"); + DBUG_PRINT("enter",("index_file: %d data_file: %d", + info->s->kfile,info->dfile)); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (share->base.reloc == (ha_rows) 1 && + share->base.records == (ha_rows) 1 && + info->state->records == (ha_rows) 1) + { /* System file */ + my_errno=HA_ERR_RECORD_FILE_FULL; + goto err2; + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + goto err2; + } + if (_ma_mark_file_changed(info)) + goto err2; + + /* Calculate and check all unique constraints */ + for (i=0 ; i < share->state.header.uniques ; i++) + { + if (_ma_check_unique(info,share->uniqueinfo+i,record, + _ma_unique_hash(share->uniqueinfo+i,record), + HA_OFFSET_ERROR)) + goto err2; + } + + if ((info->opt_flag & OPT_NO_ROWS)) + filepos= HA_OFFSET_ERROR; + else + { + /* + This may either calculate a record or, or write the record and return + the record id + */ + if ((filepos= (*share->write_record_init)(info, record)) == + HA_OFFSET_ERROR) + goto err2; + } + + /* Write all keys to indextree */ + buff= info->lastkey2; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + { + rw_wrlock(&share->key_root_lock[i]); + share->keyinfo[i].version++; + } + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + else + { + if (share->keyinfo[i].ck_insert(info,i,buff, + _ma_make_key(info,i,buff,record, + filepos))) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + if (share->calc_write_checksum) + info->cur_row.checksum= (*share->calc_write_checksum)(info,record); + if (filepos != HA_OFFSET_ERROR) + { + if ((*share->write_record)(info,record)) + goto err; + info->state->checksum+= info->cur_row.checksum; + } + if (share->base.auto_key) + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, record)); + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN | + HA_STATE_ROW_CHANGED); + info->state->records++; + info->cur_row.lastpos= filepos; + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename)); + (*info->invalidator)(info->filename); + info->invalidator=0; + } + + /* + Update status of the table. We need to do so after each row write + for the log tables, as we want the new row to become visible to + other threads as soon as possible. We don't lock mutex here + (as it is required by pthread memory visibility rules) as (1) it's + not critical to use outdated share->is_log_table value (2) locking + mutex here for every write is too expensive. + */ + if (share->is_log_table) + _ma_update_status((void*) info); + + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + save_errno= my_errno; + fatal_error= 0; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || + my_errno == HA_ERR_RECORD_FILE_FULL || + my_errno == HA_ERR_NULL_IN_SPATIAL) + { + if (info->bulk_insert) + { + uint j; + for (j=0 ; j < share->base.keys ; j++) + maria_flush_bulk_insert(info, j); + } + info->errkey= (int) i; + while ( i-- > 0) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + rw_wrlock(&share->key_root_lock[i]); + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + else + { + uint key_length= _ma_make_key(info,i,buff,record,filepos); + if (_ma_ck_delete(info,i,buff,key_length)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + } + else + fatal_error= 1; + + if ((*share->write_record_abort)(info)) + fatal_error= 1; + if (fatal_error) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + + info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); + my_errno=save_errno; +err2: + save_errno=my_errno; + DBUG_PRINT("error", ("got error: %d", save_errno)); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); +} /* maria_write */ + + + /* Write one key to btree */ + +int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + DBUG_ENTER("_ma_ck_write"); + + if (info->bulk_insert && is_tree_inited(&info->bulk_insert[keynr])) + { + DBUG_RETURN(_ma_ck_write_tree(info, keynr, key, key_length)); + } + else + { + DBUG_RETURN(_ma_ck_write_btree(info, keynr, key, key_length)); + } +} /* _ma_ck_write */ + + +/********************************************************************** + * Normal insert code * + **********************************************************************/ + +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + int error; + uint comp_flag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr; + my_off_t *root=&info->s->state.key_root[keynr]; + DBUG_ENTER("_ma_ck_write_btree"); + + if (keyinfo->flag & HA_SORT_ALLOWS_SAME) + comp_flag=SEARCH_BIGGER; /* Put after same key */ + else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT)) + { + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */ + if (keyinfo->flag & HA_NULL_ARE_EQUAL) + comp_flag|= SEARCH_NULL_ARE_EQUAL; + } + else + comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ + + error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, + root, comp_flag); + if (info->ft1_to_ft2) + { + if (!error) + error= _ma_ft_convert_to_ft2(info, keynr, key); + delete_dynamic(info->ft1_to_ft2); + my_free((gptr)info->ft1_to_ft2, MYF(0)); + info->ft1_to_ft2=0; + } + DBUG_RETURN(error); +} /* _ma_ck_write_btree */ + + +int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root, + uint comp_flag) +{ + int error; + DBUG_ENTER("_ma_ck_real_write_btree"); + /* key_length parameter is used only if comp_flag is SEARCH_FIND */ + if (*root == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, + *root, (byte*) 0, (byte*) 0, + (my_off_t) 0, 1)) > 0) + error= _ma_enlarge_root(info,keyinfo,key,root); + DBUG_RETURN(error); +} /* _ma_ck_real_write_btree */ + + + /* Make a new root with key as only pointer */ + +int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + my_off_t *root) +{ + uint t_length,nod_flag; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_enlarge_root"); + + nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0; + _ma_kpointer(info,info->buff+2,*root); /* if nod */ + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte*) 0, + (byte*) 0, (byte*) 0, key,&s_temp); + maria_putint(info->buff,t_length+2+nod_flag,nod_flag); + (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp); + info->keybuff_used=info->page_changed=1; /* info->buff is used */ + if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_RETURN(0); +} /* _ma_enlarge_root */ + + + /* + Search after a position for a key and store it there + Returns -1 = error + 0 = ok + 1 = key should be stored in higher tree + */ + +static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, uint key_length, my_off_t page, + byte *father_buff, byte *father_keypos, + my_off_t father_page, my_bool insert_last) +{ + int error,flag; + uint nod_flag, search_key_length; + byte *temp_buff,*keypos; + byte keybuff[HA_MAX_KEY_BUFF]; + my_bool was_last_key; + my_off_t next_page, dup_key_pos; + DBUG_ENTER("w_search"); + DBUG_PRINT("enter",("page: %ld", (long) page)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) + goto err; + + flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length, + comp_flag, &keypos, keybuff, &was_last_key); + nod_flag= _ma_test_if_nod(temp_buff); + if (flag == 0) + { + uint tmp_key_length; + /* get position to record with duplicated key */ + tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff); + if (tmp_key_length) + dup_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length); + else + dup_key_pos= HA_OFFSET_ERROR; + + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, keybuff); + subkeys=ft_sintXkorr(keybuff+off); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key, + USE_WHOLE_KEY, comp_flag, + &keypos, keybuff, &was_last_key); + } + else + { + /* popular word. two-level tree. going down */ + my_off_t root=dup_key_pos; + keyinfo=&info->s->ft2_keyinfo; + get_key_full_length_rdonly(off, key); + key+=off; + keypos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + error= _ma_ck_real_write_btree(info, keyinfo, key, 0, + &root, comp_flag); + _ma_dpointer(info, keypos+HA_FT_WLEN, root); + subkeys--; /* should there be underflow protection ? */ + DBUG_ASSERT(subkeys < 0); + ft_intXstore(keypos, subkeys); + if (!error) + error= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff); + my_afree((byte*) temp_buff); + DBUG_RETURN(error); + } + } + else /* not HA_FULLTEXT, normal HA_NOSAME key */ + { + info->dup_key_pos= dup_key_pos; + my_afree((byte*) temp_buff); + my_errno=HA_ERR_FOUND_DUPP_KEY; + DBUG_RETURN(-1); + } + } + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + if (!was_last_key) + insert_last=0; + next_page= _ma_kpos(nod_flag,keypos); + if (next_page == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page, + temp_buff, keypos, page, insert_last)) >0) + { + error= _ma_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff, + father_keypos,father_page, insert_last); + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff)) + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(error); +err: + my_afree((byte*) temp_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* w_search */ + + +/* + Insert new key. + + SYNOPSIS + _ma_insert() + info Open table information. + keyinfo Key definition information. + key New key. + anc_buff Key page (beginning). + key_pos Position in key page where to insert. + key_buff Copy of previous key. + father_buff parent key page for balancing. + father_key_pos position in parent key page for balancing. + father_page position of parent key page in file. + insert_last If to append at end of page. + + DESCRIPTION + Insert new key at right of key_pos. + + RETURN + 2 if key contains key to upper level. + 0 OK. + < 0 Error. +*/ + +int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_key_pos, my_off_t father_page, + my_bool insert_last) +{ + uint a_length,nod_flag; + int t_length; + byte *endpos, *prev_key; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("_ma_insert"); + DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key, + USE_WHOLE_KEY);); + + nod_flag=_ma_test_if_nod(anc_buff); + a_length=maria_getint(anc_buff); + endpos= anc_buff+ a_length; + prev_key=(key_pos == anc_buff+2+nod_flag ? (byte*) 0 : key_buff); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (key_pos == endpos ? (byte*) 0 : key_pos), + prev_key, prev_key, + key,&s_temp); +#ifndef DBUG_OFF + if (key_pos != anc_buff+2+nod_flag && (keyinfo->flag & + (HA_BINARY_PACK_KEY | HA_PACK_KEY))) + { + DBUG_DUMP("prev_key",(byte*) key_buff, _ma_keylength(keyinfo,key_buff)); + } + if (keyinfo->flag & HA_PACK_KEY) + { + DBUG_PRINT("test",("t_length: %d ref_len: %d", + t_length,s_temp.ref_length)); + DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx", + s_temp.n_ref_length, s_temp.n_length, (long) s_temp.key)); + } +#endif + if (t_length > 0) + { + if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove_upp((byte*) endpos+t_length,(byte*) endpos,(uint) (endpos-key_pos)); + } + else + { + if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length); + } + (*keyinfo->store_key)(keyinfo,key_pos,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + if (a_length <= keyinfo->block_length) + { + if (keyinfo->block_length - a_length < 32 && + keyinfo->flag & HA_FULLTEXT && key_pos == endpos && + info->s->base.key_reflength <= info->s->base.rec_reflength && + info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) + { + /* + Normal word. One-level tree. Page is almost full. + Let's consider converting. + We'll compare 'key' and the first key at anc_buff + */ + byte *a=key, *b=anc_buff+2+nod_flag; + uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; + /* the very first key on the page is always unpacked */ + DBUG_ASSERT((*b & 128) == 0); +#if HA_FT_MAXLEN >= 127 + blen= mi_uint2korr(b); b+=2; +#else + blen= *(uchar*) b++; +#endif + get_key_length(alen,a); + DBUG_ASSERT(info->ft1_to_ft2==0); + if (alen == blen && + ha_compare_text(keyinfo->seg->charset, (uchar*) a, alen, + (uchar*) b, blen, 0, 0) == 0) + { + /* yup. converting */ + info->ft1_to_ft2=(DYNAMIC_ARRAY *) + my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME)); + my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50); + + /* + now, adding all keys from the page to dynarray + if the page is a leaf (if not keys will be deleted later) + */ + if (!nod_flag) + { + /* let's leave the first key on the page, though, because + we cannot easily dispatch an empty page here */ + b+=blen+ft2len+2; + for (a=anc_buff+a_length ; b < a ; b+=ft2len+2) + insert_dynamic(info->ft1_to_ft2, (char*) b); + + /* fixing the page's length - it contains only one key now */ + maria_putint(anc_buff,2+blen+ft2len+2,0); + } + /* the rest will be done when we're back from recursion */ + } + } + DBUG_RETURN(0); /* There is room on page */ + } + /* Page is full */ + if (nod_flag) + insert_last=0; + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) && + father_buff && !insert_last) + DBUG_RETURN(_ma_balance_page(info,keyinfo,key,anc_buff,father_buff, + father_key_pos,father_page)); + DBUG_RETURN(_ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last)); +} /* _ma_insert */ + + + /* split a full page in two and assign emerging item to key */ + +int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *buff, byte *key_buff, + my_bool insert_last_key) +{ + uint length,a_length,key_ref_length,t_length,nod_flag,key_length; + byte *key_pos,*pos, *after_key; + my_off_t new_pos; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("maria_split_page"); + DBUG_DUMP("buff",(byte*) buff,maria_getint(buff)); + + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; /* Info->buff is used */ + info->keybuff_used=1; + nod_flag=_ma_test_if_nod(buff); + key_ref_length=2+nod_flag; + if (insert_last_key) + key_pos= _ma_find_last_pos(keyinfo,buff,key_buff, &key_length, &after_key); + else + key_pos= _ma_find_half_pos(nod_flag,keyinfo,buff,key_buff, &key_length, + &after_key); + if (!key_pos) + DBUG_RETURN(-1); + + length=(uint) (key_pos-buff); + a_length=maria_getint(buff); + maria_putint(buff,length,nod_flag); + + key_pos=after_key; + if (nod_flag) + { + DBUG_PRINT("test",("Splitting nod")); + pos=key_pos-nod_flag; + memcpy((byte*) info->buff+2,(byte*) pos,(size_t) nod_flag); + } + + /* Move middle item to key and pointer to new page */ + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(-1); + _ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos); + + /* Store new page */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff)) + DBUG_RETURN(-1); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte *) 0, + (byte*) 0, (byte*) 0, + key_buff, &s_temp); + length=(uint) ((buff+a_length)-key_pos); + memcpy((byte*) info->buff+key_ref_length+t_length,(byte*) key_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp); + maria_putint(info->buff,length+t_length+key_ref_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_DUMP("key",(byte*) key, _ma_keylength(keyinfo,key)); + DBUG_RETURN(2); /* Middle key up */ +} /* _ma_split_page */ + + + /* + Calculate how to much to move to split a page in two + Returns pointer to start of key. + key will contain the key. + return_key_length will contain the length of key + after_key will contain the position to where the next key starts + */ + +byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) +{ + uint keys,length,key_ref_length; + byte *end,*lastpos; + DBUG_ENTER("_ma_find_half_pos"); + + key_ref_length=2+nod_flag; + length=maria_getint(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + key_ref_length=keyinfo->keylength+nod_flag; + keys=length/(key_ref_length*2); + *return_key_length=keyinfo->keylength; + end=page+keys*key_ref_length; + *after_key=end+key_ref_length; + memcpy(key,end,key_ref_length); + DBUG_RETURN(end); + } + + end=page+length/2-key_ref_length; /* This is aprox. half */ + *key='\0'; + do + { + lastpos=page; + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key))) + DBUG_RETURN(0); + } while (page < end); + *return_key_length=length; + *after_key=page; + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx", + (long) lastpos, (long) page, (long) end)); + DBUG_RETURN(lastpos); +} /* _ma_find_half_pos */ + + +/* + Split buffer at last key + Returns pointer to the start of the key before the last key + key will contain the last key +*/ + +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) +{ + uint keys,length,last_length,key_ref_length; + byte *end,*lastpos,*prevpos; + byte key_buff[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_find_last_pos"); + + key_ref_length=2; + length=maria_getint(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + keys=length/keyinfo->keylength-2; + *return_key_length=length=keyinfo->keylength; + end=page+keys*length; + *after_key=end+length; + memcpy(key,end,length); + DBUG_RETURN(end); + } + + LINT_INIT(prevpos); + LINT_INIT(last_length); + end=page+length-key_ref_length; + *key='\0'; + length=0; + lastpos=page; + while (page < end) + { + prevpos=lastpos; lastpos=page; + last_length=length; + memcpy(key, key_buff, length); /* previous key */ + if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff))) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + *return_key_length=last_length; + *after_key=lastpos; + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx", + (long) prevpos,(long) page,(long) end)); + DBUG_RETURN(prevpos); +} /* _ma_find_last_pos */ + + + /* Balance page with not packed keys with page on right/left */ + /* returns 0 if balance was done */ + +static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, byte *curr_buff, byte *father_buff, + byte *father_key_pos, my_off_t father_page) +{ + my_bool right; + uint k_length,father_length,father_keylength,nod_flag,curr_keylength, + right_length,left_length,new_right_length,new_left_length,extra_length, + length,keys; + byte *pos,*buff,*extra_buff; + my_off_t next_page,new_pos; + byte tmp_part_key[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_balance_page"); + + k_length=keyinfo->keylength; + father_length=maria_getint(father_buff); + father_keylength=k_length+info->s->base.key_reflength; + nod_flag=_ma_test_if_nod(curr_buff); + curr_keylength=k_length+nod_flag; + info->page_changed=1; + + if ((father_key_pos != father_buff+father_length && + (info->state->records & 1)) || + father_key_pos == father_buff+2+info->s->base.key_reflength) + { + right=1; + next_page= _ma_kpos(info->s->base.key_reflength, + father_key_pos+father_keylength); + buff=info->buff; + DBUG_PRINT("test",("use right page: %lu", (ulong) next_page)); + } + else + { + right=0; + father_key_pos-=father_keylength; + next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos); + /* Fix that curr_buff is to left */ + buff=curr_buff; curr_buff=info->buff; + DBUG_PRINT("test",("use left page: %lu", (ulong) next_page)); + } /* father_key_pos ptr to parting key */ + + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0)) + goto err; + DBUG_DUMP("next",(byte*) info->buff,maria_getint(info->buff)); + + /* Test if there is room to share keys */ + + left_length=maria_getint(curr_buff); + right_length=maria_getint(buff); + keys=(left_length+right_length-4-nod_flag*2)/curr_keylength; + + if ((right ? right_length : left_length) + curr_keylength <= + keyinfo->block_length) + { /* Merge buffs */ + new_left_length=2+nod_flag+(keys/2)*curr_keylength; + new_right_length=2+nod_flag+((keys+1)/2)*curr_keylength; + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + + if (left_length < new_left_length) + { /* Move keys buff -> leaf */ + pos=curr_buff+left_length; + memcpy((byte*) pos,(byte*) father_key_pos, (size_t) k_length); + memcpy((byte*) pos+k_length, (byte*) buff+2, + (size_t) (length=new_left_length - left_length - k_length)); + pos=buff+2+length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + bmove((byte*) buff+2,(byte*) pos+k_length,new_right_length); + } + else + { /* Move keys -> buff */ + + bmove_upp((byte*) buff+new_right_length,(byte*) buff+right_length, + right_length-2); + length=new_right_length-right_length-k_length; + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + pos=curr_buff+new_left_length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + memcpy((byte*) buff+2,(byte*) pos+k_length,(size_t) length); + } + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff)) + goto err; + DBUG_RETURN(0); + } + + /* curr_buff[] and buff[] are full, lets split and make new nod */ + + extra_buff=info->buff+info->s->base.max_key_block_length; + new_left_length=new_right_length=2+nod_flag+(keys+1)/3*curr_keylength; + if (keys == 5) /* Too few keys to balance */ + new_left_length-=curr_keylength; + extra_length=nod_flag+left_length+right_length- + new_left_length-new_right_length-curr_keylength; + DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d", + left_length, right_length, + new_left_length, new_right_length, + extra_length)); + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + maria_putint(extra_buff,extra_length+2,nod_flag); + + /* move first largest keys to new page */ + pos=buff+right_length-extra_length; + memcpy((byte*) extra_buff+2,pos,(size_t) extra_length); + /* Save new parting key */ + memcpy(tmp_part_key, pos-k_length,k_length); + /* Make place for new keys */ + bmove_upp((byte*) buff+new_right_length,(byte*) pos-k_length, + right_length-extra_length-k_length-2); + /* Copy keys from left page */ + pos= curr_buff+new_left_length; + memcpy((byte*) buff+2,(byte*) pos+k_length, + (size_t) (length=left_length-new_left_length-k_length)); + /* Copy old parting key */ + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + + /* Move new parting keys up to caller */ + memcpy((byte*) (right ? key : father_key_pos),pos,(size_t) k_length); + memcpy((byte*) (right ? father_key_pos : key),tmp_part_key, k_length); + + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + goto err; + _ma_kpointer(info,key+k_length,new_pos); + if (_ma_write_keypage(info,keyinfo,(right ? new_pos : next_page), + DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,(right ? next_page : new_pos), + DFLT_INIT_HITS,extra_buff)) + goto err; + + DBUG_RETURN(1); /* Middle key up */ + +err: + DBUG_RETURN(-1); +} /* _ma_balance_page */ + +/********************************************************************** + * Bulk insert code * + **********************************************************************/ + +typedef struct { + MARIA_HA *info; + uint keynr; +} bulk_insert_param; + + +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + int error; + DBUG_ENTER("_ma_ck_write_tree"); + + error= tree_insert(&info->bulk_insert[keynr], key, + key_length + info->s->rec_reflength, + info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ; + + DBUG_RETURN(error); +} /* _ma_ck_write_tree */ + + +/* typeof(_ma_keys_compare)=qsort_cmp2 */ + +static int keys_compare(bulk_insert_param *param, byte *key1, byte *key2) +{ + uint not_used[2]; + return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg, + (uchar*) key1, (uchar*) key2, USE_WHOLE_KEY, SEARCH_SAME, + not_used); +} + + +static int keys_free(byte *key, TREE_FREE mode, bulk_insert_param *param) +{ + /* + Probably I can use info->lastkey here, but I'm not sure, + and to be safe I'd better use local lastkey. + */ + byte lastkey[HA_MAX_KEY_BUFF]; + uint keylen; + MARIA_KEYDEF *keyinfo; + + switch (mode) { + case free_init: + if (param->info->s->concurrent_insert) + { + rw_wrlock(¶m->info->s->key_root_lock[param->keynr]); + param->info->s->keyinfo[param->keynr].version++; + } + return 0; + case free_free: + keyinfo=param->info->s->keyinfo+param->keynr; + keylen= _ma_keylength(keyinfo, key); + memcpy(lastkey, key, keylen); + return _ma_ck_write_btree(param->info,param->keynr,lastkey, + keylen - param->info->s->rec_reflength); + case free_end: + if (param->info->s->concurrent_insert) + rw_unlock(¶m->info->s->key_root_lock[param->keynr]); + return 0; + } + return -1; +} + + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + bulk_insert_param *params; + uint i, num_keys, total_keylength; + ulonglong key_map; + DBUG_ENTER("_ma_init_bulk_insert"); + DBUG_PRINT("enter",("cache_size: %lu", cache_size)); + + DBUG_ASSERT(!info->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)); + + maria_clear_all_keys_active(key_map); + for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++) + { + if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) && + maria_is_key_active(share->state.key_map, i)) + { + num_keys++; + maria_set_key_active(key_map, i); + total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE; + } + } + + if (num_keys==0 || + num_keys * MARIA_MIN_SIZE_BULK_INSERT_TREE > cache_size) + DBUG_RETURN(0); + + if (rows && rows*total_keylength < cache_size) + cache_size=rows; + else + cache_size/=total_keylength*16; + + info->bulk_insert=(TREE *) + my_malloc((sizeof(TREE)*share->base.keys+ + sizeof(bulk_insert_param)*num_keys),MYF(0)); + + if (!info->bulk_insert) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + params=(bulk_insert_param *)(info->bulk_insert+share->base.keys); + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(key_map, i)) + { + params->info=info; + params->keynr=i; + /* Only allocate a 16'th of the buffer at a time */ + init_tree(&info->bulk_insert[i], + cache_size * key[i].maxlength, + cache_size * key[i].maxlength, 0, + (qsort_cmp2)keys_compare, 0, + (tree_element_free) keys_free, (void *)params++); + } + else + info->bulk_insert[i].root=0; + } + + DBUG_RETURN(0); +} + +void maria_flush_bulk_insert(MARIA_HA *info, uint inx) +{ + if (info->bulk_insert) + { + if (is_tree_inited(&info->bulk_insert[inx])) + reset_tree(&info->bulk_insert[inx]); + } +} + +void maria_end_bulk_insert(MARIA_HA *info) +{ + if (info->bulk_insert) + { + uint i; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (is_tree_inited(& info->bulk_insert[i])) + { + delete_tree(& info->bulk_insert[i]); + } + } + my_free((void *)info->bulk_insert, MYF(0)); + info->bulk_insert=0; + } +} diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c new file mode 100644 index 00000000000..8471d36bec9 --- /dev/null +++ b/storage/maria/maria_chk.c @@ -0,0 +1,1823 @@ +/* Copyright (C) 2006-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +#include "ma_fulltext.h" +#include <myisamchk.h> +#include <my_bit.h> +#include <m_ctype.h> +#include <stdarg.h> +#include <my_getopt.h> +#ifdef HAVE_SYS_VADVICE_H +#include <sys/vadvise.h> +#endif +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif +SET_STACK_SIZE(9000) /* Minimum stack size for program */ + +#ifndef USE_RAID +#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) +#define my_raid_delete(A,B,C) my_delete(A,B) +#endif + +static uint decode_bits; +static char **default_argv; +static const char *load_default_groups[]= { "maria_chk", 0 }; +static const char *set_collation_name, *opt_tmpdir; +static CHARSET_INFO *set_collation; +static long opt_maria_block_size; +static long opt_key_cache_block_size; +static const char *my_progname_short; +static int stopwords_inited= 0; +static MY_TMPDIR maria_chk_tmpdir; + +static const char *type_names[]= +{ + "impossible","char","binary", "short", "long", "float", + "double","number","unsigned short", + "unsigned long","longlong","ulonglong","int24", + "uint24","int8","varchar", "varbin","?", + "?" +}; + +static const char *prefix_packed_txt="packed ", + *bin_packed_txt="prefix ", + *diff_txt="stripped ", + *null_txt="NULL", + *blob_txt="BLOB "; + +static const char *field_pack[]= +{ + "","no endspace", "no prespace", + "no zeros", "blob", "constant", "table-lockup", + "always zero","varchar","unique-hash","?","?" +}; + +static const char *record_formats[]= +{ + "Fixed length", "Packed", "Compressed", "Block", "?" +}; + +static const char *maria_stats_method_str="nulls_unequal"; + +static void get_options(int *argc,char * * *argv); +static void print_version(void); +static void usage(void); +static int maria_chk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name); +static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info, + my_string name, uint sort_key, + my_bool write_info, my_bool update_index); +static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff,uint sortkey, + File new_file, my_bool update_index); + +HA_CHECK check_param; + + /* Main program */ + +int main(int argc, char **argv) +{ + int error; + MY_INIT(argv[0]); + my_progname_short= my_progname+dirname_length(my_progname); + + maria_chk_init(&check_param); + check_param.opt_lock_memory= 1; /* Lock memory if possible */ + check_param.using_global_keycache = 0; + get_options(&argc,(char***) &argv); + maria_quick_table_bits=decode_bits; + error=0; + maria_init(); + + while (--argc >= 0) + { + int new_error=maria_chk(&check_param, *(argv++)); + if ((check_param.testflag & T_REP_ANY) != T_REP) + check_param.testflag&= ~T_REP; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + if ((check_param.error_printed | check_param.warning_printed) && + (check_param.testflag & T_FORCE_CREATE) && + (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS | + T_SORT_INDEX)))) + { + uint old_testflag=check_param.testflag; + if (!(check_param.testflag & T_REP)) + check_param.testflag|= T_REP_BY_SORT; + check_param.testflag&= ~T_EXTEND; /* Don't needed */ + error|=maria_chk(&check_param, argv[-1]); + check_param.testflag= old_testflag; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + } + else + error|=new_error; + if (argc && (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO)) + { + puts("\n---------\n"); + VOID(fflush(stdout)); + } + } + if (check_param.total_files > 1) + { /* Only if descript */ + char buff[22],buff2[22]; + if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO) + puts("\n---------\n"); + printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff), + llstr(check_param.total_deleted,buff2)); + } + free_defaults(default_argv); + free_tmpdir(&maria_chk_tmpdir); + maria_end(); + my_end(check_param.testflag & T_INFO ? + MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} /* main */ + +enum options_mc { + OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS, + OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, + OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE, + OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE, + OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, + OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE, + OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD +}; + +static struct my_option my_long_options[] = +{ + {"analyze", 'a', + "Analyze distribution of keys. Will make some joins in MySQL faster. You can check the calculated distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"block-search", 'b', + "No help available.", + 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"backup", 'B', + "Make a backup of the .MYD file as 'filename-time.BAK'.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR, + "Directory where character sets are.", + (gptr*) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"check", 'c', + "Check table for errors.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"check-only-changed", 'C', + "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"correct-checksum", OPT_CORRECT_CHECKSUM, + "Correct checksum information for table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', + "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"description", 'd', + "Prints some information about table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"data-file-length", 'D', + "Max length of data file (when recreating data-file when it's full).", + (gptr*) &check_param.max_data_file_length, + (gptr*) &check_param.max_data_file_length, + 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"extend-check", 'e', + "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"fast", 'F', + "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Restart with -r if there are any errors in the table. States will be updated as with --update-state.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"HELP", 'H', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"information", 'i', + "Print statistics information about table that is checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"keys-used", 'k', + "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", + (gptr*) &check_param.keys_in_use, + (gptr*) &check_param.keys_in_use, + 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, + {"max-record-length", OPT_MAX_RECORD_LENGTH, + "Skip rows bigger than this if maria_chk can't allocate memory to hold it", + (gptr*) &check_param.max_record_length, + (gptr*) &check_param.max_record_length, + 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, + {"medium-check", 'm', + "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"quick", 'q', "Faster repair by not modifying the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"read-only", 'T', + "Don't mark table as checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"recover", 'r', + "Can fix almost anything except unique keys that aren't unique.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"parallel-recover", 'p', + "Same as '-r' but creates all the keys in parallel.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"safe-recover", 'o', + "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-recover", 'n', + "Force recovering with sorting even if the temporary file was very big.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef DEBUG + {"start-check-pos", OPT_START_CHECK_POS, + "No help available.", + 0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"set-auto-increment", 'A', + "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", + (gptr*) &check_param.auto_increment_value, + (gptr*) &check_param.auto_increment_value, + 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"set-collation", OPT_SET_COLLATION, + "Change the collation used by the index", + (gptr*) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"set-variable", 'O', + "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', + "Only print errors. One can use two -s to make maria_chk very silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-index", 'S', + "Sort index blocks. This speeds up 'read-next' in applications.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-records", 'R', + "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)", + (gptr*) &check_param.opt_sort_key, + (gptr*) &check_param.opt_sort_key, + 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 't', + "Path for temporary files.", + (gptr*) &opt_tmpdir, + 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"update-state", 'U', + "Mark tables as crashed if any errors were found.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unpack", 'u', + "Unpack file packed with mariapack.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', + "Print more information. This can be used with --description and --check. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', + "Print version and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', + "Wait if table is locked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "", + (gptr*) &check_param.use_buffers, (gptr*) &check_param.use_buffers, 0, + GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, + { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE, "", + (gptr*) &opt_key_cache_block_size, + (gptr*) &opt_key_cache_block_size, 0, + GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + { "maria_block_size", OPT_MARIA_BLOCK_SIZE, "", + (gptr*) &opt_maria_block_size, (gptr*) &opt_maria_block_size, 0, + GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + { "read_buffer_size", OPT_READ_BUFFER_SIZE, "", + (gptr*) &check_param.read_buffer_length, + (gptr*) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "", + (gptr*) &check_param.write_buffer_length, + (gptr*) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "", + (gptr*) &check_param.sort_buffer_length, + (gptr*) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD), + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "", + (gptr*) &check_param.sort_key_blocks, + (gptr*) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, + BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0}, + { "decode_bits", OPT_DECODE_BITS, "", (gptr*) &decode_bits, + (gptr*) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, + { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (gptr*) &ft_min_word_len, + (gptr*) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, + 0, 1, 0}, + { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len, + (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, + HA_FT_MAXCHARLEN, 0, 1, 0}, + { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE, + "Use stopwords from this file instead of built-in list.", + (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"stats_method", OPT_STATS_METHOD, + "Specifies how index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +#include <help_start.h> + +static void print_version(void) +{ + printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE, + MACHINE_TYPE); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("By Monty, for your professional use"); + puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n"); + puts("Description, check and repair of MARIA tables."); + puts("Used without options all tables on the command will be checked for errors"); + printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short); + printf("\nGlobal options:\n"); +#ifndef DBUG_OFF + printf("\ + -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n"); +#endif + printf("\ + -?, --help Display this help and exit.\n\ + -O, --set-variable var=option.\n\ + Change the value of a variable. Please note that\n\ + this option is deprecated; you can set variables\n\ + directly with '--variable-name=value'.\n\ + -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ + specified, separated by "); +#if defined( __WIN__) || defined(__NETWARE__) + printf("semicolon (;)"); +#else + printf("colon (:)"); +#endif + printf(", they will be used\n\ + in a round-robin fashion.\n\ + -s, --silent Only print errors. One can use two -s to make\n\ + maria_chk very silent.\n\ + -v, --verbose Print more information. This can be used with\n\ + --description and --check. Use many -v for more verbosity.\n\ + -V, --version Print version and exit.\n\ + -w, --wait Wait if table is locked.\n\n"); +#ifdef DEBUG + puts(" --start-check-pos=# Start reading file at given offset.\n"); +#endif + + puts("Check options (check is the default action for maria_chk):\n\ + -c, --check Check table for errors.\n\ + -e, --extend-check Check the table VERY throughly. Only use this in\n\ + extreme cases as maria_chk should normally be able to\n\ + find out if the table is ok even without this switch.\n\ + -F, --fast Check only tables that haven't been closed properly.\n\ + -C, --check-only-changed\n\ + Check only tables that have changed since last check.\n\ + -f, --force Restart with '-r' if there are any errors in the table.\n\ + States will be updated as with '--update-state'.\n\ + -i, --information Print statistics information about table that is checked.\n\ + -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ + all errors. Should be good enough for most cases.\n\ + -U --update-state Mark tables as crashed if you find any errors.\n\ + -T, --read-only Don't mark table as checked.\n"); + + puts("Repair options (When using '-r' or '-o'):\n\ + -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\ + --correct-checksum Correct checksum information for table.\n\ + -D, --data-file-length=# Max length of data file (when recreating data\n\ + file when it's full).\n\ + -e, --extend-check Try to recover every possible row from the data file\n\ + Normally this will also find a lot of garbage rows;\n\ + Don't use this option if you are not totally desperate.\n\ + -f, --force Overwrite old temporary files.\n\ + -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\ + bit mask of which keys to use. This can be used to\n\ + get faster inserts.\n\ + --max-record-length=#\n\ + Skip rows bigger than this if maria_chk can't allocate\n\ + memory to hold it.\n\ + -r, --recover Can fix almost anything except unique keys that aren't\n\ + unique.\n\ + -n, --sort-recover Forces recovering with sorting even if the temporary\n\ + file would be very big.\n\ + -p, --parallel-recover\n\ + Uses the same technique as '-r' and '-n', but creates\n\ + all the keys in parallel, in different threads.\n\ + -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\ + handle a couple of cases where '-r' reports that it\n\ + can't fix the data file.\n\ + --character-sets-dir=...\n\ + Directory where character sets are.\n\ + --set-collation=name\n\ + Change the collation used by the index.\n\ + -q, --quick Faster repair by not modifying the data file.\n\ + One can give a second '-q' to force maria_chk to\n\ + modify the original datafile in case of duplicate keys.\n\ + NOTE: Tables where the data file is currupted can't be\n\ + fixed with this option.\n\ + -u, --unpack Unpack file packed with mariapack.\n\ +"); + + puts("Other actions:\n\ + -a, --analyze Analyze distribution of keys. Will make some joins in\n\ + MySQL faster. You can check the calculated distribution\n\ + by using '--description --verbose table_name'.\n\ + --stats_method=name Specifies how index statistics collection code should\n\ + threat NULLs. Possible values of name are \"nulls_unequal\"\n\ + (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ + \"nulls_ignored\".\n\ + -d, --description Prints some information about table.\n\ + -A, --set-auto-increment[=value]\n\ + Force auto_increment to start at this or higher value\n\ + If no value is given, then sets the next auto_increment\n\ + value to the highest used value for the auto key + 1.\n\ + -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\ + applications.\n\ + -R, --sort-records=#\n\ + Sort records according to an index. This makes your\n\ + data much more localized and may speed up things\n\ + (It may be VERY slow to do a sort the first time!).\n\ + -b, --block-search=#\n\ + Find a record, a block at given offset belongs to."); + + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include <help_end.h> + +const char *maria_stats_method_names[] = {"nulls_unequal", "nulls_equal", + "nulls_ignored", NullS}; +TYPELIB maria_stats_method_typelib= { + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL}; + + /* Read options */ + +static my_bool +get_one_option(int optid, + const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'a': + if (argument == disabled_my_option) + check_param.testflag&= ~T_STATISTICS; + else + check_param.testflag|= T_STATISTICS; + break; + case 'A': + if (argument) + check_param.auto_increment_value= strtoull(argument, NULL, 0); + else + check_param.auto_increment_value= 0; /* Set to max used value */ + check_param.testflag|= T_AUTO_INC; + break; + case 'b': + check_param.search_after_block= strtoul(argument, NULL, 10); + break; + case 'B': + if (argument == disabled_my_option) + check_param.testflag&= ~T_BACKUP_DATA; + else + check_param.testflag|= T_BACKUP_DATA; + break; + case 'c': + if (argument == disabled_my_option) + check_param.testflag&= ~T_CHECK; + else + check_param.testflag|= T_CHECK; + break; + case 'C': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_CHECK | T_CHECK_ONLY_CHANGED); + else + check_param.testflag|= T_CHECK | T_CHECK_ONLY_CHANGED; + break; + case 'D': + check_param.max_data_file_length=strtoll(argument, NULL, 10); + break; + case 's': /* silent */ + if (argument == disabled_my_option) + check_param.testflag&= ~(T_SILENT | T_VERY_SILENT); + else + { + if (check_param.testflag & T_SILENT) + check_param.testflag|= T_VERY_SILENT; + check_param.testflag|= T_SILENT; + check_param.testflag&= ~T_WRITE_LOOP; + } + break; + case 'w': + if (argument == disabled_my_option) + check_param.testflag&= ~T_WAIT_FOREVER; + else + check_param.testflag|= T_WAIT_FOREVER; + break; + case 'd': /* description if isam-file */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_DESCRIPT; + else + check_param.testflag|= T_DESCRIPT; + break; + case 'e': /* extend check */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_EXTEND; + else + check_param.testflag|= T_EXTEND; + break; + case 'i': + if (argument == disabled_my_option) + check_param.testflag&= ~T_INFO; + else + check_param.testflag|= T_INFO; + break; + case 'f': + if (argument == disabled_my_option) + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC | O_EXCL; + check_param.testflag&= ~(T_FORCE_CREATE | T_UPDATE_STATE); + } + else + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC; + check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE; + } + break; + case 'F': + if (argument == disabled_my_option) + check_param.testflag&= ~T_FAST; + else + check_param.testflag|= T_FAST; + break; + case 'k': + check_param.keys_in_use= (ulonglong) strtoll(argument, NULL, 10); + break; + case 'm': + if (argument == disabled_my_option) + check_param.testflag&= ~T_MEDIUM; + else + check_param.testflag|= T_MEDIUM; /* Medium check */ + break; + case 'r': /* Repair table */ + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_BY_SORT; + break; + case 'p': + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_PARALLEL; + break; + case 'o': + check_param.testflag&= ~T_REP_ANY; + check_param.force_sort= 0; + if (argument != disabled_my_option) + { + check_param.testflag|= T_REP; + my_disable_async_io= 1; /* More safety */ + } + break; + case 'n': + check_param.testflag&= ~T_REP_ANY; + if (argument == disabled_my_option) + check_param.force_sort= 0; + else + { + check_param.testflag|= T_REP_BY_SORT; + check_param.force_sort= 1; + } + break; + case 'q': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_QUICK | T_FORCE_UNIQUENESS); + else + check_param.testflag|= + (check_param.testflag & T_QUICK) ? T_FORCE_UNIQUENESS : T_QUICK; + break; + case 'u': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_UNPACK | T_REP_BY_SORT); + else + check_param.testflag|= T_UNPACK | T_REP_BY_SORT; + break; + case 'v': /* Verbose */ + if (argument == disabled_my_option) + { + check_param.testflag&= ~T_VERBOSE; + check_param.verbose=0; + } + else + { + check_param.testflag|= T_VERBOSE; + check_param.verbose++; + } + break; + case 'R': /* Sort records */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_RECORDS; + else + { + check_param.testflag|= T_SORT_RECORDS; + check_param.opt_sort_key= (uint) atoi(argument) - 1; + if (check_param.opt_sort_key >= MARIA_MAX_KEY) + { + fprintf(stderr, + "The value of the sort key is bigger than max key: %d.\n", + MARIA_MAX_KEY); + exit(1); + } + } + break; + case 'S': /* Sort index */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_INDEX; + else + check_param.testflag|= T_SORT_INDEX; + break; + case 'T': + if (argument == disabled_my_option) + check_param.testflag&= ~T_READONLY; + else + check_param.testflag|= T_READONLY; + break; + case 'U': + if (argument == disabled_my_option) + check_param.testflag&= ~T_UPDATE_STATE; + else + check_param.testflag|= T_UPDATE_STATE; + break; + case '#': + if (argument == disabled_my_option) + { + DBUG_POP(); + } + else + { + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); + } + break; + case 'V': + print_version(); + exit(0); + case OPT_CORRECT_CHECKSUM: + if (argument == disabled_my_option) + check_param.testflag&= ~T_CALC_CHECKSUM; + else + check_param.testflag|= T_CALC_CHECKSUM; + break; + case OPT_STATS_METHOD: + { + int method; + enum_handler_stats_method method_conv; + LINT_INIT(method_conv); + maria_stats_method_str= argument; + if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0) + { + fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); + exit(1); + } + switch (method-1) { + case 0: + method_conv= MI_STATS_METHOD_NULLS_EQUAL; + break; + case 1: + method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; + break; + case 2: + method_conv= MI_STATS_METHOD_IGNORE_NULLS; + break; + } + check_param.stats_method= method_conv; + break; + } +#ifdef DEBUG /* Only useful if debugging */ + case OPT_START_CHECK_POS: + check_param.start_check_pos= strtoull(argument, NULL, 0); + break; +#endif + case 'H': + my_print_help(my_long_options); + exit(0); + case '?': + usage(); + exit(0); + } + return 0; +} + + +static void get_options(register int *argc,register char ***argv) +{ + int ho_error; + + load_defaults("my", load_default_groups, argc, argv); + default_argv= *argv; + if (isatty(fileno(stdout))) + check_param.testflag|=T_WRITE_LOOP; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + /* If using repair, then update checksum if one uses --update-state */ + if ((check_param.testflag & T_UPDATE_STATE) && + (check_param.testflag & T_REP_ANY)) + check_param.testflag|= T_CALC_CHECKSUM; + + if (*argc == 0) + { + usage(); + exit(-1); + } + + if ((check_param.testflag & T_UNPACK) && + (check_param.testflag & (T_QUICK | T_SORT_RECORDS))) + { + VOID(fprintf(stderr, + "%s: --unpack can't be used with --quick or --sort-records\n", + my_progname_short)); + exit(1); + } + if ((check_param.testflag & T_READONLY) && + (check_param.testflag & + (T_REP_ANY | T_STATISTICS | T_AUTO_INC | + T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) + { + VOID(fprintf(stderr, + "%s: Can't use --readonly when repairing or sorting\n", + my_progname_short)); + exit(1); + } + + if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir)) + exit(1); + + check_param.tmpdir=&maria_chk_tmpdir; + check_param.key_cache_block_size= opt_key_cache_block_size; + + if (set_collation_name) + if (!(set_collation= get_charset_by_name(set_collation_name, + MYF(MY_WME)))) + exit(1); + + maria_block_size=(uint) 1 << my_bit_log2(opt_maria_block_size); + return; +} /* get options */ + + + /* Check table */ + +static int maria_chk(HA_CHECK *param, my_string filename) +{ + int error,lock_type,recreate; + int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); + MARIA_HA *info; + File datafile; + char llbuff[22],llbuff2[22]; + my_bool state_updated=0; + MARIA_SHARE *share; + DBUG_ENTER("maria_chk"); + + param->out_flag=error=param->warning_printed=param->error_printed= + recreate=0; + datafile=0; + param->isam_file_name=filename; /* For error messages */ + if (!(info=maria_open(filename, + (param->testflag & (T_DESCRIPT | T_READONLY)) ? + O_RDONLY : O_RDWR, + HA_OPEN_FOR_REPAIR | + ((param->testflag & T_WAIT_FOREVER) ? + HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? + HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED)))) + { + /* Avoid twice printing of isam file name */ + param->error_printed=1; + switch (my_errno) { + case HA_ERR_CRASHED: + _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename); + break; + case HA_ERR_NOT_A_TABLE: + _ma_check_print_error(param,"'%s' is not a MARIA-table",filename); + break; + case HA_ERR_CRASHED_ON_USAGE: + _ma_check_print_error(param,"'%s' is marked as crashed",filename); + break; + case HA_ERR_CRASHED_ON_REPAIR: + _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename); + break; + case HA_ERR_OLD_FILE: + _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename); + break; + case HA_ERR_END_OF_FILE: + _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename); + break; + case EAGAIN: + _ma_check_print_error(param,"'%s' is locked. Use -w to wait until unlocked",filename); + break; + case ENOENT: + _ma_check_print_error(param,"File '%s' doesn't exist",filename); + break; + case EACCES: + _ma_check_print_error(param,"You don't have permission to use '%s'", + filename); + break; + default: + _ma_check_print_error(param,"%d when opening MARIA-table '%s'", + my_errno,filename); + break; + } + DBUG_RETURN(1); + } + share=info->s; + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + share->tot_locks-= share->r_locks; + share->r_locks=0; + + if (share->data_file_type == BLOCK_RECORD && + (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | + T_CHECK | T_CHECK_ONLY_CHANGED))) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } + + /* + Skip the checking of the file if: + We are using --fast and the table is closed properly + We are using --check-only-changed-tables and the table hasn't changed + */ + if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED)) + { + my_bool need_to_check= (maria_is_crashed(info) || + share->state.open_count != 0); + + if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && + ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR) || + !(param->testflag & T_CHECK_ONLY_CHANGED)))) + need_to_check=1; + + if (info->s->base.keys && info->state->records) + { + if ((param->testflag & T_STATISTICS) && + (share->state.changed & STATE_NOT_ANALYZED)) + need_to_check=1; + if ((param->testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + need_to_check=1; + if ((param->testflag & T_REP_BY_SORT) && + (share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + need_to_check=1; + } + if ((param->testflag & T_CHECK_ONLY_CHANGED) && + (share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR))) + need_to_check=1; + if (!need_to_check) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("MARIA file: %s is already checked\n",filename); + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'", + my_errno,filename); + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + } + if ((param->testflag & (T_REP_ANY | T_STATISTICS | + T_SORT_RECORDS | T_SORT_INDEX)) && + (((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) || + mi_uint2korr(share->state.header.state_info_length) != + MARIA_STATE_INFO_SIZE || + mi_uint2korr(share->state.header.base_info_length) != + MARIA_BASE_INFO_SIZE || + maria_is_any_intersect_keys_active(param->keys_in_use, share->base.keys, + ~share->state.key_map) || + maria_test_if_almost_full(info) || + info->s->state.header.file_version[3] != maria_file_magic[3] || + (set_collation && + set_collation->number != share->state.header.language) || + maria_block_size != MARIA_KEY_BLOCK_LENGTH)) + { + if (set_collation) + param->language= set_collation->number; + if (maria_recreate_table(param, &info,filename)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + return(-1); + } + recreate=1; + if (!(param->testflag & T_REP_ANY)) + { + param->testflag|=T_REP_BY_SORT; /* if only STATISTICS */ + if (!(param->testflag & T_SILENT)) + printf("- '%s' has old table-format. Recreating index\n",filename); + rep_quick|=T_QUICK; + } + share=info->s; + share->tot_locks-= share->r_locks; + share->r_locks=0; + } + + if (param->testflag & T_DESCRIPT) + { + param->total_files++; + param->total_records+=info->state->records; + param->total_deleted+=info->state->del; + descript(param, info, filename); + } + else + { + if (!stopwords_inited++) + ft_init_stopwords(); + + if (!(param->testflag & T_READONLY)) + lock_type = F_WRLCK; /* table is changed */ + else + lock_type= F_RDLCK; + if (info->lock_type == F_RDLCK) + info->lock_type=F_UNLCK; /* Read only table */ + if (_ma_readinfo(info,lock_type,0)) + { + _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d", + filename,my_errno); + param->error_printed=0; + error= 1; + goto end2; + } + /* + _ma_readinfo() has locked the table. + We mark the table as locked (without doing file locks) to be able to + use functions that only works on locked tables (like row caching). + */ + maria_lock_database(info, F_EXTRA_LCK); + datafile=info->dfile; + + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + if (param->testflag & T_REP_ANY) + { + ulonglong tmp=share->state.key_map; + maria_copy_keys_active(share->state.key_map, share->base.keys, + param->keys_in_use); + if (tmp != share->state.key_map) + info->update|=HA_STATE_CHANGED; + } + if (rep_quick && + maria_chk_del(param, info, param->testflag & ~T_VERBOSE)) + { + if (param->testflag & T_FORCE_CREATE) + { + rep_quick=0; + _ma_check_print_info(param,"Creating new data file\n"); + } + else + { + error=1; + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch 'q'"); + } + } + if (!error) + { + if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && + (maria_is_any_key_active(share->state.key_map) || + (rep_quick && !param->keys_in_use && !recreate)) && + maria_test_if_sort_rep(info, info->state->records, + info->s->state.key_map, + param->force_sort)) + { + if (param->testflag & T_REP_BY_SORT) + error=maria_repair_by_sort(param,info,filename,rep_quick); + else + error=maria_repair_parallel(param,info,filename,rep_quick); + state_updated=1; + } + else if (param->testflag & T_REP_ANY) + error=maria_repair(param, info,filename,rep_quick); + } + if (!error && param->testflag & T_SORT_RECORDS) + { + /* + The data file is nowadays reopened in the repair code so we should + soon remove the following reopen-code + */ +#ifndef TO_BE_REMOVED + if (param->out_flag & O_NEW_DATA) + { /* Change temp file to org file */ + VOID(my_close(info->dfile,MYF(MY_WME))); /* Close new file */ + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + MYF(0)); + if (_ma_open_datafile(info,info->s, -1)) + error=1; + param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */ + param->read_cache.file=info->dfile; + } +#endif + if (! error) + { + uint key; + /* + We can't update the index in maria_sort_records if we have a + prefix compressed or fulltext index + */ + my_bool update_index=1; + for (key=0 ; key < share->base.keys; key++) + if (share->keyinfo[key].flag & (HA_BINARY_PACK_KEY|HA_FULLTEXT)) + update_index=0; + + error=maria_sort_records(param,info,filename,param->opt_sort_key, + /* what is the following parameter for ? */ + (my_bool) !(param->testflag & T_REP), + update_index); + datafile=info->dfile; /* This is now locked */ + if (!error && !update_index) + { + if (param->verbose) + puts("Table had a compressed index; We must now recreate the index"); + error=maria_repair_by_sort(param,info,filename,1); + } + } + } + if (!error && param->testflag & T_SORT_INDEX) + error=maria_sort_index(param,info,filename); + if (!error) + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + else + maria_mark_crashed(info); + } + else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC)) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("Checking MARIA file: %s\n",filename); + if (!(param->testflag & T_SILENT)) + printf("Data records: %7s Deleted blocks: %7s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + error =maria_chk_status(param,info); + maria_intersect_keys_active(share->state.key_map, param->keys_in_use); + error =maria_chk_size(param,info); + if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) + error|=maria_chk_del(param, info,param->testflag); + if ((!error || (!(param->testflag & (T_FAST | T_FORCE_CREATE)) && + !param->start_check_pos))) + { + error|=maria_chk_key(param, info); + if (!error && (param->testflag & (T_STATISTICS | T_AUTO_INC))) + error=maria_update_state_info(param, info, + ((param->testflag & T_STATISTICS) ? + UPDATE_STAT : 0) | + ((param->testflag & T_AUTO_INC) ? + UPDATE_AUTO_INC : 0)); + } + if ((!rep_quick && !error) || + !(param->testflag & (T_FAST | T_FORCE_CREATE))) + { + if (param->testflag & (T_EXTEND | T_MEDIUM)) + VOID(init_key_cache(maria_key_cache,opt_key_cache_block_size, + param->use_buffers, 0, 0)); + VOID(init_io_cache(¶m->read_cache,datafile, + (uint) param->read_buffer_length, + READ_CACHE, + (param->start_check_pos ? + param->start_check_pos : + share->pack.header_length), + 1, + MYF(MY_WME))); + maria_lock_memory(param); + if ((info->s->data_file_type != STATIC_RECORD) || + (param->testflag & (T_EXTEND | T_MEDIUM))) + error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND); + error|=_ma_flush_blocks(param, share->key_cache, share->kfile); + VOID(end_io_cache(¶m->read_cache)); + } + if (!error) + { + if ((share->state.changed & STATE_CHANGED) && + (param->testflag & T_UPDATE_STATE)) + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + } + else if (!maria_is_crashed(info) && + (param->testflag & T_UPDATE_STATE)) + { /* Mark crashed */ + maria_mark_crashed(info); + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + } + } + if ((param->testflag & T_AUTO_INC) || + ((param->testflag & T_REP_ANY) && info->s->base.auto_key)) + _ma_update_auto_increment_key(param, info, + (my_bool) !test(param->testflag & T_AUTO_INC)); + + if (!(param->testflag & T_DESCRIPT)) + { + if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY)) + error|=maria_update_state_info(param, info, + UPDATE_OPEN_COUNT | + (((param->testflag & T_REP_ANY) ? + UPDATE_TIME : 0) | + (state_updated ? UPDATE_STAT : 0) | + ((param->testflag & T_SORT_RECORDS) ? + UPDATE_SORT : 0))); + info->update&= ~HA_STATE_CHANGED; + } + maria_lock_database(info, F_UNLCK); +end2: + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'",my_errno,filename); + DBUG_RETURN(1); + } + if (error == 0) + { + if (param->out_flag & O_NEW_DATA) + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + ((param->testflag & T_BACKUP_DATA) ? + MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); + if (param->out_flag & O_NEW_INDEX) + error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT, + MYF(0)); + } + VOID(fflush(stdout)); VOID(fflush(stderr)); + if (param->error_printed) + { + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + if (param->testflag & T_REP_ANY) + VOID(fprintf(stderr, + "Try fixing it by using the --safe-recover (-o), the --force (-f) option or by not using the --quick (-q) flag\n")); + } + else if (!(param->error_printed & 2) && + !(param->testflag & T_FORCE_CREATE)) + VOID(fprintf(stderr, + "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", + filename)); + } + else if (param->warning_printed && + ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | + T_FORCE_CREATE))) + VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n", + filename)); + VOID(fflush(stderr)); + DBUG_RETURN(error); +} /* maria_chk */ + + +/* Write info about table */ + +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + uint key,keyseg_nr,field,start; + reg3 MARIA_KEYDEF *keyinfo; + reg2 HA_KEYSEG *keyseg; + reg4 const char *text; + char buff[160],length[10],*pos,*end; + enum en_fieldtype type; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("describe"); + + printf("\nMARIA file: %s\n",name); + printf("Record format: %s\n", record_formats[share->data_file_type]); + printf("Character set: %s (%d)\n", + get_charset_name(share->state.header.language), + share->state.header.language); + + if (param->testflag & T_VERBOSE) + { + printf("File-version: %d\n", + (int) share->state.header.file_version[3]); + if (share->state.create_time) + { + get_date(buff,1,share->state.create_time); + printf("Creation time: %s\n",buff); + } + if (share->state.check_time) + { + get_date(buff,1,share->state.check_time); + printf("Recover time: %s\n",buff); + } + pos=buff; + if (share->state.changed & STATE_CRASHED) + strmov(buff,"crashed"); + else + { + if (share->state.open_count) + pos=strmov(pos,"open,"); + if (share->state.changed & STATE_CHANGED) + pos=strmov(pos,"changed,"); + else + pos=strmov(pos,"checked,"); + if (!(share->state.changed & STATE_NOT_ANALYZED)) + pos=strmov(pos,"analyzed,"); + if (!(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + pos=strmov(pos,"optimized keys,"); + if (!(share->state.changed & STATE_NOT_SORTED_PAGES)) + pos=strmov(pos,"sorted index pages,"); + pos[-1]=0; /* Remove extra ',' */ + } + printf("Status: %s\n",buff); + if (share->base.auto_key) + { + printf("Auto increment key: %16d Last value: %18s\n", + share->base.auto_key, + llstr(share->state.auto_increment,llbuff)); + } + if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + printf("Checksum: %26s\n",llstr(info->state->checksum,llbuff)); +; + if (share->options & HA_OPTION_DELAY_KEY_WRITE) + printf("Keys are only flushed at close\n"); + + } + printf("Data records: %16s Deleted blocks: %18s\n", + llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2)); + if (param->testflag & T_SILENT) + DBUG_VOID_RETURN; /* This is enough */ + + if (param->testflag & T_VERBOSE) + { +#ifdef USE_RELOC + printf("Init-relocation: %16s\n",llstr(share->base.reloc,llbuff)); +#endif + printf("Datafile parts: %16s Deleted data: %18s\n", + llstr(share->state.split,llbuff), + llstr(info->state->empty,llbuff2)); + printf("Datafile pointer (bytes): %11d Keyfile pointer (bytes): %13d\n", + share->rec_reflength,share->base.key_reflength); + printf("Datafile length: %16s Keyfile length: %18s\n", + llstr(info->state->data_file_length,llbuff), + llstr(info->state->key_file_length,llbuff2)); + + if (info->s->base.reloc == 1L && info->s->base.records == 1L) + puts("This is a one-record table"); + else + { + if (share->base.max_data_file_length != HA_OFFSET_ERROR || + share->base.max_key_file_length != HA_OFFSET_ERROR) + printf("Max datafile length: %16s Max keyfile length: %18s\n", + llstr(share->base.max_data_file_length-1,llbuff), + llstr(share->base.max_key_file_length-1,llbuff2)); + } + } + printf("Block_size: %16d\n",(int) share->block_size); + printf("Recordlength: %16d\n",(int) share->base.pack_reclength); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + longlong2str(share->state.key_map,buff,2); + printf("Using only keys '%s' of %d possibly keys\n", + buff, share->base.keys); + } + puts("\ntable description:"); + printf("Key Start Len Index Type"); + if (param->testflag & T_VERBOSE) + printf(" Rec/key Root Blocksize"); + VOID(putchar('\n')); + + for (key=keyseg_nr=0, keyinfo= &share->keyinfo[0] ; + key < share->base.keys; + key++,keyinfo++) + { + keyseg=keyinfo->seg; + if (keyinfo->flag & HA_NOSAME) text="unique "; + else if (keyinfo->flag & HA_FULLTEXT) text="fulltext "; + else text="multip."; + + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++ = ' '; + *pos=0; + if (keyinfo->flag & HA_PACK_KEY) + pos=strmov(pos,prefix_packed_txt); + if (keyinfo->flag & HA_BINARY_PACK_KEY) + pos=strmov(pos,bin_packed_txt); + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + + printf("%-4d%-6ld%-3d %-8s%-21s", + key+1,(long) keyseg->start+1,keyseg->length,text,buff); + if (share->state.key_root[key] != HA_OFFSET_ERROR) + llstr(share->state.key_root[key],buff); + else + buff[0]=0; + if (param->testflag & T_VERBOSE) + printf("%11lu %12s %10d", + share->state.rec_per_key_part[keyseg_nr++], + buff,keyinfo->block_length); + VOID(putchar('\n')); + while ((++keyseg)->type != HA_KEYTYPE_END) + { + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++= ' '; + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + printf(" %-6ld%-3d %-21s", + (long) keyseg->start+1,keyseg->length,buff); + if (param->testflag & T_VERBOSE) + printf("%11lu", share->state.rec_per_key_part[keyseg_nr++]); + VOID(putchar('\n')); + } + keyseg++; + } + if (share->state.header.uniques) + { + MARIA_UNIQUEDEF *uniqueinfo; + puts("\nUnique Key Start Len Nullpos Nullbit Type"); + for (key=0,uniqueinfo= &share->uniqueinfo[0] ; + key < share->state.header.uniques; key++, uniqueinfo++) + { + my_bool new_row=0; + char null_bit[8],null_pos[8]; + printf("%-8d%-5d",key+1,uniqueinfo->key+1); + for (keyseg=uniqueinfo->seg ; keyseg->type != HA_KEYTYPE_END ; keyseg++) + { + if (new_row) + fputs(" ",stdout); + null_bit[0]=null_pos[0]=0; + if (keyseg->null_bit) + { + sprintf(null_bit,"%d",keyseg->null_bit); + sprintf(null_pos,"%ld",(long) keyseg->null_pos+1); + } + printf("%-7ld%-5d%-9s%-10s%-30s\n", + (long) keyseg->start+1,keyseg->length, + null_pos,null_bit, + type_names[keyseg->type]); + new_row=1; + } + } + } + if (param->verbose > 1) + { + char null_bit[8],null_pos[8]; + printf("\nField Start Length Nullpos Nullbit Type"); + if (share->options & HA_OPTION_COMPRESS_RECORD) + printf(" Huff tree Bits"); + VOID(putchar('\n')); + + for (field=0 ; field < share->base.fields ; field++) + { + if (share->options & HA_OPTION_COMPRESS_RECORD) + type=share->rec[field].base_type; + else + type=(enum en_fieldtype) share->rec[field].type; + end=strmov(buff,field_pack[type]); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->rec[field].pack_type & PACK_TYPE_SELECTED) + end=strmov(end,", not_always"); + if (share->rec[field].pack_type & PACK_TYPE_SPACE_FIELDS) + end=strmov(end,", no empty"); + if (share->rec[field].pack_type & PACK_TYPE_ZERO_FILL) + { + sprintf(end,", zerofill(%d)",share->rec[field].space_length_bits); + end=strend(end); + } + } + if (buff[0] == ',') + strmov(buff,buff+2); + int10_to_str((long) share->rec[field].length,length,10); + null_bit[0]=null_pos[0]=0; + if (share->rec[field].null_bit) + { + sprintf(null_bit,"%d",share->rec[field].null_bit); + sprintf(null_pos,"%d",share->rec[field].null_pos+1); + } + printf("%-6d%-6u%-7s%-8s%-8s%-35s",field+1, + (uint) share->rec[field].offset+1, + length, null_pos, null_bit, buff); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->rec[field].huff_tree) + printf("%3d %2d", + (uint) (share->rec[field].huff_tree-share->decode_trees)+1, + share->rec[field].huff_tree->quick_table_bits); + } + VOID(putchar('\n')); + start+=share->rec[field].length; + } + } + DBUG_VOID_RETURN; +} /* describe */ + + + /* Sort records according to one key */ + +static int maria_sort_records(HA_CHECK *param, + register MARIA_HA *info, my_string name, + uint sort_key, + my_bool write_info, + my_bool update_index) +{ + int got_error; + uint key; + MARIA_KEYDEF *keyinfo; + File new_file; + byte *temp_buff; + ha_rows old_record_count; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("sort_records"); + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char*)&sort_param,sizeof(sort_param)); + sort_param.sort_info=&sort_info; + sort_info.param=param; + keyinfo= &share->keyinfo[sort_key]; + got_error=1; + temp_buff=0; + new_file= -1; + + if (! maria_is_key_active(share->state.key_map, sort_key)) + { + _ma_check_print_warning(param, + "Can't sort table '%s' on key %d; No such key", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (keyinfo->flag & HA_FULLTEXT) + { + _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (share->data_file_type == COMPRESSED_RECORD) + { + _ma_check_print_warning(param,"Can't sort read-only table '%s'", name); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (!(param->testflag & T_SILENT)) + { + printf("- Sorting records for MARIA-table '%s'\n",name); + if (write_info) + printf("Data records: %9s Deleted: %9s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + } + if (share->state.key_root[sort_key] == HA_OFFSET_ERROR) + DBUG_RETURN(0); /* Nothing to do */ + + init_key_cache(maria_key_cache, opt_key_cache_block_size, param->use_buffers, + 0, 0); + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE,share->pack.header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + goto err; + } + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + goto err; + } + fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32); + new_file= my_create(fn_format(param->temp_filename, + param->temp_filename,"", + DATA_TMP_EXT,2+4), + 0,param->tmpfile_createflag, + MYF(0)); + if (new_file < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (share->pack.header_length) + if (maria_filecopy(param,new_file,info->dfile,0L,share->pack.header_length, + "datafile-header")) + goto err; + info->rec_cache.file=new_file; /* Use this file for cacheing*/ + + maria_lock_memory(param); + for (key=0 ; key < share->base.keys ; key++) + share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME; + + if (my_pread(share->kfile, temp_buff, + (uint) keyinfo->block_length, + share->state.key_root[sort_key], + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + (ulong) share->state.key_root[sort_key]); + goto err; + } + + /* Setup param for _ma_sort_write_record */ + sort_info.info=info; + sort_info.new_data_file_type=share->data_file_type; + sort_param.fix_datafile=1; + sort_param.master=1; + sort_param.filepos=share->pack.header_length; + old_record_count=info->state->records; + info->state->records=0; + if (sort_info.new_data_file_type != COMPRESSED_RECORD) + info->state->checksum=0; + + if (sort_record_index(&sort_param,info,keyinfo, + share->state.key_root[sort_key], + temp_buff, sort_key,new_file,update_index) || + maria_write_data_suffix(&sort_info,1) || + flush_io_cache(&info->rec_cache)) + goto err; + + if (info->state->records != old_record_count) + { + _ma_check_print_error(param,"found %s of %s records", + llstr(info->state->records,llbuff), + llstr(old_record_count,llbuff2)); + goto err; + } + + VOID(my_close(info->dfile,MYF(MY_WME))); + param->out_flag|=O_NEW_DATA; /* Data in new file */ + info->dfile=new_file; /* Use new datafile */ + info->state->del=0; + info->state->empty=0; + share->state.dellink= HA_OFFSET_ERROR; + info->state->data_file_length=sort_param.filepos; + share->state.split=info->state->records; /* Only hole records */ + share->state.version=(ulong) time((time_t*) 0); + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + got_error=0; + +err: + if (got_error && new_file >= 0) + { + VOID(end_io_cache(&info->rec_cache)); + (void) my_close(new_file,MYF(MY_WME)); + (void) my_delete(param->temp_filename, MYF(MY_WME)); + } + if (temp_buff) + { + my_afree((gptr) temp_buff); + } + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + sort_info.buff=0; + share->state.sortkey=sort_key; + DBUG_RETURN(_ma_flush_blocks(param, share->key_cache, share->kfile) | + got_error); +} /* sort_records */ + + +/* Sort records recursive using one index */ + +static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, uint sort_key, + File new_file,my_bool update_index) +{ + uint nod_flag,used_length,key_length; + byte *temp_buff,*keypos,*endpos; + my_off_t next_page,rec_pos; + byte lastkey[HA_MAX_KEY_BUFF]; + char llbuff[22]; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_record_index"); + + nod_flag=_ma_test_if_nod(buff); + temp_buff=0; + + if (nod_flag) + { + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not Enough memory"); + DBUG_RETURN(-1); + } + } + used_length=maria_getint(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + _sanity(__FILE__,__LINE__); + if (nod_flag) + { + next_page= _ma_kpos(nod_flag, keypos); + if (my_pread(info->s->kfile,(byte*) temp_buff, + (uint) keyinfo->block_length, next_page, + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read keys from filepos: %s", + llstr(next_page,llbuff)); + goto err; + } + if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff, + sort_key, + new_file, update_index)) + goto err; + } + _sanity(__FILE__,__LINE__); + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + == 0) + break; + rec_pos= _ma_dpos(info,0,lastkey+key_length); + + if ((*info->s->read_record)(info,sort_param->record,rec_pos)) + { + _ma_check_print_error(param,"%d when reading datafile",my_errno); + goto err; + } + if (rec_pos != sort_param->filepos && update_index) + { + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + sort_param->filepos); + if (maria_movepoint(info,sort_param->record,rec_pos,sort_param->filepos, + sort_key)) + { + _ma_check_print_error(param,"%d when updating key-pointers",my_errno); + goto err; + } + } + if (_ma_sort_write_record(sort_param)) + goto err; + } + /* Clear end of block to get better compression if the table is backuped */ + bzero((byte*) buff+used_length,keyinfo->block_length-used_length); + if (my_pwrite(info->s->kfile,(byte*) buff,(uint) keyinfo->block_length, + page,param->myf_rw)) + { + _ma_check_print_error(param,"%d when updating keyblock",my_errno); + goto err; + } + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(0); +err: + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(1); +} /* sort_record_index */ + + + +/* + Check if maria_chk was killed by a signal + This is overloaded by other programs that want to be able to abort + sorting +*/ + +static int not_killed= 0; + +volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused))) +{ + return ¬_killed; /* always NULL */ +} + + /* print warnings and errors */ + /* VARARGS */ + +void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), + const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + VOID(vfprintf(stdout, fmt, args)); + VOID(fputc('\n',stdout)); + va_end(args); +} + +/* VARARGS */ + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_warning"); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, + param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->warning_printed=1; + va_start(args,fmt); + fprintf(stderr,"%s: warning: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} + +/* VARARGS */ + +void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_error"); + DBUG_PRINT("enter",("format: %s",fmt)); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->error_printed|=1; + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h new file mode 100644 index 00000000000..b174df2fa3e --- /dev/null +++ b/storage/maria/maria_def.h @@ -0,0 +1,853 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file is included by all internal maria files */ + +#include "maria.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ +#include <my_tree.h> +#ifdef THREAD +#include <my_pthread.h> +#include <thr_lock.h> +#else +#include <my_no_pthread.h> +#endif + +#include <pagecache.h> +#include "ma_loghandler.h" +#include "ma_control_file.h" + +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write + +typedef struct st_maria_status_info +{ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ + my_off_t key_file_length; + my_off_t data_file_length; + ha_checksum checksum; +} MARIA_STATUS_INFO; + +typedef struct st_maria_state_info +{ + struct + { /* Fileheader */ + uchar file_version[4]; + uchar options[2]; + uchar header_length[2]; + uchar state_info_length[2]; + uchar base_info_length[2]; + uchar base_pos[2]; + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar fulltext_keys; + uchar data_file_type; + uchar org_data_file_type; /* Used by mariapack to store dft */ + } header; + + MARIA_STATUS_INFO state; + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ + ulonglong first_bitmap_with_space; + ulonglong auto_increment; + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ + ulong status; + ulong *rec_per_key_part; + ha_checksum checksum; /* Table checksum */ + my_off_t *key_root; /* Start of key trees */ + my_off_t key_del; /* delete links for trees */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ + uint open_count; + uint8 changed; /* Changed since mariachk */ + + /* the following isn't saved on disk */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ + ulong *key_info; +} MARIA_STATE_INFO; + + +#define MARIA_STATE_INFO_SIZE (24 + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8) +#define MARIA_STATE_KEY_SIZE 8 +#define MARIA_STATE_KEYBLOCK_SIZE 8 +#define MARIA_STATE_KEYSEG_SIZE 4 +#define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE) +#define MARIA_KEYDEF_SIZE (2+ 5*2) +#define MARIA_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MARIA_COLUMNDEF_SIZE (6+2+2+2+2+2+1+1) +#define MARIA_BASE_INFO_SIZE (5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16) +#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ + +typedef struct st_ma_base_info +{ + my_off_t keystart; /* Start of keys */ + my_off_t max_data_file_length; + my_off_t max_key_file_length; + my_off_t margin_key_file_length; + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec */ + ulong min_pack_length; + ulong max_pack_length; /* Max possibly length of packed rec */ + ulong min_block_length; + uint fields; /* fields in table */ + uint fixed_not_null_fields; + uint fixed_not_null_fields_length; + uint max_field_lengths; + uint pack_fields; /* packed fields in table */ + uint varlength_fields; /* char/varchar/blobs */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + /* Length of packed bits (when table was created first time) */ + uint pack_bytes; + /* Length of null bits (when table was created first time) */ + uint original_null_bytes; + uint null_bytes; /* Null bytes in record */ + uint field_offsets; /* Number of field offsets */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ + /* Extra allocation when using dynamic record format */ + uint extra_alloc_bytes; + uint extra_alloc_procent; + uint is_nulls_extended; /* 1 if new null bytes */ + uint min_row_length; + uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */ + uint block_size; + uint default_rec_buff_size; + uint extra_rec_buff_size; + + /* The following are from the header */ + uint key_parts, all_key_parts; + my_bool transactional; +} MARIA_BASE_INFO; + + +/* Structs used intern in database */ + +typedef struct st_maria_blob /* Info of record */ +{ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ +} MARIA_BLOB; + + +typedef struct st_maria_pack +{ + ulong header_length; + uint ref_length; + uchar version; +} MARIA_PACK; + +typedef struct st_maria_file_bitmap +{ + uchar *map; + ulonglong page; /* Page number for current bitmap */ + uint used_size; /* Size of bitmap that is not 0 */ + File file; + + my_bool changed; + +#ifdef THREAD + pthread_mutex_t bitmap_lock; +#endif + /* Constants, allocated when initiating bitmaps */ + uint sizes[8]; /* Size per bit combination */ + uint total_size; /* Total usable size of bitmap page */ + uint block_size; /* Block size of file */ + ulong pages_covered; /* Pages covered by bitmap + 1 */ +} MARIA_FILE_BITMAP; + + +#define MAX_NONMAPPED_INSERTS 1000 + +typedef struct st_maria_share +{ /* Shared between opens */ + MARIA_STATE_INFO state; + MARIA_BASE_INFO base; + MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key + definition */ + MARIA_KEYDEF *keyinfo; /* Key definitions */ + MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MARIA_COLUMNDEF *rec; /* Pointer to field information + */ + MARIA_PACK pack; /* Data about packed records */ + MARIA_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name, /* Resolved path names from + symlinks */ + *index_file_name; + byte *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache */ + MARIA_DECODE_TREE *decode_trees; + uint16 *decode_tables; + my_bool (*once_init)(struct st_maria_share *, File); + my_bool (*once_end)(struct st_maria_share *); + my_bool (*init)(struct st_maria_info *); + void (*end)(struct st_maria_info *); + int (*read_record)(struct st_maria_info *, byte *, MARIA_RECORD_POS); + my_bool (*scan_init)(struct st_maria_info *); + int (*scan)(struct st_maria_info *, byte *, MARIA_RECORD_POS, my_bool); + void (*scan_end)(struct st_maria_info *); + MARIA_RECORD_POS (*write_record_init)(struct st_maria_info *, const byte *); + my_bool (*write_record)(struct st_maria_info *, const byte *); + my_bool (*write_record_abort)(struct st_maria_info *); + my_bool (*update_record)(struct st_maria_info *, MARIA_RECORD_POS, + const byte *); + my_bool (*delete_record)(struct st_maria_info *); + my_bool (*compare_record)(struct st_maria_info *, const byte *); + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); + ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *); + my_bool (*compare_unique) (struct st_maria_info *, MARIA_UNIQUEDEF *, + const byte *record, MARIA_RECORD_POS pos); + uint (*file_read)(MARIA_HA *, byte *, uint, my_off_t, myf); + uint (*file_write)(MARIA_HA *, byte *, uint, my_off_t, myf); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* These are used by packed data */ + ulong max_pack_length; + ulong state_diff_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; + uint32 ftparsers; /* Number of distinct ftparsers + + 1 */ + File kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint block_size; /* block_size of keyfile & data file*/ + uint base_length; + myf write_flag; + enum data_file_type data_file_type; + my_bool temporary; + /* Below flag is needed to make log tables work with concurrent insert */ + my_bool is_log_table; + + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, concurrent_insert; + my_bool delay_key_write; +#ifdef THREAD + THR_LOCK lock; + pthread_mutex_t intern_lock; /* Locking for use with _locking */ + rw_lock_t *key_root_lock; +#endif + my_off_t mmaped_length; + uint nonmmaped_inserts; /* counter of writing in + non-mmaped area */ + MARIA_FILE_BITMAP bitmap; + rw_lock_t mmap_lock; +} MARIA_SHARE; + + +typedef byte MARIA_BITMAP_BUFFER; + +typedef struct st_maria_bitmap_block +{ + ulonglong page; /* Page number */ + /* Number of continuous pages. TAIL_BIT is set if this is a tail page */ + uint page_count; + uint empty_space; /* Set for head and tail pages */ + /* + Number of BLOCKS for block-region (holds all non-blob-fields or one blob) + */ + uint sub_blocks; + /* set to <> 0 in write_record() if this block was actually used */ + uint8 used; + uint8 org_bitmap_value; +} MARIA_BITMAP_BLOCK; + + +typedef struct st_maria_bitmap_blocks +{ + MARIA_BITMAP_BLOCK *block; + uint count; + my_bool tail_page_skipped; /* If some tail pages was not used */ + my_bool page_skipped; /* If some full pages was not used */ +} MARIA_BITMAP_BLOCKS; + + +/* Data about the currently read row */ +typedef struct st_maria_row +{ + MARIA_BITMAP_BLOCKS insert_blocks; + MARIA_BITMAP_BUFFER *extents; + MARIA_RECORD_POS lastpos, nextpos; + MARIA_RECORD_POS *tail_positions; + ha_checksum checksum; + byte *empty_bits, *field_lengths; + byte *empty_bits_buffer; /* For storing cur_row.empty_bits */ + uint *null_field_lengths; /* All null field lengths */ + ulong *blob_lengths; /* Length for each blob */ + ulong base_length, normal_length, char_length, varchar_length, blob_length; + ulong head_length, total_length; + my_size_t extents_buffer_length; /* Size of 'extents' buffer */ + uint field_lengths_length; /* Length of data in field_lengths */ + uint extents_count; /* number of extents in 'extents' */ + uint full_page_count, tail_count; /* For maria_chk */ +} MARIA_ROW; + +/* Data to scan row in blocked format */ +typedef struct st_maria_block_scan +{ + byte *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff; + byte *dir, *dir_end; + ulong bitmap_page; + ulonglong bits; + uint number_of_rows, bit_pos; + MARIA_RECORD_POS row_base_page; +} MARIA_BLOCK_SCAN; + + +struct st_maria_info +{ + MARIA_SHARE *s; /* Shared between open:s */ + MARIA_STATUS_INFO *state, save_state; + MARIA_ROW cur_row, new_row; + MARIA_BLOCK_SCAN scan; + MARIA_BLOB *blobs; /* Pointer to blobs */ + MARIA_BIT_BUFF bit_buff; + DYNAMIC_ARRAY bitmap_blocks; + /* accumulate indexfile changes between write's */ + TREE *bulk_insert; + DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ + MEM_ROOT ft_memroot; /* used by the parser */ + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + char *filename; /* parameter to open filename */ + byte *buff; /* page buffer */ + byte *keyread_buff; /* Buffer for last key read */ + byte *lastkey, *lastkey2; /* Last used search key */ + byte *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Temp buffer for recordpack */ + byte *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int (*read_record) (struct st_maria_info *, byte*, MARIA_RECORD_POS); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + MARIA_RECORD_POS save_lastpos; + MARIA_RECORD_POS dup_key_pos; + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ + + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + my_size_t rec_buff_size; + int dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in maria_rkey() */ + enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ + uint save_lastkey_length; + uint pack_key_length; /* For MARIAMRG */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ + my_bool quick_mode; + /* If info->keyread_buff can't be used for rnext */ + my_bool page_changed; + /* If info->keyread_buff has to be reread for rnext */ + my_bool keybuff_used; + my_bool once_flags; /* For MARIA_MRG */ +#ifdef __WIN__ + my_bool owned_by_merge; /* This Maria table is part of a merge union */ +#endif +#ifdef THREAD + THR_LOCK_DATA lock; +#endif + uchar *maria_rtree_recursion_state; /* For RTREE */ + int maria_rtree_recursion_depth; +}; + +/* Some defines used by maria-functions */ + +#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _search() */ +#define F_EXTRA_LCK -1 + +/* bits in opt_flag */ +#define MEMMAP_USED 32 +#define REMEMBER_OLD_POS 64 + +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 + +/* once_flags */ +#define USE_PACKED_KEYS 1 +#define RRND_PRESERVE_LASTINX 2 + +/* bits in state.changed */ + +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 +#define STATE_CRASHED_ON_REPAIR 4 +#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_OPTIMIZED_KEYS 16 +#define STATE_NOT_SORTED_PAGES 32 + +/* options to maria_read_cache */ + +#define READING_NEXT 1 +#define READING_HEADER 2 + +#define maria_getint(x) ((uint) mi_uint2korr(x) & 32767) +#define maria_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ + mi_int2store(x,boh); } +#define _ma_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) +#define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ + DBUG_PRINT("error", ("Marked table crashed")); \ + }while(0) +#define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", \ + ("Marked table crashed")); \ + }while(0) +#define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) +#define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) +#define maria_print_error(SHARE, ERRNO) \ + _ma_report_error((ERRNO), (SHARE)->index_file_name) + +/* Functions to store length of space packed keys, VARCHAR or BLOB keys */ + +#define store_key_length(key,length) \ +{ if ((length) < 255) \ + { *(key)=(length); } \ + else \ + { *(key)=255; mi_int2store((key)+1,(length)); } \ +} + +#define get_key_full_length(length,key) \ + { if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)++))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; (key)+=3; } \ +} + +#define get_key_full_length_rdonly(length,key) \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; } \ +} + +#define get_pack_length(length) ((length) >= 255 ? 3 : 1) + +#define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +/* Don't use to small record-blocks */ +#define MARIA_EXTEND_BLOCK_LENGTH 20 +#define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2) + /* Max prefix of record-block */ +#define MARIA_MAX_DYN_BLOCK_HEADER 20 +#define MARIA_BLOCK_INFO_HEADER_LENGTH 20 +#define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH) +#define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))) +#define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) + +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ + +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ + +#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) +#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ + +#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100 +#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100 +#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10 + +/* The UNIQUE check is done with a hashed long key */ + +#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define maria_unique_store(A,B) mi_int4store((A),(B)) + +#ifdef THREAD +extern pthread_mutex_t THR_LOCK_maria; +#endif +#if !defined(THREAD) || defined(DONT_USE_RW_LOCKS) +#define rw_wrlock(A) {} +#define rw_rdlock(A) {} +#define rw_unlock(A) {} +#endif + + /* Some extern variables */ + +extern LIST *maria_open_list; +extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; +extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; +extern uint maria_quick_table_bits; +extern const char *maria_data_root; +extern byte maria_zero_string[]; +extern my_bool maria_inited; + + /* This is used by _ma_calc_xxx_key_length och _ma_store_key */ + +typedef struct st_maria_s_param +{ + uint ref_length, key_length, n_ref_length; + uint n_length, totlength, part_of_prev_key, prev_length, pack_marker; + const byte *key; + byte *prev_key, *next_key_pos; + bool store_not_null; +} MARIA_KEY_PARAM; + + /* Prototypes for intern functions */ + +extern int _ma_read_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern my_bool _ma_write_dynamic_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern my_bool _ma_delete_dynamic_record(MARIA_HA *info); +extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record); +extern my_bool _ma_write_blob_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern int _ma_read_static_record(MARIA_HA *info, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern my_bool _ma_write_static_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *); +extern my_bool _ma_delete_static_record(MARIA_HA *info); +extern my_bool _ma_cmp_static_record(MARIA_HA *info, const byte *record); +extern int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, + uint length); +extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, + MARIA_RECORD_POS *root, uint comp_flag); +extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, MARIA_RECORD_POS *root); +extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, byte *buff, byte *key_buff, + my_bool insert_last); +extern byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, + uint *return_key_length, + byte ** after_key); +extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp); +void _ma_store_static_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +#ifdef NOT_USED +void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +#endif +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); + +extern int _ma_ck_delete(MARIA_HA *info, uint keynr, byte *key, + uint key_length); +extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer); +extern int _ma_writeinfo(MARIA_HA *info, uint options); +extern int _ma_test_if_changed(MARIA_HA *info); +extern int _ma_mark_file_changed(MARIA_HA *info); +extern int _ma_decrement_open_count(MARIA_HA *info); +extern int _ma_check_index(MARIA_HA *info, int inx); +extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _ma_bin_search(struct st_maria_info *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte **ret_pos, byte *buff, + my_bool *was_last_key); +extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, + my_bool *was_last_key); +extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, + my_bool *was_last_key); +extern my_off_t _ma_kpos(uint nod_flag, byte *after_key); +extern void _ma_kpointer(MARIA_HA *info, byte *buff, my_off_t pos); +extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag, + const byte *after_key); +extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *info, byte *ptr); +extern void _ma_dpointer(MARIA_HA *info, byte *buff, MARIA_RECORD_POS pos); +extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte **page, byte *key); +extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte **page, byte *key); +extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte ** page_pos, byte *key); +extern byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *keypos, byte *lastkey, + byte *endpos, uint *return_key_length); +extern byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, + uint *return_key_length); +extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const byte *key); +extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, + HA_KEYSEG *end); +extern byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from); +extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, uint nextflag, + my_off_t pos); +extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern byte *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff, + int return_buffer); +extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff); +extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level); +extern uint _ma_make_key(MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos); +extern uint _ma_pack_key(MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint key_length, + HA_KEYSEG ** last_used_keyseg); +extern int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS); +extern int _ma_read_cache(IO_CACHE *info, byte *buff, MARIA_RECORD_POS pos, + uint length, int re_read_if_possibly); +extern ulonglong ma_retrieve_auto_increment(MARIA_HA *info, const byte *record); + +extern my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size); +extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, + ulong reclength); +extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, + byte *packpos, ulong packed_length, + my_bool with_checkum); +extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, + ulong length, my_off_t next_filepos, + byte ** record, ulong *reclength, + int *flag); +extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg, + const byte *key, uint length); +extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile); +extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share); +extern int _ma_read_pack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *from, ulong reclength); +extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b); +extern int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, + my_off_t pos); + +/* Parameter to _ma_get_block_info */ + +typedef struct st_maria_block_info +{ + uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH]; + ulong rec_len; + ulong data_len; + ulong block_len; + ulong blob_len; + MARIA_RECORD_POS filepos; + MARIA_RECORD_POS next_filepos; + MARIA_RECORD_POS prev_filepos; + uint second_read; + uint offset; +} MARIA_BLOCK_INFO; + +/* bits in return from _ma_get_block_info */ + +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ + +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" + +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 + +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) + +#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) +#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1) + +extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); +extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); +extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, + my_size_t *rec_buff_size, + File file, my_off_t filepos); +extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); +extern void _ma_report_error(int errcode, const char *file_name); +extern my_bool _ma_memmap_file(MARIA_HA *info); +extern void _ma_unmap_file(MARIA_HA *info); +extern uint _ma_save_pack_length(uint version, byte * block_buff, + ulong length); +extern uint _ma_calc_pack_length(uint version, ulong length); +extern ulong _ma_calc_blob_length(uint length, const byte *pos); +extern uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite); +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state); +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, + my_bool pRead); +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base); +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg); +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg); +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef); +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef); +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef); +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *keydef); +uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo); +char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo); +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record); +ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf); +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *buf); +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + byte *record, ha_checksum unique_hash, + MARIA_RECORD_POS pos); +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *buf); +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal); +void _ma_get_status(void *param, int concurrent_insert); +void _ma_update_status(void *param); +void _ma_copy_status(void *to, void *from); +my_bool _ma_check_status(void *param); + +extern MARIA_HA *_ma_test_if_reopen(char *filename); +my_bool _ma_check_table_is_closed(const char *name, const char *where); +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup); +int _ma_open_keyfile(MARIA_SHARE *share); +void _ma_setup_functions(register MARIA_SHARE *share); +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size); +void _ma_remap_file(MARIA_HA *info, my_off_t size); + +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_default(MARIA_HA *info); + +/* Functions needed by _ma_check (are overrided in MySQL) */ +C_MODE_START +volatile int *_ma_killed_ptr(HA_CHECK *param); +void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); +C_MODE_END + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param); +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param); +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param); +#ifdef THREAD +pthread_handler_t _ma_thr_find_all_keys(void *arg); +#endif +int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong); +int _ma_sync_table_files(const MARIA_HA *info); diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c new file mode 100644 index 00000000000..ef30540bfce --- /dev/null +++ b/storage/maria/maria_ftdump.c @@ -0,0 +1,279 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include <my_getopt.h> + +static void usage(); +static void complain(int val); +static my_bool get_one_option(int, const struct my_option *, char *); + +static int count=0, stats=0, dump=0, lstats=0; +static my_bool verbose; +static char *query=NULL; +static uint lengths[256]; + +#define MAX_LEN (HA_FT_MAXBYTELEN+10) +#define HOW_OFTEN_TO_WRITE 10000 + +static struct my_option my_long_options[] = +{ + {"help", 'h', "Display help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', "Synonym for -h.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Calculate per-word stats (counts and global weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"dump", 'd', "Dump index (incl. data offsets and word weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"length", 'l', "Report length distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"stats", 's', "Report global stats.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be verbose.", + (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +int main(int argc,char *argv[]) +{ + int error=0, subkeys; + uint keylen, keylen2=0, inx, doc_cnt=0; + float weight= 1.0; + double gws, min_gws=0, avg_gws=0; + MARIA_HA *info; + char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; + ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; + struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ + + MY_INIT(argv[0]); + if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(error); + maria_init(); + if (count || dump) + verbose=0; + if (!count && !dump && !lstats && !query) + stats=1; + + if (verbose) + setbuf(stdout,NULL); + + if (argc < 2) + usage(); + + { + char *end; + inx= (uint) strtoll(argv[1], &end, 10); + if (*end) + usage(); + } + + init_key_cache(maria_key_cache,MARIA_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); + + if (!(info=maria_open(argv[0], O_RDONLY, + HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) + { + error=my_errno; + goto err; + } + + *buf2=0; + aio->info=info; + + if ((inx >= info->s->base.keys) || + !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) + { + printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename); + goto err; + } + + maria_lock_database(info, F_EXTRA_LCK); + + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + + while (!(error=maria_rnext(info,NULL,inx))) + { + keylen=*(info->lastkey); + + subkeys=ft_sintXkorr(info->lastkey+keylen+1); + if (subkeys >= 0) + weight=*(float*)&subkeys; + +#ifdef HAVE_SNPRINTF + snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); +#else + sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); +#endif + my_casedn_str(default_charset_info,buf); + total++; + lengths[keylen]++; + + if (count || stats) + { + if (strcmp(buf, buf2)) + { + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen<keylen2) + { + maxlen=keylen2; + strmov(buf_maxlen, buf2); + } + if (max_doc_cnt < doc_cnt) + { + max_doc_cnt=doc_cnt; + strmov(buf_min_gws, buf2); + min_gws=gws; + } + } + strmov(buf2, buf); + keylen2=keylen; + doc_cnt=0; + } + doc_cnt+= (subkeys >= 0 ? 1 : -subkeys); + } + if (dump) + { + if (subkeys>=0) + printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf); + else + printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys,buf); + } + if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) + printf("%10ld\r",total); + } + maria_lock_database(info, F_UNLCK); + + if (count || stats) + { + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen<keylen2) + { + maxlen=keylen2; + strmov(buf_maxlen, buf2); + } + if (max_doc_cnt < doc_cnt) + { + max_doc_cnt=doc_cnt; + strmov(buf_min_gws, buf2); + min_gws=gws; + } + } + } + + if (stats) + { + count=0; + for (inx=0;inx<256;inx++) + { + count+=lengths[inx]; + if ((ulong) count >= total/2) + break; + } + printf("Total rows: %lu\nTotal words: %lu\n" + "Unique words: %lu\nLongest word: %lu chars (%s)\n" + "Median length: %u\n" + "Average global weight: %f\n" + "Most common word: %lu times, weight: %f (%s)\n", + (long) info->state->records, total, uniq, maxlen, buf_maxlen, + inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); + } + if (lstats) + { + count=0; + for (inx=0; inx<256; inx++) + { + count+=lengths[inx]; + if (count && lengths[inx]) + printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, + (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, + 100.0*count/total); + } + } + +err: + if (error && error != HA_ERR_END_OF_FILE) + printf("got error %d\n",my_errno); + if (info) + maria_close(info); + maria_end(); + return 0; +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch(optid) { + case 'd': + dump=1; + complain(count || query); + break; + case 's': + stats=1; + complain(query!=0); + break; + case 'c': + count= 1; + complain(dump || query); + break; + case 'l': + lstats=1; + complain(query!=0); + break; + case '?': + case 'h': + usage(); + } + return 0; +} + +#include <help_start.h> + +static void usage() +{ + printf("Use: maria_ft_dump <table_name> <index_num>\n"); + my_print_help(my_long_options); + my_print_variables(my_long_options); + NETWARE_SET_SCREEN_MODE(1); + exit(1); +} + +#include <help_end.h> + +static void complain(int val) /* Kinda assert :-) */ +{ + if (val) + { + printf("You cannot use these options together!\n"); + exit(1); + } +} diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c new file mode 100644 index 00000000000..41c519693d5 --- /dev/null +++ b/storage/maria/maria_pack.c @@ -0,0 +1,3212 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Pack MARIA file */ + +#ifndef USE_MY_FUNC +#define USE_MY_FUNC /* We need at least my_malloc */ +#endif + +#include "maria_def.h" +#include <queues.h> +#include <my_tree.h> +#include "mysys_err.h" +#ifdef MSDOS +#include <io.h> +#endif +#ifndef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */ +#endif +#include <my_getopt.h> +#include <assert.h> + +#if SIZEOF_LONG_LONG > 4 +#define BITS_SAVED 64 +#else +#define BITS_SAVED 32 +#endif + +#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ +#define HEAD_LENGTH 32 +#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */ + +#define DATA_TMP_EXT ".TMD" +#define OLD_EXT ".OLD" +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE + +struct st_file_buffer { + File file; + uchar *buffer,*pos,*end; + my_off_t pos_in_file; + int bits; + ulonglong bitbucket; +}; + +struct st_huff_tree; +struct st_huff_element; + +typedef struct st_huff_counts { + uint field_length,max_zero_fill; + uint pack_type; + uint max_end_space,max_pre_space,length_bits,min_space; + ulong max_length; + enum en_fieldtype field_type; + struct st_huff_tree *tree; /* Tree for field */ + my_off_t counts[256]; + my_off_t end_space[8]; + my_off_t pre_space[8]; + my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; + TREE int_tree; /* Tree for detecting distinct column values. */ + byte *tree_buff; /* Column values, 'field_length' each. */ + byte *tree_pos; /* Points to end of column values in 'tree_buff'. */ +} HUFF_COUNTS; + +typedef struct st_huff_element HUFF_ELEMENT; + +/* + WARNING: It is crucial for the optimizations in calc_packed_length() + that 'count' is the first element of 'HUFF_ELEMENT'. +*/ +struct st_huff_element { + my_off_t count; + union un_element { + struct st_nod { + HUFF_ELEMENT *left,*right; + } nod; + struct st_leaf { + HUFF_ELEMENT *null; + uint element_nr; /* Number of element */ + } leaf; + } a; +}; + + +typedef struct st_huff_tree { + HUFF_ELEMENT *root,*element_buffer; + HUFF_COUNTS *counts; + uint tree_number; + uint elements; + my_off_t bytes_packed; + uint tree_pack_length; + uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; + ulonglong *code; + uchar *code_len; +} HUFF_TREE; + + +typedef struct st_isam_mrg { + MARIA_HA **file,**current,**end; + uint free_file; + uint count; + uint min_pack_length; /* Theese is used by packed data */ + uint max_pack_length; + uint ref_length; + uint max_blob_length; + my_off_t records; + /* true if at least one source file has at least one disabled index */ + my_bool src_file_has_indexes_disabled; +} PACK_MRG_INFO; + + +extern int main(int argc,char * *argv); +static void get_options(int *argc,char ***argv); +static MARIA_HA *open_isam_file(char *name,int mode); +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count); +static int compress(PACK_MRG_INFO *file,char *join_name); +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records); +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, + uint trees, + HUFF_COUNTS *huff_counts, + uint fields); +static int compare_tree(void* cmp_arg __attribute__((unused)), + const uchar *s,const uchar *t); +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts); +static void check_counts(HUFF_COUNTS *huff_counts,uint trees, + my_off_t records); +static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records, + uint max_space_length,my_off_t *space_counts, + my_off_t tot_space_count, + enum en_fieldtype field_type); +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts); +static int compare_huff_elements(void *not_used, byte *a,byte *b); +static int save_counts_in_queue(byte *key,element_count count, + HUFF_TREE *tree); +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag); +static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element,uint size, + ulonglong code); +static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, + my_off_t tot_elements,my_off_t filelength); +static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); +static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees); +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint *offset); +static uint max_bit(uint value); +static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts); +static char *make_new_name(char *new_name,char *old_name); +static char *make_old_name(char *new_name,char *old_name); +static void init_file_buffer(File file,pbool read_buffer); +static int flush_buffer(ulong neaded_length); +static void end_file_buffer(void); +static void write_bits(ulonglong value, uint bits); +static void flush_bits(void); +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc); +static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length, + ha_checksum crc); +static int mrg_close(PACK_MRG_INFO *mrg); +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf); +static void mrg_reset(PACK_MRG_INFO *mrg); +#if !defined(DBUG_OFF) +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count); +static int fakecmp(my_off_t **count1, my_off_t **count2); +#endif + + +static int error_on_write=0,test_only=0,verbose=0,silent=0, + write_loop=0,force_pack=0, isamchk_neaded=0; +static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; +static my_bool backup, opt_wait; +/* + tree_buff_length is somewhat arbitrary. The bigger it is the better + the chance to win in terms of compression factor. On the other hand, + this table becomes part of the compressed file header. And its length + is coded with 16 bits in the header. Hence the limit is 2**16 - 1. +*/ +static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; +static char tmp_dir[FN_REFLEN]={0},*join_table; +static my_off_t intervall_length; +static ha_checksum glob_crc; +static struct st_file_buffer file_buffer; +static QUEUE queue; +static HUFF_COUNTS *global_count; +static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +static const char *load_default_groups[]= { "mariapack",0 }; + + /* The main program */ + +int main(int argc, char **argv) +{ + int error,ok; + PACK_MRG_INFO merge; + char **default_argv; + MY_INIT(argv[0]); + + load_defaults("my",load_default_groups,&argc,&argv); + default_argv= argv; + get_options(&argc,&argv); + maria_init(); + + error=ok=isamchk_neaded=0; + if (join_table) + { /* Join files into one */ + if (open_isam_files(&merge,argv,(uint) argc) || + compress(&merge,join_table)) + error=1; + } + else while (argc--) + { + MARIA_HA *isam_file; + if (!(isam_file=open_isam_file(*argv++,O_RDWR))) + error=1; + else + { + merge.file= &isam_file; + merge.current=0; + merge.free_file=0; + merge.count=1; + if (compress(&merge,0)) + error=1; + else + ok=1; + } + } + if (ok && isamchk_neaded && !silent) + puts("Remember to run maria_chk -rq on compressed tables"); + VOID(fflush(stdout)); + VOID(fflush(stderr)); + free_defaults(default_argv); + maria_end(); + my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error ? 2 : 0); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} + +enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE}; + +static struct my_option my_long_options[] = +{ +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"backup", 'b', "Make a backup of the table as table_name.OLD.", + (gptr*) &backup, (gptr*) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR_MP, + "Directory where character sets are.", (gptr*) &charsets_dir, + (gptr*) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Force packing of table even if it gets bigger or if tempfile exists.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"join", 'j', + "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", + (gptr*) &join_table, (gptr*) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + 0, 0, 0}, + {"help", '?', "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Be more silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 'T', "Use temporary directory to store temporary table.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"test", 't', "Don't pack table, only test packing it.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Output version information and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', "Wait and retry if table is in use.", (gptr*) &opt_wait, + (gptr*) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +#include <help_start.h> + +static void print_version(void) +{ + VOID(printf("%s Ver 1.0 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2002 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Pack a MARIA-table to take much less space."); + puts("Keys are not updated, you must run maria_chk -rq on the datafile"); + puts("afterwards to update the keys."); + puts("You should give the .MYI file as the filename argument."); + + VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include <help_end.h> + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + uint length; + + switch(optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'f': + force_pack= 1; + tmpfile_createflag= O_RDWR | O_TRUNC; + break; + case 's': + write_loop= verbose= 0; + silent= 1; + break; + case 't': + test_only= 1; + /* Avoid to reset 'verbose' if it was already set > 1. */ + if (! verbose) + verbose= 1; + break; + case 'T': + length= (uint) (strmov(tmp_dir, argument) - tmp_dir); + if (length != dirname_length(tmp_dir)) + { + tmp_dir[length]=FN_LIBCHAR; + tmp_dir[length+1]=0; + } + break; + case 'v': + verbose++; /* Allow for selecting the level of verbosity. */ + silent= 0; + break; + case '#': + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace"); + break; + case 'V': + print_version(); + exit(0); + case 'I': + case '?': + usage(); + exit(0); + } + return 0; +} + + /* reads options */ + /* Initiates DEBUG - but no debugging here ! */ + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + my_progname= argv[0][0]; + if (isatty(fileno(stdout))) + write_loop=1; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!*argc) + { + usage(); + exit(1); + } + if (join_table) + { + backup=0; /* Not needed */ + tmp_dir[0]=0; + } + return; +} + + +static MARIA_HA *open_isam_file(char *name,int mode) +{ + MARIA_HA *isam_file; + MARIA_SHARE *share; + DBUG_ENTER("open_isam_file"); + + if (!(isam_file=maria_open(name,mode, + (opt_wait ? HA_OPEN_WAIT_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED)))) + { + VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); + DBUG_RETURN(0); + } + share=isam_file->s; + if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table) + { + if (!force_pack) + { + VOID(fprintf(stderr, "%s is already compressed\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + if (verbose) + puts("Recompressing already compressed table"); + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + } + if (! force_pack && share->state.state.records != 0 && + (share->state.state.records <= 1 || + share->state.state.data_file_length < 1024)) + { + VOID(fprintf(stderr, "%s is too small to compress\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + VOID(maria_lock_database(isam_file,F_WRLCK)); + DBUG_RETURN(isam_file); +} + + +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) +{ + uint i,j; + mrg->count=0; + mrg->current=0; + mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE)); + mrg->free_file=1; + mrg->src_file_has_indexes_disabled= 0; + for (i=0; i < count ; i++) + { + if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY))) + goto error; + + mrg->src_file_has_indexes_disabled|= + ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map, + mrg->file[i]->s->base.keys); + } + /* Check that files are identical */ + for (j=0 ; j < count-1 ; j++) + { + MARIA_COLUMNDEF *m1,*m2,*end; + if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength || + mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields) + goto diff_file; + m1=mrg->file[j]->s->rec; + end=m1+mrg->file[j]->s->base.fields; + m2=mrg->file[j+1]->s->rec; + for ( ; m1 != end ; m1++,m2++) + { + if (m1->type != m2->type || m1->length != m2->length) + goto diff_file; + } + } + mrg->count=count; + return 0; + + diff_file: + VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", + my_progname, names[j], names[j+1])); + error: + while (i--) + maria_close(mrg->file[i]); + my_free((gptr) mrg->file,MYF(0)); + return 1; +} + + +static int compress(PACK_MRG_INFO *mrg,char *result_table) +{ + int error; + File new_file,join_isam_file; + MARIA_HA *isam_file; + MARIA_SHARE *share; + char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN]; + uint i,header_length,fields,trees,used_trees; + my_off_t old_length,new_length,tot_elements; + HUFF_COUNTS *huff_counts; + HUFF_TREE *huff_trees; + DBUG_ENTER("compress"); + + isam_file=mrg->file[0]; /* Take this as an example */ + share=isam_file->s; + new_file=join_isam_file= -1; + trees=fields=0; + huff_trees=0; + huff_counts=0; + + /* Create temporary or join file */ + + if (backup) + VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2)); + else + VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2+4+16)); + if (!test_only && result_table) + { + /* Make a new indexfile based on first file in list */ + uint length; + char *buff; + strmov(org_name,result_table); /* Fix error messages */ + VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2)); + if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) + < 0) + goto err; + length=(uint) share->base.keystart; + if (!(buff=my_malloc(length,MYF(MY_WME)))) + goto err; + if (my_pread(share->kfile,buff,length,0L,MYF(MY_WME | MY_NABP)) || + my_write(join_isam_file,buff,length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + my_free(buff,MYF(0)); + goto err; + } + my_free(buff,MYF(0)); + VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2)); + } + else if (!tmp_dir[0]) + VOID(make_new_name(new_name,org_name)); + else + VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4)); + if (!test_only && + (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0) + goto err; + + /* Start calculating statistics */ + + mrg->records=0; + for (i=0 ; i < mrg->count ; i++) + mrg->records+=mrg->file[i]->s->state.state.records; + + DBUG_PRINT("info", ("Compressing %s: (%lu records)", + result_table ? new_name : org_name, + (ulong) mrg->records)); + if (write_loop || verbose) + { + VOID(printf("Compressing %s: (%lu records)\n", + result_table ? new_name : org_name, (ulong) mrg->records)); + } + trees=fields=share->base.fields; + huff_counts=init_huff_count(isam_file,mrg->records); + QUICK_SAFEMALLOC; + + /* + Read the whole data file(s) for statistics. + */ + DBUG_PRINT("info", ("- Calculating statistics")); + if (write_loop || verbose) + VOID(printf("- Calculating statistics\n")); + if (get_statistic(mrg,huff_counts)) + goto err; + NORMAL_SAFEMALLOC; + old_length=0; + for (i=0; i < mrg->count ; i++) + old_length+= (mrg->file[i]->s->state.state.data_file_length - + mrg->file[i]->s->state.state.empty); + + /* + Create a global priority queue in preparation for making + temporary Huffman trees. + */ + if (init_queue(&queue,256,0,0,compare_huff_elements,0)) + goto err; + + /* + Check each column if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress. + */ + check_counts(huff_counts,fields,mrg->records); + + /* + Build a Huffman tree for each column. + */ + huff_trees=make_huff_trees(huff_counts,trees); + + /* + If the packed lengths of combined columns is less then the sum of + the non-combined columns, then create common Huffman trees for them. + We do this only for byte compressed columns, not for distinct values + compressed columns. + */ + if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) + goto err; + + /* + Assign codes to all byte or column values. + */ + if (make_huff_decode_table(huff_trees,fields)) + goto err; + + /* Prepare a file buffer. */ + init_file_buffer(new_file,0); + + /* + Reserve space in the target file for the fixed compressed file header. + */ + file_buffer.pos_in_file=HEAD_LENGTH; + if (! test_only) + VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); + + /* + Write field infos: field type, pack type, length bits, tree number. + */ + write_field_info(huff_counts,fields,used_trees); + + /* + Write decode trees. + */ + if (!(tot_elements=write_huff_tree(huff_trees,trees))) + goto err; + + /* + Calculate the total length of the compression info header. + This includes the fixed compressed file header, the column compression + type descriptions, and the decode trees. + */ + header_length=(uint) file_buffer.pos_in_file+ + (uint) (file_buffer.pos-file_buffer.buffer); + + /* + Compress the source file into the target file. + */ + DBUG_PRINT("info", ("- Compressing file")); + if (write_loop || verbose) + VOID(printf("- Compressing file\n")); + error=compress_isam_file(mrg,huff_counts); + new_length=file_buffer.pos_in_file; + if (!error && !test_only) + { + char buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */ + bzero(buff,sizeof(buff)); + error=my_write(file_buffer.file,buff,sizeof(buff), + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; + } + + /* + Write the fixed compressed file header. + */ + if (!error) + error=write_header(mrg,header_length,used_trees,tot_elements, + new_length); + + /* Flush the file buffer. */ + end_file_buffer(); + + /* Display statistics. */ + DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " + "Mean total length: %6ld", + mrg->min_pack_length, mrg->max_pack_length, + (ulong) (mrg->records ? (new_length/mrg->records) : 0))); + if (verbose && mrg->records) + VOID(printf("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", mrg->min_pack_length, + mrg->max_pack_length, (ulong) (new_length/mrg->records))); + + /* Close source and target file. */ + if (!test_only) + { + error|=my_close(new_file,MYF(MY_WME)); + if (!result_table) + { + error|=my_close(isam_file->dfile,MYF(MY_WME)); + isam_file->dfile= -1; /* Tell maria_close file is closed */ + isam_file->s->bitmap.file= -1; + } + } + + /* Cleanup. */ + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (! test_only && ! error) + { + if (result_table) + { + error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc); + } + else + { + if (backup) + { + if (my_rename(org_name,make_old_name(temp_name,isam_file->filename), + MYF(MY_WME))) + error=1; + else + { + if (tmp_dir[0]) + error=my_copy(new_name,org_name,MYF(MY_WME)); + else + error=my_rename(new_name,org_name,MYF(MY_WME)); + if (!error) + { + VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME))); + if (tmp_dir[0]) + VOID(my_delete(new_name,MYF(MY_WME))); + } + } + } + else + { + if (tmp_dir[0]) + { + error=my_copy(new_name,org_name, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME)); + if (!error) + VOID(my_delete(new_name,MYF(MY_WME))); + } + else + error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME)); + } + if (! error) + error=save_state(isam_file,mrg,new_length,glob_crc); + } + } + error|=mrg_close(mrg); + if (join_isam_file >= 0) + error|=my_close(join_isam_file,MYF(MY_WME)); + if (error) + { + VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); + VOID(my_delete(new_name,MYF(MY_WME))); + DBUG_RETURN(-1); + } + if (write_loop || verbose) + { + if (old_length) + VOID(printf("%.4g%% \n", + (((longlong) (old_length - new_length)) * 100.0 / + (longlong) old_length))); + else + puts("Empty file saved in compressed format"); + } + DBUG_RETURN(0); + + err: + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (new_file >= 0) + VOID(my_close(new_file,MYF(0))); + if (join_isam_file >= 0) + VOID(my_close(join_isam_file,MYF(0))); + mrg_close(mrg); + VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); + DBUG_RETURN(-1); +} + + /* Init a huff_count-struct for each field and init it */ + +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records) +{ + reg2 uint i; + reg1 HUFF_COUNTS *count; + if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields* + sizeof(HUFF_COUNTS), + MYF(MY_ZEROFILL | MY_WME)))) + { + for (i=0 ; i < info->s->base.fields ; i++) + { + enum en_fieldtype type; + count[i].field_length=info->s->rec[i].length; + type= count[i].field_type= (enum en_fieldtype) info->s->rec[i].type; + if (type == FIELD_INTERVALL || + type == FIELD_CONSTANT || + type == FIELD_ZERO) + type = FIELD_NORMAL; + if (count[i].field_length <= 8 && + (type == FIELD_NORMAL || + type == FIELD_SKIP_ZERO)) + count[i].max_zero_fill= count[i].field_length; + /* + For every column initialize a tree, which is used to detect distinct + column values. 'int_tree' works together with 'tree_buff' and + 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'. + This is accomplished by '-1' as the element size. + */ + init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, + NULL); + if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) + count[i].tree_pos=count[i].tree_buff = + my_malloc(count[i].field_length > 1 ? tree_buff_length : 2, + MYF(MY_WME)); + } + } + return count; +} + + + /* Free memory used by counts and trees */ + +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees, + HUFF_COUNTS *huff_counts, + uint fields) +{ + register uint i; + + if (huff_trees) + { + for (i=0 ; i < trees ; i++) + { + if (huff_trees[i].element_buffer) + my_free((gptr) huff_trees[i].element_buffer,MYF(0)); + if (huff_trees[i].code) + my_free((gptr) huff_trees[i].code,MYF(0)); + } + my_free((gptr) huff_trees,MYF(0)); + } + if (huff_counts) + { + for (i=0 ; i < fields ; i++) + { + if (huff_counts[i].tree_buff) + { + my_free((gptr) huff_counts[i].tree_buff,MYF(0)); + delete_tree(&huff_counts[i].int_tree); + } + } + my_free((gptr) huff_counts,MYF(0)); + } + delete_queue(&queue); /* This is safe to free */ + return; +} + + /* Read through old file and gather some statistics */ + +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) +{ + int error; + uint length, null_bytes; + ulong reclength,max_blob_length; + byte *record,*pos,*next_pos,*end_pos,*start_pos; + ha_rows record_count; + HUFF_COUNTS *count,*end_count; + TREE_ELEMENT *element; + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); + DBUG_ENTER("get_statistic"); + + reclength= mrg->file[0]->s->base.reclength; + null_bytes= mrg->file[0]->s->base.null_bytes; + record=(byte*) my_alloca(reclength); + end_count=huff_counts+mrg->file[0]->s->base.fields; + record_count=0; glob_crc=0; + max_blob_length=0; + + /* Check how to calculate checksum */ + if (mrg->file[0]->s->data_file_type == STATIC_RECORD) + calc_checksum= _ma_static_checksum; + else + calc_checksum= _ma_checksum; + + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + /* glob_crc is a checksum over all bytes of all records. */ + glob_crc+= (*calc_checksum)(mrg->file[0],record); + + /* Count the incidence of values separately for every column. */ + for (pos=record + null_bytes, count=huff_counts ; + count < end_count ; + count++, + pos=next_pos) + { + next_pos=end_pos=(start_pos=pos)+count->field_length; + + /* + Put the whole column value in a tree if there is room for it. + 'int_tree' is used to quickly check for duplicate values. + 'tree_buff' collects as many distinct column values as + possible. If the field length is > 1, it is tree_buff_length, + else 2 bytes. Each value is 'field_length' bytes big. If there + are more distinct column values than fit into the buffer, we + give up with this tree. BLOBs and VARCHARs do not have a + tree_buff as it can only be used with fixed length columns. + For the special case of field length == 1, we handle only the + case that there is only one distinct value in the table(s). + Otherwise, we can have a maximum of 256 distinct values. This + is then handled by the normal Huffman tree build. + + Another limit for collecting distinct column values is the + number of values itself. Since we would need to build a + Huffman tree for the values, we are limited by the 'IS_OFFSET' + constant. This constant expresses a bit which is used to + determine if a tree element holds a final value or an offset + to a child element. Hence, all values and offsets need to be + smaller than 'IS_OFFSET'. A tree element is implemented with + two integer values, one for the left branch and one for the + right branch. For the extreme case that the first element + points to the last element, the number of integers in the tree + must be less or equal to IS_OFFSET. So the number of elements + must be less or equal to IS_OFFSET / 2. + + WARNING: At first, we insert a pointer into the record buffer + as the key for the tree. If we got a new distinct value, which + is really inserted into the tree, instead of being counted + only, we will copy the column value from the record buffer to + 'tree_buff' and adjust the key pointer of the tree accordingly. + */ + if (count->tree_buff) + { + global_count=count; + if (!(element=tree_insert(&count->int_tree,pos, 0, + count->int_tree.custom_arg)) || + (element->count == 1 && + (count->tree_buff + tree_buff_length < + count->tree_pos + count->field_length)) || + (count->int_tree.elements_in_tree > IS_OFFSET / 2) || + (count->field_length == 1 && + count->int_tree.elements_in_tree > 1)) + { + delete_tree(&count->int_tree); + my_free(count->tree_buff,MYF(0)); + count->tree_buff=0; + } + else + { + /* + If tree_insert() succeeds, it either creates a new element + or increments the counter of an existing element. + */ + if (element->count == 1) + { + /* Copy the new column value into 'tree_buff'. */ + memcpy(count->tree_pos,pos,(size_t) count->field_length); + /* Adjust the key pointer in the tree. */ + tree_set_pointer(element,count->tree_pos); + /* Point behind the last column value so far. */ + count->tree_pos+=count->field_length; + } + } + } + + /* Save character counters and space-counts and zero-field-counts */ + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ENDSPACE) + { + /* Ignore trailing space. */ + for ( ; end_pos > pos ; end_pos--) + if (end_pos[-1] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all trailing spaces and the number of + short trailing spaces. Remember the longest trailing space. + */ + length= (uint) (next_pos-end_pos); + count->tot_end_space+=length; + if (length < 8) + count->end_space[length]++; + if (count->max_end_space < length) + count->max_end_space = length; + } + + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_PRESPACE) + { + /* Ignore leading space. */ + for (pos=start_pos; pos < end_pos ; pos++) + if (pos[0] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all leading spaces and the number of + short leading spaces. Remember the longest leading space. + */ + length= (uint) (pos-start_pos); + count->tot_pre_space+=length; + if (length < 8) + count->pre_space[length]++; + if (count->max_pre_space < length) + count->max_pre_space = length; + } + + /* Calculate pos, end_pos, and max_length for variable length fields. */ + if (count->field_type == FIELD_BLOB) + { + uint field_length=count->field_length -maria_portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(field_length, start_pos); + memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*)); + end_pos=pos+blob_length; + tot_blob_length+=blob_length; + set_if_bigger(count->max_length,blob_length); + } + else if (count->field_type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + pos= start_pos+pack_length; + end_pos= pos+length; + set_if_bigger(count->max_length,length); + } + + /* Evaluate 'max_zero_fill' for short fields. */ + if (count->field_length <= 8 && + (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ZERO)) + { + uint i; + /* Zero fields are just counted. Go to the next record. */ + if (!memcmp((byte*) start_pos,zero_string,count->field_length)) + { + count->zero_fields++; + continue; + } + /* + max_zero_fill starts with field_length. It is decreased every + time a shorter "zero trailer" is found. It is set to zero when + an empty field is found (see above). This suggests that the + variable should be called 'min_zero_fill'. + */ + for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; + i++) ; + if (i < count->max_zero_fill) + count->max_zero_fill=i; + } + + /* Ignore zero fields and check fields. */ + if (count->field_type == FIELD_ZERO || + count->field_type == FIELD_CHECK) + continue; + + /* + Count the incidence of every byte value in the + significant field value. + */ + for ( ; pos < end_pos ; pos++) + count->counts[(uchar) *pos]++; + + /* Step to next field. */ + } + + if (tot_blob_length > max_blob_length) + max_blob_length=tot_blob_length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + { + VOID(fprintf(stderr, "Got error %d while reading rows", error)); + break; + } + + /* Step to next record. */ + } + if (write_loop) + { + VOID(printf(" \r")); + VOID(fflush(stdout)); + } + + /* + If --debug=d,fakebigcodes is set, fake the counts to get big Huffman + codes. + */ + DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count);); + + DBUG_PRINT("info", ("Found the following number of incidents " + "of the byte codes:")); + if (verbose >= 2) + VOID(printf("Found the following number of incidents " + "of the byte codes:\n")); + for (count= huff_counts ; count < end_count; count++) + { + uint idx; + my_off_t total_count; + char llbuf[32]; + + DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1))); + if (verbose >= 2) + VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1))); + if (count->tree_buff) + { + DBUG_PRINT("info", ("number of distinct values: %u", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); + if (verbose >= 2) + VOID(printf("number of distinct values: %u\n", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); + } + total_count= 0; + for (idx= 0; idx < 256; idx++) + { + if (count->counts[idx]) + { + total_count+= count->counts[idx]; + DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx, + llstr((longlong) count->counts[idx], llbuf))); + if (verbose >= 2) + VOID(printf("counts[0x%02x]: %12s\n", idx, + llstr((longlong) count->counts[idx], llbuf))); + } + } + DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count, + llbuf))); + if ((verbose >= 2) && total_count) + { + VOID(printf("total: %12s\n", + llstr((longlong) total_count, llbuf))); + } + } + + mrg->records=record_count; + mrg->max_blob_length=max_blob_length; + my_afree((gptr) record); + DBUG_RETURN(error != HA_ERR_END_OF_FILE); +} + +static int compare_huff_elements(void *not_used __attribute__((unused)), + byte *a, byte *b) +{ + return *((my_off_t*) a) < *((my_off_t*) b) ? -1 : + (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1); +} + + /* Check each tree if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress */ + +static void check_counts(HUFF_COUNTS *huff_counts, uint trees, + my_off_t records) +{ + uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count]; + my_off_t old_length,new_length,length; + DBUG_ENTER("check_counts"); + + bzero((gptr) field_count,sizeof(field_count)); + space_fields=fill_zero_fields=0; + + for (; trees-- ; huff_counts++) + { + if (huff_counts->field_type == FIELD_BLOB) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_VARCHAR) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_CHECK) + { + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + + huff_counts->field_type=FIELD_NORMAL; + huff_counts->pack_type=0; + + /* Check for zero-filled records (in this column), or zero records. */ + if (huff_counts->zero_fields || ! records) + { + my_off_t old_space_count; + /* + If there are only zero filled records (in this column), + or no records at all, we are done. + */ + if (huff_counts->zero_fields == records) + { + huff_counts->field_type= FIELD_ZERO; + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + /* Remeber the number of significant spaces. */ + old_space_count=huff_counts->counts[' ']; + /* Add all leading and trailing spaces. */ + huff_counts->counts[' ']+= (huff_counts->tot_end_space + + huff_counts->tot_pre_space + + huff_counts->empty_fields * + huff_counts->field_length); + /* Check, what the compressed length of this would be. */ + old_length=calc_packed_length(huff_counts,0)+records/8; + /* Get the number of zero bytes. */ + length=huff_counts->zero_fields*huff_counts->field_length; + /* Add it to the counts. */ + huff_counts->counts[0]+=length; + /* Check, what the compressed length of this would be. */ + new_length=calc_packed_length(huff_counts,0); + /* If the compression without the zeroes would be shorter, we are done. */ + if (old_length < new_length && huff_counts->field_length > 1) + { + huff_counts->field_type=FIELD_SKIP_ZERO; + huff_counts->counts[0]-=length; + huff_counts->bytes_packed=old_length- records/8; + goto found_pack; + } + /* Remove the insignificant spaces, but keep the zeroes. */ + huff_counts->counts[' ']=old_space_count; + } + /* Check, what the compressed length of this column would be. */ + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + + /* + If there are enough empty records (in this column), + treating them specially may pay off. + */ + if (huff_counts->empty_fields) + { + if (huff_counts->field_length > 2 && + huff_counts->empty_fields + (records - huff_counts->empty_fields)* + (1+max_bit(max(huff_counts->max_pre_space, + huff_counts->max_end_space))) < + records * max_bit(huff_counts->field_length)) + { + huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS; + } + else + { + length=huff_counts->empty_fields*huff_counts->field_length; + if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space) + { + huff_counts->tot_end_space+=length; + huff_counts->max_end_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->end_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + if (huff_counts->tot_pre_space) + { + huff_counts->tot_pre_space+=length; + huff_counts->max_pre_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->pre_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + } + } + + /* + If there are enough trailing spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_end_space) + { + huff_counts->counts[' ']+=huff_counts->tot_pre_space; + if (test_space_compress(huff_counts,records,huff_counts->max_end_space, + huff_counts->end_space, + huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE)) + goto found_pack; + huff_counts->counts[' ']-=huff_counts->tot_pre_space; + } + + /* + If there are enough leading spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_pre_space) + { + if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, + huff_counts->pre_space, + huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE)) + goto found_pack; + } + + found_pack: /* Found field-packing */ + + /* Test if we can use zero-fill */ + + if (huff_counts->max_zero_fill && + (huff_counts->field_type == FIELD_NORMAL || + huff_counts->field_type == FIELD_SKIP_ZERO)) + { + huff_counts->counts[0]-=huff_counts->max_zero_fill* + (huff_counts->field_type == FIELD_SKIP_ZERO ? + records - huff_counts->zero_fields : records); + huff_counts->pack_type|=PACK_TYPE_ZERO_FILL; + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + } + + /* Test if intervall-field is better */ + + if (huff_counts->tree_buff) + { + HUFF_TREE tree; + + DBUG_EXECUTE_IF("forceintervall", + huff_counts->bytes_packed= ~ (my_off_t) 0;); + tree.element_buffer=0; + if (!make_huff_tree(&tree,huff_counts) && + tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) + { + if (tree.elements == 1) + huff_counts->field_type=FIELD_CONSTANT; + else + huff_counts->field_type=FIELD_INTERVALL; + huff_counts->pack_type=0; + } + else + { + my_free((gptr) huff_counts->tree_buff,MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff=0; + } + if (tree.element_buffer) + my_free((gptr) tree.element_buffer,MYF(0)); + } + if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS) + space_fields++; + if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL) + fill_zero_fields++; + field_count[huff_counts->field_type]++; + } + DBUG_PRINT("info", ("normal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields)); + DBUG_PRINT("info", ("pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d", + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + if (verbose) + VOID(printf("\nnormal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d\n" + "pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d\n", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields, + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + DBUG_VOID_RETURN; +} + + +/* Test if we can use space-compression and empty-field-compression */ + +static int +test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records, + uint max_space_length, my_off_t *space_counts, + my_off_t tot_space_count, enum en_fieldtype field_type) +{ + int min_pos; + uint length_bits,i; + my_off_t space_count,min_space_count,min_pack,new_length,skip; + + length_bits=max_bit(max_space_length); + + /* Default no end_space-packing */ + space_count=huff_counts->counts[(uint) ' ']; + min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count); + min_pack=calc_packed_length(huff_counts,0); + min_pos= -2; + huff_counts->counts[(uint) ' ']=space_count; + + /* Test with allways space-count */ + new_length=huff_counts->bytes_packed+length_bits*records/8; + if (new_length+1 < min_pack) + { + min_pos= -1; + min_pack=new_length; + min_space_count=space_count; + } + /* Test with length-flag */ + for (skip=0L, i=0 ; i < 8 ; i++) + { + if (space_counts[i]) + { + if (i) + huff_counts->counts[(uint) ' ']+=space_counts[i]; + skip+=huff_counts->pre_space[i]; + new_length=calc_packed_length(huff_counts,0)+ + (records+(records-skip)*(1+length_bits))/8; + if (new_length < min_pack) + { + min_pos=(int) i; + min_pack=new_length; + min_space_count=huff_counts->counts[(uint) ' ']; + } + } + } + + huff_counts->counts[(uint) ' ']=min_space_count; + huff_counts->bytes_packed=min_pack; + switch (min_pos) { + case -2: + return(0); /* No space-compress */ + case -1: /* Always space-count */ + huff_counts->field_type=field_type; + huff_counts->min_space=0; + huff_counts->length_bits=max_bit(max_space_length); + break; + default: + huff_counts->field_type=field_type; + huff_counts->min_space=(uint) min_pos; + huff_counts->pack_type|=PACK_TYPE_SELECTED; + huff_counts->length_bits=max_bit(max_space_length); + break; + } + return(1); /* Using space-compress */ +} + + + /* Make a huff_tree of each huff_count */ + +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint tree; + HUFF_TREE *huff_tree; + DBUG_ENTER("make_huff_trees"); + + if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(0); + + for (tree=0 ; tree < trees ; tree++) + { + if (make_huff_tree(huff_tree+tree,huff_counts+tree)) + { + while (tree--) + my_free((gptr) huff_tree[tree].element_buffer,MYF(0)); + my_free((gptr) huff_tree,MYF(0)); + DBUG_RETURN(0); + } + } + DBUG_RETURN(huff_tree); +} + +/* + Build a Huffman tree. + + SYNOPSIS + make_huff_tree() + huff_tree The Huffman tree. + huff_counts The counts. + + DESCRIPTION + Build a Huffman tree according to huff_counts->counts or + huff_counts->tree_buff. tree_buff, if non-NULL contains up to + tree_buff_length of distinct column values. In that case, whole + values can be Huffman encoded instead of single bytes. + + RETURN + 0 OK + != 0 Error +*/ + +static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT *a,*b,*new_huff_el; + + first=last=0; + if (huff_counts->tree_buff) + { + /* Calculate the number of distinct values in tree_buff. */ + found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / + huff_counts->field_length; + first=0; last=found-1; + } + else + { + /* Count the number of byte codes found in the column. */ + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + } + } + if (found < 2) + found=2; + } + + /* When using 'tree_buff' we can have more that 256 values. */ + if (queue.max_elements < found) + { + delete_queue(&queue); + if (init_queue(&queue,found,0,0,compare_huff_elements,0)) + return -1; + } + + /* Allocate or reallocate an element buffer for the Huffman tree. */ + if (!huff_tree->element_buffer) + { + if (!(huff_tree->element_buffer= + (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME)))) + return 1; + } + else + { + HUFF_ELEMENT *temp; + if (!(temp= + (HUFF_ELEMENT*) my_realloc((gptr) huff_tree->element_buffer, + found*2*sizeof(HUFF_ELEMENT), + MYF(MY_WME)))) + return 1; + huff_tree->element_buffer=temp; + } + + huff_counts->tree=huff_tree; + huff_tree->counts=huff_counts; + huff_tree->min_chr=first; + huff_tree->max_chr=last; + huff_tree->char_bits=max_bit(last-first); + huff_tree->offset_bits=max_bit(found-1)+1; + + if (huff_counts->tree_buff) + { + huff_tree->elements=0; + huff_tree->tree_pack_length=(1+15+16+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8 + + (uint) (huff_tree->counts->tree_pos- + huff_tree->counts->tree_buff); + /* + Put a HUFF_ELEMENT into the queue for every distinct column value. + + tree_walk() calls save_counts_in_queue() for every element in + 'int_tree'. This takes elements from the target trees element + buffer and places references to them into the buffer of the + priority queue. We insert in column value order, but the order is + in fact irrelevant here. We will establish the correct order + later. + */ + tree_walk(&huff_counts->int_tree, + (int (*)(void*, element_count,void*)) save_counts_in_queue, + (gptr) huff_tree, left_root_right); + } + else + { + huff_tree->elements=found; + huff_tree->tree_pack_length=(9+9+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8; + /* + Put a HUFF_ELEMENT into the queue for every byte code found in the column. + + The elements are taken from the target trees element buffer. + Instead of using queue_insert(), we just place references to the + elements into the buffer of the priority queue. We insert in byte + value order, but the order is in fact irrelevant here. We will + establish the correct order later. + */ + for (i=first, found=0 ; i <= last ; i++) + { + if (huff_counts->counts[i]) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=huff_counts->counts[i]; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr=i; + queue.root[found]=(byte*) new_huff_el; + } + } + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which builds the Huffman tree. + */ + while (found < 2) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=0; + new_huff_el->a.leaf.null=0; + if (last) + new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1; + else + new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1; + queue.root[found]=(byte*) new_huff_el; + } + } + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=found/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + bytes_packed=0; bits_packed=0; + for (i=1 ; i < found ; i++) + { + /* + Pop the top element from the queue (the one with the least incidence). + Popping from a priority queue includes a re-ordering of the queue, + to get the next least incidence element to the top. + */ + a=(HUFF_ELEMENT*) queue_remove(&queue,0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b=(HUFF_ELEMENT*) queue.root[1]; + /* Get a new element from the element buffer. */ + new_huff_el=huff_tree->element_buffer+found+i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count=a->count+b->count; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* The new element points to its children, lesser in left. */ + new_huff_el->a.nod.left=a; + new_huff_el->a.nod.right=b; + /* + Replace the copied top element by the new element and re-order the + queue. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + huff_tree->root=(HUFF_ELEMENT*) queue.root[1]; + huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8; + return 0; +} + +static int compare_tree(void* cmp_arg __attribute__((unused)), + register const uchar *s, register const uchar *t) +{ + uint length; + for (length=global_count->field_length; length-- ;) + if (*s++ != *t++) + return (int) s[-1] - (int) t[-1]; + return 0; +} + +/* + Organize distinct column values and their incidences into a priority queue. + + SYNOPSIS + save_counts_in_queue() + key The column value. + count The incidence of this value. + tree The Huffman tree to be built later. + + DESCRIPTION + We use the element buffer of the targeted tree. The distinct column + values are organized in a priority queue first. The Huffman + algorithm will later organize the elements into a Huffman tree. For + the time being, we just place references to the elements into the + queue buffer. The buffer will later be organized into a priority + queue. + + RETURN + 0 + */ + +static int save_counts_in_queue(byte *key, element_count count, + HUFF_TREE *tree) +{ + HUFF_ELEMENT *new_huff_el; + + new_huff_el=tree->element_buffer+(tree->elements++); + new_huff_el->count=count; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) / + tree->counts->field_length; + queue.root[tree->elements]=(byte*) new_huff_el; + return 0; +} + + +/* + Calculate length of file if given counts should be used. + + SYNOPSIS + calc_packed_length() + huff_counts The counts for a column of the table(s). + add_tree_lenght If the decode tree length should be added. + + DESCRIPTION + We need to follow the Huffman algorithm until we know, how many bits + are required for each byte code. But we do not need the resulting + Huffman tree. Hence, we can leave out some steps which are essential + in make_huff_tree(). + + RETURN + Number of bytes required to compress this table column. +*/ + +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, + uint add_tree_lenght) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT element_buffer[256]; + DBUG_ENTER("calc_packed_length"); + + /* + WARNING: We use a small hack for efficiency: Instead of placing + references to HUFF_ELEMENTs into the queue, we just insert + references to the counts of the byte codes which appeared in this + table column. During the Huffman algorithm they are successively + replaced by references to HUFF_ELEMENTs. This works, because + HUFF_ELEMENTs have the incidence count at their beginning. + Regardless, wether the queue array contains references to counts of + type my_off_t or references to HUFF_ELEMENTs which have the count of + type my_off_t at their beginning, it always points to a count of the + same type. + + Instead of using queue_insert(), we just copy the references into + the buffer of the priority queue. We insert in byte value order, but + the order is in fact irrelevant here. We will establish the correct + order later. + */ + first=last=0; + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + /* We start with root[1], which is the queues top element. */ + queue.root[found]=(byte*) &huff_counts->counts[i]; + } + } + if (!found) + DBUG_RETURN(0); /* Empty tree */ + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which follows the Huffman algorithm. + */ + if (found < 2) + queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1]; + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + bytes_packed=0; bits_packed=0; + /* Add the length of the coding table, which would become part of the file. */ + if (add_tree_lenght) + bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ + (max_bit(found-1)+1+1)*(found-2) +7)/8; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=(found+1)/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + for (i=0 ; i < found-1 ; i++) + { + my_off_t *a; + my_off_t *b; + HUFF_ELEMENT *new_huff_el; + + /* + Pop the top element from the queue (the one with the least + incidence). Popping from a priority queue includes a re-ordering + of the queue, to get the next least incidence element to the top. + */ + a= (my_off_t*) queue_remove(&queue, 0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b= (my_off_t*) queue.root[1]; + /* Create a new element in a local (automatic) buffer. */ + new_huff_el= element_buffer + i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count= *a + *b; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* + Replace the copied top element by the new element and re-order the + queue. This successively replaces the references to counts by + references to HUFF_ELEMENTs. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + DBUG_RETURN(bytes_packed+(bits_packed+7)/8); +} + + + /* Remove trees that don't give any compression */ + +static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint k,tree_number; + HUFF_COUNTS count,*i,*j,*last_count; + + last_count=huff_counts+trees; + for (tree_number=0, i=huff_counts ; i < last_count ; i++) + { + if (!i->tree->tree_number) + { + i->tree->tree_number= ++tree_number; + if (i->tree_buff) + continue; /* Don't join intervall */ + for (j=i+1 ; j < last_count ; j++) + { + if (! j->tree->tree_number && ! j->tree_buff) + { + for (k=0 ; k < 256 ; k++) + count.counts[k]=i->counts[k]+j->counts[k]; + if (calc_packed_length(&count,1) <= + i->tree->bytes_packed + j->tree->bytes_packed+ + i->tree->tree_pack_length+j->tree->tree_pack_length+ + ALLOWED_JOIN_DIFF) + { + memcpy_fixed((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + my_free((gptr) j->tree->element_buffer,MYF(0)); + j->tree->element_buffer=0; + j->tree=i->tree; + bmove((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + if (make_huff_tree(i->tree,i)) + return (uint) -1; + } + } + } + } + } + DBUG_PRINT("info", ("Original trees: %d After join: %d", + trees, tree_number)); + if (verbose) + VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); + return tree_number; /* Return trees left */ +} + + +/* + Fill in huff_tree encode tables. + + SYNOPSIS + make_huff_decode_table() + huff_tree An array of HUFF_TREE which are to be encoded. + trees The number of HUFF_TREE in the array. + + RETURN + 0 success + != 0 error +*/ + +static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) +{ + uint elements; + for ( ; trees-- ; huff_tree++) + { + if (huff_tree->tree_number > 0) + { + elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; + if (!(huff_tree->code = + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong) + sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) + return 1; + huff_tree->code_len=(uchar*) (huff_tree->code+elements); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), LL(0)); + } + } + return 0; +} + + +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint size, ulonglong code) +{ + uint chr; + if (!element->a.leaf.null) + { + chr=element->a.leaf.element_nr; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; + } + else + { + size--; + make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); + } + return; +} + + +/* + Convert a value into binary digits. + + SYNOPSIS + bindigits() + value The value. + length The number of low order bits to convert. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *bindigits(ulonglong value, uint bits) +{ + static char digits[72]; + char *ptr= digits; + uint idx= bits; + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1); + *ptr= '\0'; + return digits; +} + + +/* + Convert a value into hexadecimal digits. + + SYNOPSIS + hexdigits() + value The value. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *hexdigits(ulonglong value) +{ + static char digits[20]; + char *ptr= digits; + uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + { + if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9') + *(ptr - 1)+= 'a' - '9' - 1; + } + *ptr= '\0'; + return digits; +} + + + /* Write header to new packed data file */ + +static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, + my_off_t tot_elements,my_off_t filelength) +{ + byte *buff= (byte*) file_buffer.pos; + + bzero(buff,HEAD_LENGTH); + memcpy_fixed(buff,maria_pack_file_magic,4); + int4store(buff+4,head_length); + int4store(buff+8, mrg->min_pack_length); + int4store(buff+12,mrg->max_pack_length); + int4store(buff+16,tot_elements); + int4store(buff+20,intervall_length); + int2store(buff+24,trees); + buff[26]=(char) mrg->ref_length; + /* Save record pointer length */ + buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2); + if (test_only) + return 0; + VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0))); + return my_write(file_buffer.file,(const byte *) file_buffer.pos,HEAD_LENGTH, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; +} + + /* Write fieldinfo to new packed file */ + +static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) +{ + reg1 uint i; + uint huff_tree_bits; + huff_tree_bits=max_bit(trees ? trees-1 : 0); + + DBUG_PRINT("info", (" ")); + DBUG_PRINT("info", ("column types:")); + DBUG_PRINT("info", ("FIELD_NORMAL 0")); + DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); + DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2")); + DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3")); + DBUG_PRINT("info", ("FIELD_BLOB 4")); + DBUG_PRINT("info", ("FIELD_CONSTANT 5")); + DBUG_PRINT("info", ("FIELD_INTERVALL 6")); + DBUG_PRINT("info", ("FIELD_ZERO 7")); + DBUG_PRINT("info", ("FIELD_VARCHAR 8")); + DBUG_PRINT("info", ("FIELD_CHECK 9")); + DBUG_PRINT("info", (" ")); + DBUG_PRINT("info", ("pack type as a set of flags:")); + DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); + DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); + DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + { + VOID(printf("\n")); + VOID(printf("column types:\n")); + VOID(printf("FIELD_NORMAL 0\n")); + VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); + VOID(printf("FIELD_SKIP_PRESPACE 2\n")); + VOID(printf("FIELD_SKIP_ZERO 3\n")); + VOID(printf("FIELD_BLOB 4\n")); + VOID(printf("FIELD_CONSTANT 5\n")); + VOID(printf("FIELD_INTERVALL 6\n")); + VOID(printf("FIELD_ZERO 7\n")); + VOID(printf("FIELD_VARCHAR 8\n")); + VOID(printf("FIELD_CHECK 9\n")); + VOID(printf("\n")); + VOID(printf("pack type as a set of flags:\n")); + VOID(printf("PACK_TYPE_SELECTED 1\n")); + VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); + VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); + VOID(printf("\n")); + } + for (i=0 ; i++ < fields ; counts++) + { + write_bits((ulonglong) (int) counts->field_type, 5); + write_bits(counts->pack_type,6); + if (counts->pack_type & PACK_TYPE_ZERO_FILL) + write_bits(counts->max_zero_fill,5); + else + write_bits(counts->length_bits,5); + write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits); + DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + i , counts->field_type, counts->pack_type, + counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + if (verbose >= 2) + VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " + "tree: %2u length: %4u\n", i , counts->field_type, + counts->pack_type, counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + } + flush_bits(); + return; +} + + /* Write all huff_trees to new datafile. Return tot count of + elements in all trees + Returns 0 on error */ + +static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) +{ + uint i,int_length; + uint tree_no; + uint codes; + uint errors= 0; + uint *packed_tree,*offset,length; + my_off_t elements; + + /* Find the highest number of elements in the trees. */ + for (i=length=0 ; i < trees ; i++) + if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) + length=huff_tree[i].elements; + /* + Allocate a buffer for packing a decode tree. Two numbers per element + (left child and right child). + */ + if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) + { + my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); + return 0; + } + + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + VOID(printf("\n")); + tree_no= 0; + intervall_length=0; + for (elements=0; trees-- ; huff_tree++) + { + /* Skip columns that have been joined with other columns. */ + if (huff_tree->tree_number == 0) + continue; /* Deleted tree */ + tree_no++; + DBUG_PRINT("info", (" ")); + if (verbose >= 3) + VOID(printf("\n")); + /* Count the total number of elements (byte codes or column values). */ + elements+=huff_tree->elements; + huff_tree->max_offset=2; + /* Build a tree of offsets and codes for decoding in 'packed_tree'. */ + if (huff_tree->elements <= 1) + offset=packed_tree; + else + offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); + + /* This should be the same as 'length' above. */ + huff_tree->offset_bits=max_bit(huff_tree->max_offset); + + /* + Since we check this during collecting the distinct column values, + this should never happen. + */ + if (huff_tree->max_offset >= IS_OFFSET) + { /* This should be impossible */ + VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", + huff_tree->max_offset)); + my_afree((gptr) packed_tree); + return 0; + } + + DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu " + "char_bits: %u\n", + (ulong) (file_buffer.pos - file_buffer.buffer), + huff_tree->elements, (ulong) (offset - packed_tree), + huff_tree->char_bits)); + if (!huff_tree->counts->tree_buff) + { + /* We do a byte compression on this column. Mark with bit 0. */ + write_bits(0,1); + write_bits(huff_tree->min_chr,8); + write_bits(huff_tree->elements,9); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + int_length=0; + } + else + { + int_length=(uint) (huff_tree->counts->tree_pos - + huff_tree->counts->tree_buff); + /* We have distinct column values for this column. Mark with bit 1. */ + write_bits(1,1); + write_bits(huff_tree->elements,15); + write_bits(int_length,16); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + intervall_length+=int_length; + } + DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u " + "offset_bits: %2u %s: %5u codelen: %2u", + tree_no, huff_tree->elements, huff_tree->char_bits, + huff_tree->offset_bits, huff_tree->counts->tree_buff ? + "bufflen" : "min_chr", huff_tree->counts->tree_buff ? + int_length : huff_tree->min_chr, huff_tree->height)); + if (verbose >= 2) + VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " + "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, + huff_tree->char_bits, huff_tree->offset_bits, + huff_tree->counts->tree_buff ? "bufflen" : "min_chr", + huff_tree->counts->tree_buff ? int_length : + huff_tree->min_chr, huff_tree->height)); + + /* Check that the code tree length matches the element count. */ + length=(uint) (offset-packed_tree); + if (length != huff_tree->elements*2-2) + { + VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", + length, huff_tree->elements * 2 - 2)); + errors++; + break; + } + + for (i=0 ; i < length ; i++) + { + if (packed_tree[i] & IS_OFFSET) + write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits), + huff_tree->offset_bits+1); + else + write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); + DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x", + i, (packed_tree[i] & IS_OFFSET) ? + " -> " : "", (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + if (verbose >= 3) + VOID(printf("tree[0x%04x]: %s0x%04x\n", + i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", + (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + } + flush_bits(); + + /* + Display coding tables and check their correctness. + */ + codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; + for (i= 0; i < codes; i++) + { + ulonglong code; + uint bits; + uint len; + uint idx; + + if (! (len= huff_tree->code_len[i])) + continue; + DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], + huff_tree->code_len[i]))); + if (verbose >= 3) + VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], huff_tree->code_len[i]))); + + /* Check that the encode table decodes correctly. */ + code= 0; + bits= 0; + idx= 0; + DBUG_EXECUTE_IF("forcechkerr1", len--;); + DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code);); + DBUG_EXECUTE_IF("forcechkerr3", idx= length;); + for (;;) + { + if (! len) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", + hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); + errors++; + break; + } + code<<= 1; + code|= (huff_tree->code[i] >> (--len)) & 1; + bits++; + if (bits > 8 * sizeof(code)) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, (uint) (8 * sizeof(code)))); + errors++; + break; + } + idx+= (uint) code & 1; + if (idx >= length) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", + idx, length)); + errors++; + break; + } + if (packed_tree[idx] & IS_OFFSET) + idx+= packed_tree[idx] & ~IS_OFFSET; + else + break; /* Hit a leaf. This contains the result value. */ + } + if (errors) + break; + + DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;); + if (packed_tree[idx] != i) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", + packed_tree[idx], i)); + errors++; + break; + } + } /*end for (codes)*/ + if (errors) + break; + + /* Write column values in case of distinct column value compression. */ + if (huff_tree->counts->tree_buff) + { + for (i=0 ; i < int_length ; i++) + { + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8); + DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x", + i, (uchar) huff_tree->counts->tree_buff[i])); + if (verbose >= 3) + VOID(printf("column_values[0x%04x]: 0x%02x\n", + i, (uchar) huff_tree->counts->tree_buff[i])); + } + } + flush_bits(); + } + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + VOID(printf("\n")); + my_afree((gptr) packed_tree); + if (errors) + { + VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); + return 0; + } + return elements; +} + + +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, + uint *offset) +{ + uint *prev_offset; + + prev_offset= offset; + /* + 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null, + then there is no left child and, hence no right child either. This + is a property of a binary tree. An element is either a node with two + childs, or a leaf without childs. + + The current element is always a node with two childs. Go left first. + */ + if (!element->a.nod.left->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; + offset+=2; + } + else + { + /* + Recursively traverse the tree to the left. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + prev_offset[0]= IS_OFFSET+2; + offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); + } + + /* Now, check the right child. */ + if (!element->a.nod.right->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[1]=element->a.nod.right->a.leaf.element_nr; + return offset; + } + else + { + /* + Recursively traverse the tree to the right. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + uint temp=(uint) (offset-prev_offset-1); + prev_offset[1]= IS_OFFSET+ temp; + if (huff_tree->max_offset < temp) + huff_tree->max_offset = temp; + return make_offset_code_tree(huff_tree,element->a.nod.right,offset); + } +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) +{ + int error; + uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length; + uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes; + my_off_t record_count; + char llbuf[32]; + ulong length,pack_length; + byte *record,*pos,*end_pos,*record_pos,*start_pos; + HUFF_COUNTS *count,*end_count; + HUFF_TREE *tree; + MARIA_HA *isam_file=mrg->file[0]; + uint pack_version= (uint) isam_file->s->pack.version; + DBUG_ENTER("compress_isam_file"); + + /* Allocate a buffer for the records (excluding blobs). */ + if (!(record=(byte*) my_alloca(isam_file->s->base.reclength))) + return -1; + + end_count=huff_counts+isam_file->s->base.fields; + min_record_length= (uint) ~0; + max_record_length=0; + null_bytes= isam_file->s->base.null_bytes; + + /* + Calculate the maximum number of bits required to pack the records. + Remember to understand 'max_zero_fill' as 'min_zero_fill'. + The tree height determines the maximum number of bits per value. + Some fields skip leading or trailing spaces or zeroes. The skipped + number of bytes is encoded by 'length_bits' bits. + Empty blobs and varchar are encoded with a single 1 bit. Other blobs + and varchar get a leading 0 bit. + */ + max_calc_length= null_bytes; + for (i= 0 ; i < isam_file->s->base.fields ; i++) + { + if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) + huff_counts[i].max_zero_fill=0; + if (huff_counts[i].field_type == FIELD_CONSTANT || + huff_counts[i].field_type == FIELD_ZERO || + huff_counts[i].field_type == FIELD_CHECK) + continue; + if (huff_counts[i].field_type == FIELD_INTERVALL) + max_calc_length+=huff_counts[i].tree->height; + else if (huff_counts[i].field_type == FIELD_BLOB || + huff_counts[i].field_type == FIELD_VARCHAR) + max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1; + else + max_calc_length+= + (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* + huff_counts[i].tree->height+huff_counts[i].length_bits; + } + max_calc_length= (max_calc_length + 7) / 8; + pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length); + record_count=0; + /* 'max_blob_length' is the max length of all blobs of a record. */ + pack_blob_length= isam_file->s->base.blobs ? + _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0; + max_pack_length=pack_ref_length+pack_blob_length; + + DBUG_PRINT("fields", ("===")); + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length)) + break; + record_pos= (byte*) file_buffer.pos; + file_buffer.pos+= max_pack_length; + if (null_bytes) + { + /* Copy null bits 'as is' */ + memcpy(file_buffer.pos, record, null_bytes); + file_buffer.pos+= null_bytes; + } + for (start_pos=record+null_bytes, count= huff_counts; + count < end_count ; + count++) + { + end_pos=start_pos+(field_length=count->field_length); + tree=count->tree; + + DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + (ulong) (count - huff_counts + 1), + count->field_type, + count->pack_type, count->max_zero_fill, + count->length_bits, count->tree->tree_number, + count->field_length)); + + /* Check if the column contains spaces only. */ + if (count->pack_type & PACK_TYPE_SPACE_FIELDS) + { + for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; + if (pos == end_pos) + { + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1")); + DBUG_PRINT("fields", ("---")); + write_bits(1,1); + start_pos=end_pos; + continue; + } + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1")); + write_bits(0,1); + } + end_pos-=count->max_zero_fill; + field_length-=count->max_zero_fill; + + switch (count->field_type) { + case FIELD_SKIP_ZERO: + if (!memcmp((byte*) start_pos,zero_string,field_length)) + { + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1")); + write_bits(1,1); + start_pos=end_pos; + break; + } + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1")); + write_bits(0,1); + /* Fall through */ + case FIELD_NORMAL: + DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes", + (ulong) (end_pos - start_pos))); + for ( ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_SKIP_ENDSPACE: + for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; + length= (ulong) (end_pos - pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE not more than min_space, " + "bits: 1")); + write_bits(0,1); + pos=end_pos; + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes", + (ulong) (pos - start_pos))); + for ( ; start_pos < pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + start_pos=end_pos; + break; + case FIELD_SKIP_PRESPACE: + for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; + length= (ulong) (pos - start_pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE not more than min_space, " + "bits: 1")); + pos=start_pos; + write_bits(0,1); + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes", + (ulong) (end_pos - start_pos))); + for (start_pos=pos ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_CONSTANT: + case FIELD_ZERO: + case FIELD_CHECK: + DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK")); + start_pos=end_pos; + break; + case FIELD_INTERVALL: + global_count=count; + pos=(byte*) tree_search(&count->int_tree, start_pos, + count->int_tree.custom_arg); + intervall=(uint) (pos - count->tree_buff)/field_length; + DBUG_PRINT("fields", ("FIELD_INTERVALL")); + DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u", + intervall, hexdigits(tree->code[intervall]), + (uint) tree->code_len[intervall])); + write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); + start_pos=end_pos; + break; + case FIELD_BLOB: + { + ulong blob_length= _ma_calc_blob_length(field_length- + maria_portable_sizeof_char_ptr, + start_pos); + /* Empty blobs are encoded with a single 1 bit. */ + if (!blob_length) + { + DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1")); + write_bits(1,1); + } + else + { + byte *blob,*blob_end; + DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1")); + write_bits(0,1); + /* Write the blob length. */ + DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", + blob_length, count->length_bits)); + write_bits(blob_length,count->length_bits); + memcpy_fixed(&blob,end_pos-maria_portable_sizeof_char_ptr, + sizeof(char*)); + blob_end=blob+blob_length; + /* Encode the blob bytes. */ + for ( ; blob < blob_end ; blob++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *blob, hexdigits(tree->code[(uchar) *blob]), + (uint) tree->code_len[(uchar) *blob], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *blob], + (uint) tree->code_len[(uchar) *blob]); + } + tot_blob_length+=blob_length; + } + start_pos= end_pos; + break; + } + case FIELD_VARCHAR: + { + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + /* Empty varchar are encoded with a single 1 bit. */ + if (!col_length) + { + DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1")); + write_bits(1,1); /* Empty varchar */ + } + else + { + byte *end=start_pos+pack_length+col_length; + DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1")); + write_bits(0,1); + /* Write the varchar length. */ + DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u", + col_length, count->length_bits)); + write_bits(col_length,count->length_bits); + /* Encode the varchar bytes. */ + for (start_pos+=pack_length ; start_pos < end ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + } + start_pos= end_pos; + break; + } + case FIELD_LAST: + case FIELD_enum_val_count: + abort(); /* Impossible */ + } + start_pos+=count->max_zero_fill; + DBUG_PRINT("fields", ("---")); + } + flush_bits(); + length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; + pack_length= _ma_save_pack_length(pack_version, record_pos, length); + if (pack_blob_length) + pack_length+= _ma_save_pack_length(pack_version, + record_pos + pack_length, + tot_blob_length); + DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " + "length-bytes: %lu", (ulong) record_count, length, + tot_blob_length, pack_length)); + DBUG_PRINT("fields", ("===")); + + /* Correct file buffer if the header was smaller */ + if (pack_length != max_pack_length) + { + bmove(record_pos+pack_length,record_pos+max_pack_length,length); + file_buffer.pos-= (max_pack_length-pack_length); + } + if (length < (ulong) min_record_length) + min_record_length=(uint) length; + if (length > (ulong) max_record_length) + max_record_length=(uint) length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + break; + } + if (error == HA_ERR_END_OF_FILE) + error=0; + else + { + VOID(fprintf(stderr, "%s: Got error %d reading records\n", + my_progname, error)); + } + if (verbose >= 2) + VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf))); + + my_afree((gptr) record); + mrg->ref_length=max_pack_length; + mrg->min_pack_length=max_record_length ? min_record_length : 0; + mrg->max_pack_length=max_record_length; + DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0)); +} + + +static char *make_new_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4); +} + +static char *make_old_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",OLD_EXT,2+4); +} + + /* rutines for bit writing buffer */ + +static void init_file_buffer(File file, pbool read_buffer) +{ + file_buffer.file=file; + file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE), + MYF(MY_WME)); + file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8; + file_buffer.pos_in_file=0; + error_on_write=0; + if (read_buffer) + { + + file_buffer.pos=file_buffer.end; + file_buffer.bits=0; + } + else + { + file_buffer.pos=file_buffer.buffer; + file_buffer.bits=BITS_SAVED; + } + file_buffer.bitbucket= 0; +} + + +static int flush_buffer(ulong neaded_length) +{ + ulong length; + + /* + file_buffer.end is 8 bytes lower than the real end of the buffer. + This is done so that the end-of-buffer condition does not need to be + checked for every byte (see write_bits()). Consequently, + file_buffer.pos can become greater than file_buffer.end. The + algorithms in the other functions ensure that there will never be + more than 8 bytes written to the buffer without an end-of-buffer + check. So the buffer cannot be overrun. But we need to check for the + near-to-buffer-end condition to avoid a negative result, which is + casted to unsigned and thus becomes giant. + */ + if ((file_buffer.pos < file_buffer.end) && + ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length)) + return 0; + length=(ulong) (file_buffer.pos-file_buffer.buffer); + file_buffer.pos=file_buffer.buffer; + file_buffer.pos_in_file+=length; + if (test_only) + return 0; + if (error_on_write|| my_write(file_buffer.file, + (const byte*) file_buffer.buffer, + length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + error_on_write=1; + return 1; + } + + if (neaded_length != ~(ulong) 0 && + (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length) + { + char *tmp; + neaded_length+=256; /* some margin */ + tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME)); + if (!tmp) + return 1; + file_buffer.pos= ((uchar*) tmp + + (ulong) (file_buffer.pos - file_buffer.buffer)); + file_buffer.buffer= (uchar*) tmp; + file_buffer.end= (uchar*) (tmp+neaded_length-8); + } + return 0; +} + + +static void end_file_buffer(void) +{ + my_free((gptr) file_buffer.buffer,MYF(0)); +} + + /* output `bits` low bits of `value' */ + +static void write_bits(register ulonglong value, register uint bits) +{ + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + + if ((file_buffer.bits-= (int) bits) >= 0) + { + file_buffer.bitbucket|= value << file_buffer.bits; + } + else + { + reg3 ulonglong bit_buffer; + bits= (uint) -file_buffer.bits; + bit_buffer= (file_buffer.bitbucket | + ((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); +#if BITS_SAVED == 64 + *file_buffer.pos++= (uchar) (bit_buffer >> 56); + *file_buffer.pos++= (uchar) (bit_buffer >> 48); + *file_buffer.pos++= (uchar) (bit_buffer >> 40); + *file_buffer.pos++= (uchar) (bit_buffer >> 32); +#endif + *file_buffer.pos++= (uchar) (bit_buffer >> 24); + *file_buffer.pos++= (uchar) (bit_buffer >> 16); + *file_buffer.pos++= (uchar) (bit_buffer >> 8); + *file_buffer.pos++= (uchar) (bit_buffer); + + if (bits != 8 * sizeof(value)) + value&= (((ulonglong) 1) << bits) - 1; + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); + file_buffer.bits=(int) (BITS_SAVED - bits); + file_buffer.bitbucket= value << (BITS_SAVED - bits); + } + return; +} + + /* Flush bits in bit_buffer to buffer */ + +static void flush_bits(void) +{ + int bits; + ulonglong bit_buffer; + + bits= file_buffer.bits & ~7; + bit_buffer= file_buffer.bitbucket >> bits; + bits= BITS_SAVED - bits; + while (bits > 0) + { + bits-= 8; + *file_buffer.pos++= (uchar) (bit_buffer >> bits); + } + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); + file_buffer.bits= BITS_SAVED; + file_buffer.bitbucket= 0; +} + + +/**************************************************************************** +** functions to handle the joined files +****************************************************************************/ + +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg, + my_off_t new_length, + ha_checksum crc) +{ + MARIA_SHARE *share=isam_file->s; + uint options=mi_uint2korr(share->state.header.options); + uint key; + DBUG_ENTER("save_state"); + + options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA; + mi_int2store(share->state.header.options,options); + share->state.header.org_data_file_type= share->state.header.data_file_type; + share->state.header.data_file_type= COMPRESSED_RECORD; + + share->state.state.data_file_length=new_length; + share->state.state.del=0; + share->state.state.empty=0; + share->state.dellink= HA_OFFSET_ERROR; + share->state.split=(ha_rows) mrg->records; + share->state.version=(ulong) time((time_t*) 0); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + /* + Some indexes are disabled, cannot use current key_file_length value + as an estimate of upper bound of index file size. Use packed data file + size instead. + */ + share->state.state.key_file_length= new_length; + } + /* + If there are no disabled indexes, keep key_file_length value from + original file so "maria_chk -rq" can use this value (this is necessary + because index size cannot be easily calculated for fulltext keys) + */ + maria_clear_all_keys_active(share->state.key_map); + for (key=0 ; key < share->base.keys ; key++) + share->state.key_root[key]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + isam_file->state->checksum=crc; /* Save crc here */ + share->changed=1; /* Force write of header */ + share->state.open_count=0; + share->global_changed=0; + VOID(my_chsize(share->kfile, share->base.keystart, 0, MYF(0))); + if (share->base.keys) + isamchk_neaded=1; + DBUG_RETURN(_ma_state_info_write(share->kfile,&share->state,1+2)); +} + + +static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc) +{ + MARIA_STATE_INFO state; + MARIA_HA *isam_file=mrg->file[0]; + uint options; + DBUG_ENTER("save_state_mrg"); + + state= isam_file->s->state; + options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD | + HA_OPTION_READ_ONLY_DATA); + mi_int2store(state.header.options,options); + state.state.data_file_length=new_length; + state.state.del=0; + state.state.empty=0; + state.state.records=state.split=(ha_rows) mrg->records; + /* See comment above in save_state about key_file_length handling. */ + if (mrg->src_file_has_indexes_disabled) + { + isam_file->s->state.state.key_file_length= + max(isam_file->s->state.state.key_file_length, new_length); + } + state.dellink= HA_OFFSET_ERROR; + state.version=(ulong) time((time_t*) 0); + maria_clear_all_keys_active(state.key_map); + state.state.checksum=crc; + if (isam_file->s->base.keys) + isamchk_neaded=1; + state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */ + DBUG_RETURN (_ma_state_info_write(file,&state,1+2)); +} + + +/* reset for mrg_rrnd */ + +static void mrg_reset(PACK_MRG_INFO *mrg) +{ + if (mrg->current) + { + maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0); + mrg->current=0; + } +} + +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) +{ + int error; + MARIA_HA *isam_info; + my_off_t filepos; + + if (!info->current) + { + isam_info= *(info->current=info->file); + info->end=info->current+info->count; + maria_reset(isam_info); + maria_extra(isam_info, HA_EXTRA_CACHE, 0); + if ((error= maria_scan_init(isam_info))) + return(error); + } + else + isam_info= *info->current; + + for (;;) + { + if (!(error= maria_scan(isam_info, buf)) || + error != HA_ERR_END_OF_FILE) + return (error); + maria_scan_end(isam_info); + maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0); + if (info->current+1 == info->end) + return(HA_ERR_END_OF_FILE); + info->current++; + isam_info= *info->current; + filepos=isam_info->s->pack.header_length; + maria_reset(isam_info); + maria_extra(isam_info,HA_EXTRA_CACHE, 0); + if ((error= maria_scan_init(isam_info))) + return(error); + } +} + + +static int mrg_close(PACK_MRG_INFO *mrg) +{ + uint i; + int error=0; + DBUG_ENTER("mrg_close"); + + for (i=0 ; i < mrg->count ; i++) + error|=maria_close(mrg->file[i]); + if (mrg->free_file) + my_free((gptr) mrg->file,MYF(0)); + DBUG_RETURN(error); +} + + +#if !defined(DBUG_OFF) +/* + Fake the counts to get big Huffman codes. + + SYNOPSIS + fakebigcodes() + huff_counts A pointer to the counts array. + end_count A pointer past the counts array. + + DESCRIPTION + + Huffman coding works by removing the two least frequent values from + the list of values and add a new value with the sum of their + incidences in a loop until only one value is left. Every time a + value is reused for a new value, it gets one more bit for its + encoding. Hence, the least frequent values get the longest codes. + + To get a maximum code length for a value, two of the values must + have an incidence of 1. As their sum is 2, the next infrequent value + must have at least an incidence of 2, then 4, 8, 16 and so on. This + means that one needs 2**n bytes (values) for a code length of n + bits. However, using more distinct values forces the use of longer + codes, or reaching the code length with less total bytes (values). + + To get 64(32)-bit codes, I sort the counts by decreasing incidence. + I assign counts of 1 to the two most frequent values, a count of 2 + for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All + the remaining values get 1. That way every possible byte has an + assigned code, though not all codes are used if not all byte values + are present in the column. + + This strategy would work with distinct column values too, but + requires that at least 64(32) values are present. To make things + easier here, I cancel all distinct column values and force byte + compression for all columns. + + RETURN + void +*/ + +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count) +{ + HUFF_COUNTS *count; + my_off_t *cur_count_p; + my_off_t *end_count_p; + my_off_t **cur_sort_p; + my_off_t **end_sort_p; + my_off_t *sort_counts[256]; + my_off_t total; + DBUG_ENTER("fakebigcodes"); + + for (count= huff_counts; count < end_count; count++) + { + /* + Remove distinct column values. + */ + if (huff_counts->tree_buff) + { + my_free((gptr) huff_counts->tree_buff, MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff= NULL; + DBUG_PRINT("fakebigcodes", ("freed distinct column values")); + } + + /* + Sort counts by decreasing incidence. + */ + cur_count_p= count->counts; + end_count_p= cur_count_p + 256; + cur_sort_p= sort_counts; + while (cur_count_p < end_count_p) + *(cur_sort_p++)= cur_count_p++; + (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp); + + /* + Assign faked counts. + */ + cur_sort_p= sort_counts; +#if SIZEOF_LONG_LONG > 4 + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1; +#else + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2; +#endif + /* Most frequent value gets a faked count of 1. */ + **(cur_sort_p++)= 1; + total= 1; + while (cur_sort_p < end_sort_p) + { + **(cur_sort_p++)= total; + total<<= 1; + } + /* Set the last value. */ + **(cur_sort_p++)= --total; + /* + Set the remaining counts. + */ + end_sort_p= sort_counts + 256; + while (cur_sort_p < end_sort_p) + **(cur_sort_p++)= 1; + } + DBUG_VOID_RETURN; +} + + +/* + Compare two counts for reverse sorting. + + SYNOPSIS + fakecmp() + count1 One count. + count2 Another count. + + RETURN + 1 count1 < count2 + 0 count1 == count2 + -1 count1 > count2 +*/ + +static int fakecmp(my_off_t **count1, my_off_t **count2) +{ + return ((**count1 < **count2) ? 1 : + (**count1 > **count2) ? -1 : 0); +} +#endif diff --git a/storage/maria/maria_rename.sh b/storage/maria/maria_rename.sh new file mode 100755 index 00000000000..fb20e47e635 --- /dev/null +++ b/storage/maria/maria_rename.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +replace myisam maria MYISAM MARIA MyISAM MARIA -- mysql-test/t/*maria*test mysql-test/r/*maria*result + +FILES=`echo sql/ha_maria.{cc,h} include/maria*h storage/maria/*.{c,h}` + +replace myisam maria MYISAM MARIA MyISAM MARIA myisam.h maria.h myisamdef.h maria_def.h mi_ maria_ ft_ maria_ft_ "Copyright (C) 2000" "Copyright (C) 2006" MI_ISAMINFO MARIA_INFO MI_CREATE_INFO MARIA_CREATE_INFO maria_isam_ maria_ MI_INFO MARIA_HA MI_ MARIA_ MARIACHK MARIA_CHK rt_index.h ma_rt_index.h rtree_ maria_rtree rt_key.h ma_rt_key.h rt_mbr.h ma_rt_mbr.h -- $FILES + +replace check_table_is_closed _ma_check_table_is_closed test_if_reopen _ma_test_if_reopen my_n_base_info_read maria_n_base_info_read update_auto_increment _ma_update_auto_increment save_pack_length _ma_save_packlength calc_pack_length _ma_calc_pack_length -- $FILES + +replace mi_ ma_ ft_ ma_ft_ rt_ ma_rt_ myisam maria myisamchk maria_chk myisampack maria_pack myisamlog maria_log -- storage/maria/Makefile.am + +# +# Restore wrong replaces +# + +replace maria_sint1korr mi_sint1korr maria_uint1korr mi_uint1korr maria_sint2korr mi_sint2korr maria_sint3korr mi_sint3korr maria_sint4korr mi_sint4korr maria_sint8korr mi_sint8korr maria_uint2korr mi_uint2korr maria_uint3korr mi_uint3korr maria_uint4korr mi_uint4korr maria_uint5korr mi_uint5korr maria_uint6korr mi_uint6korr maria_uint7korr mi_uint7korr maria_uint8korr mi_uint8korr maria_int1store mi_int1store maria_int2store mi_int2store maria_int3store mi_int3store maria_int4store mi_int4store maria_int5store mi_int5store maria_int6store mi_int6store maria_int7store mi_int7store maria_int8store mi_int8store maria_float4store mi_float4store maria_float4get mi_float4get maria_float8store mi_float8store maria_float8get mi_float8get maria_rowstore mi_rowstore maria_rowkorr mi_rowkorr maria_sizestore mi_sizestore maria_sizekorr mi_sizekorr _maria_maria_ _maria MARIA_MAX_POSSIBLE_KEY HA_MAX_POSSIBLE_KEY MARIA_MAX_KEY_BUFF HA_MAX_KEY_BUFF MARIA_MAX_KEY_SEG HA_MAX_KEY_SEG maria_ft_sintXkorr ft_sintXkorr maria_ft_intXstore ft_intXstore maria_ft_boolean_syntax ft_boolean_syntax maria_ft_min_word_len ft_min_word_len maria_ft_max_word_len ft_max_word_len -- $FILES diff --git a/storage/maria/plug.in b/storage/maria/plug.in new file mode 100644 index 00000000000..198f5d8c289 --- /dev/null +++ b/storage/maria/plug.in @@ -0,0 +1,8 @@ +MYSQL_STORAGE_ENGINE(maria, no, [Maria Storage Engine], + [Traditional transactional MySQL tables], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)]) +MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) +# Maria will probably go first into max builds, not all builds, +# so we don't declare it mandatory. +MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc]) diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c new file mode 100644 index 00000000000..810c6c12ea4 --- /dev/null +++ b/storage/maria/tablockman.c @@ -0,0 +1,677 @@ +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: automatically place S instead of LS if possible */ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <my_base.h> +#include <hash.h> +#include "tablockman.h" + +/* + Lock Manager for Table Locks + + The code below handles locks on resources - but it is optimized for a + case when a number of resources is not very large, and there are many of + locks per resource - that is a resource is likely to be a table or a + database, but hardly a row in a table. + + Locks belong to "lock owners". A Lock Owner is uniquely identified by a + 16-bit number - loid (lock owner identifier). A function loid_to_tlo must + be provided by the application that takes such a number as an argument + and returns a TABLE_LOCK_OWNER structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + The assumptions are: few distinct resources, many locks are held at the + same time on one resource. Thus: a lock structure _per resource_ can be + rather large; a lock structure _per lock_ does not need to be very small + either; we need to optimize for _speed_. Operations we need are: place a + lock, check if a particular transaction already has a lock on this + resource, check if a conflicting lock exists, if yes - find who owns it. + + Solution: every resource has a structure with + 1. Hash of latest (see the lock upgrade section below) granted locks with + loid as a key. Thus, checking if a given transaction has a lock on + this resource is O(1) operation. + 2. Doubly-linked lists of all granted locks - one list for every lock + type. Thus, checking if a conflicting lock exists is a check whether + an appropriate list head pointer is not null, also O(1). + 3. Every lock has a loid of the owner, thus checking who owns a + conflicting lock is also O(1). + 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo, + because for lock upgrades requests are added to the queue head, not + tail. This is a single place where there it gets O(N) on number + of locks - when a transaction wakes up from waiting on a condition, + it may need to scan the queue backward to the beginning to find + a conflicting lock. It is guaranteed though that "all transactions + before it" received the same - or earlier - signal. In other words a + transaction needs to scan all transactions before it that received the + signal but didn't have a chance to resume the execution yet, so + practically OS scheduler won't let the scan to be O(N). + + Waiting: if there is a conflicting lock or if wait queue is not empty, a + requested lock cannot be granted at once. It is added to the end of the + wait queue. If a queue was empty and there is a conflicting lock - the + "blocker" transaction is the owner of this lock. If a queue is not empty, + an owner of the previous lock in the queue is the "blocker". But if the + previous lock is compatible with the request, then the "blocker" is the + transaction that the owner of the lock at the end of the queue is waiting + for (in other words, our lock is added to the end of the wait queue, and + our blocker is the same as of the lock right before us). + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock + (defined by lock_combining_matrix as above) is placed. Depending on + other granted locks it is immediately granted or it has to wait. Here the + lock is added to the start of the waiting queue, not to the end. Old + lock, is removed from the hash, but not from the doubly-linked lists. + (indeed, a transaction checks "do I have a lock on this resource ?" by + looking in a hash, and it should find a latest lock, so old locks must be + removed; but a transaction checks "are there conflicting locks ?" by + checking doubly-linked lists, it doesn't matter if it will find an old + lock - if it would be removed, a new lock would be also a conflict). + So, a hash contains only "latest" locks - there can be only one latest + lock per resource per transaction. But doubly-linked lists contain all + locks, even "obsolete" ones, because it doesnt't hurt. Note that old + locks can not be freed early, in particular they stay in the + 'active_locks' list of a lock owner, because they may be "re-enabled" + on a savepoint rollback. + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. + + Instant duration locks are not supported. Though they're trivial to add, + they are normally only used on rows, not on tables. So, presumably, + they are not needed here. + + Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes + (TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex). + table mutex protects operations on the table lock structures, and lock + owner pointers waiting_for and waiting_for_loid. + lock owner mutex is only used to wait on lock owner condition + (TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock + structures, and only lock owner itself may access them. + The pool mutex protects a pool of unused locks. Note the locking order: + first the table mutex, then the owner mutex or a pool mutex. + Table mutex lock cannot be attempted when owner or pool mutex are locked. + No mutex lock can be attempted if owner or pool mutex are locked. +*/ + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock <value in the row + label>, can I set the lock <value in the column label> ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static const int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static const enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, N, N, N, N, N, N, N, N, N}, /* N */ + { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { N, X, X, X, X, X, X, X, X, X}, /* X */ + { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock <value in the row label>, + what value should be returned for <value in the column label> ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static const enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +/* + this structure is optimized for a case when there're many locks + on the same resource - e.g. a table +*/ + +struct st_table_lock { + /* QQ: do we need upgraded_from ? */ + struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; + struct st_locked_table *table; + uint16 loid; + uchar lock_type; +}; + +#define hash_insert my_hash_insert /* for consistency :) */ + +static inline +TABLE_LOCK *find_by_loid(LOCKED_TABLE *table, uint16 loid) +{ + return (TABLE_LOCK *)hash_search(& table->latest_locks, + (byte *)& loid, sizeof(loid)); +} + +static inline +void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table) +{ + DBUG_ASSERT(table == lock->table); + if (lock->prev) + { + DBUG_ASSERT(table->wait_queue_out != lock); + lock->prev->next= lock->next; + } + else + { + DBUG_ASSERT(table->wait_queue_out == lock); + table->wait_queue_out= lock->next; + } + if (lock->next) + { + DBUG_ASSERT(table->wait_queue_in != lock); + lock->next->prev= lock->prev; + } + else + { + DBUG_ASSERT(table->wait_queue_in == lock); + table->wait_queue_in= lock->prev; + } +} + +/* + DESCRIPTION + tries to lock a resource 'table' with a lock level 'lock'. + + RETURN + see enum lockman_getlock_result +*/ +enum lockman_getlock_result +tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, + LOCKED_TABLE *table, enum lock_type lock) +{ + TABLE_LOCK *old, *new, *blocker, *blocker2; + TABLE_LOCK_OWNER *wait_for; + ulonglong deadline; + struct timespec timeout; + enum lock_type new_lock; + enum lockman_getlock_result res; + int i; + + DBUG_ASSERT(lo->waiting_lock == 0); + DBUG_ASSERT(lo->waiting_for == 0); + DBUG_ASSERT(lo->waiting_for_loid == 0); + + pthread_mutex_lock(& table->mutex); + /* do we already have a lock on this resource ? */ + old= find_by_loid(table, lo->loid); + + /* calculate the level of the upgraded lock, if yes */ + new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; + + /* and check if old lock is enough to satisfy the new request */ + if (old && new_lock == old->lock_type) + { + /* yes */ + res= getlock_result[old->lock_type][lock]; + goto ret; + } + + /* no, placing a new lock. first - take a free lock structure from the pool */ + pthread_mutex_lock(& lm->pool_mutex); + new= lm->pool; + if (new) + { + lm->pool= new->next; + pthread_mutex_unlock(& lm->pool_mutex); + } + else + { + pthread_mutex_unlock(& lm->pool_mutex); + new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME)); + if (unlikely(!new)) + { + res= NO_MEMORY_FOR_LOCK; + goto ret; + } + } + + new->loid= lo->loid; + new->lock_type= new_lock; + new->table= table; + + /* and try to place it */ + for (new->prev= table->wait_queue_in;;) + { + wait_for= 0; + if (!old) + { + /* not upgrading - a lock must be added to the _end_ of the wait queue */ + for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev) + { + TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid); + + /* find a blocking lock */ + DBUG_ASSERT(table->wait_queue_out); + DBUG_ASSERT(table->wait_queue_in); + if (!lock_compatibility_matrix[blocker->lock_type][lock]) + { + /* found! */ + wait_for= tmp; + break; + } + + /* + hmm, the lock before doesn't block us, let's look one step further. + the condition below means: + + if we never waited on a condition yet + OR + the lock before ours (blocker) waits on a lock (blocker2) that is + present in the hash AND and conflicts with 'blocker' + + the condition after OR may fail if 'blocker2' was removed from + the hash, its signal woke us up, but 'blocker' itself didn't see + the signal yet. + */ + if (!lo->waiting_lock || + ((blocker2= find_by_loid(table, tmp->waiting_for_loid)) && + !lock_compatibility_matrix[blocker2->lock_type] + [blocker->lock_type])) + { + /* but it's waiting for a real lock. we'll wait for the same lock */ + wait_for= tmp->waiting_for; + /* + We don't really need tmp->waiting_for, as tmp->waiting_for_loid + is enough. waiting_for is just a local cache to avoid calling + loid_to_tlo(). + But it's essensial that tmp->waiting_for pointer can ONLY + be dereferenced if find_by_loid() above returns a non-null + pointer, because a TABLE_LOCK_OWNER object that it points to + may've been freed when we come here after a signal. + In particular tmp->waiting_for_loid cannot be replaced + with tmp->waiting_for->loid. + */ + DBUG_ASSERT(wait_for == lm->loid_to_tlo(tmp->waiting_for_loid)); + break; + } + + /* + otherwise - a lock it's waiting for doesn't exist. + We've no choice but to scan the wait queue backwards, looking + for a conflicting lock or a lock waiting for a real lock. + QQ is there a way to avoid this scanning ? + */ + } + } + + if (wait_for == 0) + { + /* checking for compatibility with existing locks */ + for (blocker= 0, i= 0; i < LOCK_TYPES; i++) + { + if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) + { + blocker= table->active_locks[i]; + /* if the first lock in the list is our own - skip it */ + if (blocker->loid == lo->loid) + blocker= blocker->next; + if (blocker) /* found a conflicting lock, need to wait */ + break; + } + } + if (!blocker) /* free to go */ + break; + wait_for= lm->loid_to_tlo(blocker->loid); + } + + /* ok, we're here - the wait is inevitable */ + lo->waiting_for= wait_for; + lo->waiting_for_loid= wait_for->loid; + if (!lo->waiting_lock) /* first iteration of the for() loop */ + { + /* lock upgrade or new lock request ? */ + if (old) + { + /* upgrade - add the lock to the _start_ of the wait queue */ + new->prev= 0; + if ((new->next= table->wait_queue_out)) + new->next->prev= new; + table->wait_queue_out= new; + if (!table->wait_queue_in) + table->wait_queue_in= table->wait_queue_out; + } + else + { + /* new lock - add the lock to the _end_ of the wait queue */ + new->next= 0; + if ((new->prev= table->wait_queue_in)) + new->prev->next= new; + table->wait_queue_in= new; + if (!table->wait_queue_out) + table->wait_queue_out= table->wait_queue_in; + } + lo->waiting_lock= new; + + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + } + + /* + prepare to wait. + we must lock blocker's mutex to wait on blocker's cond. + and we must release table's mutex. + note that blocker's mutex is locked _before_ table's mutex is released + */ + pthread_mutex_lock(wait_for->mutex); + pthread_mutex_unlock(& table->mutex); + + /* now really wait */ + i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout); + + pthread_mutex_unlock(wait_for->mutex); + + if (i == ETIMEDOUT || i == ETIME) + { + /* we rely on the caller to rollback and release all locks */ + res= LOCK_TIMEOUT; + goto ret2; + } + + pthread_mutex_lock(& table->mutex); + + /* ... and repeat from the beginning */ + } + /* yeah! we can place the lock now */ + + /* remove the lock from the wait queue, if it was there */ + if (lo->waiting_lock) + { + remove_from_wait_queue(new, table); + lo->waiting_lock= 0; + lo->waiting_for= 0; + lo->waiting_for_loid= 0; + } + + /* add it to the list of all locks of this lock owner */ + new->next_in_lo= lo->active_locks; + lo->active_locks= new; + + /* and to the list of active locks of this lock type */ + new->prev= 0; + if ((new->next= table->active_locks[new_lock-1])) + new->next->prev= new; + table->active_locks[new_lock-1]= new; + + /* update the latest_locks hash */ + if (old) + hash_delete(& table->latest_locks, (byte *)old); + hash_insert(& table->latest_locks, (byte *)new); + + new->upgraded_from= old; + + res= getlock_result[lock][lock]; + +ret: + pthread_mutex_unlock(& table->mutex); +ret2: + DBUG_ASSERT(res); + return res; +} + +/* + DESCRIPTION + release all locks belonging to a transaction. + signal waiters to continue +*/ +void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock, *local_pool= 0, *local_pool_end; + + /* + instead of adding released locks to a pool one by one, we'll link + them in a list and add to a pool in one short action (under a mutex) + */ + local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks; + if (!local_pool_end) + return; + + /* release a waiting lock, if any */ + if ((lock= lo->waiting_lock)) + { + DBUG_ASSERT(lock->loid == lo->loid); + pthread_mutex_lock(& lock->table->mutex); + remove_from_wait_queue(lock, lock->table); + + /* + a special case: if this lock was not the last in the wait queue + and it's compatible with the next lock, than the next lock + is waiting for our blocker though really it waits for us, indirectly. + Signal our blocker to release this next lock (after we removed our + lock from the wait queue, of course). + */ + /* + An example to clarify the above: + trn1> S-lock the table. Granted. + trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1 + trn3> IS-lock the table. The queue is not empty, so IS-lock is added + to the queue. It's compatible with the waiting IX-lock, so trn3 + waits for trn2->waiting_for, that is trn1. + if trn1 releases the lock it signals trn1->cond and both waiting + transactions are awaken. But if trn2 times out, trn3 must be notified + too (as IS and S locks are compatible). So trn2 must signal trn1->cond. + */ + if (lock->next && + lock_compatibility_matrix[lock->next->lock_type][lock->lock_type]) + { + pthread_mutex_lock(lo->waiting_for->mutex); + pthread_cond_broadcast(lo->waiting_for->cond); + pthread_mutex_unlock(lo->waiting_for->mutex); + } + lo->waiting_for= 0; + lo->waiting_for_loid= 0; + pthread_mutex_unlock(& lock->table->mutex); + + lock->next= local_pool; + local_pool= lock; + } + + /* now release granted locks */ + lock= lo->active_locks; + while (lock) + { + TABLE_LOCK *cur= lock; + pthread_mutex_t *mutex= & lock->table->mutex; + DBUG_ASSERT(cur->loid == lo->loid); + + DBUG_ASSERT(lock != lock->next_in_lo); + lock= lock->next_in_lo; + + /* TODO ? group locks by table to reduce the number of mutex locks */ + pthread_mutex_lock(mutex); + hash_delete(& cur->table->latest_locks, (byte *)cur); + + if (cur->prev) + cur->prev->next= cur->next; + if (cur->next) + cur->next->prev= cur->prev; + if (cur->table->active_locks[cur->lock_type-1] == cur) + cur->table->active_locks[cur->lock_type-1]= cur->next; + + cur->next= local_pool; + local_pool= cur; + + pthread_mutex_unlock(mutex); + } + + lo->waiting_lock= lo->active_locks= 0; + + /* + okay, all locks released. now signal that we're leaving, + in case somebody's waiting for it + */ + pthread_mutex_lock(lo->mutex); + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + + /* and push all freed locks to the lockman's pool */ + pthread_mutex_lock(& lm->pool_mutex); + local_pool_end->next= lm->pool; + lm->pool= local_pool; + pthread_mutex_unlock(& lm->pool_mutex); +} + +void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) +{ + lm->pool= 0; + lm->loid_to_tlo= func; + lm->lock_timeout= timeout; + pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST); + my_getsystime(); /* ensure that my_getsystime() is initialized */ +} + +void tablockman_destroy(TABLOCKMAN *lm) +{ + while (lm->pool) + { + TABLE_LOCK *tmp= lm->pool; + lm->pool= tmp->next; + my_free((void *)tmp, MYF(0)); + } + pthread_mutex_destroy(& lm->pool_mutex); +} + +/* + initialize a LOCKED_TABLE structure + + SYNOPSYS + lt a LOCKED_TABLE to initialize + initial_hash_size initial size for 'latest_locks' hash +*/ +void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size) +{ + bzero(lt, sizeof(*lt)); + pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); + hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size, + offsetof(TABLE_LOCK, loid), + sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0); +} + +void tablockman_destroy_locked_table(LOCKED_TABLE *lt) +{ + int i; + + DBUG_ASSERT(lt->wait_queue_out == 0); + DBUG_ASSERT(lt->wait_queue_in == 0); + DBUG_ASSERT(lt->latest_locks.records == 0); + for (i= 0; i<LOCK_TYPES; i++) + DBUG_ASSERT(lt->active_locks[i] == 0); + + hash_free(& lt->latest_locks); + pthread_mutex_destroy(& lt->mutex); +} + +#ifdef EXTRA_DEBUG +static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", + "LS", "LX", "SLX", "LSIX"}; + +void tablockman_print_tlo(TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock; + + printf("lo%d>", lo->loid); + if ((lock= lo->waiting_lock)) + printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table); + for (lock= lo->active_locks; + lock && lock != lock->next_in_lo; + lock= lock->next_in_lo) + printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table); + if (lock && lock == lock->next_in_lo) + printf("!"); + printf("\n"); +} +#endif + diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h new file mode 100644 index 00000000000..2c6fb6996a3 --- /dev/null +++ b/storage/maria/tablockman.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _tablockman_h +#define _tablockman_h + +/* + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +#ifndef _lockman_h +/* QQ: TODO remove N-locks */ +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; +enum lockman_getlock_result { + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; +#endif + +#define LOCK_TYPES (LOCK_TYPE_LAST-1) + +typedef struct st_table_lock TABLE_LOCK; + +typedef struct st_table_lock_owner { + TABLE_LOCK *active_locks; /* list of active locks */ + TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */ + struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */ + pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */ +} TABLE_LOCK_OWNER; + +typedef struct st_locked_table { + pthread_mutex_t mutex; /* mutex for everything below */ + HASH latest_locks; /* latest locks in a hash */ + TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */ + TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/ +} LOCKED_TABLE; + +typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); + +typedef struct { + pthread_mutex_t pool_mutex; + TABLE_LOCK *pool; /* lifo pool of free locks */ + uint lock_timeout; /* lock timeout in milliseconds */ + loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ +} TABLOCKMAN; + +void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); +void tablockman_destroy(TABLOCKMAN *); +enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, + LOCKED_TABLE *, enum lock_type); +void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *); +void tablockman_init_locked_table(LOCKED_TABLE *, int); +void tablockman_destroy_locked_table(LOCKED_TABLE *); + +#ifdef EXTRA_DEBUG +void tablockman_print_tlo(TABLE_LOCK_OWNER *); +#endif + +#endif + diff --git a/storage/maria/test_pack b/storage/maria/test_pack new file mode 100755 index 00000000000..689645b1661 --- /dev/null +++ b/storage/maria/test_pack @@ -0,0 +1,10 @@ +silent="-s" +suffix="" + +ma_test1$suffix -s ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -S ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ;maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -b ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -w ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -ros test1 ; maria_chk$suffix -es test1 + +ma_test2$suffix -s -t4 ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 +ma_test2$suffix -s -t4 -b ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c new file mode 100644 index 00000000000..37978e4ff76 --- /dev/null +++ b/storage/maria/trnman.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#include <my_global.h> +#include <my_sys.h> +#include <lf.h> +#include <m_string.h> +#include "trnman.h" + +/* + status variables: + how many trns in the active list currently, + in the committed list currently, allocated since startup. +*/ +uint trnman_active_transactions, trnman_committed_transactions, + trnman_allocated_transactions; + +/* list of active transactions in the trid order */ +static TRN active_list_min, active_list_max; +/* list of committed transactions in the trid order */ +static TRN committed_list_min, committed_list_max; + +/* a counter, used to generate transaction ids */ +static TrID global_trid_generator; + +/* the mutex for everything above */ +static pthread_mutex_t LOCK_trn_list; + +/* LIFO pool of unused TRN structured for reuse */ +static TRN *pool; + +/* a hash for committed transactions that maps trid to a TRN structure */ +static LF_HASH trid_to_committed_trn; + +/* an array that maps short_trid of an active transaction to a TRN structure */ +static TRN **short_trid_to_active_trn; + +/* locks for short_trid_to_active_trn and pool */ +static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool; + +static LOCKMAN maria_lockman; + +/* + short transaction id is at the same time its identifier + for a lock manager - its lock owner identifier (loid) +*/ +#define short_id locks.loid + +/* + NOTE + Just as short_id doubles as loid, this function doubles as + short_trid_to_LOCK_OWNER. See the compile-time assert below. +*/ +static TRN *short_trid_to_TRN(uint16 short_trid) +{ + TRN *trn; + compile_time_assert(offsetof(TRN, locks) == 0); + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + return (TRN *)trn; +} + +static byte *trn_get_hash_key(const byte *trn, uint* len, + my_bool unused __attribute__ ((unused))) +{ + *len= sizeof(TrID); + return (byte *) & ((*((TRN **)trn))->trid); +} + +int trnman_init() +{ + /* + Initialize lists. + active_list_max.min_read_from must be larger than any trid, + so that when an active list is empty we would could free + all committed list. + And committed_list_max itself can not be freed so + committed_list_max.commit_trid must not be smaller that + active_list_max.min_read_from + */ + + active_list_max.trid= active_list_min.trid= 0; + active_list_max.min_read_from= ~0; + active_list_max.next= active_list_min.prev= 0; + active_list_max.prev= &active_list_min; + active_list_min.next= &active_list_max; + + committed_list_max.commit_trid= ~0; + committed_list_max.next= committed_list_min.prev= 0; + committed_list_max.prev= &committed_list_min; + committed_list_min.next= &committed_list_max; + + trnman_active_transactions= 0; + trnman_committed_transactions= 0; + trnman_allocated_transactions= 0; + + pool= 0; + global_trid_generator= 0; /* set later by the recovery code */ + lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE, + 0, 0, trn_get_hash_key, 0); + pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); + my_atomic_rwlock_init(&LOCK_short_trid_to_trn); + my_atomic_rwlock_init(&LOCK_pool); + short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!short_trid_to_active_trn)) + return 1; + short_trid_to_active_trn--; /* min short_trid is 1 */ + + lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000); + + return 0; +} + +/* + NOTE + this could only be called in the "idle" state - no transaction can be + running. See asserts below. +*/ +void trnman_destroy() +{ + DBUG_ASSERT(trid_to_committed_trn.count == 0); + DBUG_ASSERT(trnman_active_transactions == 0); + DBUG_ASSERT(trnman_committed_transactions == 0); + DBUG_ASSERT(active_list_max.prev == &active_list_min); + DBUG_ASSERT(active_list_min.next == &active_list_max); + DBUG_ASSERT(committed_list_max.prev == &committed_list_min); + DBUG_ASSERT(committed_list_min.next == &committed_list_max); + while (pool) + { + TRN *trn= pool; + pool= pool->next; + DBUG_ASSERT(trn->locks.mutex == 0); + DBUG_ASSERT(trn->locks.cond == 0); + my_free((void *)trn, MYF(0)); + } + lf_hash_destroy(&trid_to_committed_trn); + pthread_mutex_destroy(&LOCK_trn_list); + my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn); + my_atomic_rwlock_destroy(&LOCK_pool); + my_free((void *)(short_trid_to_active_trn+1), MYF(0)); + lockman_destroy(&maria_lockman); +} + +/* + NOTE + TrID is limited to 6 bytes. Initial value of the generator + is set by the recovery code - being read from the last checkpoint + (or 1 on a first run). +*/ +static TrID new_trid() +{ + DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); + safe_mutex_assert_owner(&LOCK_trn_list); + return ++global_trid_generator; +} + +static void set_short_trid(TRN *trn) +{ + int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1; + my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn); + for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */ + { + void *tmp= NULL; + if (short_trid_to_active_trn[i] == NULL && + my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) + break; + } + my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn); + trn->short_id= i; +} + +/* + DESCRIPTION + start a new transaction, allocate and initialize transaction object + mutex and cond will be used for lock waits +*/ +TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond) +{ + TRN *trn; + + /* + we have a mutex, to do simple things under it - allocate a TRN, + increment trnman_active_transactions, set trn->min_read_from. + + Note that all the above is fast. generating short_trid may be slow, + as it involves scanning a large array - so it's done outside of the + mutex. + */ + + pthread_mutex_lock(&LOCK_trn_list); + + /* Allocating a new TRN structure */ + trn= pool; + /* + Popping an unused TRN from the pool + (ABA isn't possible, we're behind a mutex + */ + my_atomic_rwlock_wrlock(&LOCK_pool); + while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, + (void *)trn->next)) + /* no-op */; + my_atomic_rwlock_wrunlock(&LOCK_pool); + + /* Nothing in the pool ? Allocate a new one */ + if (!trn) + { + trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME)); + if (unlikely(!trn)) + { + pthread_mutex_unlock(&LOCK_trn_list); + return 0; + } + trnman_allocated_transactions++; + } + trnman_active_transactions++; + + trn->min_read_from= active_list_min.next->trid; + + trn->trid= new_trid(); + trn->short_id= 0; + + trn->next= &active_list_max; + trn->prev= active_list_max.prev; + active_list_max.prev= trn->prev->next= trn; + pthread_mutex_unlock(&LOCK_trn_list); + + trn->pins= lf_hash_get_pins(&trid_to_committed_trn); + + if (unlikely(!trn->min_read_from)) + trn->min_read_from= trn->trid; + + trn->commit_trid= 0; + + trn->locks.mutex= mutex; + trn->locks.cond= cond; + trn->locks.waiting_for= 0; + trn->locks.all_locks= 0; + trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc); + + /* + only after the following function TRN is considered initialized, + so it must be done the last + */ + set_short_trid(trn); + + return trn; +} + +/* + remove a trn from the active list. + if necessary - move to committed list and set commit_trid + + NOTE + Locks are released at the end. In particular, after placing the + transaction in commit list, and after setting commit_trid. It's + important, as commit_trid affects visibility. Locks don't affect + anything they simply delay execution of other threads - they could be + released arbitrarily late. In other words, when locks are released it + serves as a start banner for other threads, they start to run. So + everything they may need must be ready at that point. +*/ +void trnman_end_trn(TRN *trn, my_bool commit) +{ + TRN *free_me= 0; + LF_PINS *pins= trn->pins; + + pthread_mutex_lock(&LOCK_trn_list); + + /* remove from active list */ + trn->next->prev= trn->prev; + trn->prev->next= trn->next; + + /* + if trn was the oldest active transaction, now that it goes away there + may be committed transactions in the list which no active transaction + needs to bother about - clean up the committed list + */ + if (trn->prev == &active_list_min) + { + uint free_me_count; + TRN *t; + for (t= committed_list_min.next, free_me_count= 0; + t->commit_trid < active_list_min.next->min_read_from; + t= t->next, free_me_count++) /* no-op */; + + DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || + (t == committed_list_min.next && free_me_count == 0)); + /* found transactions committed before the oldest active one */ + if (t != committed_list_min.next) + { + free_me= committed_list_min.next; + committed_list_min.next= t; + t->prev->next= 0; + t->prev= &committed_list_min; + trnman_committed_transactions-= free_me_count; + } + } + + /* + if transaction is committed and it was not the only active transaction - + add it to the committed list (which is used for read-from relation) + */ + if (commit && active_list_min.next != &active_list_max) + { + int res; + + trn->commit_trid= global_trid_generator; + trn->next= &committed_list_max; + trn->prev= committed_list_max.prev; + committed_list_max.prev= trn->prev->next= trn; + trnman_committed_transactions++; + + res= lf_hash_insert(&trid_to_committed_trn, pins, &trn); + DBUG_ASSERT(res == 0); + } + else /* otherwise free it right away */ + { + trn->next= free_me; + free_me= trn; + } + trnman_active_transactions--; + pthread_mutex_unlock(&LOCK_trn_list); + + /* the rest is done outside of a critical section */ + lockman_release_locks(&maria_lockman, &trn->locks); + trn->locks.mutex= 0; + trn->locks.cond= 0; + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + + /* + we, under the mutex, removed going-in-free_me transactions from the + active and committed lists, thus nobody else may see them when it scans + those lists, and thus nobody may want to free them. Now we don't + need a mutex to access free_me list + */ + /* QQ: send them to the purge thread */ + while (free_me) + { + TRN *t= free_me; + free_me= free_me->next; + + lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID)); + + trnman_free_trn(t); + } + + lf_hash_put_pins(pins); + lf_pinbox_put_pins(trn->locks.pins); +} + +/* + free a trn (add to the pool, that is) + note - we can never really free() a TRN if there's at least one other + running transaction - see, e.g., how lock waits are implemented in + lockman.c + The same is true for other lock-free data structures too. We may need some + kind of FLUSH command to reset them all - ensuring that no transactions are + running. It may even be called automatically on checkpoints if no + transactions are running. +*/ +void trnman_free_trn(TRN *trn) +{ + TRN *tmp= pool; + + my_atomic_rwlock_wrlock(&LOCK_pool); + do + { + /* + without this volatile cast gcc-3.4.4 moved the assignment + down after the loop at -O2 + */ + *(TRN * volatile *)&(trn->next)= tmp; + } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn)); + my_atomic_rwlock_wrunlock(&LOCK_pool); +} + +/* + NOTE + here we access the hash in a lock-free manner. + It's safe, a 'found' TRN can never be freed/reused before we access it. + In fact, it cannot be freed before 'trn' ends, because a 'found' TRN + can only be removed from the hash when: + found->commit_trid < ALL (trn->min_read_from) + that is, at least + found->commit_trid < trn->min_read_from + but + found->trid >= trn->min_read_from + and + found->commit_trid > found->trid +*/ +my_bool trnman_can_read_from(TRN *trn, TrID trid) +{ + TRN **found; + my_bool can; + LF_REQUIRE_PINS(3); + + if (trid < trn->min_read_from) + return TRUE; /* can read */ + if (trid > trn->trid) + return FALSE; /* cannot read */ + + found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid)); + if (!found) + return FALSE; /* not in the hash of committed transactions = cannot read */ + + can= (*found)->commit_trid < trn->trid; + lf_unpin(trn->pins, 2); + return can; +} + + +/* + Allocates two buffers and stores in them some information about transactions + of the active list (into the first buffer) and of the committed list (into + the second buffer). + + SYNOPSIS + trnman_collect_transactions() + str_act (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + str_com (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + + + DESCRIPTION + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + The caller has the intention of doing checkpoints. + + RETURN + 0 on success + 1 on error +*/ +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com) +{ + my_bool error; + TRN *trn; + char *ptr; + DBUG_ENTER("trnman_collect_transactions"); + + DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); + + pthread_mutex_lock(&LOCK_trn_list); + str_act->length= 8+(6+2+7+7+7)*trnman_active_transactions; + str_com->length= 8+(6+7+7)*trnman_committed_transactions; + if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) || + (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME))))) + goto err; + /* First, the active transactions */ + ptr= str_act->str; + int8store(ptr, (ulonglong)trnman_active_transactions); + ptr+= 8; + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + /* + trns with a short trid of 0 are not initialized; Recovery will recognize + this and ignore them. + State is not needed for now (only when we supported prepared trns). + For LSNs, Sanja will soon push lsn7store. + */ + int6store(ptr, trn->trid); + ptr+= 6; + int2store(ptr, trn->short_id); + ptr+= 2; + /* needed for rollback */ + /* lsn7store(ptr, trn->undo_lsn); */ + ptr+= 7; + /* needed for purge */ + /* lsn7store(ptr, trn->undo_purge_lsn); */ + ptr+= 7; + /* needed for low-water mark calculation */ + /* lsn7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */ + ptr+= 7; + } + /* do the same for committed ones */ + ptr= str_com->str; + int8store(ptr, (ulonglong)trnman_committed_transactions); + ptr+= 8; + for (trn= committed_list_min.next; trn != &committed_list_max; + trn= trn->next) + { + int6store(ptr, trn->trid); + ptr+= 6; + /* mi_int7store(ptr, trn->undo_purge_lsn); */ + ptr+= 7; + /* mi_int7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */ + ptr+= 7; + } + /* + TODO: if we see there exists no transaction (active and committed) we can + tell the lock-free structures to do some freeing (my_free()). + */ + error= 0; + goto end; +err: + error= 1; +end: + pthread_mutex_unlock(&LOCK_trn_list); + DBUG_RETURN(error); +} diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h new file mode 100644 index 00000000000..267e3cabd7a --- /dev/null +++ b/storage/maria/trnman.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _trnman_h +#define _trnman_h + +#include "lockman.h" + +typedef uint64 TrID; /* our TrID is 6 bytes */ +typedef struct st_transaction TRN; + +/* + trid - 6 byte transaction identifier. Assigned when a transaction + is created. Transaction can always be identified by its trid, + even after transaction has ended. + + short_trid - 2-byte transaction identifier, identifies a running + transaction, is reassigned when transaction ends. +*/ +struct st_transaction +{ + LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */ + LF_PINS *pins; + TrID trid, min_read_from, commit_trid; + TRN *next, *prev; + /* Note! if locks.loid is 0, trn is NOT initialized */ +}; + +#define SHORT_TRID_MAX 65535 + +extern uint trnman_active_transactions, trnman_allocated_transactions; + +int trnman_init(void); +void trnman_destroy(void); +TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond); +void trnman_end_trn(TRN *trn, my_bool commit); +#define trnman_commit_trn(T) trnman_end_trn(T, TRUE) +#define trnman_abort_trn(T) trnman_end_trn(T, FALSE) +void trnman_free_trn(TRN *trn); +my_bool trnman_can_read_from(TRN *trn, TrID trid); +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com); + +#endif + diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am new file mode 100644 index 00000000000..33f1bfed560 --- /dev/null +++ b/storage/maria/unittest/Makefile.am @@ -0,0 +1,87 @@ +# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ + $(top_builddir)/storage/maria/libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ \ + $(top_builddir)/storage/maria/ma_loghandler.o +noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \ + mf_pagecache_single_1k-t mf_pagecache_single_8k-t \ + mf_pagecache_single_64k-t-big \ + mf_pagecache_consist_1k-t-big \ + mf_pagecache_consist_64k-t-big \ + mf_pagecache_consist_1kHC-t-big \ + mf_pagecache_consist_64kHC-t-big \ + mf_pagecache_consist_1kRD-t-big \ + mf_pagecache_consist_64kRD-t-big \ + mf_pagecache_consist_1kWR-t-big \ + mf_pagecache_consist_64kWR-t-big \ + ma_test_loghandler-t \ + ma_test_loghandler_multigroup-t \ + ma_test_loghandler_multithread-t \ + ma_test_loghandler_pagecache-t + +ma_test_loghandler_t_SOURCES= ma_test_loghandler-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multigroup_t_SOURCES= ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multithread_t_SOURCES= ma_test_loghandler_multithread-t.c ma_maria_log_cleanup.c +ma_test_loghandler_pagecache_t_SOURCES= ma_test_loghandler_pagecache-t.c ma_maria_log_cleanup.c + +mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c +mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN + +mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_64k_t_big_SOURCES = $(mf_pagecache_single_src) +mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192 +mf_pagecache_single_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 +mf_pagecache_consist_64k_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +mf_pagecache_consist_1kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +mf_pagecache_consist_64kHC_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY + +mf_pagecache_consist_1kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +mf_pagecache_consist_64kRD_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS + +mf_pagecache_consist_1kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_1kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +mf_pagecache_consist_64kWR_t_big_SOURCES = $(mf_pagecache_consist_src) +mf_pagecache_consist_64kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS + +# the generic lock manager may not be used in the end and lockman1-t crashes, +# so we don't build lockman-t and lockman1-t +CLEANFILES = maria_control page_cache_test_file_1 \ + maria_log.???????? + diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c new file mode 100644 index 00000000000..8c0f71175e7 --- /dev/null +++ b/storage/maria/unittest/lockman-t.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + lockman for row and table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include <tap.h> + +#include <my_global.h> +#include <my_sys.h> +#include <my_atomic.h> +#include <lf.h> +#include "../lockman.h" + +#define Nlos 100 +LOCK_OWNER loarray[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKMAN lockman; + +#ifndef EXTRA_VERBOSE +#define print_lockhash(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman) +#define test_lock(O, R, L, S, RES) \ + ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lockhash(&lockman) +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + +void test_lockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[4]= { + "DIDN'T GET THE LOCK", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case DIDNT_GET_THE_LOCK: + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + DBUG_ASSERT(0); + } + } + } + + lockman_release_locks(&lockman, lo); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + lockman_init(&lockman, &loid2lo, 50); + + for (i= 0; i < Nlos; i++) + { + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + } + + test_lockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c new file mode 100644 index 00000000000..41a1f0fd2f4 --- /dev/null +++ b/storage/maria/unittest/lockman1-t.c @@ -0,0 +1,335 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + lockman for row locks, tablockman for table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include <tap.h> + +#include <my_global.h> +#include <my_sys.h> +#include <my_atomic.h> +#include <lf.h> +#include "../lockman.h" +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 10 +LOCK_OWNER loarray[Nlos]; +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +LOCKMAN lockman; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[]= { + "DIDN'T GET THE LOCK", + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; TABLE_LOCK_OWNER *lo1; DBUG_ASSERT(Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + } + } + + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + lockman_init(&lockman, &loid2lo, 50); + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i, Nlos); + } + + test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c new file mode 100644 index 00000000000..01af1a03d22 --- /dev/null +++ b/storage/maria/unittest/lockman2-t.c @@ -0,0 +1,361 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + tablockman for row and table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include <tap.h> + +#include <my_global.h> +#include <my_sys.h> +#include <my_atomic.h> +#include <lf.h> +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 110 +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IS); + lock_conflict(2, 1, X); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_a(1, 1, S); + lock_conflict(2, 1, IX); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ +#ifdef NOT_USED_YET + TABLE_LOCK_OWNER backup= *lo; +#endif + + tablockman_release_locks(lm, lo); +#ifdef NOT_USED_YET + pthread_mutex_destroy(lo->mutex); + pthread_cond_destroy(lo->cond); + bzero(lo, sizeof(*lo)); + + lo->mutex= backup.mutex; + lo->cond= backup.cond; + lo->loid= backup.loid; + pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(lo->cond, 0); +#endif +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +const char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +const char *res2str[]= { + 0, + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; + +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + TABLE_LOCK_OWNER *lo1; + DBUG_ASSERT(Ntables <= Ntbls); + DBUG_ASSERT(Nrows + Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + /* three prime numbers */ + x= (uint) ((x*LL(3628273133) + LL(1500450271)) % LL(9576890767)); + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { + /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, + lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + } + } + + reinit_tlo(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(40); + + if (my_atomic_initialize()) + return exit_status(); + + + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i, Nlos); + } + + test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); +#if 0 + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); +#endif + for (i= 0; i < Nlos; i++) + { + tablockman_release_locks(&tablockman, &loarray1[i]); + pthread_mutex_destroy(loarray1[i].mutex); + pthread_cond_destroy(loarray1[i].cond); + } + + { + ulonglong now= my_getsystime(); + for (i= 0; i < Ntbls; i++) + { + tablockman_destroy_locked_table(ltarray+i); + } + tablockman_destroy(&tablockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c new file mode 100644 index 00000000000..1b37ee3f53c --- /dev/null +++ b/storage/maria/unittest/ma_control_file-t.c @@ -0,0 +1,446 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Unit test of the control file module of the Maria engine WL#3234 */ + +/* + Note that it is not possible to test the durability of the write (can't + pull the plug programmatically :) +*/ + +#include <my_global.h> +#include <my_sys.h> +#include <tap.h> + +#ifndef WITH_MARIA_STORAGE_ENGINE +/* + If Maria is not compiled in, normally we don't come to building this test. +*/ +#error "Maria engine is not compiled in, test cannot be built" +#endif + +#include "maria.h" +#include "../../../storage/maria/maria_def.h" +#include <my_getopt.h> + +char file_name[FN_REFLEN]; + +/* The values we'll set and expect the control file module to return */ +LSN expect_checkpoint_lsn; +uint32 expect_logno; + +static int delete_file(myf my_flags); +/* + Those are test-specific wrappers around the module's API functions: after + calling the module's API functions they perform checks on the result. +*/ +static int close_file(); /* wraps ma_control_file_end */ +static int create_or_open_file(); /* wraps ma_control_file_open_or_create */ +static int write_file(); /* wraps ma_control_file_write_and_force */ + +/* Tests */ +static int test_one_log(); +static int test_five_logs(); +static int test_3_checkpoints_and_2_logs(); +static int test_binary_content(); +static int test_start_stop(); +static int test_2_open_and_2_close(); +static int test_bad_magic_string(); +static int test_bad_checksum(); +static int test_bad_size(); + +/* Utility */ +static int verify_module_values_match_expected(); +static int verify_module_values_are_impossible(); +static void usage(); +static void get_options(int argc, char *argv[]); + +/* + If "expr" is FALSE, this macro will make the function print a diagnostic + message and immediately return 1. + This is inspired from assert() but does not crash the binary (sometimes we + may want to see how other tests go even if one fails). + RET_ERR means "return error". +*/ + +#define RET_ERR_UNLESS(expr) \ + {if (!(expr)) {diag("line %d: failure: '%s'", __LINE__, #expr); return 1;}} + + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + maria_data_root= "."; + + plan(9); + + diag("Unit tests for control file"); + + get_options(argc,argv); + + diag("Deleting control file at startup, if there is an old one"); + RET_ERR_UNLESS(0 == delete_file(0)); /* if fails, can't continue */ + + diag("Tests of normal conditions"); + ok(0 == test_one_log(), "test of creating one log"); + ok(0 == test_five_logs(), "test of creating five logs"); + ok(0 == test_3_checkpoints_and_2_logs(), + "test of creating three checkpoints and two logs"); + ok(0 == test_binary_content(), "test of the binary content of the file"); + ok(0 == test_start_stop(), "test of multiple starts and stops"); + diag("Tests of abnormal conditions"); + ok(0 == test_2_open_and_2_close(), + "test of two open and two close (strange call sequence)"); + ok(0 == test_bad_magic_string(), "test of bad magic string"); + ok(0 == test_bad_checksum(), "test of bad checksum"); + ok(0 == test_bad_size(), "test of too small/big file"); + + return exit_status(); +} + + +static int delete_file(myf my_flags) +{ + RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) != NullS); + /* + Maybe file does not exist, ignore error. + The error will however be printed on stderr. + */ + my_delete(file_name, my_flags); + expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + + return 0; +} + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) match what we expect. +*/ +static int verify_module_values_match_expected() +{ + RET_ERR_UNLESS(last_logno == expect_logno); + RET_ERR_UNLESS(last_checkpoint_lsn == + expect_checkpoint_lsn); + return 0; +} + + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) are impossible (this is used when the file has been closed). +*/ +static int verify_module_values_are_impossible() +{ + RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_checkpoint_lsn == + CONTROL_FILE_IMPOSSIBLE_LSN); + return 0; +} + + +static int close_file() +{ + /* Simulate shutdown */ + ma_control_file_end(); + /* Verify amnesia */ + RET_ERR_UNLESS(verify_module_values_are_impossible() == 0); + return 0; +} + +static int create_or_open_file() +{ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int write_file(const LSN checkpoint_lsn, + uint32 logno, + uint objs_to_write) +{ + RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno, + objs_to_write) == 0); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int test_one_log() +{ + uint objs_to_write; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 123; + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, + expect_logno, + objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_five_logs() +{ + uint objs_to_write; + uint i; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 100; + for (i= 0; i<5; i++) + { + expect_logno*= 3; + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno, + objs_to_write) == 0); + } + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_3_checkpoints_and_2_logs() +{ + uint objs_to_write; + /* + Simulate one checkpoint, one log creation, two checkpoints, one + log creation. + */ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(5, 10000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 17; + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(17, 20000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(17, 45000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 19; + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_binary_content() +{ + uint i; + int fd; + + /* + TEST4: actually check by ourselves the content of the file. + Note that constants (offsets) are hard-coded here, precisely to prevent + someone from changing them in the control file module and breaking + backward-compatibility. + TODO: when we reach the format-freeze state, we may even just do a + comparison with a raw binary string, to not depend on any uint4korr + future change/breakage. + */ + + char buffer[20]; + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_read(fd, buffer, 20, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + i= uint3korr(buffer+9); + RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn)); + i= uint4korr(buffer+12); + RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn)); + i= uint4korr(buffer+16); + RET_ERR_UNLESS(i == last_logno); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_start_stop() +{ + /* TEST5: Simulate start/nothing/stop/start/nothing/stop/start */ + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_2_open_and_2_close() +{ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + + +static int test_bad_magic_string() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt magic string */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_MAGIC_STRING); + /* Restore magic string */ + RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_bad_checksum() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt checksum */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); + buffer[0]+= 3; /* mangle checksum */ + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_CHECKSUM); + /* Restore checksum */ + buffer[0]-= 3; + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + return 0; +} + + +static int test_bad_size() +{ + char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + int fd; + + /* A too short file */ + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR | O_CREAT, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL); + RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + /* Leave a correct control file */ + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + return 0; +} + + +static struct my_option my_long_options[] = +{ +#ifndef DBUG_OFF + {"debug", '#', "Debug log.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static void version() +{ + printf("ma_control_file_test: unit test for the control file " + "module of the Maria storage engine. Ver 1.0 \n"); +} + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'V': + version(); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + version(); + usage(); + exit(0); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, + get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c new file mode 100644 index 00000000000..c5917764b9b --- /dev/null +++ b/storage/maria/unittest/ma_maria_log_cleanup.c @@ -0,0 +1,45 @@ +#include "../maria_def.h" +#include <my_dir.h> + +my_bool maria_log_remove() +{ + MY_DIR *dirp; + uint i; + MY_STAT stat_buff; + char file_name[FN_REFLEN]; + + /* Removes control file */ + if (fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + return 1; + if (my_stat(file_name, &stat_buff, MYF(0)) && + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + + /* Finds and removes transaction log files */ + if (!(dirp = my_dir(maria_data_root, MYF(MY_DONT_SORT)))) + return 1; + + for (i= 0; i < dirp->number_off_files; i++) + { + char *file= dirp->dir_entry[i].name; + if (strncmp(file, "maria_log.", 10) == 0 && + file[10] >= '0' && file[10] <= '9' && + file[11] >= '0' && file[11] <= '9' && + file[12] >= '0' && file[12] <= '9' && + file[13] >= '0' && file[13] <= '9' && + file[14] >= '0' && file[14] <= '9' && + file[15] >= '0' && file[15] <= '9' && + file[16] >= '0' && file[16] <= '9' && + file[17] >= '0' && file[17] <= '9' && + file[18] == '\0') + { + if (fn_format(file_name, file, + maria_data_root, "", MYF(MY_WME)) == NullS || + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + } + } + return 0; +} + diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c new file mode 100644 index 00000000000..757520322c8 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -0,0 +1,578 @@ +#include "../maria_def.h" +#include <stdio.h> +#include <errno.h> +#include <tap.h> + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE (100 * 1024) + + +#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*1024L +#define ITERATIONS 181000 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + byte buff[2]; + for (i= 0; i < length; i++) + { + if (i % 2 == 0) + int2store(buff, i >> 1); + if (ptr[i] != buff[i % 2]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 2]); + return 1; + } + } + return 0; +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2); + if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) != + rec->record_length) + return 1; + return check_content(buffer + skip, rec->record_length - skip); +} + +int main(int argc, char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + byte long_tr_id[6]; + byte lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + byte long_buffer[LONG_BUFFER_SIZE * 2 + LSN_STORE_SIZE * 2 + 2]; + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2) + { + int2store(long_buffer + i, (i >> 1)); + /* long_buffer[i]= (i & 0xFF); */ + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + plan(((ITERATIONS - 1) * 4 + 1)*2 + ITERATIONS - 1); + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); + exit(1); + } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + if (i % 2) + { + lsn_store(lsn_buff, lsn_base); + if (translog_write_record(&lsn, + LOGREC_CLR_END, + (i % 0xFFFF), NULL, 7, lsn_buff, 0)) + { + fprintf(stderr, "1 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_CLR_END"); + exit(1); + } + ok(1, "write LOGREC_CLR_END"); + lsn_store(lsn_buff, lsn_base); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_INSERT, + (i % 0xFFFF), + NULL, 7, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "1 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_INSERT"); + exit(1); + } + ok(1, "write LOGREC_UNDO_KEY_INSERT"); + } + else + { + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + if (translog_write_record(&lsn, + LOGREC_UNDO_ROW_DELETE, + (i % 0xFFFF), NULL, 23, lsn_buff, 0)) + { + fprintf(stderr, "0 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_ROW_DELETE"); + exit(1); + } + ok(1, "write LOGREC_UNDO_ROW_DELETE"); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_DELETE, + (i % 0xFFFF), + NULL, 14, lsn_buff, rec_len, long_buffer, 0)) + { + fprintf(stderr, "0 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_DELETE"); + exit(1); + } + ok(1, "write LOGREC_UNDO_KEY_DELETE"); + } + int4store(long_tr_id, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + (i % 0xFFFF), NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); + exit(1); + } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); + + lsn_base= lsn; + + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + (i % 0xFFFF), NULL, rec_len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD"); + exit(1); + } + ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD"); + if (translog_flush(lsn)) + { + fprintf(stderr, "Can't flush #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "flush"); + exit(1); + } + ok(1, "flush"); + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + srandom(122334817L); + + + rc= 1; + + { + translog_size_t len= translog_read_record_header(first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || + first_lsn != rec.lsn) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(%lu,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + (uint) uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + lsn= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 1;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + goto err; + } + break; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || + rec.record_length != 7 || ref != lsn) + { + fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d) " + "type: %u strid: %u len: %u" + "ref: (%lu,0x%lx) (%lu,0x%lx) " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if (rec.type != LOGREC_UNDO_ROW_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1 != lsn || + ref2 != first_lsn || + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + if (rec.type !=LOGREC_UNDO_KEY_INSERT || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE || + len != 12 || ref != lsn || + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type !=LOGREC_UNDO_KEY_INSERT, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + LSN_STORE_SIZE, + (uint) len, + len != 12, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (len != 12 || ref != lsn), + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + if (rec.type !=LOGREC_UNDO_KEY_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || + len != 19 || + ref1 != lsn || + ref2 != first_lsn || + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + lsn= rec.lsn; + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + } + } + + rc= 0; +err: + if (rc) + ok(0, "read record"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + if (maria_log_remove()) + exit(1); + return(test(exit_status())); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c new file mode 100644 index 00000000000..4aaf30bd9a3 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -0,0 +1,600 @@ +#include "../maria_def.h" +#include <stdio.h> +#include <errno.h> +#include <tap.h> + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512)) + +#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1) + +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 2 +/*#define ITERATIONS 63 */ + +/* +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ +/* +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + byte buff[4]; + DBUG_ENTER("check_content"); + for (i= 0; i < length; i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + if (ptr[i] != buff[i % 4]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 4]); + DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0), + (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + DBUG_ENTER("read_and_check_content"); + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + DBUG_RETURN(res); +} + + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + +int main(int argc, char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + byte long_tr_id[6]; + byte lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + byte *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + + { + byte buff[4]; + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + long_buffer[i]= buff[i % 4]; + } + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + plan(((ITERATIONS - 1) * 4 + 1) * 2); + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); + exit(1); + } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + if (i % 2) + { + lsn_store(lsn_buff, lsn_base); + if (translog_write_record(&lsn, + LOGREC_CLR_END, + (i % 0xFFFF), NULL, + LSN_STORE_SIZE, lsn_buff, 0)) + { + fprintf(stderr, "1 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_CLR_END"); + exit(1); + } + ok(1, "write LOGREC_CLR_END"); + lsn_store(lsn_buff, lsn_base); + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_INSERT, + (i % 0xFFFF), + NULL, LSN_STORE_SIZE, lsn_buff, + rec_len, long_buffer, 0)) + { + fprintf(stderr, "1 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_INSERT"); + exit(1); + } + ok(1, "write LOGREC_UNDO_KEY_INSERT"); + } + else + { + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + if (translog_write_record(&lsn, + LOGREC_UNDO_ROW_DELETE, + (i % 0xFFFF), NULL, 23, lsn_buff, 0)) + { + fprintf(stderr, "0 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_ROW_DELETE"); + exit(1); + } + ok(1, "write LOGREC_UNDO_ROW_DELETE"); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_UNDO_KEY_DELETE, + (i % 0xFFFF), + NULL, LSN_STORE_SIZE * 2, lsn_buff, + rec_len, long_buffer, 0)) + { + fprintf(stderr, "0 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_UNDO_KEY_DELETE"); + exit(1); + } + ok(1, "write LOGREC_UNDO_KEY_DELETE"); + } + int4store(long_tr_id, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + (i % 0xFFFF), NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_LONG_TRANSACTION_ID"); + exit(1); + } + ok(1, "write LOGREC_LONG_TRANSACTION_ID"); + + lsn_base= lsn; + + rec_len= get_len(); + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + (i % 0xFFFF), NULL, rec_len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD"); + exit(1); + } + ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD"); + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + + rc= 1; + + { + translog_size_t len= translog_read_record_header(first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || + first_lsn != rec.lsn) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(0x%lu,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + (uint)uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + lsn= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 1;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + break; + } + + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if (rec.type != LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) || + rec.record_length != LSN_STORE_SIZE || ref != lsn) + { + fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)" + "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if (rec.type !=LOGREC_UNDO_ROW_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1 != lsn || + ref2 != first_lsn || + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)" + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + rec_len= get_len(); + if (rec.type !=LOGREC_UNDO_KEY_INSERT || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE || + len != 12 || ref != lsn || + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type !=LOGREC_UNDO_KEY_INSERT, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + LSN_STORE_SIZE, + (uint) len, + len != 12, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (ref != lsn), + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + rec_len= get_len(); + if (rec.type !=LOGREC_UNDO_KEY_DELETE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || + len != 19 || + ref1 != lsn || + ref2 != first_lsn || + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) + { + fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)" + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint)uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + + lsn= rec.lsn; + + len= translog_read_next_record_header(&scanner, &rec); + rec_len= get_len(); + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)" + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + } + } + + rc= 0; +err: + if (rc) + ok(0, "read record"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + if (maria_log_remove()) + exit(1); + + return (test(exit_status())); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c new file mode 100644 index 00000000000..1834e720328 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -0,0 +1,457 @@ +#include "../maria_def.h" +#include <stdio.h> +#include <errno.h> +#include <tap.h> +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */ +#define LOG_FLAGS 0 +/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/ +#define LONG_BUFFER_SIZE (1024L*1024L*1024L) +#define MIN_REC_LENGTH 30 +#define SHOW_DIVIDER 10 +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 3 +#define WRITERS 3 +static uint number_of_writers= WRITERS; + +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; + +static ulong lens[WRITERS][ITERATIONS]; +static LSN lsns1[WRITERS][ITERATIONS]; +static LSN lsns2[WRITERS][ITERATIONS]; +static byte *long_buffer; + +/* + Get pseudo-random length of the field in + limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE] + + SYNOPSIS + get_len() + + RETURN + length - length >= 0 length <= LONG_BUFFER_SIZE +*/ + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + for (i= 0; i < length; i++) + { + if (((uchar)ptr[i]) != (i & 0xFF)) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) (i & 0xFF)); + return 1; + } + } + return 0; +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + return(res); +} + +void writer(int num) +{ + LSN lsn; + byte long_tr_id[6]; + uint i; + + for (i= 0; i < ITERATIONS; i++) + { + uint len= get_len(); + lens[num][i]= len; + + int2store(long_tr_id, num); + int4store(long_tr_id + 2, i); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + num, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write LOGREC_LONG_TRANSACTION_ID record #%lu " + "thread %i\n", (ulong) i, num); + translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + return; + } + lsns1[num][i]= lsn; + if (translog_write_record(&lsn, + LOGREC_REDO_INSERT_ROW_HEAD, + num, NULL, len, long_buffer, 0)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + return; + } + lsns2[num][i]= lsn; + pthread_mutex_lock(&LOCK_thread_count); + ok(1, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + } + return; +} + + +static void *test_thread_writer(void *arg) +{ + int param= *((int*) arg); + + my_thread_init(); + + writer(param); + + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + ok(1, "writer finished"); /* just to show progress */ + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are + ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + return(0); +} + + +int main(int argc, char **argv __attribute__ ((unused))) +{ + uint32 i; + uint pagen; + PAGECACHE pagecache; + LSN first_lsn; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error; + int rc; + + plan(WRITERS + ITERATIONS * WRITERS * 3); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); + if (long_buffer == 0) + { + fprintf(stderr, "End of memory\n"); + exit(1); + } + for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) + long_buffer[i]= (i & 0xFF); + + MY_INIT(argv[0]); + if (maria_log_remove()) + exit(1); + + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr, "Got error: %d from pthread_attr_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error, errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + srandom(122334817L); + { + byte long_tr_id[6]= + { + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 + }; + + if (translog_write_record(&first_lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write the first record\n"); + translog_destroy(); + exit(1); + } + } + + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock " + "(errno: %d)\n", error, errno); + exit(1); + } + + while (number_of_writers != 0) + { + param= (int*) malloc(sizeof(int)); + *param= number_of_writers - 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n", + error, errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count))) + fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error); + + /* Find last LSN and flush up to it (all our log) */ + { + LSN max= 0; + for (i= 0; i < WRITERS; i++) + { + if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0) + max= lsns2[i][ITERATIONS - 1]; + } + translog_flush(max); + } + + rc= 1; + + { + uint indeces[WRITERS]; + uint index, len, stage; + bzero(indeces, sizeof(uint) * WRITERS); + + bzero(indeces, sizeof(indeces)); + + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 0;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != WRITERS * ITERATIONS * 2) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS * WRITERS * 2); + translog_free_record_header(&rec); + goto err; + } + break; + } + index= indeces[rec.short_trid] / 2; + stage= indeces[rec.short_trid] % 2; + if (stage == 0) + { + if (rec.type !=LOGREC_LONG_TRANSACTION_ID || + rec.record_length != 6 || + uint2korr(rec.header) != rec.short_trid || + index != uint4korr(rec.header + 2) || + cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0) + { + fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n" + "type %u, strid %u %u, len %u, i: %u %u, " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.type, + (uint) rec.short_trid, (uint) uint2korr(rec.header), + (uint) rec.record_length, + (uint) index, (uint) uint4korr(rec.header + 2), + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns1[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns1[rec.short_trid][index])); + translog_free_record_header(&rec); + goto err; + } + } + else + { + if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD || + len != 9 || + rec.record_length != lens[rec.short_trid][index] || + cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 || + check_content(rec.header, len)) + { + fprintf(stderr, + "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d) " + " thread: %d, iteration %d, stage %d\n" + "type %u (%d), len %u, length %lu %lu (%d) " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.short_trid, index, stage, + (uint) rec.type, (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD), + (uint) len, + (ulong) rec.record_length, lens[rec.short_trid][index], + (rec.record_length != lens[rec.short_trid][index]), + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns2[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns2[rec.short_trid][index])); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_REDO_INSERT_ROW_HEAD in whole rec read " + "lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "record read"); + translog_free_record_header(&rec); + indeces[rec.short_trid]++; + } + } + + rc= 0; +err: + if (rc) + ok(0, "record read"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + if (maria_log_remove()) + exit(1); + + return(exit_status()); +} diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c new file mode 100644 index 00000000000..211eb6dea9e --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -0,0 +1,149 @@ +#include "../maria_def.h" +#include <stdio.h> +#include <errno.h> +#include <tap.h> + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) +#define PCACHE_PAGE TRANSLOG_PAGE_SIZE +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define LOG_FLAGS 0 + +static char *first_translog_file= (char*)"maria_log.00000001"; +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; + +int main(int argc, char *argv[]) +{ + uint pagen; + uchar long_tr_id[6]; + PAGECACHE pagecache; + LSN lsn; + MY_STAT st, *stat; + + MY_INIT(argv[0]); + + plan(1); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + /* be sure that we have no logs in the directory*/ + if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0))) + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + if (my_stat(first_translog_file, &st, MYF(0))) + my_delete(first_translog_file, MYF(0)); + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PCACHE_PAGE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE); + exit(1); + } + int4store(long_tr_id, 0); + if (translog_write_record(&lsn, + LOGREC_LONG_TRANSACTION_ID, + 0, NULL, 6, long_tr_id, 0)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + exit(1); + } + + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + + { + uchar page[PCACHE_PAGE]; + + bzero(page, PCACHE_PAGE); +#define PAGE_LSN_OFFSET 0 + lsn_store(page + PAGE_LSN_OFFSET, lsn); + pagecache_write(&pagecache, &file1, 0, 3, (char*)page, + PAGECACHE_LSN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + } + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE * 2) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, + (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2)); + ok(0, "log triggered"); + exit(1); + } + ok(1, "log triggered"); + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + my_delete(first_translog_file, MYF(0)); + my_delete(file1_name, MYF(0)); + + exit(0); +} diff --git a/storage/maria/unittest/mf_pagecache_consist.c b/storage/maria/unittest/mf_pagecache_consist.c new file mode 100755 index 00000000000..8ea0094762c --- /dev/null +++ b/storage/maria/unittest/mf_pagecache_consist.c @@ -0,0 +1,458 @@ +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). Use ok() and plan(). +*/ + +#include <tap.h> +#include <my_sys.h> +#include <m_string.h> +#include "test_file.h" +#include <tap.h> + +#define PCACHE_SIZE (PAGE_SIZE*1024*8) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +#ifdef TEST_HIGH_CONCURENCY +static uint number_of_readers= 10; +static uint number_of_writers= 20; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_HIGH_CONCURENCY*/ +#ifdef TEST_READERS +static uint number_of_readers= 10; +static uint number_of_writers= 1; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_READERS*/ +#ifdef TEST_WRITERS +static uint number_of_readers= 0; +static uint number_of_writers= 10; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_WRITERS*/ +static uint number_of_readers= 10; +static uint number_of_writers= 10; +static uint number_of_tests= 50000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20000; +static uint flush_divider= 1000; +#endif /*TEST_WRITERS*/ +#endif /*TEST_READERS*/ +#endif /*TEST_HIGH_CONCURENCY*/ + + +/* + Get pseudo-random length of the field in (0;limit) + + SYNOPSYS + get_len() + limit limit for generated value + + RETURN + length where length >= 0 & length < limit +*/ + +static uint get_len(uint limit) +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / limit); + } while (rec_len >= limit || rec_len == 0); + return rec_len; +} + + +/* check page consistency */ +uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, + int tag) +{ + uint end= sizeof(uint); + uint num= *((uint *)buff); + uint i; + DBUG_ENTER("check_page"); + + for (i= 0; i < num; i++) + { + uint len= *((uint *)(buff + end)); + uint j; + end+= sizeof(uint) + sizeof(uint); + if (len + end > PAGE_SIZE) + { + diag("incorrect field header #%u by offset %lu\n", i, offset + end + j); + goto err; + } + for(j= 0; j < len; j++) + { + if (buff[end + j] != (uchar)((i+1) % 256)) + { + diag("incorrect %lu byte\n", offset + end + j); + goto err; + } + } + end+= len; + } + for(i= end; i < PAGE_SIZE; i++) + { + if (buff[i] != 0) + { + int h; + DBUG_PRINT("err", + ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag)); + diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag); + h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); + my_pwrite(h, (byte*) buff, PAGE_SIZE, 0, MYF(0)); + my_close(h, MYF(0)); + goto err; + } + } + DBUG_RETURN(end); +err: + DBUG_PRINT("err", ("try to flush")); + if (page_locked) + { + pagecache_delete_page(&pagecache, &file1, page_no, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + } + else + { + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + } + exit(1); +} + +void put_rec(uchar *buff, uint end, uint len, uint tag) +{ + uint i; + uint num= *((uint *)buff); + if (!len) + len= 1; + if (end + sizeof(uint)*2 + len > PAGE_SIZE) + return; + *((uint *)(buff + end))= len; + end+= sizeof(uint); + *((uint *)(buff + end))= tag; + end+= sizeof(uint); + num++; + *((uint *)buff)= num; + *((uint*)(buff + end))= len; + for (i= end; i < (len + end); i++) + { + buff[i]= (uchar) num % 256; + } +} + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + + +void reader(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint page= get_len(number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + check_page(buffr, page * PAGE_SIZE, 0, page, -num); + if (i % 500 == 0) + printf("reader%d: %d\n", num, i); + + } + printf("reader%d: done\n", num); + free(buffr); +} + + +void writer(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint end; + uint page= get_len(number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + end= check_page(buffr, page * PAGE_SIZE, 1, page, num); + put_rec(buffr, end, get_len(record_length_limit), num); + pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + if (i % flush_divider == 0) + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + if (i % 500 == 0) + printf("writer%d: %d\n", num, i); + } + printf("writer%d: done\n", num); + free(buffr); +} + + +static void *test_thread_reader(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_reader"); + DBUG_PRINT("enter", ("param: %d", param)); + + reader(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +static void *test_thread_writer(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_writer"); + DBUG_PRINT("enter", ("param: %d", param)); + + writer(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + { + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + memset(buffr, '\0', PAGE_SIZE); + for (i= 0; i < number_of_pages; i++) + { + pagecache_write(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + free(buffr); + } + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + while (number_of_readers != 0 || number_of_writers != 0) + { + if (number_of_readers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_readers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_reader, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_readers--; + } + if (number_of_writers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_writers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + } + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_end(0); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(exit_status()); +} diff --git a/storage/maria/unittest/mf_pagecache_single.c b/storage/maria/unittest/mf_pagecache_single.c new file mode 100644 index 00000000000..91cceee618d --- /dev/null +++ b/storage/maria/unittest/mf_pagecache_single.c @@ -0,0 +1,580 @@ +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). +*/ +#include <tap.h> +#include <my_sys.h> +#include <m_string.h> +#include "test_file.h" +#include <tap.h> + +#define PCACHE_SIZE (PAGE_SIZE*1024*10) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +/* + File contance descriptors +*/ +static struct file_desc simple_read_write_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_read_change_write_read_test_file[]= +{ + {PAGE_SIZE/2, '\65'}, + {PAGE_SIZE/2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file1[]= +{ + {PAGE_SIZE*2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file2[]= +{ + {PAGE_SIZE/2, '\1'}, + {PAGE_SIZE/2, (unsigned char)129}, + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_forget_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_flush_test_file[]= +{ + {PAGE_SIZE, '\2'}, + { 0, 0} +}; + + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + +/* + Write then read page, check file on disk +*/ + +int simple_read_write_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_write_test"); + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_write_test_file))), + "Simple write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, then read (and lock), change (write new value and unlock), + then check the page in the cache and on the disk +*/ +int simple_read_change_write_read_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_change_write_read_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + bfill(buffw, PAGE_SIZE/2, '\65'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple read-change-write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_change_write_read_test_file))), + "Simple read-change-write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, read page 0 (and pin) then write page 1 and page 0. + Flush the file (shold flush only page 1 and return 1 (page 0 is + still pinned). + Check file on the disk. + Unpin and flush. + Check file on the disk. +*/ +int simple_pin_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_pin_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* test */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("error in flush_pagecache_blocks\n"); + exit(1); + } + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + bfill(buffw + PAGE_SIZE/2, PAGE_SIZE/2, ((unsigned char) 129)); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* + We have to get error because one page of the file is pinned, + other page should be flushed + */ + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Did not get error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2, + simple_pin_test_file1))), + "Simple pin page file with pin"); + pagecache_unlock_page(&pagecache, + &file1, + 0, + PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_UNPIN, + 0); + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE, + simple_pin_test_file2))), + "Simple pin page result file"); + if (res) + reset_file(file1, file1_name); +err: + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page, write new value, then delete page from cache without flush, + on the disk should be page with old content written during preparation +*/ + +int simple_delete_forget_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_forget_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + bfill(buffw, PAGE_SIZE, '\2'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_WRITE, 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_forget_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page with locking, write new content to the page, + delete page with flush and on existing lock, + check that page on disk contain new value. +*/ + +int simple_delete_flush_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_flush_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + bfill(buffw, PAGE_SIZE, '\2'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_flush_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + write then read file bigger then cache +*/ + +int simple_big_test() +{ + unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); + unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); + struct file_desc *desc= + (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) * + sizeof(struct file_desc)); + int res, i; + DBUG_ENTER("simple_big_test"); + /* prepare the file twice larger then cache */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + bfill(buffw, PAGE_SIZE, (unsigned char) (i & 0xff)); + desc[i].length= PAGE_SIZE; + desc[i].content= (i & 0xff); + pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + desc[i].length= 0; + desc[i].content= '\0'; + ok(1, "Simple big file write"); + /* check written pages sequentally read */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + int j; + pagecache_read(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (i & 0xff)) + { + diag("simple_big_test seq: page %u byte %u mismatch\n", i, j); + return 0; + } + } + } + ok(1, "simple big file sequentally read"); + /* chack random reads */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++) + { + int j, page; + page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2)); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (page & 0xff)) + { + diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j); + return 0; + } + } + } + ok(1, "simple big file random read"); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + + ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE, + desc))), + "Simple big file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} +/* + Thread function +*/ + +static void *test_thread(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_thread"); + + DBUG_PRINT("enter", ("param: %d", param)); + + if (!simple_read_write_test() || + !simple_read_change_write_read_test() || + !simple_pin_test() || + !simple_delete_forget_test() || + !simple_delete_flush_test() || + !simple_big_test()) + exit(1); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + plan(12); + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + + if ((error=pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + param=(int*) malloc(sizeof(int)); + *param= 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_end(0); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(exit_status()); +} diff --git a/storage/maria/unittest/test_file.c b/storage/maria/unittest/test_file.c new file mode 100644 index 00000000000..758d0bfa81b --- /dev/null +++ b/storage/maria/unittest/test_file.c @@ -0,0 +1,68 @@ +#include <tap.h> +#include <my_sys.h> +#include <my_dir.h> +#include "test_file.h" + + +/* + Check that file contance correspond to descriptor + + SYNOPSIS + test_file() + file File to test + file_name Path (and name) of file which is tested + size size of file + buff_size size of buffer which is enought to check the file + desc file descriptor to check with + + RETURN + 1 file if OK + 0 error +*/ + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc) +{ + MY_STAT stat_buff, *stat; + unsigned char *buffr= malloc(buff_size); + off_t pos= 0; + size_t byte; + int step= 0; + + if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL) + { + diag("Can't stat() %s (errno: %d)\n", file_name, errno); + return 0; + } + if (stat->st_size != size) + { + diag("file %s size is %lu (should be %lu)\n", + file_name, (ulong) stat->st_size, (ulong) size); + return 0; + } + /* check content */ + my_seek(file.file, 0, SEEK_SET, MYF(0)); + while (desc[step].length != 0) + { + if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) != + desc[step].length) + { + diag("Can't read %u bytes from %s (errno: %d)\n", + (uint)desc[step].length, file_name, errno); + return 0; + } + for (byte= 0; byte < desc[step].length; byte++) + { + if (buffr[byte] != desc[step].content) + { + diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n", + file_name, (uint) buffr[byte], (ulong) (pos + byte), + desc[step].content); + return 0; + } + } + pos+= desc[step].length; + step++; + } + return 1; +} diff --git a/storage/maria/unittest/test_file.h b/storage/maria/unittest/test_file.h new file mode 100644 index 00000000000..bfc660b13d0 --- /dev/null +++ b/storage/maria/unittest/test_file.h @@ -0,0 +1,14 @@ +#include <m_string.h> +#include <pagecache.h> + +/* + File content descriptor +*/ +struct file_desc +{ + unsigned int length; + unsigned char content; +}; + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc); diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c new file mode 100644 index 00000000000..7d97794b685 --- /dev/null +++ b/storage/maria/unittest/trnman-t.c @@ -0,0 +1,185 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <tap.h> + +#include <my_global.h> +#include <my_sys.h> +#include <my_atomic.h> +#include <lf.h> +#include <m_string.h> +#include "../trnman.h" + +pthread_mutex_t rt_mutex; +int rt_num_threads; + +int litmus; + +/* + create and end (commit or rollback) transactions randomly +*/ +#define MAX_ITER 100 +pthread_handler_t test_trnman(void *arg) +{ + int m= (*(int *)arg); + uint x, y, i, n; + TRN *trn[MAX_ITER]; + pthread_mutex_t mutexes[MAX_ITER]; + pthread_cond_t conds[MAX_ITER]; + + for (i= 0; i < MAX_ITER; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + for (x= ((int)(intptr)(&m)); m > 0; ) + { + y= x= (x*LL(3628273133) + LL(1500450271)) % LL(9576890767); /* three prime numbers */ + m-= n= x % MAX_ITER; + for (i= 0; i < n; i++) + { + trn[i]= trnman_new_trn(&mutexes[i], &conds[i]); + if (!trn[i]) + { + diag("trnman_new_trn() failed"); + litmus++; + } + } + for (i= 0; i < n; i++) + { + y= (y*19 + 7) % 31; + trnman_end_trn(trn[i], y & 1); + } + } + for (i= 0; i < MAX_ITER; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + pthread_mutex_unlock(&rt_mutex); + + return 0; +} +#undef MAX_ITER + +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +#define ok_read_from(T1, T2, RES) \ + i= trnman_can_read_from(trn[T1], trn[T2]->trid); \ + ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot") +#define start_transaction(T) \ + trn[T]= trnman_new_trn(&mutexes[T], &conds[T]) +#define commit(T) trnman_commit_trn(trn[T]) +#define abort(T) trnman_abort_trn(trn[T]) + +#define Ntrns 4 +void test_trnman_read_from() +{ + TRN *trn[Ntrns]; + pthread_mutex_t mutexes[Ntrns]; + pthread_cond_t conds[Ntrns]; + int i; + + for (i= 0; i < Ntrns; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + start_transaction(0); /* start trn1 */ + start_transaction(1); /* start trn2 */ + ok_read_from(1, 0, 0); + commit(0); /* commit trn1 */ + start_transaction(2); /* start trn4 */ + abort(2); /* abort trn4 */ + start_transaction(3); /* start trn5 */ + ok_read_from(3, 0, 1); + ok_read_from(3, 1, 0); + ok_read_from(3, 2, 0); + commit(1); /* commit trn2 */ + ok_read_from(3, 1, 0); + commit(3); /* commit trn5 */ + + for (i= 0; i < Ntrns; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } +} + +int main() +{ + my_init(); + + plan(6); + + if (my_atomic_initialize()) + return exit_status(); + + pthread_mutex_init(&rt_mutex, 0); + +#define CYCLES 10000 +#define THREADS 10 + + trnman_init(); + + test_trnman_read_from(); + run_test("trnman", test_trnman, THREADS, CYCLES); + + diag("mallocs: %d", trnman_allocated_transactions); + { + ulonglong now= my_getsystime(); + trnman_destroy(); + now= my_getsystime()-now; + diag("trnman_destroy: %g", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index f50c312b8e4..4bd0b177daa 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -97,8 +97,8 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_delete_table.c mi_rename.c mi_check.c \ mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ - ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \ - ha_myisam.cc \ + ft_update.c ft_boolean_search.c ft_nlq_search.c \ + sort.c ha_myisam.cc ft_myisam.c \ rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c index 68076d7e401..b1b67099b22 100644 --- a/storage/myisam/ft_boolean_search.c +++ b/storage/myisam/ft_boolean_search.c @@ -149,7 +149,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ - int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, (uchar*) (*a)->word+1,(*a)->len-1,0,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); @@ -183,7 +183,7 @@ static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param, case FT_TOKEN_WORD: ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_WORD) + - (info->trunc ? MI_MAX_KEY_BUFF : + (info->trunc ? HA_MAX_KEY_BUFF : word_len * ftb_param->ftb->charset->mbmaxlen + HA_FT_WLEN + ftb_param->ftb->info->s->rec_reflength)); @@ -333,7 +333,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength; byte *lastkey_buf=ftbw->word+ftbw->off; - LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) lastkey_buf+=ftbw->len; @@ -377,7 +376,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) if (!r && !ftbw->off) { - r= mi_compare_text(ftb->charset, + r= ha_compare_text(ftb->charset, info->lastkey+1, info->lastkey_length-extra-1, (uchar*) ftbw->word+1, @@ -837,7 +836,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word+1, ftbw->len-1, (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) b= c; @@ -847,7 +846,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, for (; c >= 0; c--) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word + 1,ftbw->len - 1, (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) break; diff --git a/storage/myisam/ft_myisam.c b/storage/myisam/ft_myisam.c new file mode 100644 index 00000000000..76c04ba4c0b --- /dev/null +++ b/storage/myisam/ft_myisam.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and myisam +*/ + +#include "ftdefs.h" + +FT_INFO *ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); + else + res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, + record); + return res; +} diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c index 5c6f66897ee..58d88731201 100644 --- a/storage/myisam/ft_nlq_search.c +++ b/storage/myisam/ft_nlq_search.c @@ -103,7 +103,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) { if (keylen && - mi_compare_text(aio->charset,info->lastkey+1, + ha_compare_text(aio->charset,info->lastkey+1, info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) break; diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c index 5992d9c118e..59fa25469f1 100644 --- a/storage/myisam/ft_parser.c +++ b/storage/myisam/ft_parser.c @@ -31,7 +31,7 @@ typedef struct st_my_ft_parser_param static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { - return mi_compare_text(cs, (uchar*) w1->pos, w1->len, + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0, 0); } diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c index 34608be1721..ea3c336cc7a 100644 --- a/storage/myisam/ft_static.c +++ b/storage/myisam/ft_static.c @@ -55,19 +55,6 @@ const struct _ft_vft _ft_vft_boolean = { }; -FT_INFO *ft_init_search(uint flags, void *info, uint keynr, - byte *query, uint query_len, CHARSET_INFO *cs, - byte *record) -{ - FT_INFO *res; - if (flags & FT_BOOL) - res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); - else - res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, - record); - return res; -} - const char *ft_stopword_file = 0; const char *ft_precompiled_stopwords[] = { diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index 63732ebadc9..1b6cff5e903 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -29,7 +29,7 @@ static TREE *stopwords3=NULL; static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), FT_STOPWORD *w1, FT_STOPWORD *w2) { - return mi_compare_text(default_charset_info, + return ha_compare_text(default_charset_info, (uchar *)w1->pos,w1->len, (uchar *)w2->pos,w2->len,0,0); } diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c index e176d550b1d..a5a17ff4bd2 100644 --- a/storage/myisam/ft_update.c +++ b/storage/myisam/ft_update.c @@ -180,7 +180,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || - mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, (uchar*) ftsi2.pos,ftsi2.len,0,0))) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } @@ -209,7 +209,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, error=0; while(old_word->pos && new_word->pos) { - cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); diff --git a/storage/myisam/fulltext.h b/storage/myisam/fulltext.h index bea2fa96969..56cca3257a2 100644 --- a/storage/myisam/fulltext.h +++ b/storage/myisam/fulltext.h @@ -20,18 +20,8 @@ #include "myisamdef.h" #include "ft_global.h" -#define HA_FT_WTYPE HA_KEYTYPE_FLOAT -#define HA_FT_WLEN 4 -#define FT_SEGS 2 - -#define ft_sintXkorr(A) mi_sint4korr(A) -#define ft_intXstore(T,A) mi_int4store(T,A) - -extern const HA_KEYSEG ft_keysegs[FT_SEGS]; - int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *); int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t); int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t); uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *); - diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 06ec5c4b44e..0113fe80a08 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -22,6 +22,7 @@ #include "mysql_priv.h" #include <mysql/plugin.h> #include <m_ctype.h> +#include <my_bit.h> #include <myisampack.h> #include "ha_myisam.h" #include <stdarg.h> @@ -56,7 +57,7 @@ static handler *myisam_create_handler(handlerton *hton, // collect errors printed by mi_check routines -static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, +static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, const char *fmt, va_list args) { THD* thd = (THD*)param->thd; @@ -399,13 +400,13 @@ int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo, extern "C" { -volatile int *killed_ptr(MI_CHECK *param) +volatile int *killed_ptr(HA_CHECK *param) { /* In theory Unsafe conversion, but should be ok for now */ return (int*) &(((THD *)(param->thd))->killed); } -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { param->error_printed|=1; param->out_flag|= O_DATA_LOST; @@ -415,7 +416,7 @@ void mi_check_print_error(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_info(MI_CHECK *param, const char *fmt,...) +void mi_check_print_info(HA_CHECK *param, const char *fmt,...) { va_list args; va_start(args, fmt); @@ -423,7 +424,7 @@ void mi_check_print_info(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { param->warning_printed=1; param->out_flag|= O_DATA_LOST; @@ -674,7 +675,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) { if (!file) return HA_ADMIN_INTERNAL_ERROR; int error; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; const char *old_proc_info=thd->proc_info; @@ -685,7 +686,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag = check_opt->flags | T_CHECK | T_SILENT; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(table->db_stat & HA_READ_ONLY)) param.testflag|= T_STATISTICS; @@ -766,7 +767,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) { int error=0; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; myisamchk_init(¶m); @@ -777,7 +778,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); param.using_global_keycache = 1; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(share->state.changed & STATE_NOT_ANALYZED)) return HA_ADMIN_ALREADY_DONE; @@ -826,7 +827,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "restore"; @@ -889,7 +890,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "backup"; @@ -905,7 +906,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt) { int error; - MI_CHECK param; + HA_CHECK param; ha_rows start_records; if (!file) return HA_ADMIN_INTERNAL_ERROR; @@ -955,7 +956,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) { int error; if (!file) return HA_ADMIN_INTERNAL_ERROR; - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd = thd; @@ -974,7 +975,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) } -int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool do_optimize) +int ha_myisam::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) { int error=0; uint local_testflag=param.testflag; @@ -1152,7 +1153,7 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) if (error != HA_ADMIN_OK) { /* Send error to user */ - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "assign_to_keycache"; @@ -1220,7 +1221,7 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; @@ -1327,7 +1328,7 @@ int ha_myisam::enable_indexes(uint mode) else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) { THD *thd=current_thd; - MI_CHECK param; + HA_CHECK param; const char *save_proc_info=thd->proc_info; thd->proc_info="Creating index"; myisamchk_init(¶m); @@ -1336,7 +1337,7 @@ int ha_myisam::enable_indexes(uint mode) T_CREATE_MISSING_KEYS); param.myf_rw&= ~MY_WAIT_IF_FULL; param.sort_buffer_length= thd->variables.myisam_sort_buff_size; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; param.tmpdir=&mysql_tmpdir_list; if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair) { @@ -1839,7 +1840,7 @@ void ha_myisam::get_auto_increment(ulonglong offset, ulonglong increment, { ulonglong nr; int error; - byte key[MI_MAX_KEY_LENGTH]; + byte key[HA_MAX_KEY_LENGTH]; if (!table->s->next_number_key_offset) { // Autoincrement at key-start diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 882900bd35f..4eaeb11fece 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -21,6 +21,7 @@ /* class for the the myisam handler */ #include <myisam.h> +#include <myisamchk.h> #include <ft_global.h> #define HA_RECOVER_NONE 0 /* No automatic recover */ @@ -39,7 +40,7 @@ class ha_myisam: public handler ulonglong int_table_flags; char *data_file_name, *index_file_name; bool can_enable_indexes; - int repair(THD *thd, MI_CHECK ¶m, bool optimize); + int repair(THD *thd, HA_CHECK ¶m, bool optimize); public: ha_myisam(handlerton *hton, TABLE_SHARE *table_arg); @@ -56,8 +57,8 @@ class ha_myisam: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } uint checksum() const; virtual bool check_if_locking_is_allowed(uint sql_command, diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 7bcb8041fe0..8c955acf011 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -59,14 +59,14 @@ /* Functions defined in this file */ -static int check_k_link(MI_CHECK *param, MI_INFO *info,uint nr); -static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int check_k_link(HA_CHECK *param, MI_INFO *info,uint nr); +static int chk_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level); static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); static int writekeys(MI_SORT_PARAM *sort_param); -static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key); @@ -80,13 +80,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, reg1 SORT_KEY_BLOCKS *key_block, uchar *key, my_off_t prev_block); static int sort_delete_record(MI_SORT_PARAM *sort_param); -/*static int flush_pending_blocks(MI_CHECK *param);*/ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +/*static int flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length); static ha_checksum mi_byte_checksum(const byte *buf, uint length); -static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share); +static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share); -void myisamchk_init(MI_CHECK *param) +void myisamchk_init(HA_CHECK *param) { bzero((gptr) param,sizeof(*param)); param->opt_follow_links=1; @@ -108,7 +108,7 @@ void myisamchk_init(MI_CHECK *param) /* Check the status flags for the table */ -int chk_status(MI_CHECK *param, register MI_INFO *info) +int chk_status(HA_CHECK *param, register MI_INFO *info) { MYISAM_SHARE *share=info->s; @@ -136,7 +136,7 @@ int chk_status(MI_CHECK *param, register MI_INFO *info) /* Check delete links */ -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag) +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag) { reg2 ha_rows i; uint delete_link_length; @@ -245,7 +245,7 @@ wrong: /* Check delete links in index file */ -static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) +static int check_k_link(HA_CHECK *param, register MI_INFO *info, uint nr) { my_off_t next_link; uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH; @@ -322,7 +322,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) /* Check sizes of files */ -int chk_size(MI_CHECK *param, register MI_INFO *info) +int chk_size(HA_CHECK *param, register MI_INFO *info) { int error=0; register my_off_t skr,size; @@ -398,7 +398,7 @@ int chk_size(MI_CHECK *param, register MI_INFO *info) /* Check keys */ -int chk_key(MI_CHECK *param, register MI_INFO *info) +int chk_key(HA_CHECK *param, register MI_INFO *info) { uint key,found_keys=0,full_text_keys=0,result=0; ha_rows keys; @@ -583,7 +583,7 @@ do_stat: } /* chk_key */ -static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index_down(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { @@ -729,13 +729,13 @@ int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, /* Check if index is ok */ -static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { int flag; uint used_length,comp_flag,nod_flag,key_length=0; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; my_off_t next_page,record; char llbuff[22]; uint diff_pos[2]; @@ -932,7 +932,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo) /* Check that record-link is ok */ -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend) { int error,got_error,flag; uint key,left_length,b_type,field; @@ -942,7 +942,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) byte *record,*to; char llbuff[22],llbuff2[22],llbuff3[22]; ha_checksum intern_record_checksum; - ha_checksum key_checksum[MI_MAX_POSSIBLE_KEY]; + ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; my_bool static_row_size; MI_KEYDEF *keyinfo; MI_BLOCK_INFO block_info; @@ -990,6 +990,9 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (*killed_ptr(param)) goto err2; switch (info->s->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_read(¶m->read_cache,(byte*) record, info->s->base.pack_reclength)) @@ -1377,7 +1380,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) /* Recover old table by reading each record and writing all keys */ /* Save new datafile-name in temp_filename */ -int mi_repair(MI_CHECK *param, register MI_INFO *info, +int mi_repair(HA_CHECK *param, register MI_INFO *info, my_string name, int rep_quick) { int error,got_error; @@ -1387,7 +1390,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, File new_file; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("mi_repair"); @@ -1770,7 +1773,7 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos, /* Tell system that we want all memory for our cache */ -void lock_memory(MI_CHECK *param __attribute__((unused))) +void lock_memory(HA_CHECK *param __attribute__((unused))) { #ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ if (param->opt_lock_memory) @@ -1786,7 +1789,7 @@ void lock_memory(MI_CHECK *param __attribute__((unused))) /* Flush all changed blocks to disk */ -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file) { if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) { @@ -1801,12 +1804,12 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) /* Sort index for more efficent reads */ -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name) +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name) { reg2 uint key; reg1 MI_KEYDEF *keyinfo; File new_file; - my_off_t index_pos[MI_MAX_POSSIBLE_KEY]; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; uint r_locks,w_locks; int old_lock; MYISAM_SHARE *share=info->s; @@ -1901,12 +1904,12 @@ err2: /* Sort records recursive using one index */ -static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file) { uint length,nod_flag,used_length, key_length; uchar *buff,*keypos,*endpos; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF]; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF]; my_off_t new_page_pos,next_page; char llbuff[22]; DBUG_ENTER("sort_one_index"); @@ -2021,7 +2024,7 @@ int change_to_newfile(const char * filename, const char * old_ext, /* Locks a whole file */ /* Gives an error-message if file can't be locked */ -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename) { if (my_lock(file,lock_type,start,F_TO_EOF, @@ -2038,7 +2041,7 @@ int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, /* Copy a block between two files */ -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type) { char tmp_buff[IO_SIZE],*buff; @@ -2089,7 +2092,7 @@ err: <>0 Error */ -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { int got_error; @@ -2103,7 +2106,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, HA_KEYSEG *keyseg; ulong *rec_per_key_part; char llbuff[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; DBUG_ENTER("mi_repair_by_sort"); @@ -2509,7 +2512,7 @@ err: <>0 Error */ -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { #ifndef THREAD @@ -2528,7 +2531,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, char llbuff[22]; IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; DBUG_ENTER("mi_repair_parallel"); @@ -3007,7 +3010,7 @@ err: static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_key_read"); @@ -3034,7 +3037,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; FT_WORD *wptr=0; DBUG_ENTER("sort_ft_key_read"); @@ -3121,8 +3124,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) my_off_t pos; byte *to; MI_BLOCK_INFO block_info; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; @@ -3132,6 +3135,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: for (;;) { @@ -3547,8 +3553,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param) ulong block_length,reclength; byte *from; byte block_buff[8]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; DBUG_ENTER("sort_write_record"); @@ -3556,6 +3562,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param) if (sort_param->fix_datafile) { switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, share->base.pack_reclength)) @@ -3574,7 +3583,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) { /* must be sure that local buffer is big enough */ reclength=info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,sort_param->record)+ + _mi_calc_total_blob_length(info,sort_param->record)+ ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER; if (sort_info->buff_length < reclength) @@ -3663,24 +3672,25 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint diff_pos[2]; char llbuff[22],llbuff2[22]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param= sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; int cmp; if (sort_info->key_block->inited) { - cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, diff_pos); if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) - ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, - sort_info->key_block->lastkey, + (uchar*) sort_info-> + key_block->lastkey, (uchar*)a); } sort_param->unique[diff_pos[0]-1]++; @@ -3703,8 +3713,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) llstr(sort_info->info->lastpos,llbuff), llstr(get_record_for_key(sort_info->info, sort_param->keyinfo, - sort_info->key_block-> - lastkey), + (uchar*) sort_info-> + key_block->lastkey), llbuff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; if (sort_info->param->testflag & T_VERBOSE) @@ -3725,7 +3735,7 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_KEY_BLOCKS *key_block=sort_info->key_block; MYISAM_SHARE *share=sort_info->info->s; uint val_off, val_len; @@ -3735,19 +3745,19 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) val_len=share->ft2_keyinfo.keylength; get_key_full_length_rdonly(val_off, ft_buf->lastkey); - to=ft_buf->lastkey+val_off; + to= (uchar*) ft_buf->lastkey+val_off; if (ft_buf->buf) { /* flushing first-level tree */ - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); for (from=to+val_len; - !error && from < ft_buf->buf; + !error && from < (uchar*) ft_buf->buf; from+= val_len) { memcpy(to, from, val_len); - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); } return error; @@ -3756,8 +3766,8 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) error=flush_pending_blocks(sort_param); /* updating lastkey with second-level tree info */ ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count); - _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN, - share->state.key_root[sort_param->key]); + _mi_dpointer(sort_info->info, (uchar*) ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); /* restoring first level tree data in sort_info/sort_param */ sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; sort_param->keyinfo=share->keyinfo+sort_param->key; @@ -3765,14 +3775,14 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) /* writing lastkey in first-level tree */ return error ? error : sort_insert_key(sort_param,sort_info->key_block, - ft_buf->lastkey,HA_OFFSET_ERROR); + (uchar*) ft_buf->lastkey,HA_OFFSET_ERROR); } static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint a_len, val_off, val_len, error; uchar *p; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_FT_BUF *ft_buf=sort_info->ft_buf; SORT_KEY_BLOCKS *key_block=sort_info->key_block; @@ -3802,9 +3812,9 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) } get_key_full_length_rdonly(val_off, ft_buf->lastkey); - if (mi_compare_text(sort_param->seg->charset, + if (ha_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0, 0)==0) + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) { if (!ft_buf->buf) /* store in second-level tree */ { @@ -3820,16 +3830,16 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) return 0; /* converting to two-level tree */ - p=ft_buf->lastkey+val_off; + p= (uchar*) ft_buf->lastkey+val_off; while (key_block->inited) key_block++; sort_info->key_block=key_block; sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; - ft_buf->count=(ft_buf->buf - p)/val_len; + ft_buf->count=((uchar*) ft_buf->buf - p)/val_len; /* flushing buffer to second-level tree */ - for (error=0; !error && p < ft_buf->buf; p+= val_len) + for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len) error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); ft_buf->buf=0; return error; @@ -3877,13 +3887,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, MI_KEY_PARAM s_temp; MI_INFO *info; MI_KEYDEF *keyinfo=sort_param->keyinfo; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_insert_key"); - anc_buff=key_block->buff; + anc_buff= (uchar*) key_block->buff; info=sort_info->info; - lastkey=key_block->lastkey; + lastkey= (uchar*) key_block->lastkey; nod_flag= (key_block == sort_info->key_block ? 0 : info->s->base.key_reflength); @@ -3896,7 +3906,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_RETURN(1); } a_length=2+nod_flag; - key_block->end_pos=anc_buff+2; + key_block->end_pos= (char*) anc_buff+2; lastkey=0; /* No previous key in block */ } else @@ -3904,18 +3914,18 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, /* Save pointer to previous block */ if (nod_flag) - _mi_kpointer(info,key_block->end_pos,prev_block); + _mi_kpointer(info,(uchar*) key_block->end_pos,prev_block); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0,lastkey,lastkey,key, &s_temp); - (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + (*keyinfo->store_key)(keyinfo, (uchar*) key_block->end_pos+nod_flag,&s_temp); a_length+=t_length; mi_putint(anc_buff,a_length,nod_flag); key_block->end_pos+=t_length; if (a_length <= keyinfo->block_length) { - VOID(_mi_move_key(keyinfo,key_block->lastkey,key)); + VOID(_mi_move_key(keyinfo,(uchar*) key_block->lastkey,key)); key_block->last_length=a_length-t_length; DBUG_RETURN(0); } @@ -3940,7 +3950,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_DUMP("buff",(byte*) anc_buff,mi_getint(anc_buff)); /* Write separator-key to block in next level */ - if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + if (sort_insert_key(sort_param,key_block+1,(uchar*) key_block->lastkey, + filepos)) DBUG_RETURN(1); /* clear old block and write new key in it */ @@ -3956,8 +3967,8 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param) uint i; int old_file,error; uchar *key; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_delete_record"); @@ -4013,7 +4024,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) uint nod_flag,length; my_off_t filepos,key_file_length; SORT_KEY_BLOCKS *key_block; - SORT_INFO *sort_info= sort_param->sort_info; + MI_SORT_INFO *sort_info= sort_param->sort_info; myf myf_rw=sort_info->param->myf_rw; MI_INFO *info=sort_info->info; MI_KEYDEF *keyinfo=sort_param->keyinfo; @@ -4026,7 +4037,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) key_block->inited=0; length=mi_getint(key_block->buff); if (nod_flag) - _mi_kpointer(info,key_block->end_pos,filepos); + _mi_kpointer(info,(uchar*) key_block->end_pos,filepos); key_file_length=info->state->key_file_length; bzero((byte*) key_block->buff+length, keyinfo->block_length-length); if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) @@ -4036,7 +4047,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) if (key_file_length == info->state->key_file_length) { if (_mi_write_keypage(info, keyinfo, filepos, - DFLT_INIT_HITS, key_block->buff)) + DFLT_INIT_HITS, (uchar*) key_block->buff)) DBUG_RETURN(1); } else if (my_pwrite(info->s->kfile,(byte*) key_block->buff, @@ -4051,7 +4062,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) /* alloc space and pointers for key_blocks */ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length) { reg1 uint i; @@ -4068,7 +4079,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, for (i=0 ; i < blocks ; i++) { block[i].inited=0; - block[i].buff=(uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i; + block[i].buff=(byte*) (block+blocks)+(buffer_length+IO_SIZE)*i; } DBUG_RETURN(block); } /* alloc_key_blocks */ @@ -4088,7 +4099,7 @@ int test_if_almost_full(MI_INFO *info) /* Recreate table with bigger more alloced record-data */ -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename) { int error; MI_INFO info; @@ -4261,7 +4272,7 @@ end: /* write suffix to data file if neaded */ -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile) { MI_INFO *info=sort_info->info; @@ -4282,7 +4293,7 @@ int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) /* Update state and myisamchk_time of indexfile */ -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update) +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update) { MYISAM_SHARE *share=info->s; @@ -4354,7 +4365,7 @@ err: param->auto_increment is bigger than the biggest key. */ -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair_only) { byte *record; @@ -4587,7 +4598,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, static void -set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share) +set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share) { if ((sort_info->new_data_file_type=share->data_file_type) == COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index 2b8cbcc7da5..e9f5095bdc4 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -17,6 +17,7 @@ #include "ftdefs.h" #include "sp_defs.h" +#include <my_bit.h> #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -40,7 +41,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, File dfile,file; int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; myf create_flag; - uint fields,length,max_key_length,packed,pointer,real_length_diff, + uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff, key_length,info_length,key_segs,options,min_key_length_skip, base_pos,long_varchar_count,varchar_length, max_key_block_length,unique_key_parts,fulltext_keys,offset; @@ -56,7 +57,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, HA_KEYSEG *keyseg,tmp_keyseg; MI_COLUMNDEF *rec; ulong *rec_per_key_part; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; MI_CREATE_INFO tmp_create_info; DBUG_ENTER("mi_create"); DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", @@ -94,7 +95,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ if (!(rec_per_key_part= - (ulong*) my_malloc((keys + uniques)*MI_MAX_KEY_SEG*sizeof(long), + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(my_errno); @@ -188,11 +189,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) options|= HA_OPTION_RELIES_ON_SQL_LAYER; - packed=(packed+7)/8; + pack_bytes= (packed+7)/8; if (pack_reclength != INT_MAX32) pack_reclength+= reclength+packed + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); - min_pack_length+=packed; + min_pack_length+= pack_bytes; if (!ci->data_file_length && ci->max_rows) { @@ -416,7 +417,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, } } /* if HA_FULLTEXT */ key_segs+=keydef->keysegs; - if (keydef->keysegs > MI_MAX_KEY_SEG) + if (keydef->keysegs > HA_MAX_KEY_SEG) { my_errno=HA_WRONG_CREATE_OPTION; goto err; @@ -431,7 +432,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.state.rec_per_key_part[key_segs-1]=1L; length+=key_length; /* Get block length for key, if defined by user */ - block_length= (keydef->block_length ? + block_length= (keydef->block_length ? my_round_up_to_next_power(keydef->block_length) : myisam_block_size); block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH); @@ -441,7 +442,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, pointer,MI_MAX_KEYPTR_SIZE, block_length); if (keydef->block_length > MI_MAX_KEY_BLOCK_LENGTH || - length >= MI_MAX_KEY_BUFF) + length >= HA_MAX_KEY_BUFF) { my_errno=HA_WRONG_CREATE_OPTION; goto err; @@ -546,9 +547,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); share.base.max_pack_length=pack_reclength; share.base.min_pack_length=min_pack_length; - share.base.pack_bits=packed; + share.base.pack_bits= pack_bytes; share.base.fields=fields; - share.base.pack_fields=packed; + share.base.pack_fields= packed; #ifdef USE_RAID share.base.raid_type=ci->raid_type; share.base.raid_chunks=ci->raid_chunks; diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c index 409930ff7fb..cec483f8056 100644 --- a/storage/myisam/mi_delete.c +++ b/storage/myisam/mi_delete.c @@ -159,7 +159,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_RETURN(my_errno=HA_ERR_CRASHED); } if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); DBUG_RETURN(my_errno=ENOMEM); @@ -221,7 +221,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, my_bool last_key; uchar *leaf_buff,*keypos; my_off_t leaf_page,next_block; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("d_search"); DBUG_DUMP("page",(byte*) anc_buff,mi_getint(anc_buff)); @@ -306,7 +306,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { leaf_page=_mi_kpos(nod_flag,keypos); if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); my_errno=ENOMEM; @@ -365,9 +365,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { /* This happens only with packed keys */ DBUG_PRINT("test",("Enlarging of key when deleting")); if (!_mi_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) - { goto err; - } ret_value=_mi_insert(info,keyinfo,key,anc_buff,keypos,lastkey, (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0); } @@ -405,7 +403,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, int ret_value,length; uint a_length,nod_flag,tmp; my_off_t next_page; - uchar keybuff[MI_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; MYISAM_SHARE *share=info->s; MI_KEY_PARAM s_temp; DBUG_ENTER("del"); @@ -422,7 +420,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, { next_page= _mi_kpos(nod_flag,endpos); if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) ret_value= -1; @@ -509,7 +507,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, key_reflength,key_length; my_off_t next_page; - uchar anc_key[MI_MAX_KEY_BUFF],leaf_key[MI_MAX_KEY_BUFF], + uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF], *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key, *after_key; MI_KEY_PARAM s_temp; diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index 5342619c79b..d56df7b269b 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -252,7 +252,7 @@ int _mi_write_blob_record(MI_INFO *info, const byte *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER+1); reclength= (info->s->base.pack_reclength + - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -286,7 +286,7 @@ int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const byte *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER); reclength= (info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -1258,7 +1258,7 @@ err: /* Calc length of blob. Update info in blobs->length */ -ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record) +ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record) { ulong length; MI_BLOB *blob,*end; @@ -1292,7 +1292,7 @@ ulong _mi_calc_blob_length(uint length, const byte *pos) } -void _my_store_blob_length(byte *pos,uint pack_length,uint length) +void _mi_store_blob_length(byte *pos,uint pack_length,uint length) { switch (pack_length) { case 1: @@ -1505,7 +1505,7 @@ int _mi_cmp_dynamic_record(register MI_INFO *info, register const byte *record) if (info->s->base.blobs) { if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)))) + _mi_calc_total_blob_length(info,record)))) DBUG_RETURN(-1); } reclength=_mi_rec_pack(info,buffer,record); diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c index b203286d544..03ac511df61 100644 --- a/storage/myisam/mi_key.c +++ b/storage/myisam/mi_key.c @@ -448,7 +448,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, /* The above changed info->lastkey2. Inform mi_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; - _my_store_blob_length(record+keyseg->start, + _mi_store_blob_length(record+keyseg->start, (uint) keyseg->bit_start,length); key+=length; } diff --git a/storage/myisam/mi_log.c b/storage/myisam/mi_log.c index f720f752a06..e09899cd1bf 100644 --- a/storage/myisam/mi_log.c +++ b/storage/myisam/mi_log.c @@ -133,7 +133,7 @@ void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, if (!info->s->base.blobs) length=info->s->base.reclength; else - length=info->s->base.reclength+ _my_calc_total_blob_length(info,record); + length=info->s->base.reclength+ _mi_calc_total_blob_length(info,record); buff[0]=(char) command; mi_int2store(buff+1,info->dfile); mi_int4store(buff+3,pid); diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 830332fe0c1..6fd7d7a571f 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -82,8 +82,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) char *disk_cache, *disk_pos, *end_pos; MI_INFO info,*m_info,*old_info; MYISAM_SHARE share_buff,*share; - ulong rec_per_key_part[MI_MAX_POSSIBLE_KEY*MI_MAX_KEY_SEG]; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; ulonglong max_key_file_length, max_data_file_length; DBUG_ENTER("mi_open"); @@ -104,7 +104,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share_buff.state.rec_per_key_part=rec_per_key_part; share_buff.state.key_root=key_root; share_buff.state.key_del=key_del; - share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff)); + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), + dflt_key_cache); DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open", if (strstr(name, "/t1")) @@ -210,7 +211,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) len,MI_BASE_INFO_SIZE)); } disk_pos= (char*) - my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); + mi_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); share->state.state_length=base_pos; if (!(open_flags & HA_OPEN_FOR_REPAIR) && @@ -235,8 +236,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } key_parts+=fulltext_keys*FT_SEGS; - if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY || - key_parts >= MI_MAX_KEY * MI_MAX_KEY_SEG) + if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MI_MAX_KEY || + key_parts >= MI_MAX_KEY * HA_MAX_KEY_SEG) { DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); my_errno=HA_ERR_UNSUPPORTED; @@ -1025,7 +1026,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base) } -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) { base->keystart = mi_sizekorr(ptr); ptr +=8; base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index a5a9aaededd..c74bfb5af41 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -105,6 +105,7 @@ static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length); static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MI_BIT_BUFF *bit_buff); static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); #ifdef HAVE_MMAP static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, MI_BLOCK_INFO *info, byte **rec_buff_p, @@ -1043,7 +1044,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, return; } decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length); - _my_store_blob_length((byte*) to,pack_length,length); + _mi_store_blob_length((byte*) to,pack_length,length); memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, sizeof(char*)); bit_buff->blob_pos+=length; @@ -1624,7 +1625,7 @@ uint save_pack_length(uint version, byte *block_buff, ulong length) } -uint read_pack_length(uint version, const uchar *buf, ulong *length) +static uint read_pack_length(uint version, const uchar *buf, ulong *length) { if (buf[0] < 254) { diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c index 6655f5a7de6..5c1278637b5 100644 --- a/storage/myisam/mi_range.c +++ b/storage/myisam/mi_range.c @@ -232,7 +232,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *keypos, uint *ret_max_key) { uint nod_flag,keynr,max_key; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; end= page+mi_getint(page); nod_flag=mi_test_if_nod(page); diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index 8d2b68a97f0..735bfd3fd80 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -60,7 +60,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag; uchar *keypos,*maxpos; - uchar lastkey[MI_MAX_KEY_BUFF],*buff; + uchar lastkey[HA_MAX_KEY_BUFF],*buff; DBUG_ENTER("_mi_search"); DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", (ulong) pos, nextflag, (ulong) info->lastpos)); @@ -242,7 +242,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, { int flag; uint nod_flag,length,not_used[2]; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; DBUG_ENTER("_mi_seq_search"); LINT_INIT(flag); LINT_INIT(length); @@ -296,7 +296,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, int key_len_skip, seg_len_pack, key_len_left; uchar *end, *kseg, *vseg; uchar *sort_order=keyinfo->seg->charset->sort_order; - uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; uchar *saved_from, *saved_to, *saved_vseg; uint saved_length=0, saved_prefix_len=0; uint length_pack; @@ -920,7 +920,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_ENTER("_mi_get_binary_pack_key"); page= *page_pos; - page_end=page+MI_MAX_KEY_BUFF+1; + page_end=page+HA_MAX_KEY_BUFF+1; start_key=key; /* @@ -1210,7 +1210,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo, { int error; uint nod_flag; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_search_next"); DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu", nextflag, (ulong) info->lastpos, diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c index ebb9cdcb2f7..c5a1ffcd5d7 100644 --- a/storage/myisam/mi_test1.c +++ b/storage/myisam/mi_test1.c @@ -628,7 +628,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), key_type= HA_KEYTYPE_VARTEXT1; break; case 'k': - if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH) + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) { fprintf(stderr,"Wrong key length\n"); exit(1); diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 878bba31ea8..894e48d863c 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -26,6 +26,7 @@ #endif #include "myisamdef.h" #include <m_ctype.h> +#include <my_bit.h> #define STANDARD_LENGTH 37 #define MYISAM_KEYS 6 @@ -602,7 +603,7 @@ int main(int argc, char *argv[]) if (mi_rsame(file,read_record2,(int) i)) goto err; if (bcmp(read_record,read_record2,reclength) != 0) { - printf("is_rsame didn't find same record\n"); + printf("mi_rsame didn't find same record\n"); goto end; } } diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index 635f6c18247..342a9c910ca 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -212,7 +212,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || type == HA_KEYTYPE_VARTEXT2) { - if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, (uchar *) pos_b, b_length, 0, 1)) return 1; } diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c index bea457d2e9a..2ef4df71fc7 100644 --- a/storage/myisam/mi_update.c +++ b/storage/myisam/mi_update.c @@ -23,7 +23,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec) int flag,key_changed,save_errno; reg3 my_off_t pos; uint i; - uchar old_key[MI_MAX_KEY_BUFF],*new_key; + uchar old_key[HA_MAX_KEY_BUFF],*new_key; bool auto_key_changed=0; ulonglong changed; MYISAM_SHARE *share=info->s; diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c index 57c054f2de8..1ae7e179b29 100644 --- a/storage/myisam/mi_write.c +++ b/storage/myisam/mi_write.c @@ -346,7 +346,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag, search_key_length; uchar *temp_buff,*keypos; - uchar keybuff[MI_MAX_KEY_BUFF]; + uchar keybuff[HA_MAX_KEY_BUFF]; my_bool was_last_key; my_off_t next_page, dupp_key_pos; DBUG_ENTER("w_search"); @@ -354,7 +354,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) goto err; @@ -545,7 +545,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) + ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) @@ -707,7 +707,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page, { uint keys,length,last_length,key_ref_length; uchar *end,*lastpos,*prevpos; - uchar key_buff[MI_MAX_KEY_BUFF]; + uchar key_buff[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_find_last_pos"); key_ref_length=2; @@ -764,7 +764,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, length,keys; uchar *pos,*buff,*extra_buff; my_off_t next_page,new_pos; - byte tmp_part_key[MI_MAX_KEY_BUFF]; + byte tmp_part_key[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_balance_page"); k_length=keyinfo->keylength; @@ -930,7 +930,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) Probably I can use info->lastkey here, but I'm not sure, and to be safe I'd better use local lastkey. */ - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; uint keylen; MI_KEYDEF *keyinfo; diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index 066e6cdb81b..ebe46e7c6e5 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -16,10 +16,10 @@ /* Describe, check and repair of MyISAM tables */ #include "fulltext.h" - #include <m_ctype.h> #include <stdarg.h> #include <my_getopt.h> +#include <my_bit.h> #ifdef HAVE_SYS_VADVICE_H #include <sys/vadvise.h> #endif @@ -67,9 +67,9 @@ static const char *myisam_stats_method_str="nulls_unequal"; static void get_options(int *argc,char * * *argv); static void print_version(void); static void usage(void); -static int myisamchk(MI_CHECK *param, char *filename); -static void descript(MI_CHECK *param, register MI_INFO *info, my_string name); -static int mi_sort_records(MI_CHECK *param, register MI_INFO *info, +static int myisamchk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MI_INFO *info, my_string name); +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, my_string name, uint sort_key, my_bool write_info, my_bool update_index); static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, @@ -77,7 +77,7 @@ static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, my_off_t page,uchar *buff,uint sortkey, File new_file, my_bool update_index); -MI_CHECK check_param; +HA_CHECK check_param; /* Main program */ @@ -695,7 +695,7 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; - enum_mi_stats_method method_conv; + enum_handler_stats_method method_conv; LINT_INIT(method_conv); myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) @@ -794,7 +794,7 @@ static void get_options(register int *argc,register char ***argv) /* Check table */ -static int myisamchk(MI_CHECK *param, my_string filename) +static int myisamchk(HA_CHECK *param, my_string filename) { int error,lock_type,recreate; int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); @@ -1199,7 +1199,7 @@ end2: /* Write info about table */ -static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) +static void descript(HA_CHECK *param, register MI_INFO *info, my_string name) { uint key,keyseg_nr,field,start; reg3 MI_KEYDEF *keyinfo; @@ -1464,7 +1464,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name) /* Sort records according to one key */ -static int mi_sort_records(MI_CHECK *param, +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, my_string name, uint sort_key, my_bool write_info, @@ -1478,7 +1478,7 @@ static int mi_sort_records(MI_CHECK *param, ha_rows old_record_count; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("sort_records"); @@ -1653,10 +1653,10 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info, uint nod_flag,used_length,key_length; uchar *temp_buff,*keypos,*endpos; my_off_t next_page,rec_pos; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; char llbuff[22]; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_record_index"); nod_flag=mi_test_if_nod(buff); @@ -1744,7 +1744,7 @@ err: static int not_killed= 0; -volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) +volatile int *killed_ptr(HA_CHECK *param __attribute__((unused))) { return ¬_killed; /* always NULL */ } @@ -1752,7 +1752,7 @@ volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) /* print warnings and errors */ /* VARARGS */ -void mi_check_print_info(MI_CHECK *param __attribute__((unused)), +void mi_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; @@ -1765,7 +1765,7 @@ void mi_check_print_info(MI_CHECK *param __attribute__((unused)), /* VARARGS */ -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_warning"); @@ -1790,7 +1790,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) /* VARARGS */ -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_error"); diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index dceccd10ae2..9686af00bc6 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -15,8 +15,8 @@ /* This file is included by all internal myisam files */ -#include "myisam.h" /* Structs & some defines */ -#include "myisampack.h" /* packing of keys */ +#include "myisam.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ #include <my_tree.h> #ifdef THREAD #include <my_pthread.h> @@ -26,15 +26,16 @@ #endif #if defined(my_write) && !defined(MAP_TO_USE_RAID) -#undef my_write /* undef map from my_nosys; We need test-if-disk full */ +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write #endif typedef struct st_mi_status_info { - ha_rows records; /* Rows in table */ - ha_rows del; /* Removed rows */ - my_off_t empty; /* lost space in datafile */ - my_off_t key_empty; /* lost space in indexfile */ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ my_off_t key_file_length; my_off_t data_file_length; ha_checksum checksum; @@ -42,345 +43,291 @@ typedef struct st_mi_status_info typedef struct st_mi_state_info { - struct { /* Fileheader */ + struct + { /* Fileheader */ uchar file_version[4]; uchar options[2]; uchar header_length[2]; uchar state_info_length[2]; uchar base_info_length[2]; uchar base_pos[2]; - uchar key_parts[2]; /* Key parts */ - uchar unique_key_parts[2]; /* Key parts + unique parts */ - uchar keys; /* number of keys in file */ - uchar uniques; /* number of UNIQUE definitions */ - uchar language; /* Language for indexes */ - uchar max_block_size_index; /* max keyblock size */ + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar max_block_size_index; /* max keyblock size */ uchar fulltext_keys; uchar not_used; /* To align to 8 */ } header; MI_STATUS_INFO state; - ha_rows split; /* number of split blocks */ - my_off_t dellink; /* Link to next removed block */ + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ ulonglong auto_increment; - ulong process; /* process that updated table last */ - ulong unique; /* Unique number for this process */ - ulong update_count; /* Updated for each write lock */ + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ ulong status; ulong *rec_per_key_part; - my_off_t *key_root; /* Start of key trees */ - my_off_t *key_del; /* delete links for trees */ - my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ - - ulong sec_index_changed; /* Updated when new sec_index */ - ulong sec_index_used; /* which extra index are in use */ - ulonglong key_map; /* Which keys are in use */ ha_checksum checksum; /* Table checksum */ - ulong version; /* timestamp of create */ - time_t create_time; /* Time when created database */ - time_t recover_time; /* Time for last recover */ - time_t check_time; /* Time for last check */ - uint sortkey; /* sorted by this key (not used) */ + my_off_t *key_root; /* Start of key trees */ + my_off_t *key_del; /* delete links for trees */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ uint open_count; - uint8 changed; /* Changed since myisamchk */ + uint8 changed; /* Changed since myisamchk */ /* the following isn't saved on disk */ - uint state_diff_length; /* Should be 0 */ - uint state_length; /* Length of state header in file */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ ulong *key_info; } MI_STATE_INFO; -#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) -#define MI_STATE_KEY_SIZE 8 +#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) +#define MI_STATE_KEY_SIZE 8 #define MI_STATE_KEYBLOCK_SIZE 8 -#define MI_STATE_KEYSEG_SIZE 4 -#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) -#define MI_KEYDEF_SIZE (2+ 5*2) -#define MI_UNIQUEDEF_SIZE (2+1+1) -#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) -#define MI_COLUMNDEF_SIZE (2*3+1) -#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) -#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ +#define MI_STATE_KEYSEG_SIZE 4 +#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) +#define MI_KEYDEF_SIZE (2+ 5*2) +#define MI_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MI_COLUMNDEF_SIZE (2*3+1) +#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) +#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ typedef struct st_mi_base_info { - my_off_t keystart; /* Start of keys */ + my_off_t keystart; /* Start of keys */ my_off_t max_data_file_length; my_off_t max_key_file_length; my_off_t margin_key_file_length; - ha_rows records,reloc; /* Create information */ - ulong mean_row_length; /* Create information */ - ulong reclength; /* length of unpacked record */ - ulong pack_reclength; /* Length of full packed rec. */ + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec. */ ulong min_pack_length; - ulong max_pack_length; /* Max possibly length of packed rec.*/ + ulong max_pack_length; /* Max possibly length of packed rec.*/ ulong min_block_length; - ulong fields, /* fields in table */ - pack_fields; /* packed fields in table */ - uint rec_reflength; /* = 2-8 */ - uint key_reflength; /* = 2-8 */ - uint keys; /* same as in state.header */ - uint auto_key; /* Which key-1 is a auto key */ - uint blobs; /* Number of blobs */ - uint pack_bits; /* Length of packed bits */ - uint max_key_block_length; /* Max block length */ - uint max_key_length; /* Max key length */ + ulong fields, /* fields in table */ + pack_fields; /* packed fields in table */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + uint pack_bits; /* Length of packed bits */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ /* Extra allocation when using dynamic record format */ uint extra_alloc_bytes; uint extra_alloc_procent; /* Info about raid */ - uint raid_type,raid_chunks; + uint raid_type, raid_chunks; ulong raid_chunksize; /* The following are from the header */ - uint key_parts,all_key_parts; + uint key_parts, all_key_parts; } MI_BASE_INFO; - /* Structs used intern in database */ + /* Structs used intern in database */ -typedef struct st_mi_blob /* Info of record */ +typedef struct st_mi_blob /* Info of record */ { - ulong offset; /* Offset to blob in record */ - uint pack_length; /* Type of packed length */ - ulong length; /* Calc:ed for each record */ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ } MI_BLOB; -typedef struct st_mi_isam_pack { +typedef struct st_mi_isam_pack +{ ulong header_length; uint ref_length; uchar version; } MI_PACK; -#define MAX_NONMAPPED_INSERTS 1000 +#define MAX_NONMAPPED_INSERTS 1000 -typedef struct st_mi_isam_share { /* Shared between opens */ +typedef struct st_mi_isam_share +{ /* Shared between opens */ MI_STATE_INFO state; MI_BASE_INFO base; - MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ - MI_KEYDEF *keyinfo; /* Key definitions */ - MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ - HA_KEYSEG *keyparts; /* key part info */ - MI_COLUMNDEF *rec; /* Pointer to field information */ - MI_PACK pack; /* Data about packed records */ - MI_BLOB *blobs; /* Pointer to blobs */ - char *unique_file_name; /* realpath() of index file */ - char *data_file_name, /* Resolved path names from symlinks */ - *index_file_name; - byte *file_map; /* mem-map of file if possible */ - KEY_CACHE *key_cache; /* ref to the current key cache */ + MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ + MI_KEYDEF *keyinfo; /* Key definitions */ + MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MI_COLUMNDEF *rec; /* Pointer to field information */ + MI_PACK pack; /* Data about packed records */ + MI_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name, /* Resolved path names from symlinks */ + *index_file_name; + byte *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache */ MI_DECODE_TREE *decode_trees; uint16 *decode_tables; - int (*read_record)(struct st_myisam_info*, my_off_t, byte*); - int (*write_record)(struct st_myisam_info*, const byte*); - int (*update_record)(struct st_myisam_info*, my_off_t, const byte*); - int (*delete_record)(struct st_myisam_info*); - int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool); - int (*compare_record)(struct st_myisam_info*, const byte *); /* Function to use for a row checksum. */ - ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *); - int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *, - const byte *record, my_off_t pos); - uint (*file_read)(MI_INFO *, byte *, uint, my_off_t, myf); - uint (*file_write)(MI_INFO *, byte *, uint, my_off_t, myf); + int(*read_record) (struct st_myisam_info *, my_off_t, byte *); + int(*write_record) (struct st_myisam_info *, const byte *); + int(*update_record) (struct st_myisam_info *, my_off_t, const byte *); + int(*delete_record) (struct st_myisam_info *); + int(*read_rnd) (struct st_myisam_info *, byte *, my_off_t, my_bool); + int(*compare_record) (struct st_myisam_info *, const byte *); + ha_checksum(*calc_checksum) (struct st_myisam_info *, const byte *); + int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *, + const byte * record, my_off_t pos); + uint(*file_read) (MI_INFO *, byte *, uint, my_off_t, myf); + uint(*file_write) (MI_INFO *, byte *, uint, my_off_t, myf); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_process; /* processid */ - ulong last_process; /* For table-change-check */ - ulong last_version; /* Version on start */ - ulong options; /* Options used */ - ulong min_pack_length; /* Theese are used by packed data */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* Theese are used by packed data */ ulong max_pack_length; ulong state_diff_length; - uint rec_reflength; /* rec_reflength in use now */ - uint unique_name_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; uint32 ftparsers; /* Number of distinct ftparsers + 1 */ - File kfile; /* Shared keyfile */ - File data_file; /* Shared data file */ - int mode; /* mode of file on open */ - uint reopen; /* How many times reopened */ - uint w_locks,r_locks,tot_locks; /* Number of read/write locks */ - uint blocksize; /* blocksize of keyfile */ + File kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint blocksize; /* blocksize of keyfile */ myf write_flag; enum data_file_type data_file_type; /* Below flag is needed to make log tables work with concurrent insert */ my_bool is_log_table; - my_bool changed, /* If changed since lock */ - global_changed, /* If changed since open */ - not_flushed, - temporary,delay_key_write, - concurrent_insert; + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, temporary, delay_key_write, concurrent_insert; #ifdef THREAD THR_LOCK lock; - pthread_mutex_t intern_lock; /* Locking for use with _locking */ + pthread_mutex_t intern_lock; /* Locking for use with _locking */ rw_lock_t *key_root_lock; #endif my_off_t mmaped_length; - uint nonmmaped_inserts; /* counter of writing in non-mmaped - area */ + /* counter of writing in non-mmaped area */ + uint nonmmaped_inserts; rw_lock_t mmap_lock; } MYISAM_SHARE; -typedef uint mi_bit_type; - -typedef struct st_mi_bit_buff { /* Used for packing of record */ - mi_bit_type current_byte; - uint bits; - uchar *pos,*end,*blob_pos,*blob_end; - uint error; -} MI_BIT_BUFF; - -struct st_myisam_info { - MYISAM_SHARE *s; /* Shared between open:s */ - MI_STATUS_INFO *state,save_state; - MI_BLOB *blobs; /* Pointer to blobs */ - MI_BIT_BUFF bit_buff; +struct st_myisam_info +{ + MYISAM_SHARE *s; /* Shared between open:s */ + MI_STATUS_INFO *state, save_state; + MI_BLOB *blobs; /* Pointer to blobs */ + MI_BIT_BUFF bit_buff; /* accumulate indexfile changes between write's */ - TREE *bulk_insert; + TREE *bulk_insert; DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ MEM_ROOT ft_memroot; /* used by the parser */ - MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ - char *filename; /* parameter to open filename */ - uchar *buff, /* Temp area for key */ - *lastkey,*lastkey2; /* Last used search key */ - uchar *first_mbr_key; /* Searhed spatial key */ - byte *rec_buff; /* Tempbuff for recordpack */ - uchar *int_keypos, /* Save position for next/previous */ - *int_maxpos; /* -""- */ - uint int_nod_flag; /* -""- */ - uint32 int_keytree_version; /* -""- */ - int (*read_record)(struct st_myisam_info*, my_off_t, byte*); + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + char *filename; /* parameter to open filename */ + uchar *buff, /* Temp area for key */ + *lastkey, *lastkey2; /* Last used search key */ + uchar *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Tempbuff for recordpack */ + uchar *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int(*read_record) (struct st_myisam_info *, my_off_t, byte *); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_unique; /* uniq filenumber or thread */ - ulong last_unique; /* last unique number */ - ulong this_loop; /* counter for this open */ - ulong last_loop; /* last used counter */ - my_off_t lastpos, /* Last record position */ - nextpos; /* Position to next record */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + my_off_t lastpos, /* Last record position */ + nextpos; /* Position to next record */ my_off_t save_lastpos; - my_off_t pos; /* Intern variable */ - my_off_t last_keypage; /* Last key page read */ - my_off_t last_search_keypage; /* Last keypage when searching */ + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; ha_checksum checksum; /* Temp storage for row checksum */ - /* QQ: the folloing two xxx_length fields should be removed, - as they are not compatible with parallel repair */ - ulong packed_length,blob_length; /* Length of found, packed record */ - int dfile; /* The datafile */ - uint opt_flag; /* Optim. for space/speed */ - uint update; /* If file changed since open */ - int lastinx; /* Last used index */ - uint lastkey_length; /* Length of key in lastkey */ - uint last_rkey_length; /* Last length in mi_rkey() */ + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + int dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in mi_rkey() */ enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ - uint save_lastkey_length; - uint pack_key_length; /* For MYISAMMRG */ - int errkey; /* Got last error on this key */ - int lock_type; /* How database was locked */ - int tmp_lock_type; /* When locked by readinfo */ - uint data_changed; /* Somebody has changed data */ - uint save_update; /* When using KEY_READ */ - int save_lastinx; - LIST open_list; - IO_CACHE rec_cache; /* When cacheing records */ - uint preload_buff_size; /* When preloading indexes */ - myf lock_wait; /* is 0 or MY_DONT_WAIT */ - my_bool was_locked; /* Was locked in panic */ - my_bool append_insert_at_end; /* Set if concurrent insert */ + uint save_lastkey_length; + uint pack_key_length; /* For MYISAMMRG */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; - my_bool page_changed; /* If info->buff can't be used for rnext */ - my_bool buff_used; /* If info->buff has to be reread for rnext */ - my_bool once_flags; /* For MYISAMMRG */ + /* If info->buff can't be used for rnext */ + my_bool page_changed; + /* If info->buff has to be reread for rnext */ + my_bool buff_used; + my_bool once_flags; /* For MYISAMMRG */ #ifdef __WIN__ my_bool owned_by_merge; /* This MyISAM table is part of a merge union */ #endif #ifdef THREAD THR_LOCK_DATA lock; #endif - uchar *rtree_recursion_state; /* For RTREE */ - int rtree_recursion_depth; + uchar *rtree_recursion_state; /* For RTREE */ + int rtree_recursion_depth; }; -typedef struct st_buffpek { - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* Key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; - -typedef struct st_mi_sort_param -{ - pthread_t thr; - IO_CACHE read_cache, tempfile, tempfile_for_exceptions; - DYNAMIC_ARRAY buffpek; - MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ - - /* - The next two are used to collect statistics, see update_key_parts for - description. - */ - ulonglong unique[MI_MAX_KEY_SEG+1]; - ulonglong notnull[MI_MAX_KEY_SEG+1]; - - my_off_t pos,max_pos,filepos,start_recpos; - uint key, key_length,real_key_length,sortbuff_size; - uint maxbuffers, keys, find_length, sort_keys_length; - my_bool fix_datafile, master; - my_bool calc_checksum; /* calculate table checksum */ - MI_KEYDEF *keyinfo; - HA_KEYSEG *seg; - SORT_INFO *sort_info; - uchar **sort_keys; - byte *rec_buff; - void *wordlist, *wordptr; - MEM_ROOT wordroot; - char *record; - MY_TMPDIR *tmpdir; - int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); - int (*key_read)(struct st_mi_sort_param *,void *); - int (*key_write)(struct st_mi_sort_param *, const void *); - void (*lock_in_memory)(MI_CHECK *); - NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, - uint , struct st_buffpek *, IO_CACHE *); - NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); - NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, - uint, uint); -} MI_SORT_PARAM; - /* Some defines used by isam-funktions */ - -#define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ -#define F_EXTRA_LCK -1 - - /* bits in opt_flag */ -#define MEMMAP_USED 32 +#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ +#define F_EXTRA_LCK -1 +/* bits in opt_flag */ +#define MEMMAP_USED 32 #define REMEMBER_OLD_POS 64 -#define WRITEINFO_UPDATE_KEYFILE 1 -#define WRITEINFO_NO_UNLOCK 2 +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 - /* once_flags */ +/* once_flags */ #define USE_PACKED_KEYS 1 #define RRND_PRESERVE_LASTINX 2 - /* bits in state.changed */ - -#define STATE_CHANGED 1 -#define STATE_CRASHED 2 +/* bits in state.changed */ +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 #define STATE_CRASHED_ON_REPAIR 4 -#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_ANALYZED 8 #define STATE_NOT_OPTIMIZED_KEYS 16 -#define STATE_NOT_SORTED_PAGES 32 - - /* options to mi_read_cache */ +#define STATE_NOT_SORTED_PAGES 32 -#define READING_NEXT 1 -#define READING_HEADER 2 +/* options to mi_read_cache */ +#define READING_NEXT 1 +#define READING_HEADER 2 -#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) +#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) #define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ - mi_int2store(x,boh); } + mi_int2store(x,boh); } #define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) #define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ DBUG_PRINT("error", ("Marked table crashed")); \ @@ -398,13 +345,6 @@ typedef struct st_mi_sort_param /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ -#define store_key_length_inc(key,length) \ -{ if ((length) < 255) \ - { *(key)++=(length); } \ - else \ - { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ -} - #define store_key_length(key,length) \ { if ((length) < 255) \ { *(key)=(length); } \ @@ -428,39 +368,39 @@ typedef struct st_mi_sort_param #define get_pack_length(length) ((length) >= 255 ? 3 : 1) -#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ -#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ -#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) -#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ +#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ +#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) +#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ #define MI_BLOCK_INFO_HEADER_LENGTH 20 -#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ -#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) -#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) -#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ -#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ -#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) +#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) +#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) #define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) -#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ -#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ -#define PACK_TYPE_SPACE_FIELDS 2 -#define PACK_TYPE_ZERO_FILL 4 -#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ -#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) +#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) #define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) -#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ -#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ +#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ -#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ #define MI_MIN_ROWS_TO_USE_BULK_INSERT 100 #define MI_MIN_ROWS_TO_DISABLE_INDEXES 100 #define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10 /* The UNIQUE check is done with a hashed long key */ -#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT #define mi_unique_store(A,B) mi_int4store((A),(B)) #ifdef THREAD @@ -472,174 +412,181 @@ extern pthread_mutex_t THR_LOCK_myisam; #define rw_unlock(A) {} #endif - /* Some extern variables */ +/* Some extern variables */ extern LIST *myisam_open_list; -extern uchar NEAR myisam_file_magic[],NEAR myisam_pack_file_magic[]; -extern uint NEAR myisam_read_vec[],NEAR myisam_readnext_vec[]; +extern uchar NEAR myisam_file_magic[], NEAR myisam_pack_file_magic[]; +extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[]; extern uint myisam_quick_table_bits; extern File myisam_log_file; extern ulong myisam_pid; - /* This is used by _mi_calc_xxx_key_length och _mi_store_key */ +/* This is used by _mi_calc_xxx_key_length och _mi_store_key */ typedef struct st_mi_s_param { - uint ref_length,key_length, - n_ref_length, - n_length, - totlength, - part_of_prev_key,prev_length,pack_marker; - uchar *key, *prev_key,*next_key_pos; - bool store_not_null; + uint ref_length, key_length, + n_ref_length, + n_length, totlength, part_of_prev_key, prev_length, pack_marker; + uchar *key, *prev_key, *next_key_pos; + bool store_not_null; } MI_KEY_PARAM; - /* Prototypes for intern functions */ +/* Prototypes for intern functions */ -extern int _mi_read_dynamic_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_write_dynamic_record(MI_INFO*, const byte*); -extern int _mi_update_dynamic_record(MI_INFO*, my_off_t, const byte*); +extern int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_write_dynamic_record(MI_INFO *, const byte *); +extern int _mi_update_dynamic_record(MI_INFO *, my_off_t, const byte *); extern int _mi_delete_dynamic_record(MI_INFO *info); -extern int _mi_cmp_dynamic_record(MI_INFO *info,const byte *record); -extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *,my_off_t, my_bool); -extern int _mi_write_blob_record(MI_INFO*, const byte*); -extern int _mi_update_blob_record(MI_INFO*, my_off_t, const byte*); -extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos,byte *buf); -extern int _mi_write_static_record(MI_INFO*, const byte*); -extern int _mi_update_static_record(MI_INFO*, my_off_t, const byte*); +extern int _mi_cmp_dynamic_record(MI_INFO *info, const byte *record); +extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *, my_off_t, my_bool); +extern int _mi_write_blob_record(MI_INFO *, const byte *); +extern int _mi_update_blob_record(MI_INFO *, my_off_t, const byte *); +extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_write_static_record(MI_INFO *, const byte *); +extern int _mi_update_static_record(MI_INFO *, my_off_t, const byte *); extern int _mi_delete_static_record(MI_INFO *info); -extern int _mi_cmp_static_record(MI_INFO *info,const byte *record); -extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool); -extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length); +extern int _mi_cmp_static_record(MI_INFO *info, const byte *record); +extern int _mi_read_rnd_static_record(MI_INFO *, byte *, my_off_t, my_bool); +extern int _mi_ck_write(MI_INFO *info, uint keynr, uchar *key, uint length); extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, uint key_length, my_off_t *root, uint comp_flag); -extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root); -extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *anc_buff,uchar *key_pos,uchar *key_buff, - uchar *father_buff, uchar *father_keypos, - my_off_t father_page, my_bool insert_last); -extern int _mi_split_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *buff,uchar *key_buff, my_bool insert_last); -extern uchar *_mi_find_half_pos(uint nod_flag,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint *return_key_length, - uchar **after_key); -extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos,uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); -void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +extern int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + my_off_t *root); +extern int _mi_insert(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *anc_buff, uchar *key_pos, uchar *key_buff, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _mi_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *buff, uchar *key_buff, my_bool insert_last); +extern uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, + uint *return_key_length, uchar ** after_key); +extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); +void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #ifdef NOT_USED -void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #endif -void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); -extern int _mi_ck_delete(MI_INFO *info,uint keynr,uchar *key,uint key_length); -extern int _mi_readinfo(MI_INFO *info,int lock_flag,int check_keybuffer); -extern int _mi_writeinfo(MI_INFO *info,uint options); +extern int _mi_ck_delete(MI_INFO *info, uint keynr, uchar *key, + uint key_length); +extern int _mi_readinfo(MI_INFO *info, int lock_flag, int check_keybuffer); +extern int _mi_writeinfo(MI_INFO *info, uint options); extern int _mi_test_if_changed(MI_INFO *info); extern int _mi_mark_file_changed(MI_INFO *info); extern int _mi_decrement_open_count(MI_INFO *info); -extern int _mi_check_index(MI_INFO *info,int inx); -extern int _mi_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uint key_len, - uint nextflag,my_off_t pos); -extern int _mi_bin_search(struct st_myisam_info *info,MI_KEYDEF *keyinfo, - uchar *page,uchar *key,uint key_len,uint comp_flag, - uchar * *ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key); -extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos); -extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key); +extern int _mi_check_index(MI_INFO *info, int inx); +extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _mi_bin_search(struct st_myisam_info *info, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar **ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_seq_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_prefix_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern my_off_t _mi_kpos(uint nod_flag, uchar *after_key); +extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key); extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr); -extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos); -extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b, - uint key_length,uint nextflag,uint *diff_length); -extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); -extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); +extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length, uint nextflag, uint *diff_length); +extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag, + uchar **page, uchar *key); +extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page, + uchar *key); extern uint _mi_get_binary_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, - uchar **page_pos, uchar *key); -extern uchar *_mi_get_last_key(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *keypos, - uchar *lastkey,uchar *endpos, - uint *return_key_length); + uchar ** page_pos, uchar *key); +extern uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *keypos, uchar *lastkey, uchar *endpos, + uint *return_key_length); extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, uint *return_key_length); -extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key); + uchar *key, uchar *keypos, + uint *return_key_length); +extern uint _mi_keylength(MI_KEYDEF *keyinfo, uchar *key); extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, - HA_KEYSEG *end); -extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from); -extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uint key_length,uint nextflag,my_off_t pos); -extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level,uchar *buff,int return_buffer); -extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level, uchar *buff); -extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos, - int level); -extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level); -extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key, - const byte *record,my_off_t filepos); -extern uint _mi_pack_key(MI_INFO *info,uint keynr,uchar *key,uchar *old, - uint key_length, HA_KEYSEG **last_used_keyseg); -extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_read_cache(IO_CACHE *info,byte *buff,my_off_t pos, - uint length,int re_read_if_possibly); -extern ulonglong retrieve_auto_increment(MI_INFO *info,const byte *record); - -extern byte *mi_alloc_rec_buff(MI_INFO *,ulong, byte**); + HA_KEYSEG *end); +extern uchar *_mi_move_key(MI_KEYDEF *keyinfo, uchar *to, uchar *from); +extern int _mi_search_next(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uint nextflag, my_off_t pos); +extern int _mi_search_first(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern int _mi_search_last(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern uchar *_mi_fetch_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff, + int return_buffer); +extern int _mi_write_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, + int level, uchar *buff); +extern int _mi_dispose(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _mi_new(MI_INFO *info, MI_KEYDEF *keyinfo, int level); +extern uint _mi_make_key(MI_INFO *info, uint keynr, uchar *key, + const byte *record, my_off_t filepos); +extern uint _mi_pack_key(MI_INFO *info, uint keynr, uchar *key, uchar *old, + uint key_length, HA_KEYSEG ** last_used_keyseg); +extern int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, + uint length, int re_read_if_possibly); +extern ulonglong retrieve_auto_increment(MI_INFO *info, const byte *record); + +extern byte *mi_alloc_rec_buff(MI_INFO *, ulong, byte **); #define mi_get_rec_buff_ptr(info,buf) \ ((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \ (buf) - MI_REC_BUFF_OFFSET : (buf)) #define mi_get_rec_buff_len(info,buf) \ (*((uint32 *)(mi_get_rec_buff_ptr(info,buf)))) -extern ulong _mi_rec_unpack(MI_INFO *info,byte *to,byte *from, - ulong reclength); -extern my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *packpos, +extern ulong _mi_rec_unpack(MI_INFO *info, byte *to, byte *from, + ulong reclength); +extern my_bool _mi_rec_check(MI_INFO *info, const char *record, byte *packpos, ulong packed_length, my_bool with_checkum); -extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length, - my_off_t next_filepos,byte **record, - ulong *reclength,int *flag); -extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, - uint length); -extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); -extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf); -extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool); +extern int _mi_write_part_record(MI_INFO *info, my_off_t filepos, ulong length, + my_off_t next_filepos, byte ** record, + ulong *reclength, int *flag); +extern void _mi_print_key(FILE *stream, HA_KEYSEG *keyseg, const uchar *key, + uint length); +extern my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys); +extern int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf); +extern int _mi_read_rnd_pack_record(MI_INFO *, byte *, my_off_t, my_bool); extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff, byte *to, byte *from, ulong reclength); -extern ulonglong mi_safe_mul(ulonglong a,ulonglong b); +extern ulonglong mi_safe_mul(ulonglong a, ulonglong b); extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, - const byte *oldrec, const byte *newrec, my_off_t pos); + const byte *oldrec, const byte *newrec, + my_off_t pos); struct st_sort_info; -typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ +typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */ +{ uchar header[MI_BLOCK_INFO_HEADER_LENGTH]; ulong rec_len; ulong data_len; @@ -652,35 +599,37 @@ typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ uint offset; } MI_BLOCK_INFO; - /* bits in return from _mi_get_block_info */ - -#define BLOCK_FIRST 1 -#define BLOCK_LAST 2 -#define BLOCK_DELETED 4 -#define BLOCK_ERROR 8 /* Wrong data */ -#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ -#define BLOCK_FATAL_ERROR 32 /* hardware-error */ - -#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ -#define MAXERR 20 -#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ -#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE -#define INDEX_TMP_EXT ".TMM" -#define DATA_TMP_EXT ".TMD" - -#define UPDATE_TIME 1 -#define UPDATE_STAT 2 -#define UPDATE_SORT 4 -#define UPDATE_AUTO_INC 8 -#define UPDATE_OPEN_COUNT 16 - -#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) -#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) -#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) -#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) - -enum myisam_log_commands { - MI_LOG_OPEN,MI_LOG_WRITE,MI_LOG_UPDATE,MI_LOG_DELETE,MI_LOG_CLOSE,MI_LOG_EXTRA,MI_LOG_LOCK,MI_LOG_DELETE_ALL + /* bits in return from _mi_get_block_info */ + +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ + +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" + +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 + +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) + +enum myisam_log_commands +{ + MI_LOG_OPEN, MI_LOG_WRITE, MI_LOG_UPDATE, MI_LOG_DELETE, MI_LOG_CLOSE, + MI_LOG_EXTRA, MI_LOG_LOCK, MI_LOG_DELETE_ALL }; #define myisam_log(a,b,c,d) if (myisam_log_file >= 0) _myisam_log(a,b,c,d) @@ -690,36 +639,34 @@ enum myisam_log_commands { #define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0) #define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1) -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif - -extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t); -extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from); + extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t); +extern uint _mi_rec_pack(MI_INFO *info, byte *to, const byte *from); extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, MI_BLOCK_INFO *info, byte **rec_buff_p, File file, my_off_t filepos); -extern void _my_store_blob_length(byte *pos,uint pack_length,uint length); -extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info, - const byte *buffert,uint length); +extern void _mi_store_blob_length(byte *pos, uint pack_length, uint length); +extern void _myisam_log(enum myisam_log_commands command, MI_INFO *info, + const byte *buffert, uint length); extern void _myisam_log_command(enum myisam_log_commands command, - MI_INFO *info, const byte *buffert, - uint length, int result); -extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info, - const byte *record,my_off_t filepos, - int result); + MI_INFO *info, const byte *buffert, + uint length, int result); +extern void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, + const byte *record, my_off_t filepos, + int result); extern void mi_report_error(int errcode, const char *file_name); extern my_bool _mi_memmap_file(MI_INFO *info); extern void _mi_unmap_file(MI_INFO *info); extern uint save_pack_length(uint version, byte *block_buff, ulong length); -extern uint read_pack_length(uint version, const uchar *buf, ulong *length); extern uint calc_pack_length(uint version, ulong length); extern uint mi_mmap_pread(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_mmap_pwrite(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pread(MI_INFO *info, byte *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pwrite(MI_INFO *info, byte *Buffer, uint Count, my_off_t offset, myf MyFlags); @@ -727,7 +674,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite); uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state); uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead); uint mi_base_info_write(File file, MI_BASE_INFO *base); -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); int mi_keyseg_write(File file, const HA_KEYSEG *keyseg); char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint mi_keydef_write(File file, MI_KEYDEF *keydef); @@ -739,23 +686,23 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo); extern int mi_disable_indexes(MI_INFO *info); extern int mi_enable_indexes(MI_INFO *info); extern int mi_indexes_are_disabled(MI_INFO *info); -ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record); +ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record); ha_checksum mi_checksum(MI_INFO *info, const byte *buf); ha_checksum mi_static_checksum(MI_INFO *info, const byte *buf); my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record, - ha_checksum unique_hash, my_off_t pos); + ha_checksum unique_hash, my_off_t pos); ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *buf); int _mi_cmp_static_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const byte *record, my_off_t pos); + const byte *record, my_off_t pos); int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const byte *record, my_off_t pos); + const byte *record, my_off_t pos); int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b, - my_bool null_are_equal); -void mi_get_status(void* param, int concurrent_insert); -void mi_update_status(void* param); -void mi_restore_status(void* param); -void mi_copy_status(void* to,void *from); -my_bool mi_check_status(void* param); + my_bool null_are_equal); +void mi_get_status(void *param, int concurrent_insert); +void mi_update_status(void *param); +void mi_restore_status(void *param); +void mi_copy_status(void *to, void *from); +my_bool mi_check_status(void *param); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); extern MI_INFO *test_if_reopen(char *filename); @@ -767,22 +714,14 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size); void mi_remap_file(MI_INFO *info, my_off_t size); /* Functions needed by mi_check */ -volatile int *killed_ptr(MI_CHECK *param); -void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...)); -int flush_pending_blocks(MI_SORT_PARAM *param); -int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); -int thr_write_keys(MI_SORT_PARAM *sort_param); +volatile int *killed_ptr(HA_CHECK *param); +void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); #ifdef THREAD pthread_handler_t thr_find_all_keys(void *arg); #endif -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file); - -int sort_write_record(MI_SORT_PARAM *sort_param); -int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); - +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); #ifdef __cplusplus } #endif - diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c index 0bcf74d87a4..9e039058097 100644 --- a/storage/myisam/myisamlog.c +++ b/storage/myisam/myisamlog.c @@ -805,7 +805,7 @@ static int find_record_with_key(struct file_info *file_info, byte *record) { uint key; MI_INFO *info=file_info->isam; - uchar tmp_key[MI_MAX_KEY_BUFF]; + uchar tmp_key[HA_MAX_KEY_BUFF]; for (key=0 ; key < info->s->base.keys ; key++) { diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c index edb33ec10b9..c1f59e2f65d 100644 --- a/storage/myisam/rt_index.c +++ b/storage/myisam/rt_index.c @@ -540,7 +540,7 @@ static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, int res; if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; @@ -652,7 +652,7 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key, uint nod_flag = info->s->base.key_reflength; if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; return -1; diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index 53eb6b2e310..23395fd9558 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -15,7 +15,7 @@ /* Creates a index for a database by reading keys, sorting them and outputing - them in sorted order through SORT_INFO functions. + them in sorted order through MI_SORT_INFO functions. */ #include "fulltext.h" @@ -486,8 +486,8 @@ ok: int thr_write_keys(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; ulong length, keys; ulong *rec_per_key_part=param->rec_per_key_part; int got_error=sort_info->got_error; @@ -829,7 +829,7 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, register uint count; uint16 length_of_key = 0; uint idx; - uchar *buffp; + byte *buffp; if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) { @@ -916,7 +916,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { count+= buffpek->count; - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys=maxcount; strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, sort_length)); @@ -954,7 +954,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) { - uchar *base=buffpek->base; + byte *base= buffpek->base; uint max_keys=buffpek->max_keys; VOID(queue_remove(&queue,0)); @@ -986,7 +986,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } } buffpek=(BUFFPEK*) queue_top(&queue); - buffpek->base=(uchar *) sort_keys; + buffpek->base= (byte*) sort_keys; buffpek->max_keys=keys; do { @@ -1001,7 +1001,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key; + strpos= (uchar*) buffpek->key; for (end=strpos+buffpek->mem_count*sort_length; strpos != end ; strpos+=sort_length) diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index ffa55673ad1..99e7b72e5fe 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -46,8 +46,8 @@ class ha_myisammrg: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } double scan_time() { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; } diff --git a/support-files/magic b/support-files/magic index 9844142ba93..e2bd4d70fb5 100644 --- a/support-files/magic +++ b/support-files/magic @@ -4,12 +4,23 @@ # 0 beshort 0xfe01 MySQL table definition file >2 byte x Version %d -0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file +0 belong&0xffffff00 0xfefe0700 MySQL MISAM index file >3 byte x Version %d -0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file +0 belong&0xffffff00 0xfefe0800 MySQL MISAM compressed data file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0900 MySQL Maria index file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0A00 MySQL Maria compressed data file >3 byte x Version %d 0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file >3 byte x Version %d 0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file >3 byte x Version %d 0 string \376bin MySQL replication log +0 belong&0xffffff00 0xfefe0b00 +>4 string MARIALOG MySQL Maria transaction log file +>>3 byte x Version %d +0 belong&0xffffff00 0xfefe0c00 +>4 string MACF MySQL Maria control file +>>3 byte x Version %d + diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 6197586b008..cc617292941 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -13,7 +13,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -SUBDIRS = mytap . mysys examples +SUBDIRS = mytap mysys examples EXTRA_DIST = unit.pl CLEANFILES = unit diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index 54b3d203e10..229e8f0339b 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -13,13 +13,11 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include -AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a - -noinst_PROGRAMS = bitmap-t base64-t my_atomic-t - diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index c4ba7850ae1..d3be33f4163 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -13,27 +13,27 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <my_global.h> #include <tap.h> + +#include <my_global.h> #include <my_sys.h> #include <my_atomic.h> +#include <lf.h> -int32 a32,b32,c32; +volatile uint32 a32,b32; +volatile int32 c32, N; my_atomic_rwlock_t rwl; - -pthread_attr_t thr_attr; -pthread_mutex_t mutex; -pthread_cond_t cond; -int N; +LF_ALLOCATOR lf_allocator; +LF_HASH lf_hash; /* add and sub a random number in a loop. Must get 0 at the end */ pthread_handler_t test_atomic_add_handler(void *arg) { - int m=*(int *)arg; + int m= (*(int *)arg)/2; int32 x; - for (x=((int)((long)(&m))); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { - x=x*m+0x87654321; + x= (x*m+0x87654321) & INT_MAX32; my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); my_atomic_rwlock_wrunlock(&rwl); @@ -42,10 +42,6 @@ pthread_handler_t test_atomic_add_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -57,30 +53,24 @@ pthread_handler_t test_atomic_add_handler(void *arg) 5. subtract result from a32 must get 0 in a32 at the end */ -pthread_handler_t test_atomic_swap_handler(void *arg) +pthread_handler_t test_atomic_fas_handler(void *arg) { - int m=*(int *)arg; - int32 x; - - my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrunlock(&rwl); + int m= *(int *)arg; + uint32 x= my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); - my_atomic_rwlock_wrunlock(&rwl); for (; m ; m--) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } if (!x) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } @@ -88,113 +78,222 @@ pthread_handler_t test_atomic_swap_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } /* same as test_atomic_add_handler, but my_atomic_add32 is emulated with - (slower) my_atomic_cas32 + my_atomic_cas32 - notice that the slowdown is proportional to the + number of CPUs */ pthread_handler_t test_atomic_cas_handler(void *arg) { - int m=*(int *)arg, ok; - int32 x,y; - for (x=((int)((long)(&m))); m ; m--) + int m= (*(int *)arg)/2, ok= 0; + int32 x, y; + for (x= ((int)(intptr)(&m)); m ; m--) { my_atomic_rwlock_wrlock(&rwl); - y=my_atomic_load32(&a32); + y= my_atomic_load32(&a32); my_atomic_rwlock_wrunlock(&rwl); - - x=x*m+0x87654321; + x= (x*m+0x87654321) & INT_MAX32; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y+x); + ok= my_atomic_cas32(&a32, &y, (uint32)y+x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y-x); + ok= my_atomic_cas32(&a32, &y, y-x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); + return 0; +} + +/* + pin allocator - alloc and release an element in a loop +*/ +pthread_handler_t test_lf_pinbox(void *arg) +{ + int m= *(int *)arg; + int32 x= 0; + LF_PINS *pins; + + pins= lf_pinbox_get_pins(&lf_allocator.pinbox); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + lf_pinbox_put_pins(pins); + pins= lf_pinbox_get_pins(&lf_allocator.pinbox); + } + lf_pinbox_put_pins(pins); + return 0; +} + +typedef union { + int32 data; + void *not_used; +} TLA; + +pthread_handler_t test_lf_alloc(void *arg) +{ + int m= (*(int *)arg)/2; + int32 x,y= 0; + LF_PINS *pins; + + pins= lf_alloc_get_pins(&lf_allocator); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + TLA *node1, *node2; + x= (x*m+0x87654321) & INT_MAX32; + node1= (TLA *)lf_alloc_new(pins); + node1->data= x; + y+= node1->data; + node1->data= 0; + node2= (TLA *)lf_alloc_new(pins); + node2->data= x; + y-= node2->data; + node2->data= 0; + lf_alloc_free(pins, node1); + lf_alloc_free(pins, node2); + } + lf_alloc_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, y); + + if (my_atomic_add32(&N, -1) == 1) + { + diag("%d mallocs, %d pins in stack", + lf_allocator.mallocs, lf_allocator.pinbox.pins_in_stack); +#ifdef MY_LF_EXTRA_DEBUG + a32|= lf_allocator.mallocs - lf_alloc_in_pool(&lf_allocator); +#endif + } + my_atomic_rwlock_wrunlock(&rwl); + return 0; +} + +#define N_TLH 1000 +pthread_handler_t test_lf_hash(void *arg) +{ + int m= (*(int *)arg)/(2*N_TLH); + int32 x,y,z,sum= 0, ins= 0; + LF_PINS *pins; + + pins= lf_hash_get_pins(&lf_hash); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + int i; + y= x; + for (i= 0; i < N_TLH; i++) + { + x= (x*(m+i)+0x87654321) & INT_MAX32; + z= (x<0) ? -x : x; + if (lf_hash_insert(&lf_hash, pins, &z)) + { + sum+= z; + ins++; + } + } + for (i= 0; i < N_TLH; i++) + { + y= (y*(m+i)+0x87654321) & INT_MAX32; + z= (y<0) ? -y : y; + if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z))) + sum-= z; + } + } + lf_hash_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, sum); + my_atomic_add32(&b32, ins); + + if (my_atomic_add32(&N, -1) == 1) + { + diag("%d mallocs, %d pins in stack, %d hash size, %d inserts", + lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_stack, + lf_hash.size, b32); + a32|= lf_hash.count; + } + my_atomic_rwlock_wrunlock(&rwl); return 0; } void test_atomic(const char *test, pthread_handler handler, int n, int m) { - pthread_t t; - ulonglong now=my_getsystime(); + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; a32= 0; b32= 0; c32= 0; + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + diag("Testing %s with %d threads, %d iterations... ", test, n, m); - for (N=n ; n ; n--) + N= n; + for (i= 0 ; i < n ; i++) { - if (pthread_create(&t, &thr_attr, handler, &m) != 0) + if (pthread_create(threads+i, 0, handler, &m) != 0) { diag("Could not create thread"); - a32= 1; - goto err; + abort(); } } - - pthread_mutex_lock(&mutex); - while (N) - pthread_cond_wait(&cond, &mutex); - pthread_mutex_unlock(&mutex); - now=my_getsystime()-now; -err: - ok(a32 == 0, "tested %s in %g secs", test, ((double)now)/1e7); + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32); + my_free((void *)threads, MYF(0)); } + int main() { int err; - diag("N CPUs: %d", my_getncpus()); + my_init(); + + diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE); err= my_atomic_initialize(); - plan(4); + plan(7); ok(err == 0, "my_atomic_initialize() returned %d", err); - pthread_attr_init(&thr_attr); - pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); - pthread_mutex_init(&mutex, 0); - pthread_cond_init(&cond, 0); my_atomic_rwlock_init(&rwl); + lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used)); + lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0, + &my_charset_bin); -#ifdef HPUX11 -#define CYCLES 1000 +#ifdef MY_ATOMIC_MODE_RWLOCKS +#define CYCLES 3000 #else -#define CYCLES 10000 +#define CYCLES 300000 #endif #define THREADS 100 - test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS, CYCLES); - test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS, CYCLES); - test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS, CYCLES); + + test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); + test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES); + test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); + test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); + test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); + test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10); + + lf_hash_destroy(&lf_hash); + lf_alloc_destroy(&lf_allocator); /* workaround until we know why this includes dbug but not safemalloc */ if(err) { my_thread_global_init(); my_free(my_malloc(0, MYF(0)), MYF(0)); } - /* - workaround until we know why it crashes randomly on some machine - (BUG#22320). - */ - sleep(2); - - pthread_mutex_destroy(&mutex); - pthread_cond_destroy(&cond); - pthread_attr_destroy(&thr_attr); my_atomic_rwlock_destroy(&rwl); + my_end(0); return exit_status(); } diff --git a/unittest/mytap/tap.c b/unittest/mytap/tap.c index 4e053e3e745..a99da3fd975 100644 --- a/unittest/mytap/tap.c +++ b/unittest/mytap/tap.c @@ -169,6 +169,8 @@ static signal_entry install_signal[]= { void plan(int const count) { + + setvbuf(tapout, 0, _IONBF, 0); /* provide output at once */ /* Install signal handler */ |