summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bzrignore28
-rwxr-xr-xBUILD/SETUP.sh5
-rwxr-xr-xBitKeeper/triggers/post-commit51
-rw-r--r--Makefile.am2
-rw-r--r--configure.in5
-rw-r--r--include/Makefile.am6
-rw-r--r--include/atomic/nolock.h29
-rw-r--r--include/atomic/rwlock.h23
-rw-r--r--include/atomic/x86-gcc.h31
-rw-r--r--include/atomic/x86-msvc.h12
-rw-r--r--include/ft_global.h11
-rw-r--r--include/keycache.h3
-rw-r--r--include/lf.h258
-rw-r--r--include/m_string.h7
-rw-r--r--include/maria.h439
-rw-r--r--include/my_atomic.h203
-rw-r--r--include/my_base.h8
-rw-r--r--include/my_bit.h107
-rw-r--r--include/my_dbug.h2
-rw-r--r--include/my_global.h26
-rw-r--r--include/my_handler.h48
-rw-r--r--include/my_sys.h11
-rw-r--r--include/myisam.h268
-rw-r--r--include/myisamchk.h164
-rw-r--r--include/pagecache.h228
-rw-r--r--include/wqueue.h26
-rw-r--r--libmysql/CMakeLists.txt1
-rw-r--r--libmysql/Makefile.shared2
-rw-r--r--mysql-test/include/have_maria.inc4
-rw-r--r--mysql-test/r/events_logs_tests.result2
-rw-r--r--mysql-test/r/have_maria.require2
-rw-r--r--mysql-test/r/maria.result1788
-rw-r--r--mysql-test/r/ps_maria.result3137
-rw-r--r--mysql-test/t/disabled.def3
-rw-r--r--mysql-test/t/events_logs_tests.test2
-rw-r--r--mysql-test/t/maria.test1082
-rw-r--r--mysql-test/t/ps_maria.test46
-rw-r--r--mysys/Makefile.am7
-rw-r--r--mysys/array.c56
-rw-r--r--mysys/lf_alloc-pin.c443
-rw-r--r--mysys/lf_dynarray.c204
-rw-r--r--mysys/lf_hash.c400
-rw-r--r--mysys/mf_keycache.c24
-rw-r--r--mysys/mf_keycaches.c14
-rwxr-xr-xmysys/mf_pagecache.c4102
-rw-r--r--mysys/my_atomic.c9
-rw-r--r--mysys/my_bit.c100
-rw-r--r--mysys/my_bitmap.c1
-rw-r--r--mysys/my_create.c7
-rw-r--r--mysys/my_delete.c3
-rw-r--r--mysys/my_getsystime.c4
-rw-r--r--mysys/my_handler.c23
-rw-r--r--mysys/my_init.c1
-rw-r--r--mysys/my_open.c1
-rw-r--r--mysys/my_pread.c9
-rw-r--r--mysys/my_rename.c16
-rw-r--r--mysys/my_symlink.c2
-rw-r--r--mysys/my_sync.c79
-rw-r--r--mysys/wqueue.c167
-rw-r--r--plugin/daemon_example/daemon_example.cc4
-rw-r--r--sql/filesort.cc12
-rw-r--r--sql/gen_lex_hash.cc4
-rw-r--r--sql/handler.h2
-rw-r--r--sql/item_func.cc1
-rw-r--r--sql/item_func.h1
-rw-r--r--sql/log.cc5
-rw-r--r--sql/mysql_priv.h4
-rw-r--r--sql/mysqld.cc113
-rw-r--r--sql/set_var.cc40
-rw-r--r--sql/set_var.h1
-rw-r--r--sql/sql_class.h5
-rw-r--r--sql/sql_parse.cc4
-rw-r--r--sql/sql_select.cc1
-rw-r--r--sql/sql_sort.h11
-rw-r--r--sql/sql_test.cc4
-rw-r--r--sql/uniques.cc2
-rw-r--r--sql/unireg.cc6
-rw-r--r--storage/Makefile.am7
-rw-r--r--storage/maria/CMakeLists.txt1
-rw-r--r--storage/maria/Makefile.am162
-rw-r--r--storage/maria/ft_maria.c49
-rw-r--r--storage/maria/ha_maria.cc1896
-rw-r--r--storage/maria/ha_maria.h145
-rw-r--r--storage/maria/lockman.c787
-rw-r--r--storage/maria/lockman.h77
-rw-r--r--storage/maria/ma_bitmap.c1704
-rw-r--r--storage/maria/ma_blockrec.c2743
-rw-r--r--storage/maria/ma_blockrec.h160
-rw-r--r--storage/maria/ma_cache.c108
-rw-r--r--storage/maria/ma_changed.c34
-rw-r--r--storage/maria/ma_check.c5096
-rw-r--r--storage/maria/ma_checkpoint.c429
-rw-r--r--storage/maria/ma_checkpoint.h35
-rw-r--r--storage/maria/ma_checksum.c68
-rw-r--r--storage/maria/ma_close.c133
-rw-r--r--storage/maria/ma_control_file.c320
-rw-r--r--storage/maria/ma_control_file.h77
-rw-r--r--storage/maria/ma_create.c1043
-rw-r--r--storage/maria/ma_dbug.c193
-rw-r--r--storage/maria/ma_delete.c891
-rw-r--r--storage/maria/ma_delete_all.c102
-rw-r--r--storage/maria/ma_delete_table.c81
-rw-r--r--storage/maria/ma_dynrec.c1954
-rw-r--r--storage/maria/ma_extra.c458
-rw-r--r--storage/maria/ma_ft_boolean_search.c964
-rw-r--r--storage/maria/ma_ft_eval.c255
-rw-r--r--storage/maria/ma_ft_eval.h42
-rw-r--r--storage/maria/ma_ft_nlq_search.c371
-rw-r--r--storage/maria/ma_ft_parser.c425
-rw-r--r--storage/maria/ma_ft_stem.c19
-rw-r--r--storage/maria/ma_ft_test1.c318
-rw-r--r--storage/maria/ma_ft_test1.h421
-rw-r--r--storage/maria/ma_ft_update.c353
-rw-r--r--storage/maria/ma_ftdefs.h153
-rw-r--r--storage/maria/ma_fulltext.h28
-rw-r--r--storage/maria/ma_info.c137
-rw-r--r--storage/maria/ma_init.c60
-rw-r--r--storage/maria/ma_key.c591
-rw-r--r--storage/maria/ma_keycache.c164
-rw-r--r--storage/maria/ma_least_recently_dirtied.c106
-rw-r--r--storage/maria/ma_least_recently_dirtied.h26
-rw-r--r--storage/maria/ma_locking.c512
-rw-r--r--storage/maria/ma_loghandler.c5296
-rw-r--r--storage/maria/ma_loghandler.h182
-rw-r--r--storage/maria/ma_loghandler_lsn.h56
-rw-r--r--storage/maria/ma_open.c1324
-rw-r--r--storage/maria/ma_packrec.c1426
-rw-r--r--storage/maria/ma_page.c158
-rw-r--r--storage/maria/ma_panic.c126
-rw-r--r--storage/maria/ma_preload.c117
-rw-r--r--storage/maria/ma_range.c258
-rw-r--r--storage/maria/ma_recovery.c268
-rw-r--r--storage/maria/ma_recovery.h26
-rw-r--r--storage/maria/ma_rename.c89
-rw-r--r--storage/maria/ma_rfirst.c27
-rw-r--r--storage/maria/ma_rkey.c179
-rw-r--r--storage/maria/ma_rlast.c27
-rw-r--r--storage/maria/ma_rnext.c123
-rw-r--r--storage/maria/ma_rnext_same.c108
-rw-r--r--storage/maria/ma_rprev.c89
-rw-r--r--storage/maria/ma_rrnd.c54
-rw-r--r--storage/maria/ma_rsame.c70
-rw-r--r--storage/maria/ma_rsamepos.c59
-rw-r--r--storage/maria/ma_rt_index.c1093
-rw-r--r--storage/maria/ma_rt_index.h48
-rw-r--r--storage/maria/ma_rt_key.c102
-rw-r--r--storage/maria/ma_rt_key.h33
-rw-r--r--storage/maria/ma_rt_mbr.c807
-rw-r--r--storage/maria/ma_rt_mbr.h39
-rw-r--r--storage/maria/ma_rt_split.c351
-rw-r--r--storage/maria/ma_rt_test.c474
-rw-r--r--storage/maria/ma_scan.c61
-rw-r--r--storage/maria/ma_search.c1906
-rw-r--r--storage/maria/ma_sort.c1056
-rw-r--r--storage/maria/ma_sp_defs.h48
-rw-r--r--storage/maria/ma_sp_key.c300
-rw-r--r--storage/maria/ma_sp_test.c569
-rw-r--r--storage/maria/ma_static.c67
-rw-r--r--storage/maria/ma_statrec.c300
-rw-r--r--storage/maria/ma_test1.c714
-rw-r--r--storage/maria/ma_test2.c1100
-rw-r--r--storage/maria/ma_test3.c501
-rw-r--r--storage/maria/ma_test_all.res62
-rwxr-xr-xstorage/maria/ma_test_all.sh192
-rw-r--r--storage/maria/ma_unique.c236
-rw-r--r--storage/maria/ma_update.c251
-rw-r--r--storage/maria/ma_write.c1081
-rw-r--r--storage/maria/maria_chk.c1823
-rw-r--r--storage/maria/maria_def.h853
-rw-r--r--storage/maria/maria_ftdump.c279
-rw-r--r--storage/maria/maria_pack.c3212
-rwxr-xr-xstorage/maria/maria_rename.sh17
-rw-r--r--storage/maria/plug.in8
-rw-r--r--storage/maria/tablockman.c677
-rw-r--r--storage/maria/tablockman.h88
-rwxr-xr-xstorage/maria/test_pack10
-rw-r--r--storage/maria/trnman.c519
-rw-r--r--storage/maria/trnman.h57
-rw-r--r--storage/maria/unittest/Makefile.am87
-rw-r--r--storage/maria/unittest/lockman-t.c309
-rw-r--r--storage/maria/unittest/lockman1-t.c335
-rw-r--r--storage/maria/unittest/lockman2-t.c361
-rw-r--r--storage/maria/unittest/ma_control_file-t.c446
-rw-r--r--storage/maria/unittest/ma_maria_log_cleanup.c45
-rw-r--r--storage/maria/unittest/ma_test_loghandler-t.c578
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multigroup-t.c600
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multithread-t.c457
-rw-r--r--storage/maria/unittest/ma_test_loghandler_pagecache-t.c149
-rwxr-xr-xstorage/maria/unittest/mf_pagecache_consist.c458
-rw-r--r--storage/maria/unittest/mf_pagecache_single.c580
-rw-r--r--storage/maria/unittest/test_file.c68
-rw-r--r--storage/maria/unittest/test_file.h14
-rw-r--r--storage/maria/unittest/trnman-t.c185
-rw-r--r--storage/myisam/Makefile.am4
-rw-r--r--storage/myisam/ft_boolean_search.c11
-rw-r--r--storage/myisam/ft_myisam.c36
-rw-r--r--storage/myisam/ft_nlq_search.c2
-rw-r--r--storage/myisam/ft_parser.c2
-rw-r--r--storage/myisam/ft_static.c13
-rw-r--r--storage/myisam/ft_stopwords.c2
-rw-r--r--storage/myisam/ft_update.c4
-rw-r--r--storage/myisam/fulltext.h10
-rw-r--r--storage/myisam/ha_myisam.cc39
-rw-r--r--storage/myisam/ha_myisam.h7
-rw-r--r--storage/myisam/mi_check.c171
-rw-r--r--storage/myisam/mi_create.c21
-rw-r--r--storage/myisam/mi_delete.c14
-rw-r--r--storage/myisam/mi_dynrec.c10
-rw-r--r--storage/myisam/mi_key.c2
-rw-r--r--storage/myisam/mi_log.c2
-rw-r--r--storage/myisam/mi_open.c15
-rw-r--r--storage/myisam/mi_packrec.c5
-rw-r--r--storage/myisam/mi_range.c2
-rw-r--r--storage/myisam/mi_search.c10
-rw-r--r--storage/myisam/mi_test1.c2
-rw-r--r--storage/myisam/mi_test2.c3
-rw-r--r--storage/myisam/mi_unique.c2
-rw-r--r--storage/myisam/mi_update.c2
-rw-r--r--storage/myisam/mi_write.c12
-rw-r--r--storage/myisam/myisamchk.c34
-rw-r--r--storage/myisam/myisamdef.h875
-rw-r--r--storage/myisam/myisamlog.c2
-rw-r--r--storage/myisam/rt_index.c4
-rw-r--r--storage/myisam/sort.c16
-rw-r--r--storage/myisammrg/ha_myisammrg.h4
-rw-r--r--support-files/magic15
-rw-r--r--unittest/Makefile.am2
-rw-r--r--unittest/mysys/Makefile.am8
-rw-r--r--unittest/mysys/my_atomic-t.c257
-rw-r--r--unittest/mytap/tap.c2
230 files changed, 71635 insertions, 1315 deletions
diff --git a/.bzrignore b/.bzrignore
index 9f59f057f4f..297338d08dc 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1,6 +1,7 @@
*-t
*.Plo
*.Po
+*.Tpo
*.a
*.bb
*.bbg
@@ -1058,6 +1059,7 @@ libmysqld/ha_innobase.cc
libmysqld/ha_innodb.cc
libmysqld/ha_isam.cc
libmysqld/ha_isammrg.cc
+libmysqld/ha_maria.cc
libmysqld/ha_myisam.cc
libmysqld/ha_myisammrg.cc
libmysqld/ha_ndbcluster.cc
@@ -2385,6 +2387,20 @@ storage/innobase/ut/.deps/ut0rnd.Po
storage/innobase/ut/.deps/ut0ut.Po
storage/innobase/ut/.deps/ut0vec.Po
storage/innobase/ut/.deps/ut0wqueue.Po
+storage/maria/*.MAD
+storage/maria/*.MAI
+storage/maria/ma_rt_test
+storage/maria/ma_sp_test
+storage/maria/ma_test1
+storage/maria/ma_test2
+storage/maria/ma_test3
+storage/maria/ma_test_all
+storage/maria/maria.log
+storage/maria/maria_chk
+storage/maria/maria_ftdump
+storage/maria/maria_log
+storage/maria/maria_pack
+storage/maria/unittest/maria_control
storage/myisam/.deps/ft_boolean_search.Po
storage/myisam/.deps/ft_nlq_search.Po
storage/myisam/.deps/ft_parser.Po
@@ -2922,13 +2938,25 @@ unittest/examples/.deps/simple-t.Po
unittest/examples/.deps/skip-t.Po
unittest/examples/.deps/skip_all-t.Po
unittest/examples/.deps/todo-t.Po
+unittest/maria_control
unittest/mysys/*.t
unittest/mysys/.deps/base64-t.Po
unittest/mysys/.deps/bitmap-t.Po
unittest/mysys/.deps/my_atomic-t.Po
+unittest/mysys/mf_pagecache_consist_1k-t-big
+unittest/mysys/mf_pagecache_consist_1kHC-t-big
+unittest/mysys/mf_pagecache_consist_1kRD-t-big
+unittest/mysys/mf_pagecache_consist_1kWR-t-big
+unittest/mysys/mf_pagecache_consist_64k-t-big
+unittest/mysys/mf_pagecache_consist_64kHC-t-big
+unittest/mysys/mf_pagecache_consist_64kRD-t-big
+unittest/mysys/mf_pagecache_consist_64kWR-t-big
+unittest/mysys/mf_pagecache_single_64k-t-big
unittest/mytap/.deps/tap.Po
unittest/mytap/t/*.t
unittest/mytap/t/.deps/basic-t.Po
+unittest/page_cache_test_file_1
+unittest/pagecache_debug.log
unittest/unit
vi.h
vio/*.ds?
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index 11dd67d5de0..5dca590849e 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -121,8 +121,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify "
valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max"
#
# Used in -debug builds
-debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS "
+debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS"
debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX"
+debug_cflags="$debug_cflags -DMY_LF_EXTRA_DEBUG"
error_inject="--with-error-inject "
#
# Base C++ flags for all builds
@@ -154,8 +155,6 @@ then
base_configs="$base_configs --with-libedit"
fi
-static_link="--with-mysqld-ldflags=-all-static "
-static_link="$static_link --with-client-ldflags=-all-static"
# we need local-infile in all binaries for rpl000001
# if you need to disable local-infile in the client, write a build script
# and unset local_infile_configs
diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit
index 86f3db012ee..0830070ccf9 100755
--- a/BitKeeper/triggers/post-commit
+++ b/BitKeeper/triggers/post-commit
@@ -5,7 +5,7 @@ FROM=$USER@mysql.com
COMMITS=commits@lists.mysql.com
DOCS=docs-commit@mysql.com
LIMIT=10000
-VERSION="5.1"
+VERSION="maria"
BKROOT=`bk root`
if [ -x /usr/sbin/sendmail ]; then
@@ -76,55 +76,6 @@ EOF
) > $BKROOT/BitKeeper/tmp/dev_public.txt
$SENDMAIL -t < $BKROOT/BitKeeper/tmp/dev_public.txt
-
-#++
-# commits@ mail
-#--
- echo "Notifying commits list at $COMMITS"
- (
- cat <<EOF
-List-ID: <bk.mysql-$VERSION>
-From: $FROM
-To: $COMMITS
-Subject: bk commit into $VERSION tree ($CHANGESET)$BS
-X-CSetKey: <$CSETKEY>
-$BH
-Below is the list of changes that have just been committed into a local
-$VERSION repository of $USER. When $USER does a push these changes will
-be propagated to the main repository and, within 24 hours after the
-push, to the public repository.
-For information on how to access the public repository
-see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
-
-EOF
- bk changes -v -r+
- bk cset -r+ -d
- ) | bk sed -e ${LIMIT}q > $BKROOT/BitKeeper/tmp/commits.txt
-
-$SENDMAIL -t < $BKROOT/BitKeeper/tmp/commits.txt
-
-#++
-# docs-commit@ mail
-# Picks up anything under the Docs subdirectory (relevant for docs team).
-#--
- bk changes -v -r+ | grep -q " Docs/"
- if [ $? -eq 0 ]
- then
- echo "Notifying docs list at $DOCS"
- (
- cat <<EOF
-List-ID: <bk.mysql-$VERSION>
-From: $FROM
-To: $DOCS
-Subject: bk commit - $VERSION tree (Manual) ($CHANGESET)$BS
-
-EOF
- bk changes -v -r+
- bk cset -r+ -d
- ) > $BKROOT/BitKeeper/tmp/docs.txt
- $SENDMAIL -t < $BKROOT/BitKeeper/tmp/docs.txt
- fi
-
else
echo "commit failed because '$BK_STATUS', you may need to re-clone..."
fi
diff --git a/Makefile.am b/Makefile.am
index 6d435bff85a..31946b18987 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -130,7 +130,7 @@ test-binlog-statement:
cd mysql-test ; \
@PERL@ ./mysql-test-run.pl $(force) --mysqld=--binlog-format=statement
-test: test-unit test-ns test-pr
+test: test-ns test-pr
test-full: test test-nr test-ps
diff --git a/configure.in b/configure.in
index 9581b5ac46a..1a62c3129cb 100644
--- a/configure.in
+++ b/configure.in
@@ -2298,9 +2298,9 @@ AC_ARG_WITH(man,
if test "$with_man" = "yes"
then
man_dirs="man"
- man1_files=`ls -1 $srcdir/man/*.1 | sed -e 's;^.*man/;;'`
+ man1_files=`ls -1 $srcdir/man/*.1 2>/dev/null| sed -e 's;^.*man/;;'`
man1_files=`echo $man1_files`
- man8_files=`ls -1 $srcdir/man/*.8 | sed -e 's;^.*man/;;'`
+ man8_files=`ls -1 $srcdir/man/*.8 2>/dev/null| sed -e 's;^.*man/;;'`
man8_files=`echo $man8_files`
else
man_dirs=""
@@ -2436,7 +2436,6 @@ AC_SUBST(readline_basedir)
AC_SUBST(readline_link)
AC_SUBST(readline_h_ln_cmd)
-
# If we have threads generate some library functions and test programs
sql_server_dirs=
sql_server=
diff --git a/include/Makefile.am b/include/Makefile.am
index d40df7d8343..f92110cb4ce 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -27,8 +27,8 @@ pkginclude_HEADERS = $(HEADERS_ABI) my_dbug.h m_string.h my_sys.h \
my_getopt.h sslopt-longopts.h my_dir.h \
sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
m_ctype.h mysql/plugin.h $(HEADERS_GEN)
-noinst_HEADERS = config-win.h config-netware.h \
- heap.h my_bitmap.h my_uctype.h \
+noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \
+ heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \
myisam.h myisampack.h myisammrg.h ft_global.h\
mysys_err.h my_base.h help_start.h help_end.h \
my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \
@@ -37,7 +37,7 @@ noinst_HEADERS = config-win.h config-netware.h \
mysql_version.h.in my_handler.h my_time.h decimal.h \
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \
- my_libwrap.h
+ my_libwrap.h pagecache.h
# Remove built files and the symlinked directories
CLEANFILES = $(BUILT_SOURCES) readline openssl
diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h
index f15c8b13b7f..9b5cbecbd0a 100644
--- a/include/atomic/nolock.h
+++ b/include/atomic/nolock.h
@@ -13,24 +13,25 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#if defined(__i386__) || defined(_M_IX86)
-
-#ifdef MY_ATOMIC_MODE_DUMMY
-# define LOCK ""
-#else
-# define LOCK "lock"
-#endif
-
-#ifdef __GNUC__
-#include "x86-gcc.h"
-#elif defined(_MSC_VER)
-#include "x86-msvc.h"
-#endif
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__)
+
+# ifdef MY_ATOMIC_MODE_DUMMY
+# define LOCK_prefix ""
+# else
+# define LOCK_prefix "lock"
+# endif
+
+# ifdef __GNUC__
+# include "x86-gcc.h"
+# elif defined(_MSC_VER)
+# error Broken!
+# include "x86-msvc.h"
+# endif
#endif
#ifdef make_atomic_cas_body
-typedef struct { } my_atomic_rwlock_t;
+typedef struct { } my_atomic_rwlock_t __attribute__ ((unused));
#define my_atomic_rwlock_destroy(name)
#define my_atomic_rwlock_init(name)
#define my_atomic_rwlock_rdlock(name)
diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h
index 18b77e93d80..cb41952b70c 100644
--- a/include/atomic/rwlock.h
+++ b/include/atomic/rwlock.h
@@ -13,7 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
+typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t;
#ifdef MY_ATOMIC_MODE_DUMMY
/*
@@ -31,17 +31,22 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
#define my_atomic_rwlock_wrunlock(name)
#define MY_ATOMIC_MODE "dummy (non-atomic)"
#else
-#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw)
-#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0)
-#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw)
-#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw)
-#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw)
-#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw)
-#define MY_ATOMIC_MODE "rwlocks"
+/*
+ we're using read-write lock macros but map them to mutex locks, and they're
+ faster. Still, having semantically rich API we can change the
+ underlying implementation, if necessary.
+*/
+#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw)
+#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0)
+#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw)
+#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw)
+#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw)
+#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw)
+#define MY_ATOMIC_MODE "mutex"
#endif
#define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav;
-#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav;
+#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav;
#define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a;
#define make_atomic_load_body(S) ret= *a;
#define make_atomic_store_body(S) *a= v;
diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h
index d79dadbf05e..5a34bc22f9e 100644
--- a/include/atomic/x86-gcc.h
+++ b/include/atomic/x86-gcc.h
@@ -19,10 +19,18 @@
architectures support double-word (128-bit) cas.
*/
-#ifdef MY_ATOMIC_NO_XADD
-#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd"
+#ifdef __x86_64__
+# ifdef MY_ATOMIC_NO_XADD
+# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd"
+# else
+# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix
+# endif
#else
-#define MY_ATOMIC_MODE "gcc-x86" LOCK
+# ifdef MY_ATOMIC_NO_XADD
+# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd"
+# else
+# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix
+# endif
#endif
/* fix -ansi errors while maintaining readability */
@@ -32,12 +40,12 @@
#ifndef MY_ATOMIC_NO_XADD
#define make_atomic_add_body(S) \
- asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
+ asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
#endif
-#define make_atomic_swap_body(S) \
- asm volatile ("; xchg %0, %1;" : "+r" (v) , "+m" (*a))
+#define make_atomic_fas_body(S) \
+ asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a))
#define make_atomic_cas_body(S) \
- asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \
+ asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \
: "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set))
#ifdef MY_ATOMIC_MODE_DUMMY
@@ -46,13 +54,16 @@
#else
/*
Actually 32-bit reads/writes are always atomic on x86
- But we add LOCK here anyway to force memory barriers
+ But we add LOCK_prefix here anyway to force memory barriers
*/
#define make_atomic_load_body(S) \
ret=0; \
- asm volatile (LOCK "; cmpxchg %2, %0" \
+ asm volatile (LOCK_prefix "; cmpxchg %2, %0" \
: "+m" (*a), "+a" (ret): "r" (ret))
#define make_atomic_store_body(S) \
- asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v))
+ asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v))
#endif
+/* TODO test on intel whether the below helps. on AMD it makes no difference */
+//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; })
+
diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h
index c4885bb8451..2a2cfe70de9 100644
--- a/include/atomic/x86-msvc.h
+++ b/include/atomic/x86-msvc.h
@@ -25,24 +25,24 @@
#ifndef _atomic_h_cleanup_
#define _atomic_h_cleanup_ "atomic/x86-msvc.h"
-#define MY_ATOMIC_MODE "msvc-x86" LOCK
+#define MY_ATOMIC_MODE "msvc-x86" LOCK_prefix
#define make_atomic_add_body(S) \
_asm { \
_asm mov reg_ ## S, v \
- _asm LOCK xadd *a, reg_ ## S \
+ _asm LOCK_prefix xadd *a, reg_ ## S \
_asm movzx v, reg_ ## S \
}
#define make_atomic_cas_body(S) \
_asm { \
_asm mov areg_ ## S, *cmp \
_asm mov reg2_ ## S, set \
- _asm LOCK cmpxchg *a, reg2_ ## S \
+ _asm LOCK_prefix cmpxchg *a, reg2_ ## S \
_asm mov *cmp, areg_ ## S \
_asm setz al \
_asm movzx ret, al \
}
-#define make_atomic_swap_body(S) \
+#define make_atomic_fas_body(S) \
_asm { \
_asm mov reg_ ## S, v \
_asm xchg *a, reg_ ## S \
@@ -55,13 +55,13 @@
#else
/*
Actually 32-bit reads/writes are always atomic on x86
- But we add LOCK here anyway to force memory barriers
+ But we add LOCK_prefix here anyway to force memory barriers
*/
#define make_atomic_load_body(S) \
_asm { \
_asm mov areg_ ## S, 0 \
_asm mov reg2_ ## S, areg_ ## S \
- _asm LOCK cmpxchg *a, reg2_ ## S \
+ _asm LOCK_prefix cmpxchg *a, reg2_ ## S \
_asm mov ret, areg_ ## S \
}
#define make_atomic_store_body(S) \
diff --git a/include/ft_global.h b/include/ft_global.h
index 6c3457541a5..fd1397d1d25 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -65,6 +65,17 @@ void ft_free_stopwords(void);
FT_INFO *ft_init_search(uint,void *, uint, byte *, uint,CHARSET_INFO *, byte *);
my_bool ft_boolean_check_syntax_string(const byte *);
+/* Internal symbols for fulltext between maria and MyISAM */
+
+#define HA_FT_WTYPE HA_KEYTYPE_FLOAT
+#define HA_FT_WLEN 4
+#define FT_SEGS 2
+
+#define ft_sintXkorr(A) mi_sint4korr(A)
+#define ft_intXstore(T,A) mi_int4store(T,A)
+
+extern const HA_KEYSEG ft_keysegs[FT_SEGS];
+
#ifdef __cplusplus
}
#endif
diff --git a/include/keycache.h b/include/keycache.h
index dc763b8cc08..90e86f7e8f9 100644
--- a/include/keycache.h
+++ b/include/keycache.h
@@ -127,7 +127,8 @@ extern void end_key_cache(KEY_CACHE *keycache, my_bool cleanup);
/* Functions to handle multiple key caches */
extern my_bool multi_keycache_init(void);
extern void multi_keycache_free(void);
-extern KEY_CACHE *multi_key_cache_search(byte *key, uint length);
+extern KEY_CACHE *multi_key_cache_search(byte *key, uint length,
+ KEY_CACHE *def);
extern my_bool multi_key_cache_set(const byte *key, uint length,
KEY_CACHE *key_cache);
extern void multi_key_cache_change(KEY_CACHE *old_data,
diff --git a/include/lf.h b/include/lf.h
new file mode 100644
index 00000000000..39a47e6568a
--- /dev/null
+++ b/include/lf.h
@@ -0,0 +1,258 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _lf_h
+#define _lf_h
+
+#include <my_atomic.h>
+
+/*
+ Helpers to define both func() and _func(), where
+ func() is a _func() protected by my_atomic_rwlock_wrlock()
+*/
+
+#define lock_wrap(f, t, proto_args, args, lock) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ t ret; \
+ my_atomic_rwlock_wrlock(lock); \
+ ret= _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+ return ret; \
+}
+
+#define lock_wrap_void(f, proto_args, args, lock) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ my_atomic_rwlock_wrlock(lock); \
+ _ ## f args; \
+ my_atomic_rwlock_wrunlock(lock); \
+}
+
+#define nolock_wrap(f, t, proto_args, args) \
+t _ ## f proto_args; \
+static inline t f proto_args \
+{ \
+ return _ ## f args; \
+}
+
+#define nolock_wrap_void(f, proto_args, args) \
+void _ ## f proto_args; \
+static inline void f proto_args \
+{ \
+ _ ## f args; \
+}
+
+/*
+ wait-free dynamic array, see lf_dynarray.c
+
+ 4 levels of 256 elements each mean 4311810304 elements in an array - it
+ should be enough for a while
+*/
+#define LF_DYNARRAY_LEVEL_LENGTH 256
+#define LF_DYNARRAY_LEVELS 4
+
+typedef struct {
+ void * volatile level[LF_DYNARRAY_LEVELS];
+ uint size_of_element;
+ my_atomic_rwlock_t lock;
+} LF_DYNARRAY;
+
+typedef int (*lf_dynarray_func)(void *, void *);
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size);
+void lf_dynarray_destroy(LF_DYNARRAY *array);
+
+nolock_wrap(lf_dynarray_value, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx))
+lock_wrap(lf_dynarray_lvalue, void *,
+ (LF_DYNARRAY *array, uint idx),
+ (array, idx),
+ &array->lock)
+nolock_wrap(lf_dynarray_iterate, int,
+ (LF_DYNARRAY *array, lf_dynarray_func func, void *arg),
+ (array, func, arg))
+
+/*
+ pin manager for memory allocator, lf_alloc-pin.c
+*/
+
+#define LF_PINBOX_PINS 4
+#define LF_PURGATORY_SIZE 10
+
+typedef void lf_pinbox_free_func(void *, void *);
+
+typedef struct {
+ LF_DYNARRAY pinstack;
+ lf_pinbox_free_func *free_func;
+ void *free_func_arg;
+ uint free_ptr_offset;
+ uint32 volatile pinstack_top_ver; /* this is a versioned pointer */
+ uint32 volatile pins_in_stack; /* number of elements in array */
+} LF_PINBOX;
+
+typedef struct {
+ void * volatile pin[LF_PINBOX_PINS];
+ LF_PINBOX *pinbox;
+ void *purgatory;
+ uint32 purgatory_count;
+ uint32 volatile link;
+/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */
+ char pad[128-sizeof(uint32)*2
+ -sizeof(LF_PINBOX *)
+ -sizeof(void *)*(LF_PINBOX_PINS+1)];
+} LF_PINS;
+
+/*
+ shortcut macros to do an atomic_wrlock on a structure that uses pins
+ (e.g. lf_hash).
+*/
+#define lf_rwlock_by_pins(PINS) \
+ my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock)
+#define lf_rwunlock_by_pins(PINS) \
+ my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinstack.lock)
+
+/*
+ compile-time assert, to require "no less than N" pins
+ it's enough if it'll fail on at least one compiler, so
+ we'll enable it on GCC only, which supports zero-length arrays.
+*/
+#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG)
+#define LF_REQUIRE_PINS(N) \
+ static const char require_pins[LF_PINBOX_PINS-N] __attribute__ ((unused)); \
+ static const int LF_NUM_PINS_IN_THIS_FILE= N
+
+#define _lf_pin(PINS, PIN, ADDR) \
+ ( \
+ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \
+ my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \
+ )
+#else
+#define LF_REQUIRE_PINS(N)
+#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR))
+#endif
+
+#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL)
+#define lf_pin(PINS, PIN, ADDR) \
+ do { \
+ lf_rwlock_by_pins(PINS); \
+ _lf_pin(PINS, PIN, ADDR); \
+ lf_rwunlock_by_pins(PINS); \
+ } while (0)
+#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL)
+#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0)
+#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0)
+
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void * free_func_arg);
+void lf_pinbox_destroy(LF_PINBOX *pinbox);
+
+lock_wrap(lf_pinbox_get_pins, LF_PINS *,
+ (LF_PINBOX *pinbox),
+ (pinbox),
+ &pinbox->pinstack.lock)
+lock_wrap_void(lf_pinbox_put_pins,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinstack.lock)
+lock_wrap_void(lf_pinbox_free,
+ (LF_PINS *pins, void *addr),
+ (pins, addr),
+ &pins->pinbox->pinstack.lock)
+
+/*
+ memory allocator, lf_alloc-pin.c
+*/
+
+struct st_lf_alloc_node {
+ struct st_lf_alloc_node *next;
+};
+
+typedef struct st_lf_allocator {
+ LF_PINBOX pinbox;
+ struct st_lf_alloc_node * volatile top;
+ uint element_size;
+ uint32 volatile mallocs;
+} LF_ALLOCATOR;
+
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
+void lf_alloc_destroy(LF_ALLOCATOR *allocator);
+uint lf_alloc_in_pool(LF_ALLOCATOR *allocator);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
+#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
+#define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox)
+#define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox)
+#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
+#define lf_alloc_real_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0))
+
+lock_wrap(lf_alloc_new, void *,
+ (LF_PINS *pins),
+ (pins),
+ &pins->pinbox->pinstack.lock)
+
+/*
+ extendible hash, lf_hash.c
+*/
+#include <hash.h>
+
+#define LF_HASH_UNIQUE 1
+
+typedef struct {
+ LF_DYNARRAY array; /* hash itself */
+ LF_ALLOCATOR alloc; /* allocator for elements */
+ hash_get_key get_key; /* see HASH */
+ CHARSET_INFO *charset; /* see HASH */
+ uint key_offset, key_length; /* see HASH */
+ uint element_size, flags; /* LF_HASH_UNIQUE, etc */
+ int32 volatile size; /* size of array */
+ int32 volatile count; /* number of elements in the hash */
+} LF_HASH;
+
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, hash_get_key get_key,
+ CHARSET_INFO *charset);
+void lf_hash_destroy(LF_HASH *hash);
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data);
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
+/*
+ shortcut macros to access underlying pinbox functions from an LF_HASH
+ see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
+*/
+#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc)
+#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc)
+#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
+#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS)
+
+/*
+ cleanup
+*/
+
+#undef lock_wrap_void
+#undef lock_wrap
+#undef nolock_wrap_void
+#undef nolock_wrap
+
+#endif
+
diff --git a/include/m_string.h b/include/m_string.h
index 4f098c3206d..407dd68c699 100644
--- a/include/m_string.h
+++ b/include/m_string.h
@@ -67,7 +67,7 @@
# define bcopy(s, d, n) memcpy((d), (s), (n))
# define bcmp(A,B,C) memcmp((A),(B),(C))
# define bzero(A,B) memset((A),0,(B))
-# define bmove_align(A,B,C) memcpy((A),(B),(C))
+# define bmove_align(A,B,C) memcpy((A),(B),(C))
#endif
#if defined(__cplusplus)
@@ -126,7 +126,10 @@ extern int bcmp(const char *s1,const char *s2,uint len);
extern int my_bcmp(const char *s1,const char *s2,uint len);
#undef bcmp
#define bcmp(A,B,C) my_bcmp((A),(B),(C))
-#endif
+#define bzero_if_purify(A,B) bzero(A,B)
+#else
+#define bzero_if_purify(A,B)
+#endif /* HAVE_purify */
#ifndef bmove512
extern void bmove512(gptr dst,const gptr src,uint len);
diff --git a/include/maria.h b/include/maria.h
new file mode 100644
index 00000000000..94152ce9bfa
--- /dev/null
+++ b/include/maria.h
@@ -0,0 +1,439 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This file should be included when using maria_funktions */
+
+#ifndef _maria_h
+#define _maria_h
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifndef _my_base_h
+#include <my_base.h>
+#endif
+#ifndef _m_ctype_h
+#include <m_ctype.h>
+#endif
+#ifndef _keycache_h
+#include "keycache.h"
+#endif
+#include "my_handler.h"
+#include "ft_global.h"
+#include <myisamchk.h>
+#include <mysql/plugin.h>
+
+/*
+ Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
+*/
+
+#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY
+#define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */
+#else
+#define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */
+#endif
+
+#define MARIA_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */
+#define MARIA_NAME_IEXT ".MAI"
+#define MARIA_NAME_DEXT ".MAD"
+/* Max extra space to use when sorting keys */
+#define MARIA_MAX_TEMP_LENGTH 2*1024L*1024L*1024L
+/* Possible values for maria_block_size (must be power of 2) */
+#define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */
+#define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */
+#define MARIA_MAX_KEY_BLOCK_LENGTH 32768
+#define maria_portable_sizeof_char_ptr 8
+#define MARIA_MAX_KEY_LENGTH 1000 /* Max length in bytes */
+
+/*
+ In the following macros '_keyno_' is 0 .. keys-1.
+ If there can be more keys than bits in the key_map, the highest bit
+ is for all upper keys. They cannot be switched individually.
+ This means that clearing of high keys is ignored, setting one high key
+ sets all high keys.
+*/
+#define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG)
+#define MARIA_KEYMAP_HIGH_MASK (ULL(1) << (MARIA_KEYMAP_BITS - 1))
+#define maria_get_mask_all_keys_active(_keys_) \
+ (((_keys_) < MARIA_KEYMAP_BITS) ? \
+ ((ULL(1) << (_keys_)) - ULL(1)) : \
+ (~ ULL(0)))
+#if MARIA_MAX_KEY > MARIA_KEYMAP_BITS
+#define maria_is_key_active(_keymap_,_keyno_) \
+ (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ test((_keymap_) & (ULL(1) << (_keyno_))) : \
+ test((_keymap_) & MARIA_KEYMAP_HIGH_MASK))
+#define maria_set_key_active(_keymap_,_keyno_) \
+ (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ (ULL(1) << (_keyno_)) : \
+ MARIA_KEYMAP_HIGH_MASK)
+#define maria_clear_key_active(_keymap_,_keyno_) \
+ (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
+ (~ (ULL(1) << (_keyno_))) : \
+ (~ (ULL(0))) /*ignore*/ )
+#else
+#define maria_is_key_active(_keymap_,_keyno_) \
+ test((_keymap_) & (ULL(1) << (_keyno_)))
+#define maria_set_key_active(_keymap_,_keyno_) \
+ (_keymap_)|= (ULL(1) << (_keyno_))
+#define maria_clear_key_active(_keymap_,_keyno_) \
+ (_keymap_)&= (~ (ULL(1) << (_keyno_)))
+#endif
+#define maria_is_any_key_active(_keymap_) \
+ test((_keymap_))
+#define maria_is_all_keys_active(_keymap_,_keys_) \
+ ((_keymap_) == maria_get_mask_all_keys_active(_keys_))
+#define maria_set_all_keys_active(_keymap_,_keys_) \
+ (_keymap_)= maria_get_mask_all_keys_active(_keys_)
+#define maria_clear_all_keys_active(_keymap_) \
+ (_keymap_)= 0
+#define maria_intersect_keys_active(_to_,_from_) \
+ (_to_)&= (_from_)
+#define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \
+ ((_keymap1_) & (_keymap2_) & \
+ maria_get_mask_all_keys_active(_keys_))
+#define maria_copy_keys_active(_to_,_maxkeys_,_from_) \
+ (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \
+ (_from_))
+
+ /* Param to/from maria_info */
+
+typedef ulonglong MARIA_RECORD_POS;
+
+typedef struct st_maria_isaminfo /* Struct from h_info */
+{
+ ha_rows records; /* Records in database */
+ ha_rows deleted; /* Deleted records in database */
+ MARIA_RECORD_POS recpos; /* Pos for last used record */
+ MARIA_RECORD_POS newrecpos; /* Pos if we write new record */
+ MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */
+ my_off_t data_file_length; /* Length of data file */
+ my_off_t max_data_file_length, index_file_length;
+ my_off_t max_index_file_length, delete_length;
+ ulong reclength; /* Recordlength */
+ ulong mean_reclength; /* Mean recordlength (if packed) */
+ ulonglong auto_increment;
+ ulonglong key_map; /* Which keys are used */
+ char *data_file_name, *index_file_name;
+ uint keys; /* Number of keys in use */
+ uint options; /* HA_OPTION_... used */
+ int errkey, /* With key was dupplicated on err */
+ sortkey; /* clustered by this key */
+ File filenr; /* (uniq) filenr for datafile */
+ time_t create_time; /* When table was created */
+ time_t check_time;
+ time_t update_time;
+ uint reflength;
+ ulong record_offset;
+ ulong *rec_per_key; /* for sql optimizing */
+} MARIA_INFO;
+
+
+typedef struct st_maria_create_info
+{
+ const char *index_file_name, *data_file_name; /* If using symlinks */
+ ha_rows max_rows;
+ ha_rows reloc_rows;
+ ulonglong auto_increment;
+ ulonglong data_file_length;
+ ulonglong key_file_length;
+ uint null_bytes;
+ uint old_options;
+ enum data_file_type org_data_file_type;
+ uint8 language;
+ my_bool with_auto_increment, transactional;
+} MARIA_CREATE_INFO;
+
+struct st_maria_info; /* For referense */
+struct st_maria_share;
+typedef struct st_maria_info MARIA_HA;
+struct st_maria_s_param;
+
+typedef struct st_maria_keydef /* Key definition with open & info */
+{
+ struct st_maria_share *share; /* Pointer to base (set in open) */
+ uint16 keysegs; /* Number of key-segment */
+ uint16 flag; /* NOSAME, PACK_USED */
+
+ uint8 key_alg; /* BTREE, RTREE */
+ uint16 block_length; /* Length of keyblock (auto) */
+ uint16 underflow_block_length; /* When to execute underflow */
+ uint16 keylength; /* Tot length of keyparts (auto) */
+ uint16 minlength; /* min length of (packed) key (auto) */
+ uint16 maxlength; /* max length of (packed) key (auto) */
+ uint32 version; /* For concurrent read/write */
+ uint32 ftparser_nr; /* distinct ftparser number */
+
+ HA_KEYSEG *seg, *end;
+ struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */
+ int (*bin_search)(struct st_maria_info *info,
+ struct st_maria_keydef *keyinfo, byte *page, byte *key,
+ uint key_len, uint comp_flag, byte **ret_pos,
+ byte *buff, my_bool *was_last_key);
+ uint(*get_key)(struct st_maria_keydef *keyinfo, uint nod_flag,
+ byte **page, byte *key);
+ int (*pack_key)(struct st_maria_keydef *keyinfo, uint nod_flag,
+ byte *next_key, byte *org_key, byte *prev_key,
+ const byte *key, struct st_maria_s_param *s_temp);
+ void (*store_key)(struct st_maria_keydef *keyinfo, byte *key_pos,
+ struct st_maria_s_param *s_temp);
+ int (*ck_insert)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen);
+ int (*ck_delete)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen);
+} MARIA_KEYDEF;
+
+
+#define MARIA_UNIQUE_HASH_LENGTH 4
+
+typedef struct st_maria_unique_def /* Segment definition of unique */
+{
+ uint16 keysegs; /* Number of key-segment */
+ uint8 key; /* Mapped to which key */
+ uint8 null_are_equal;
+ HA_KEYSEG *seg, *end;
+} MARIA_UNIQUEDEF;
+
+typedef struct st_maria_decode_tree /* Decode huff-table */
+{
+ uint16 *table;
+ uint quick_table_bits;
+ byte *intervalls;
+} MARIA_DECODE_TREE;
+
+
+struct st_maria_bit_buff;
+
+/*
+ Note that null markers should always be first in a row !
+ When creating a column, one should only specify:
+ type, length, null_bit and null_pos
+*/
+
+typedef struct st_maria_columndef /* column information */
+{
+ uint64 offset; /* Offset to position in row */
+ enum en_fieldtype type;
+ uint16 length; /* length of field */
+ uint16 fill_length;
+ uint16 null_pos; /* Position for null marker */
+ uint16 empty_pos; /* Position for empty marker */
+ uint8 null_bit; /* If column may be NULL */
+ uint8 empty_bit; /* If column may be empty */
+
+#ifndef NOT_PACKED_DATABASES
+ void(*unpack)(struct st_maria_columndef *rec,
+ struct st_maria_bit_buff *buff,
+ byte *start, byte *end);
+ enum en_fieldtype base_type;
+ uint space_length_bits, pack_type;
+ MARIA_DECODE_TREE *huff_tree;
+#endif
+} MARIA_COLUMNDEF;
+
+
+extern ulong maria_block_size;
+extern ulong maria_concurrent_insert;
+extern my_bool maria_flush, maria_single_user;
+extern my_bool maria_delay_key_write, maria_delay_rec_write;
+extern my_off_t maria_max_temp_length;
+extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size;
+extern KEY_CACHE maria_key_cache_var, *maria_key_cache;
+
+
+ /* Prototypes for maria-functions */
+
+extern int maria_init(void);
+extern void maria_end(void);
+extern int maria_close(struct st_maria_info *file);
+extern int maria_delete(struct st_maria_info *file, const byte *buff);
+extern struct st_maria_info *maria_open(const char *name, int mode,
+ uint wait_if_locked);
+extern int maria_panic(enum ha_panic_function function);
+extern int maria_rfirst(struct st_maria_info *file, byte *buf, int inx);
+extern int maria_rkey(struct st_maria_info *file, byte *buf, int inx,
+ const byte *key,
+ uint key_len, enum ha_rkey_function search_flag);
+extern int maria_rlast(struct st_maria_info *file, byte *buf, int inx);
+extern int maria_rnext(struct st_maria_info *file, byte *buf, int inx);
+extern int maria_rnext_same(struct st_maria_info *info, byte *buf);
+extern int maria_rprev(struct st_maria_info *file, byte *buf, int inx);
+extern int maria_rrnd(struct st_maria_info *file, byte *buf,
+ MARIA_RECORD_POS pos);
+extern int maria_scan_init(struct st_maria_info *file);
+extern int maria_scan(struct st_maria_info *file, byte *buf);
+extern void maria_scan_end(struct st_maria_info *file);
+extern int maria_rsame(struct st_maria_info *file, byte *record, int inx);
+extern int maria_rsame_with_pos(struct st_maria_info *file, byte *record,
+ int inx, MARIA_RECORD_POS pos);
+extern int maria_update(struct st_maria_info *file, const byte *old,
+ byte *new_record);
+extern int maria_write(struct st_maria_info *file, byte *buff);
+extern MARIA_RECORD_POS maria_position(struct st_maria_info *file);
+extern int maria_status(struct st_maria_info *info, MARIA_INFO *x, uint flag);
+extern int maria_lock_database(struct st_maria_info *file, int lock_type);
+extern int maria_create(const char *name, enum data_file_type record_type,
+ uint keys, MARIA_KEYDEF *keydef,
+ uint columns, MARIA_COLUMNDEF *columndef,
+ uint uniques, MARIA_UNIQUEDEF *uniquedef,
+ MARIA_CREATE_INFO *create_info, uint flags);
+extern int maria_delete_table(const char *name);
+extern int maria_rename(const char *from, const char *to);
+extern int maria_extra(struct st_maria_info *file,
+ enum ha_extra_function function, void *extra_arg);
+extern int maria_reset(struct st_maria_info *file);
+extern ha_rows maria_records_in_range(struct st_maria_info *info, int inx,
+ key_range *min_key, key_range *max_key);
+extern int maria_is_changed(struct st_maria_info *info);
+extern int maria_delete_all_rows(struct st_maria_info *info);
+extern uint maria_get_pointer_length(ulonglong file_length, uint def);
+
+
+/* this is used to pass to mysql_mariachk_table */
+
+#define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */
+#define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */
+
+typedef uint maria_bit_type;
+
+typedef struct st_maria_bit_buff
+{ /* Used for packing of record */
+ maria_bit_type current_byte;
+ uint bits;
+ uchar *pos, *end, *blob_pos, *blob_end;
+ uint error;
+} MARIA_BIT_BUFF;
+
+
+typedef struct st_maria_sort_info
+{
+#ifdef THREAD
+ /* sync things */
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+#endif
+ MARIA_HA *info;
+ HA_CHECK *param;
+ char *buff;
+ SORT_KEY_BLOCKS *key_block, *key_block_end;
+ SORT_FT_BUF *ft_buf;
+ my_off_t filelength, dupp, buff_length;
+ ha_rows max_records;
+ uint current_key, total_keys;
+ uint got_error, threads_running;
+ myf myf_rw;
+ enum data_file_type new_data_file_type;
+} MARIA_SORT_INFO;
+
+typedef struct st_maria_sort_param
+{
+ pthread_t thr;
+ IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
+ DYNAMIC_ARRAY buffpek;
+ MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
+
+ MARIA_KEYDEF *keyinfo;
+ MARIA_SORT_INFO *sort_info;
+ HA_KEYSEG *seg;
+ byte **sort_keys;
+ byte *rec_buff;
+ void *wordlist, *wordptr;
+ MEM_ROOT wordroot;
+ char *record;
+ MY_TMPDIR *tmpdir;
+
+ /*
+ The next two are used to collect statistics, see maria_update_key_parts for
+ description.
+ */
+ ulonglong unique[HA_MAX_KEY_SEG+1];
+ ulonglong notnull[HA_MAX_KEY_SEG+1];
+
+ MARIA_RECORD_POS pos,max_pos,filepos,start_recpos;
+ uint key, key_length,real_key_length,sortbuff_size;
+ uint maxbuffers, keys, find_length, sort_keys_length;
+ my_bool fix_datafile, master;
+ my_bool calc_checksum; /* calculate table checksum */
+ my_size_t rec_buff_size;
+
+ int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *);
+ int (*key_read)(struct st_maria_sort_param *, byte *);
+ int (*key_write)(struct st_maria_sort_param *, const byte *);
+ void (*lock_in_memory)(HA_CHECK *);
+ NEAR int (*write_keys)(struct st_maria_sort_param *, register byte **,
+ uint , struct st_buffpek *, IO_CACHE *);
+ NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
+ NEAR int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,char *,
+ uint, uint);
+} MARIA_SORT_PARAM;
+
+
+/* functions in maria_check */
+void maria_chk_init(HA_CHECK *param);
+int maria_chk_status(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag);
+int maria_chk_size(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_key(HA_CHECK *param, MARIA_HA *info);
+int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, int extend);
+int maria_repair(HA_CHECK *param, register MARIA_HA *info,
+ my_string name, int rep_quick);
+int maria_sort_index(HA_CHECK *param, register MARIA_HA *info,
+ my_string name);
+int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
+ const char *name, int rep_quick);
+int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
+ const char *name, int rep_quick);
+int maria_change_to_newfile(const char *filename, const char *old_ext,
+ const char *new_ext, myf myflags);
+void maria_lock_memory(HA_CHECK *param);
+int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update);
+void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part,
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records);
+int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start,
+ my_off_t length, const char *type);
+int maria_movepoint(MARIA_HA *info, byte *record, my_off_t oldpos,
+ my_off_t newpos, uint prot_key);
+int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile);
+int maria_test_if_almost_full(MARIA_HA *info);
+int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename);
+int maria_disable_indexes(MARIA_HA *info);
+int maria_enable_indexes(MARIA_HA *info);
+int maria_indexes_are_disabled(MARIA_HA *info);
+void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows);
+my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map,
+ my_bool force);
+
+int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows);
+void maria_flush_bulk_insert(MARIA_HA *info, uint inx);
+void maria_end_bulk_insert(MARIA_HA *info);
+int maria_assign_to_key_cache(MARIA_HA *info, ulonglong key_map,
+ KEY_CACHE *key_cache);
+void maria_change_key_cache(KEY_CACHE *old_key_cache,
+ KEY_CACHE *new_key_cache);
+int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves);
+
+/* fulltext functions */
+FT_INFO *maria_ft_init_search(uint,void *, uint, byte *, uint,
+ CHARSET_INFO *, byte *);
+
+/* 'Almost-internal' Maria functions */
+
+void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
+ my_bool repair);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/include/my_atomic.h b/include/my_atomic.h
index 7b066e9b529..5f328f32f3f 100644
--- a/include/my_atomic.h
+++ b/include/my_atomic.h
@@ -13,6 +13,40 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+/*
+ This header defines five atomic operations:
+
+ my_atomic_add#(&var, what)
+ add 'what' to *var, and return the old value of *var
+
+ my_atomic_fas#(&var, what)
+ 'Fetch And Store'
+ store 'what' in *var, and return the old value of *var
+
+ my_atomic_cas#(&var, &old, new)
+ 'Compare And Swap'
+ if *var is equal to *old, then store 'new' in *var, and return TRUE
+ otherwise store *var in *old, and return FALSE
+
+ my_atomic_load#(&var)
+ return *var
+
+ my_atomic_store#(&var, what)
+ store 'what' in *var
+
+ '#' is substituted by a size suffix - 8, 16, 32, or ptr
+ (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr).
+
+ NOTE This operations are not always atomic, so they always must be
+ enclosed in my_atomic_rwlock_rdlock(lock)/my_atomic_rwlock_rdunlock(lock)
+ or my_atomic_rwlock_wrlock(lock)/my_atomic_rwlock_wrunlock(lock).
+ Hint: if a code block makes intensive use of atomic ops, it make sense
+ to take/release rwlock once for the whole block, not for every statement.
+
+ On architectures where these operations are really atomic, rwlocks will
+ be optimized away.
+*/
+
#ifndef my_atomic_rwlock_init
#define intptr void *
@@ -26,70 +60,115 @@
#endif
#ifndef make_atomic_add_body
-#define make_atomic_add_body(S) \
+#define make_atomic_add_body(S) \
int ## S tmp=*a; \
while (!my_atomic_cas ## S(a, &tmp, tmp+v)); \
v=tmp;
#endif
+#ifdef __GNUC__
+/*
+ we want to be able to use my_atomic_xxx functions with
+ both signed and unsigned integers. But gcc will issue a warning
+ "passing arg N of `my_atomic_XXX' as [un]signed due to prototype"
+ if the signedness of the argument doesn't match the prototype, or
+ "pointer targets in passing argument N of my_atomic_XXX differ in signedness"
+ if int* is used where uint* is expected (or vice versa).
+ Let's shut these warnings up
+*/
+#define make_transparent_unions(S) \
+ typedef union { \
+ int ## S i; \
+ uint ## S u; \
+ } U_ ## S __attribute__ ((transparent_union)); \
+ typedef union { \
+ int ## S volatile *i; \
+ uint ## S volatile *u; \
+ } Uv_ ## S __attribute__ ((transparent_union));
+#define uintptr intptr
+make_transparent_unions(8)
+make_transparent_unions(16)
+make_transparent_unions(32)
+make_transparent_unions(ptr)
+#undef uintptr
+#undef make_transparent_unions
+#define a U_a.i
+#define cmp U_cmp.i
+#define v U_v.i
+#define set U_set.i
+#else
+#define U_8 int8
+#define U_16 int16
+#define U_32 int32
+#define U_ptr intptr
+#define Uv_8 int8
+#define Uv_16 int16
+#define Uv_32 int32
+#define Uv_ptr intptr
+#define U_a volatile *a
+#define U_cmp *cmp
+#define U_v v
+#define U_set set
+#endif /* __GCC__ transparent_union magic */
+
#ifdef HAVE_INLINE
-#define make_atomic_add(S) \
-static inline int ## S my_atomic_add ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_add_body(S); \
- return v; \
+#define make_atomic_add(S) \
+STATIC_INLINE int ## S my_atomic_add ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_add_body(S); \
+ return v; \
}
-#define make_atomic_swap(S) \
-static inline int ## S my_atomic_swap ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_swap_body(S); \
- return v; \
+#define make_atomic_fas(S) \
+STATIC_INLINE int ## S my_atomic_fas ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_fas_body(S); \
+ return v; \
}
-#define make_atomic_cas(S) \
-static inline int my_atomic_cas ## S(int ## S volatile *a, \
- int ## S *cmp, int ## S set) \
-{ \
- int8 ret; \
- make_atomic_cas_body(S); \
- return ret; \
+#define make_atomic_cas(S) \
+STATIC_INLINE int my_atomic_cas ## S(Uv_ ## S U_a, \
+ Uv_ ## S U_cmp, U_ ## S U_set) \
+{ \
+ int8 ret; \
+ make_atomic_cas_body(S); \
+ return ret; \
}
-#define make_atomic_load(S) \
-static inline int ## S my_atomic_load ## S(int ## S volatile *a) \
-{ \
- int ## S ret; \
- make_atomic_load_body(S); \
- return ret; \
+#define make_atomic_load(S) \
+STATIC_INLINE int ## S my_atomic_load ## S(Uv_ ## S U_a) \
+{ \
+ int ## S ret; \
+ make_atomic_load_body(S); \
+ return ret; \
}
-#define make_atomic_store(S) \
-static inline void my_atomic_store ## S( \
- int ## S volatile *a, int ## S v) \
-{ \
- make_atomic_store_body(S); \
+#define make_atomic_store(S) \
+STATIC_INLINE void my_atomic_store ## S( \
+ Uv_ ## S U_a, U_ ## S U_v) \
+{ \
+ make_atomic_store_body(S); \
}
#else /* no inline functions */
-#define make_atomic_add(S) \
-extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_add(S) \
+extern int ## S my_atomic_add ## S(Uv_ ## S, U_ ## S);
-#define make_atomic_swap(S) \
-extern int ## S my_atomic_swap ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_fas(S) \
+extern int ## S my_atomic_fas ## S(Uv_ ## S, U_ ## S);
-#define make_atomic_cas(S) \
-extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set);
+#define make_atomic_cas(S) \
+extern int my_atomic_cas ## S(Uv_ ## S, Uv_ ## S, U_ ## S);
-#define make_atomic_load(S) \
-extern int ## S my_atomic_load ## S(int ## S volatile *a);
+#define make_atomic_load(S) \
+extern int ## S my_atomic_load ## S(Uv_ ## S);
-#define make_atomic_store(S) \
-extern void my_atomic_store ## S(int ## S volatile *a, int ## S v);
+#define make_atomic_store(S) \
+extern void my_atomic_store ## S(Uv_ ## S, U_ ## S);
#endif
@@ -112,34 +191,42 @@ make_atomic_store(16)
make_atomic_store(32)
make_atomic_store(ptr)
-make_atomic_swap( 8)
-make_atomic_swap(16)
-make_atomic_swap(32)
-make_atomic_swap(ptr)
+make_atomic_fas( 8)
+make_atomic_fas(16)
+make_atomic_fas(32)
+make_atomic_fas(ptr)
+#ifdef _atomic_h_cleanup_
+#include _atomic_h_cleanup_
+#undef _atomic_h_cleanup_
+#endif
+
+#undef U_8
+#undef U_16
+#undef U_32
+#undef U_ptr
+#undef a
+#undef cmp
+#undef v
+#undef set
+#undef U_a
+#undef U_cmp
+#undef U_v
+#undef U_set
#undef make_atomic_add
#undef make_atomic_cas
#undef make_atomic_load
#undef make_atomic_store
-#undef make_atomic_swap
+#undef make_atomic_fas
#undef make_atomic_add_body
#undef make_atomic_cas_body
#undef make_atomic_load_body
#undef make_atomic_store_body
-#undef make_atomic_swap_body
+#undef make_atomic_fas_body
#undef intptr
-#ifdef _atomic_h_cleanup_
-#include _atomic_h_cleanup_
-#undef _atomic_h_cleanup_
-#endif
-
-#if SIZEOF_CHARP == SIZEOF_INT
-typedef int intptr;
-#elif SIZEOF_CHARP == SIZEOF_LONG
-typedef long intptr;
-#else
-#error
+#ifndef LF_BACKOFF
+#define LF_BACKOFF (1)
#endif
#define MY_ATOMIC_OK 0
diff --git a/include/my_base.h b/include/my_base.h
index 26d513ba2a7..38376adfe85 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -14,7 +14,6 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* This file includes constants used with all databases */
-/* Author: Michael Widenius */
#ifndef _my_base_h
#define _my_base_h
@@ -49,7 +48,7 @@
#define HA_OPEN_FROM_SQL_LAYER 64
#define HA_OPEN_MMAP 128 /* open memory mapped */
- /* The following is parameter to ha_rkey() how to use key */
+/* The following is parameter to ha_rkey() how to use key */
/*
We define a complete-field prefix of a key value as a prefix where
@@ -449,7 +448,7 @@ enum en_fieldtype {
};
enum data_file_type {
- STATIC_RECORD,DYNAMIC_RECORD,COMPRESSED_RECORD
+ STATIC_RECORD, DYNAMIC_RECORD, COMPRESSED_RECORD, BLOCK_RECORD
};
/* For key ranges */
@@ -500,4 +499,7 @@ typedef ulong ha_rows;
#define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 :2)
+/* invalidator function reference for Query Cache */
+typedef void (* invalidator_by_filename)(const char * filename);
+
#endif /* _my_base_h */
diff --git a/include/my_bit.h b/include/my_bit.h
new file mode 100644
index 00000000000..58e8bb39683
--- /dev/null
+++ b/include/my_bit.h
@@ -0,0 +1,107 @@
+/*
+ Some useful bit functions
+*/
+
+#ifdef HAVE_INLINE
+
+extern const char _my_bits_nbits[256];
+extern const uchar _my_bits_reverse_table[256];
+
+/*
+ Find smallest X in 2^X >= value
+ This can be used to divide a number with value by doing a shift instead
+*/
+
+STATIC_INLINE uint my_bit_log2(ulong value)
+{
+ uint bit;
+ for (bit=0 ; value > 1 ; value>>=1, bit++) ;
+ return bit;
+}
+
+STATIC_INLINE uint my_count_bits(ulonglong v)
+{
+#if SIZEOF_LONG_LONG > 4
+ /* The following code is a bit faster on 16 bit machines than if we would
+ only shift v */
+ ulong v2=(ulong) (v >> 32);
+ return (uint) (uchar) (_my_bits_nbits[(uchar) v] +
+ _my_bits_nbits[(uchar) (v >> 8)] +
+ _my_bits_nbits[(uchar) (v >> 16)] +
+ _my_bits_nbits[(uchar) (v >> 24)] +
+ _my_bits_nbits[(uchar) (v2)] +
+ _my_bits_nbits[(uchar) (v2 >> 8)] +
+ _my_bits_nbits[(uchar) (v2 >> 16)] +
+ _my_bits_nbits[(uchar) (v2 >> 24)]);
+#else
+ return (uint) (uchar) (_my_bits_nbits[(uchar) v] +
+ _my_bits_nbits[(uchar) (v >> 8)] +
+ _my_bits_nbits[(uchar) (v >> 16)] +
+ _my_bits_nbits[(uchar) (v >> 24)]);
+#endif
+}
+
+STATIC_INLINE uint my_count_bits_ushort(ushort v)
+{
+ return _my_bits_nbits[v];
+}
+
+
+/*
+ Next highest power of two
+
+ SYNOPSIS
+ my_round_up_to_next_power()
+ v Value to check
+
+ RETURN
+ Next or equal power of 2
+ Note: 0 will return 0
+
+ NOTES
+ Algorithm by Sean Anderson, according to:
+ http://graphics.stanford.edu/~seander/bithacks.html
+ (Orignal code public domain)
+
+ Comments shows how this works with 01100000000000000000000000001011
+*/
+
+STATIC_INLINE uint32 my_round_up_to_next_power(uint32 v)
+{
+ v--; /* 01100000000000000000000000001010 */
+ v|= v >> 1; /* 01110000000000000000000000001111 */
+ v|= v >> 2; /* 01111100000000000000000000001111 */
+ v|= v >> 4; /* 01111111110000000000000000001111 */
+ v|= v >> 8; /* 01111111111111111100000000001111 */
+ v|= v >> 16; /* 01111111111111111111111111111111 */
+ return v+1; /* 10000000000000000000000000000000 */
+}
+
+STATIC_INLINE uint32 my_clear_highest_bit(uint32 v)
+{
+ uint32 w=v >> 1;
+ w|= w >> 1;
+ w|= w >> 2;
+ w|= w >> 4;
+ w|= w >> 8;
+ w|= w >> 16;
+ return v & w;
+}
+
+STATIC_INLINE uint32 my_reverse_bits(uint32 key)
+{
+ return
+ (_my_bits_reverse_table[ key & 255] << 24) |
+ (_my_bits_reverse_table[(key>> 8) & 255] << 16) |
+ (_my_bits_reverse_table[(key>>16) & 255] << 8) |
+ _my_bits_reverse_table[(key>>24) ];
+}
+
+#else
+extern uint my_bit_log2(ulong value);
+extern uint32 my_round_up_to_next_power(uint32 v);
+uint32 my_clear_highest_bit(uint32 v);
+uint32 my_reverse_bits(uint32 key);
+extern uint my_count_bits(ulonglong v);
+extern uint my_count_bits_ushort(ushort v);
+#endif
diff --git a/include/my_dbug.h b/include/my_dbug.h
index 0541cf00c29..080f0cb7c54 100644
--- a/include/my_dbug.h
+++ b/include/my_dbug.h
@@ -101,7 +101,7 @@ extern FILE *_db_fp_(void);
#define DBUG_LONGJMP(a1) longjmp(a1)
#define DBUG_DUMP(keyword,a1,a2)
#define DBUG_END()
-#define DBUG_ASSERT(A)
+#define DBUG_ASSERT(A) do { } while(0)
#define DBUG_LOCK_FILE
#define DBUG_FILE (stderr)
#define DBUG_UNLOCK_FILE
diff --git a/include/my_global.h b/include/my_global.h
index e25752b8ed8..68f47a75e4b 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -224,6 +224,8 @@
#endif
#undef inline_test_2
#undef inline_test_1
+/* helper macro for "instantiating" inline functions */
+#define STATIC_INLINE static inline
/*
The following macros are used to control inlining a bit more than
@@ -458,6 +460,14 @@ C_MODE_END
*/
#include <assert.h>
+/* an assert that works at compile-time. only for constant expression */
+#define compile_time_assert(X) \
+ do \
+ { \
+ char compile_time_assert[(X) ? 1 : -1] \
+ __attribute__ ((unused)); \
+ } while(0)
+
/* Go around some bugs in different OS and compilers */
#if defined (HPUX11) && defined(_LARGEFILE_SOURCE)
#define _LARGEFILE64_SOURCE
@@ -1016,6 +1026,14 @@ typedef unsigned __int64 my_ulonglong;
typedef unsigned long long my_ulonglong;
#endif
+#if SIZEOF_CHARP == SIZEOF_INT
+typedef int intptr;
+#elif SIZEOF_CHARP == SIZEOF_LONG
+typedef long intptr;
+#else
+#error
+#endif
+
#ifdef USE_RAID
/*
The following is done with a if to not get problems with pre-processors
@@ -1479,4 +1497,12 @@ do { doubleget_union _tmp; \
#define dlerror() ""
#endif
+/*
+ Only Linux is known to need an explicit sync of the directory to make sure a
+ file creation/deletion/renaming in(from,to) this directory durable.
+*/
+#ifdef TARGET_OS_LINUX
+#define NEED_EXPLICIT_SYNC_DIR 1
+#endif
+
#endif /* my_global_h */
diff --git a/include/my_handler.h b/include/my_handler.h
index d7cd0567f9c..72caf67398f 100644
--- a/include/my_handler.h
+++ b/include/my_handler.h
@@ -18,10 +18,30 @@
#ifndef _my_handler_h
#define _my_handler_h
-#include "my_base.h"
-#include "m_ctype.h"
#include "myisampack.h"
+/*
+ There is a hard limit for the maximum number of keys as there are only
+ 8 bits in the index file header for the number of keys in a table.
+ This means that 0..255 keys can exist for a table. The idea of
+ HA_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on
+ a MyISAM table for which one has more keys than MyISAM is normally
+ compiled for. If you don't have this, you will get a core dump when
+ running myisamchk compiled for 128 keys on a table with 255 keys.
+*/
+
+#define HA_MAX_POSSIBLE_KEY 255 /* For myisamchk */
+/*
+ The following defines can be increased if necessary.
+ But beware the dependency of HA_MAX_POSSIBLE_KEY_BUFF and HA_MAX_KEY_LENGTH.
+*/
+
+#define HA_MAX_KEY_LENGTH 1000 /* Max length in bytes */
+#define HA_MAX_KEY_SEG 16 /* Max segments for key */
+
+#define HA_MAX_POSSIBLE_KEY_BUFF (HA_MAX_KEY_LENGTH + 24+ 6+6)
+#define HA_MAX_KEY_BUFF (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8)
+
typedef struct st_HA_KEYSEG /* Key-portion */
{
CHARSET_INFO *charset;
@@ -38,33 +58,35 @@ typedef struct st_HA_KEYSEG /* Key-portion */
} HA_KEYSEG;
#define get_key_length(length,key) \
-{ if ((uchar) *(key) != 255) \
- length= (uint) (uchar) *((key)++); \
+{ if (*(uchar*) (key) != 255) \
+ length= (uint) *(uchar*) ((key)++); \
else \
- { length=mi_uint2korr((key)+1); (key)+=3; } \
+ { length= mi_uint2korr((key)+1); (key)+=3; } \
}
#define get_key_length_rdonly(length,key) \
-{ if ((uchar) *(key) != 255) \
- length= ((uint) (uchar) *((key))); \
+{ if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key))); \
else \
- { length=mi_uint2korr((key)+1); } \
+ { length= mi_uint2korr((key)+1); } \
}
#define get_key_pack_length(length,length_pack,key) \
-{ if ((uchar) *(key) != 255) \
- { length= (uint) (uchar) *((key)++); length_pack=1; }\
+{ if (*(uchar*) (key) != 255) \
+ { length= (uint) *(uchar*) ((key)++); length_pack= 1; }\
else \
- { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \
+ { length=mi_uint2korr((key)+1); (key)+= 3; length_pack= 3; } \
}
#define store_key_length_inc(key,length) \
{ if ((length) < 255) \
- { *(key)++=(length); } \
+ { *(key)++= (length); } \
else \
{ *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \
}
+#define size_to_store_key_length(length) ((length) < 255 ? 1 : 3)
+
#define get_rec_bits(bit_ptr, bit_ofs, bit_len) \
(((((uint16) (bit_ptr)[1] << 8) | (uint16) (bit_ptr)[0]) >> (bit_ofs)) & \
((1 << (bit_len)) - 1))
@@ -81,7 +103,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */
#define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \
set_rec_bits(0, bit_ptr, bit_ofs, bit_len)
-extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint ,
+extern int ha_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint ,
my_bool, my_bool);
extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
register uchar *b, uint key_length, uint nextflag,
diff --git a/include/my_sys.h b/include/my_sys.h
index 8e831b448d7..c8362455970 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -54,6 +54,7 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_WME 16 /* Write message on error */
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
+#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */
#define MY_RAID 64 /* Support for RAID */
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
@@ -215,6 +216,7 @@ extern int (*error_handler_hook)(uint my_err, const char *str,myf MyFlags);
extern int (*fatal_error_handler_hook)(uint my_err, const char *str,
myf MyFlags);
extern uint my_file_limit;
+extern ulong my_thread_stack_size;
#ifdef HAVE_LARGE_PAGES
extern my_bool my_use_large_pages;
@@ -628,6 +630,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags);
extern int my_fclose(FILE *fd,myf MyFlags);
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
extern int my_sync(File fd, myf my_flags);
+extern int my_sync_dir(const char *dir_name, myf my_flags);
+extern int my_sync_dir_by_file(const char *file_name, myf my_flags);
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
extern int my_printf_error _VARARGS((uint my_err, const char *format,
myf MyFlags, ...))
@@ -767,6 +771,7 @@ extern my_bool insert_dynamic(DYNAMIC_ARRAY *array,gptr element);
extern byte *alloc_dynamic(DYNAMIC_ARRAY *array);
extern byte *pop_dynamic(DYNAMIC_ARRAY*);
extern my_bool set_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index);
+extern my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements);
extern void get_dynamic(DYNAMIC_ARRAY *array,gptr element,uint array_index);
extern void delete_dynamic(DYNAMIC_ARRAY *array);
extern void delete_dynamic_element(DYNAMIC_ARRAY *array, uint array_index);
@@ -831,10 +836,6 @@ extern int packfrm(const void *, uint, const void **, uint *);
extern int unpackfrm(const void **, uint *, const void *);
extern ha_checksum my_checksum(ha_checksum crc, const byte *mem, uint count);
-extern uint my_bit_log2(ulong value);
-extern uint32 my_round_up_to_next_power(uint32 v);
-extern uint my_count_bits(ulonglong v);
-extern uint my_count_bits_ushort(ushort v);
extern void my_sleep(ulong m_seconds);
extern ulong crc32(ulong crc, const uchar *buf, uint len);
extern uint my_set_max_open_files(uint files);
@@ -850,7 +851,7 @@ extern int my_getncpus();
#ifndef MAP_NOSYNC
#define MAP_NOSYNC 0
#endif
-#ifndef MAP_NORESERVE
+#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0 /* For irix and AIX */
#endif
diff --git a/include/myisam.h b/include/myisam.h
index f763bf07719..d6d6426adf7 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -31,33 +31,19 @@ extern "C" {
#include "keycache.h"
#endif
#include "my_handler.h"
+#include <myisamchk.h>
#include <mysql/plugin.h>
/*
- There is a hard limit for the maximum number of keys as there are only
- 8 bits in the index file header for the number of keys in a table.
- This means that 0..255 keys can exist for a table. The idea of
- MI_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on
- a MyISAM table for which one has more keys than MyISAM is normally
- compiled for. If you don't have this, you will get a core dump when
- running myisamchk compiled for 128 keys on a table with 255 keys.
+ Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
*/
-#define MI_MAX_POSSIBLE_KEY 255 /* For myisam_chk */
-#if MAX_INDEXES > MI_MAX_POSSIBLE_KEY
-#define MI_MAX_KEY MI_MAX_POSSIBLE_KEY /* Max allowed keys */
+
+#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY
+#define MI_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */
#else
#define MI_MAX_KEY MAX_INDEXES /* Max allowed keys */
#endif
-#define MI_MAX_POSSIBLE_KEY_BUFF (1024+6+6) /* For myisam_chk */
-/*
- The following defines can be increased if necessary.
- But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH.
-*/
-#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */
-#define MI_MAX_KEY_SEG 16 /* Max segments for key */
-
-#define MI_MAX_KEY_BUFF (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8)
#define MI_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */
#define MI_NAME_IEXT ".MYI"
#define MI_NAME_DEXT ".MYD"
@@ -256,9 +242,6 @@ typedef struct st_columndef /* column information */
#endif
} MI_COLUMNDEF;
-/* invalidator function reference for Query Cache */
-typedef void (* invalidator_by_filename)(const char * filename);
-
extern my_string myisam_log_filename; /* Name of logfile */
extern ulong myisam_block_size;
extern ulong myisam_concurrent_insert;
@@ -311,195 +294,117 @@ extern int mi_delete_all_rows(struct st_myisam_info *info);
extern ulong _mi_calc_blob_length(uint length , const byte *pos);
extern uint mi_get_pointer_length(ulonglong file_length, uint def);
-/* this is used to pass to mysql_myisamchk_table -- by Sasha Pachev */
+/* this is used to pass to mysql_myisamchk_table */
#define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */
#define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */
-/*
- Definitions needed for myisamchk.c
-
- Entries marked as "QQ to be removed" are NOT used to
- pass check/repair options to mi_check.c. They are used
- internally by myisamchk.c or/and ha_myisam.cc and should NOT
- be stored together with other flags. They should be removed
- from the following list to make addition of new flags possible.
-*/
-
-#define T_AUTO_INC 1
-#define T_AUTO_REPAIR 2 /* QQ to be removed */
-#define T_BACKUP_DATA 4
-#define T_CALC_CHECKSUM 8
-#define T_CHECK 16 /* QQ to be removed */
-#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */
-#define T_CREATE_MISSING_KEYS 64
-#define T_DESCRIPT 128
-#define T_DONT_CHECK_CHECKSUM 256
-#define T_EXTEND 512
-#define T_FAST (1L << 10) /* QQ to be removed */
-#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */
-#define T_FORCE_UNIQUENESS (1L << 12)
-#define T_INFO (1L << 13)
-#define T_MEDIUM (1L << 14)
-#define T_QUICK (1L << 15) /* QQ to be removed */
-#define T_READONLY (1L << 16) /* QQ to be removed */
-#define T_REP (1L << 17)
-#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */
-#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */
-#define T_RETRY_WITHOUT_QUICK (1L << 20)
-#define T_SAFE_REPAIR (1L << 21)
-#define T_SILENT (1L << 22)
-#define T_SORT_INDEX (1L << 23) /* QQ to be removed */
-#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */
-#define T_STATISTICS (1L << 25)
-#define T_UNPACK (1L << 26)
-#define T_UPDATE_STATE (1L << 27)
-#define T_VERBOSE (1L << 28)
-#define T_VERY_SILENT (1L << 29)
-#define T_WAIT_FOREVER (1L << 30)
-#define T_WRITE_LOOP ((ulong) 1L << 31)
-
-#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL)
-
-/*
- Flags used by myisamchk.c or/and ha_myisam.cc that are NOT passed
- to mi_check.c follows:
-*/
-
-#define TT_USEFRM 1
-#define TT_FOR_UPGRADE 2
+typedef uint mi_bit_type;
-#define O_NEW_INDEX 1 /* Bits set in out_flag */
-#define O_NEW_DATA 2
-#define O_DATA_LOST 4
+typedef struct st_mi_bit_buff
+{ /* Used for packing of record */
+ mi_bit_type current_byte;
+ uint bits;
+ uchar *pos, *end, *blob_pos, *blob_end;
+ uint error;
+} MI_BIT_BUFF;
-/* these struct is used by my_check to tell it what to do */
-typedef struct st_sort_key_blocks /* Used when sorting */
+typedef struct st_sort_info
{
- uchar *buff,*end_pos;
- uchar lastkey[MI_MAX_POSSIBLE_KEY_BUFF];
- uint last_length;
- int inited;
-} SORT_KEY_BLOCKS;
-
-
-/*
- MyISAM supports several statistics collection methods. Currently statistics
- collection method is not stored in MyISAM file and has to be specified for
- each table analyze/repair operation in MI_CHECK::stats_method.
-*/
+#ifdef THREAD
+ /* sync things */
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+#endif
+ MI_INFO *info;
+ HA_CHECK *param;
+ char *buff;
+ SORT_KEY_BLOCKS *key_block, *key_block_end;
+ SORT_FT_BUF *ft_buf;
+ my_off_t filelength, dupp, buff_length;
+ ha_rows max_records;
+ uint current_key, total_keys;
+ uint got_error, threads_running;
+ myf myf_rw;
+ enum data_file_type new_data_file_type;
+} MI_SORT_INFO;
-typedef enum
-{
- /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
- MI_STATS_METHOD_NULLS_NOT_EQUAL,
- /* Treat NULLs as equal when collecting statistics (like 4.0 did) */
- MI_STATS_METHOD_NULLS_EQUAL,
- /* Ignore NULLs - count only tuples without NULLs in the index components */
- MI_STATS_METHOD_IGNORE_NULLS
-} enum_mi_stats_method;
-
-typedef struct st_mi_check_param
+typedef struct st_mi_sort_param
{
- ulonglong auto_increment_value;
- ulonglong max_data_file_length;
- ulonglong keys_in_use;
- ulonglong max_record_length;
- my_off_t search_after_block;
- my_off_t new_file_pos,key_file_blocks;
- my_off_t keydata,totaldata,key_blocks,start_check_pos;
- ha_rows total_records,total_deleted;
- ha_checksum record_checksum,glob_crc;
- ulong use_buffers,read_buffer_length,write_buffer_length,
- sort_buffer_length,sort_key_blocks;
- uint out_flag,warning_printed,error_printed,verbose;
- uint opt_sort_key,total_files,max_level;
- uint testflag, key_cache_block_size;
- uint8 language;
- my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
- my_bool retry_repair, force_sort;
- char temp_filename[FN_REFLEN],*isam_file_name;
- MY_TMPDIR *tmpdir;
- int tmpfile_createflag;
- myf myf_rw;
- IO_CACHE read_cache;
+ pthread_t thr;
+ IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
+ DYNAMIC_ARRAY buffpek;
+ MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
+ MI_KEYDEF *keyinfo;
+ MI_SORT_INFO *sort_info;
+ HA_KEYSEG *seg;
+ uchar **sort_keys;
+ byte *rec_buff;
+ void *wordlist, *wordptr;
+ MEM_ROOT wordroot;
+ char *record;
+ MY_TMPDIR *tmpdir;
+
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
- ulonglong unique_count[MI_MAX_KEY_SEG+1];
- ulonglong notnull_count[MI_MAX_KEY_SEG+1];
-
- ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
- ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
- void *thd;
- const char *db_name, *table_name;
- const char *op_name;
- enum_mi_stats_method stats_method;
-} MI_CHECK;
-
-typedef struct st_sort_ft_buf
-{
- uchar *buf, *end;
- int count;
- uchar lastkey[MI_MAX_KEY_BUFF];
-} SORT_FT_BUF;
+ ulonglong unique[HA_MAX_KEY_SEG+1];
+ ulonglong notnull[HA_MAX_KEY_SEG+1];
+
+ my_off_t pos,max_pos,filepos,start_recpos;
+ uint key, key_length,real_key_length,sortbuff_size;
+ uint maxbuffers, keys, find_length, sort_keys_length;
+ my_bool fix_datafile, master;
+ my_bool calc_checksum; /* calculate table checksum */
+
+ int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *);
+ int (*key_read)(struct st_mi_sort_param *,void *);
+ int (*key_write)(struct st_mi_sort_param *, const void *);
+ void (*lock_in_memory)(HA_CHECK *);
+ NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **,
+ uint , struct st_buffpek *, IO_CACHE *);
+ NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
+ NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *,
+ uint, uint);
+} MI_SORT_PARAM;
-typedef struct st_sort_info
-{
- my_off_t filelength,dupp,buff_length;
- ha_rows max_records;
- uint current_key, total_keys;
- myf myf_rw;
- enum data_file_type new_data_file_type;
- MI_INFO *info;
- MI_CHECK *param;
- char *buff;
- SORT_KEY_BLOCKS *key_block,*key_block_end;
- SORT_FT_BUF *ft_buf;
- /* sync things */
- uint got_error, threads_running;
-#ifdef THREAD
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-#endif
-} SORT_INFO;
/* functions in mi_check */
-void myisamchk_init(MI_CHECK *param);
-int chk_status(MI_CHECK *param, MI_INFO *info);
-int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag);
-int chk_size(MI_CHECK *param, MI_INFO *info);
-int chk_key(MI_CHECK *param, MI_INFO *info);
-int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend);
-int mi_repair(MI_CHECK *param, register MI_INFO *info,
+void myisamchk_init(HA_CHECK *param);
+int chk_status(HA_CHECK *param, MI_INFO *info);
+int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag);
+int chk_size(HA_CHECK *param, MI_INFO *info);
+int chk_key(HA_CHECK *param, MI_INFO *info);
+int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend);
+int mi_repair(HA_CHECK *param, register MI_INFO *info,
my_string name, int rep_quick);
-int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name);
-int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
+int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name);
+int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick);
-int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick);
int change_to_newfile(const char * filename, const char * old_ext,
const char * new_ext, uint raid_chunks,
myf myflags);
-int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
+int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type,
const char *filetype, const char *filename);
-void lock_memory(MI_CHECK *param);
-void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
+void lock_memory(HA_CHECK *param);
+void update_auto_increment_key(HA_CHECK *param, MI_INFO *info,
my_bool repair);
-int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
+int update_state_info(HA_CHECK *param, MI_INFO *info,uint update);
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong *notnull,
ulonglong records);
-int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
+int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type);
int movepoint(MI_INFO *info,byte *record,my_off_t oldpos,
my_off_t newpos, uint prot_key);
-int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile);
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile);
int test_if_almost_full(MI_INFO *info);
-int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename);
+int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename);
void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows);
my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, ulonglong key_map,
my_bool force);
@@ -513,6 +418,13 @@ void mi_change_key_cache(KEY_CACHE *old_key_cache,
KEY_CACHE *new_key_cache);
int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves);
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile);
+int flush_pending_blocks(MI_SORT_PARAM *param);
+int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
+int thr_write_keys(MI_SORT_PARAM *sort_param);
+int sort_write_record(MI_SORT_PARAM *sort_param);
+int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/myisamchk.h b/include/myisamchk.h
new file mode 100644
index 00000000000..66d0a77b62f
--- /dev/null
+++ b/include/myisamchk.h
@@ -0,0 +1,164 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Definitions needed for myisamchk/mariachk.c */
+
+/*
+ Entries marked as "QQ to be removed" are NOT used to
+ pass check/repair options to xxx_check.c. They are used
+ internally by xxxchk.c or/and ha_xxxx.cc and should NOT
+ be stored together with other flags. They should be removed
+ from the following list to make addition of new flags possible.
+*/
+
+#ifndef _myisamchk_h
+#define _myisamchk_h
+
+#define T_AUTO_INC 1
+#define T_AUTO_REPAIR 2 /* QQ to be removed */
+#define T_BACKUP_DATA 4
+#define T_CALC_CHECKSUM 8
+#define T_CHECK 16 /* QQ to be removed */
+#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */
+#define T_CREATE_MISSING_KEYS 64
+#define T_DESCRIPT 128
+#define T_DONT_CHECK_CHECKSUM 256
+#define T_EXTEND 512
+#define T_FAST (1L << 10) /* QQ to be removed */
+#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */
+#define T_FORCE_UNIQUENESS (1L << 12)
+#define T_INFO (1L << 13)
+#define T_MEDIUM (1L << 14)
+#define T_QUICK (1L << 15) /* QQ to be removed */
+#define T_READONLY (1L << 16) /* QQ to be removed */
+#define T_REP (1L << 17)
+#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */
+#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */
+#define T_RETRY_WITHOUT_QUICK (1L << 20)
+#define T_SAFE_REPAIR (1L << 21)
+#define T_SILENT (1L << 22)
+#define T_SORT_INDEX (1L << 23) /* QQ to be removed */
+#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */
+#define T_STATISTICS (1L << 25)
+#define T_UNPACK (1L << 26)
+#define T_UPDATE_STATE (1L << 27)
+#define T_VERBOSE (1L << 28)
+#define T_VERY_SILENT (1L << 29)
+#define T_WAIT_FOREVER (1L << 30)
+#define T_WRITE_LOOP ((ulong) 1L << 31)
+
+#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL)
+
+/*
+ Flags used by xxxxchk.c or/and ha_xxxx.cc that are NOT passed
+ to xxxcheck.c follows:
+*/
+
+#define TT_USEFRM 1
+#define TT_FOR_UPGRADE 2
+
+#define O_NEW_INDEX 1 /* Bits set in out_flag */
+#define O_NEW_DATA 2
+#define O_DATA_LOST 4
+
+typedef struct st_sort_key_blocks /* Used when sorting */
+{
+ byte *buff, *end_pos;
+ byte lastkey[HA_MAX_POSSIBLE_KEY_BUFF];
+ uint last_length;
+ int inited;
+} SORT_KEY_BLOCKS;
+
+
+/*
+ MARIA/MYISAM supports several statistics collection
+ methods. Currently statistics collection method is not stored in
+ MARIA file and has to be specified for each table analyze/repair
+ operation in MI_CHECK::stats_method.
+*/
+
+typedef enum
+{
+ /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
+ MI_STATS_METHOD_NULLS_NOT_EQUAL,
+ /* Treat NULLs as equal when collecting statistics (like 4.0 did) */
+ MI_STATS_METHOD_NULLS_EQUAL,
+ /* Ignore NULLs - count only tuples without NULLs in the index components */
+ MI_STATS_METHOD_IGNORE_NULLS
+} enum_handler_stats_method;
+
+
+typedef struct st_handler_check_param
+{
+ char *isam_file_name;
+ MY_TMPDIR *tmpdir;
+ void *thd;
+ const char *db_name, *table_name, *op_name;
+ ulonglong auto_increment_value;
+ ulonglong max_data_file_length;
+ ulonglong keys_in_use;
+ ulonglong max_record_length;
+ /*
+ The next two are used to collect statistics, see update_key_parts for
+ description.
+ */
+ ulonglong unique_count[HA_MAX_KEY_SEG + 1];
+ ulonglong notnull_count[HA_MAX_KEY_SEG + 1];
+
+ my_off_t search_after_block;
+ my_off_t new_file_pos, key_file_blocks;
+ my_off_t keydata, totaldata, key_blocks, start_check_pos;
+ my_off_t used, empty, splits, del_length, link_used;
+ ha_rows total_records, total_deleted, records,del_blocks;
+ ha_rows full_page_count, tail_count;
+ ha_checksum record_checksum, glob_crc;
+ ha_checksum key_crc[HA_MAX_POSSIBLE_KEY];
+ ha_checksum tmp_key_crc[HA_MAX_POSSIBLE_KEY];
+ ha_checksum tmp_record_checksum;
+ ulong use_buffers, read_buffer_length, write_buffer_length;
+ ulong sort_buffer_length, sort_key_blocks;
+ ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY];
+ uint out_flag, warning_printed, error_printed, verbose;
+ uint opt_sort_key, total_files, max_level;
+ uint testflag, key_cache_block_size;
+ int tmpfile_createflag, err_count;
+ myf myf_rw;
+ uint8 language;
+ my_bool using_global_keycache, opt_lock_memory, opt_follow_links;
+ my_bool retry_repair, force_sort, calc_checksum, static_row_size;
+ char temp_filename[FN_REFLEN];
+ IO_CACHE read_cache;
+ enum_handler_stats_method stats_method;
+} HA_CHECK;
+
+
+typedef struct st_sort_ftbuf
+{
+ byte *buf, *end;
+ int count;
+ byte lastkey[HA_MAX_KEY_BUFF];
+} SORT_FT_BUF;
+
+
+typedef struct st_buffpek {
+ my_off_t file_pos; /* Where we are in the sort file */
+ byte *base, *key; /* Key pointers */
+ ha_rows count; /* Number of rows in table */
+ ulong mem_count; /* numbers of keys in memory */
+ ulong max_keys; /* Max keys in buffert */
+} BUFFPEK;
+
+#endif /* _myisamchk_h */
diff --git a/include/pagecache.h b/include/pagecache.h
new file mode 100644
index 00000000000..32e23fb54ea
--- /dev/null
+++ b/include/pagecache.h
@@ -0,0 +1,228 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Page cache variable structures */
+
+#ifndef _pagecache_h
+#define _pagecache_h
+C_MODE_START
+
+#include "../storage/maria/ma_loghandler_lsn.h"
+#include <m_string.h>
+
+/* Type of the page */
+enum pagecache_page_type
+{
+#ifndef DBUG_OFF
+ /* used only for control page type changing during debugging */
+ PAGECACHE_EMPTY_PAGE,
+#endif
+ /* the page does not contain LSN */
+ PAGECACHE_PLAIN_PAGE,
+ /* the page contain LSN (maria tablespace page) */
+ PAGECACHE_LSN_PAGE
+};
+
+/*
+ This enum describe lock status changing. every type of page cache will
+ interpret WRITE/READ lock as it need.
+*/
+enum pagecache_page_lock
+{
+ PAGECACHE_LOCK_LEFT_UNLOCKED, /* free -> free */
+ PAGECACHE_LOCK_LEFT_READLOCKED, /* read -> read */
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, /* write -> write */
+ PAGECACHE_LOCK_READ, /* free -> read */
+ PAGECACHE_LOCK_WRITE, /* free -> write */
+ PAGECACHE_LOCK_READ_UNLOCK, /* read -> free */
+ PAGECACHE_LOCK_WRITE_UNLOCK, /* write -> free */
+ PAGECACHE_LOCK_WRITE_TO_READ /* write -> read */
+};
+/*
+ This enum describe pin status changing
+*/
+enum pagecache_page_pin
+{
+ PAGECACHE_PIN_LEFT_PINNED, /* pinned -> pinned */
+ PAGECACHE_PIN_LEFT_UNPINNED, /* unpinned -> unpinned */
+ PAGECACHE_PIN, /* unpinned -> pinned */
+ PAGECACHE_UNPIN /* pinned -> unpinned */
+};
+/* How to write the page */
+enum pagecache_write_mode
+{
+ /* do not write immediately, i.e. it will be dirty page */
+ PAGECACHE_WRITE_DELAY,
+ /* write page to the file and put it to the cache */
+ PAGECACHE_WRITE_NOW,
+ /* page already is in the file. (key cache insert analogue) */
+ PAGECACHE_WRITE_DONE
+};
+
+typedef void *PAGECACHE_PAGE_LINK;
+
+/* file descriptor for Maria */
+typedef struct st_pagecache_file
+{
+ int file; /* it is for debugging purposes then it will be uint32 file_no */
+} PAGECACHE_FILE;
+
+/* page number for maria */
+typedef uint32 pgcache_page_no_t;
+
+/* declare structures that is used by st_pagecache */
+
+struct st_pagecache_block_link;
+typedef struct st_pagecache_block_link PAGECACHE_BLOCK_LINK;
+struct st_pagecache_page;
+typedef struct st_pagecache_page PAGECACHE_PAGE;
+struct st_pagecache_hash_link;
+typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK;
+
+#include <wqueue.h>
+
+typedef my_bool (*pagecache_disk_read_validator)(byte *page, gptr data);
+
+#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */
+
+/*
+ The page cache structure
+ It also contains read-only statistics parameters.
+*/
+
+typedef struct st_pagecache
+{
+ my_bool inited;
+ my_bool resize_in_flush; /* true during flush of resize operation */
+ my_bool can_be_used; /* usage of cache for read/write is allowed */
+ uint shift; /* block size = 2 ^ shift */
+ my_size_t mem_size; /* specified size of the cache memory */
+ uint32 block_size; /* size of the page buffer of a cache block */
+ ulong min_warm_blocks; /* min number of warm blocks; */
+ ulong age_threshold; /* age threshold for hot blocks */
+ ulonglong time; /* total number of block link operations */
+ uint hash_entries; /* max number of entries in the hash table */
+ int hash_links; /* max number of hash links */
+ int hash_links_used; /* number of hash links taken from free links pool */
+ int disk_blocks; /* max number of blocks in the cache */
+ ulong blocks_used; /* maximum number of concurrently used blocks */
+ ulong blocks_unused; /* number of currently unused blocks */
+ ulong blocks_changed; /* number of currently dirty blocks */
+ ulong warm_blocks; /* number of blocks in warm sub-chain */
+ ulong cnt_for_resize_op; /* counter to block resize operation */
+ ulong blocks_available; /* number of blocks available in the LRU chain */
+ PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */
+ PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */
+ PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */
+ PAGECACHE_BLOCK_LINK *free_block_list;/* list of free blocks */
+ PAGECACHE_BLOCK_LINK *block_root;/* memory for block links */
+ byte HUGE_PTR *block_mem; /* memory for block buffers */
+ PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */
+ PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */
+ pthread_mutex_t cache_lock; /* to lock access to the cache structure */
+ WQUEUE resize_queue; /* threads waiting during resize operation */
+ WQUEUE waiting_for_hash_link;/* waiting for a free hash link */
+ WQUEUE waiting_for_block; /* requests waiting for a free block */
+ /* hash for dirty file bl.*/
+ PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
+ /* hash for other file bl.*/
+ PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
+
+ /*
+ The following variables are and variables used to hold parameters for
+ initializing the key cache.
+ */
+
+ ulonglong param_buff_size; /* size the memory allocated for the cache */
+ ulong param_block_size; /* size of the blocks in the key cache */
+ ulong param_division_limit; /* min. percentage of warm blocks */
+ ulong param_age_threshold; /* determines when hot block is downgraded */
+
+ /* Statistics variables. These are reset in reset_pagecache_counters(). */
+ ulong global_blocks_changed; /* number of currently dirty blocks */
+ ulonglong global_cache_w_requests;/* number of write requests (write hits) */
+ ulonglong global_cache_write; /* number of writes from cache to files */
+ ulonglong global_cache_r_requests;/* number of read requests (read hits) */
+ ulonglong global_cache_read; /* number of reads from files to cache */
+
+ int blocks; /* max number of blocks in the cache */
+ my_bool in_init; /* Set to 1 in MySQL during init/resize */
+} PAGECACHE;
+
+extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
+ uint division_limit, uint age_threshold,
+ uint block_size);
+extern int resize_pagecache(PAGECACHE *pagecache,
+ my_size_t use_mem, uint division_limit,
+ uint age_threshold);
+extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
+ uint age_threshold);
+
+#define pagecache_read(P,F,N,L,B,T,K,I) \
+ pagecache_valid_read(P,F,N,L,B,T,K,I,0,0)
+
+extern byte *pagecache_valid_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_PAGE_LINK *link,
+ pagecache_disk_read_validator validator,
+ gptr validator_data);
+extern my_bool pagecache_write(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ enum pagecache_write_mode write_mode,
+ PAGECACHE_PAGE_LINK *link);
+extern void pagecache_unlock_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page);
+extern void pagecache_unlock(PAGECACHE *pagecache,
+ PAGECACHE_PAGE_LINK *link,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page);
+extern void pagecache_unpin_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno);
+extern void pagecache_unpin(PAGECACHE *pagecache,
+ PAGECACHE_PAGE_LINK *link);
+extern int flush_pagecache_blocks(PAGECACHE *keycache,
+ PAGECACHE_FILE *file,
+ enum flush_type type);
+extern my_bool pagecache_delete_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ my_bool flush);
+extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup);
+extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
+ LEX_STRING *str,
+ LSN *max_lsn);
+extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache);
+
+C_MODE_END
+#endif /* _keycache_h */
diff --git a/include/wqueue.h b/include/wqueue.h
new file mode 100644
index 00000000000..bacabb8c401
--- /dev/null
+++ b/include/wqueue.h
@@ -0,0 +1,26 @@
+
+#ifndef _wqueue_h
+#define _wqueue_h
+
+#include <my_global.h>
+#include <my_pthread.h>
+
+/* info about requests in a waiting queue */
+typedef struct st_pagecache_wqueue
+{
+ struct st_my_thread_var *last_thread; /* circular list of waiting
+ threads */
+} WQUEUE;
+
+#ifdef THREAD
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread,
+ pthread_mutex_t *lock);
+void wqueue_release_queue(WQUEUE *wqueue);
+
+#endif
+
+#endif
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
index db4368a3534..68bf129551b 100644
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@@ -52,6 +52,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def
../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c
../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c
../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c
+ ../mysys/my_sync.c
../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c
../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c
../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c
diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared
index dc6d658fcdf..bf8600e3ce7 100644
--- a/libmysql/Makefile.shared
+++ b/libmysql/Makefile.shared
@@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
mf_iocache2.lo my_seek.lo my_sleep.lo \
my_pread.lo mf_cache.lo md5.lo sha1.lo \
my_getopt.lo my_gethostbyname.lo my_port.lo \
- my_rename.lo my_chsize.lo
+ my_rename.lo my_chsize.lo my_sync.lo
sqlobjects = net.lo
sql_cmn_objects = pack.lo client.lo my_time.lo
diff --git a/mysql-test/include/have_maria.inc b/mysql-test/include/have_maria.inc
new file mode 100644
index 00000000000..955e2305ca5
--- /dev/null
+++ b/mysql-test/include/have_maria.inc
@@ -0,0 +1,4 @@
+-- require r/have_maria.require
+disable_query_log;
+show variables like "have_maria";
+enable_query_log;
diff --git a/mysql-test/r/events_logs_tests.result b/mysql-test/r/events_logs_tests.result
index 1efd3fa602e..7b80a1bc21c 100644
--- a/mysql-test/r/events_logs_tests.result
+++ b/mysql-test/r/events_logs_tests.result
@@ -86,7 +86,7 @@ slo_val val
20 0
1 0
"Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10"
-SELECT user_host, query_time, db, sql_text FROM mysql.slow_log;
+SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event";
user_host query_time db sql_text
USER_HOST SLEEPVAL events_test INSERT INTO slow_event_test SELECT @@long_query_time, SLEEP(2)
DROP EVENT long_event2;
diff --git a/mysql-test/r/have_maria.require b/mysql-test/r/have_maria.require
new file mode 100644
index 00000000000..02988af6976
--- /dev/null
+++ b/mysql-test/r/have_maria.require
@@ -0,0 +1,2 @@
+Variable_name Value
+have_maria YES
diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result
new file mode 100644
index 00000000000..7812f04336a
--- /dev/null
+++ b/mysql-test/r/maria.result
@@ -0,0 +1,1788 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+drop table if exists t1,t2;
+SET SQL_WARNINGS=1;
+CREATE TABLE t1 (
+STRING_DATA char(255) default NULL,
+KEY string_data (STRING_DATA)
+);
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF');
+INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG');
+INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH');
+INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW');
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a));
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+delete from t1 where (a & 1);
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b));
+insert into t1 (b) values (1),(2),(2),(2),(2);
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 0 PRIMARY 1 a A 5 NULL NULL BTREE
+t1 1 b 1 b A 1 NULL NULL BTREE
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status Table is already up to date
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 0 PRIMARY 1 a A 5 NULL NULL BTREE
+t1 1 b 1 b A 1 NULL NULL BTREE
+drop table t1;
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b));
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+explain select * from t1 order by a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select * from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select * from t1 order by c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select a from t1 order by a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL PRIMARY 4 NULL 4 Using index
+explain select b from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL b 4 NULL 4 Using index
+explain select a,b from t1 order by b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort
+explain select a,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4
+explain select a,b,c from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4
+drop table t1;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (2), (3);
+LOCK TABLES t1 WRITE;
+INSERT INTO t1 VALUES (1), (2), (3);
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+DROP TABLE t1;
+create table t1 ( t1 char(255), key(t1(250)));
+insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169');
+insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203');
+insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767');
+insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907');
+insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011');
+insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101');
+insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564');
+insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002');
+insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834');
+insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016');
+insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899');
+insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482');
+insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139');
+insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477');
+insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755');
+insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188');
+insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454');
+insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656');
+insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741');
+insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178');
+insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049');
+insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635');
+insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573');
+insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878');
+insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077');
+delete from t1 where t1>'2';
+insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'),
+('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'),
+('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47');
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8
+int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17
+int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int,
+i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34
+int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int,
+i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51
+int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int,
+i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68
+int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int,
+i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85
+int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int,
+i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102
+int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110
+int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118
+int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126
+int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134
+int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142
+int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150
+int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158
+int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166
+int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174
+int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182
+int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190
+int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198
+int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206
+int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214
+int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222
+int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230
+int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238
+int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246
+int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254
+int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262
+int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270
+int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278
+int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286
+int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294
+int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302
+int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310
+int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318
+int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326
+int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334
+int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342
+int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350
+int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358
+int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366
+int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374
+int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382
+int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390
+int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398
+int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406
+int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414
+int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422
+int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430
+int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438
+int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446
+int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454
+int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462
+int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470
+int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478
+int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486
+int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494
+int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502
+int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510
+int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518
+int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526
+int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534
+int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542
+int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550
+int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558
+int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566
+int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574
+int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582
+int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590
+int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598
+int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606
+int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614
+int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622
+int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630
+int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638
+int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646
+int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654
+int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662
+int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670
+int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678
+int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686
+int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694
+int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702
+int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710
+int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718
+int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726
+int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734
+int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742
+int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750
+int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758
+int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766
+int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774
+int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782
+int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790
+int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798
+int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806
+int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814
+int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822
+int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830
+int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838
+int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846
+int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854
+int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862
+int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870
+int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878
+int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886
+int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894
+int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902
+int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910
+int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918
+int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926
+int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934
+int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942
+int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950
+int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958
+int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966
+int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974
+int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982
+int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990
+int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998
+int, i999 int, i1000 int, b blob) row_format=dynamic;
+insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei");
+update t1 set b=repeat('a',256);
+update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+delete from t1 where i8=1;
+select i1,i2 from t1;
+i1 i2
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+CREATE TABLE `t1` (
+`post_id` mediumint(8) unsigned NOT NULL auto_increment,
+`topic_id` mediumint(8) unsigned NOT NULL default '0',
+`post_time` datetime NOT NULL default '0000-00-00 00:00:00',
+`post_text` text NOT NULL,
+`icon_url` varchar(10) NOT NULL default '',
+`sign` tinyint(1) unsigned NOT NULL default '0',
+`post_edit` varchar(150) NOT NULL default '',
+`poster_login` varchar(35) NOT NULL default '',
+`ip` varchar(15) NOT NULL default '',
+PRIMARY KEY (`post_id`),
+KEY `post_time` (`post_time`),
+KEY `ip` (`ip`),
+KEY `poster_login` (`poster_login`),
+KEY `topic_id` (`topic_id`),
+FULLTEXT KEY `post_text` (`post_text`)
+);
+INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test');
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e));
+ERROR 42000: Specified key was too long; max key length is 1000 bytes
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255));
+ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e);
+ERROR 42000: Specified key was too long; max key length is 1000 bytes
+DROP TABLE t1;
+CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a));
+INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4);
+create table t2 (a int not null, b int, c int, key(b), key(c), key(a));
+INSERT into t2 values (1,1,1), (2,2,2);
+optimize table t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 b 1 b A 5 NULL NULL YES BTREE
+t1 1 c 1 c A 5 NULL NULL YES BTREE
+t1 1 a 1 a A 1 NULL NULL BTREE
+t1 1 a 2 b A 5 NULL NULL YES BTREE
+t1 1 c_2 1 c A 5 NULL NULL YES BTREE
+t1 1 c_2 2 a A 5 NULL NULL BTREE
+explain select * from t1,t2 where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where
+explain select * from t1,t2 force index(a) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where
+explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 2
+1 SIMPLE t1 ref a a 4 test.t2.a 3
+explain select * from t1,t2 where t1.b=t2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL b NULL NULL NULL 2
+1 SIMPLE t1 ref b b 5 test.t2.b 1 Using where
+explain select * from t1,t2 force index(c) where t1.a=t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where
+explain select * from t1 where a=0 or a=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where
+explain select * from t1 force index (a) where a=0 or a=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 4 NULL 4 Using where
+explain select * from t1 where c=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c,c_2 c 5 const 1 Using where
+explain select * from t1 use index() where c=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where
+drop table t1,t2;
+create table t1 (a int not null auto_increment primary key, b varchar(255));
+insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100));
+update t1 set b=repeat(left(b,1),200) where a=1;
+delete from t1 where (a & 1)= 0;
+update t1 set b=repeat('e',200) where a=1;
+flush tables;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+update t1 set b=repeat(left(b,1),255) where a between 1 and 5;
+update t1 set b=repeat(left(b,1),10) where a between 32 and 43;
+update t1 set b=repeat(left(b,1),2) where a between 64 and 66;
+update t1 set b=repeat(left(b,1),65) where a between 67 and 70;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+insert into t1 (b) values (repeat('z',100));
+update t1 set b="test" where left(b,1) > 'n';
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 ( a text not null, key a (a(20)));
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+repair table t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+select concat(a,'.') from t1 where a='aaa';
+concat(a,'.')
+aaa .
+aaa.
+select concat(a,'.') from t1 where binary a='aaa';
+concat(a,'.')
+aaa.
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+concat(a,'.')
+bbb.
+bbb.
+aa.
+drop table t1;
+create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10)));
+insert into t1 values('807780', '477', '165');
+insert into t1 values('807780', '477', '162');
+insert into t1 values('807780', '472', '162');
+select * from t1 where a='807780' and b='477' and c='165';
+a b c
+807780 477 165
+drop table t1;
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 't1'
+CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a));
+INSERT t1 VALUES ("can \tcan");
+INSERT t1 VALUES ("can can");
+INSERT t1 VALUES ("can");
+SELECT * FROM t1;
+a
+can can
+can
+can can
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+create table t1 (a blob);
+insert into t1 values('a '),('a');
+select concat(a,'.') from t1 where a='a';
+concat(a,'.')
+a.
+select concat(a,'.') from t1 where a='a ';
+concat(a,'.')
+a .
+alter table t1 add key(a(2));
+select concat(a,'.') from t1 where a='a';
+concat(a,'.')
+a.
+select concat(a,'.') from t1 where a='a ';
+concat(a,'.')
+a .
+drop table t1;
+create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20)));
+insert into t1 (b) values ('a'),('b'),('c');
+select concat(b,'.') from t1;
+concat(b,'.')
+a.
+b.
+c.
+update t1 set b='b ' where a=2;
+update t1 set b='b ' where a > 1;
+ERROR 23000: Duplicate entry 'b ' for key 'b'
+insert into t1 (b) values ('b');
+ERROR 23000: Duplicate entry 'b' for key 'b'
+select * from t1;
+a b
+1 a
+2 b
+3 c
+delete from t1 where b='b';
+select a,concat(b,'.') from t1;
+a concat(b,'.')
+1 a.
+3 c.
+drop table t1;
+create table t1 (a int not null);
+create table t2 (a int not null, primary key (a));
+insert into t1 values (1);
+insert into t2 values (1),(2);
+select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+a
+1
+select distinct t1.a from t1,t2 order by t2.a;
+a
+1
+select sql_big_result distinct t1.a from t1,t2;
+a
+1
+explain select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary
+1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct
+explain select distinct t1.a from t1,t2 order by t2.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary
+1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct
+drop table t1,t2;
+create table t1 (
+c1 varchar(32),
+key (c1)
+);
+alter table t1 disable keys;
+insert into t1 values ('a'), ('b');
+select c1 from t1 order by c1 limit 1;
+c1
+a
+drop table t1;
+create table t1 (a int not null, primary key(a));
+create table t2 (a int not null, b int not null, primary key(a,b));
+insert into t1 values (1),(2),(3),(4),(5),(6);
+insert into t2 values (1,1),(2,1);
+lock tables t1 read local, t2 read local;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+a a b
+1 1 1
+2 2 1
+insert into t2 values(2,0);
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+a a b
+1 1 1
+2 2 1
+drop table t1,t2;
+CREATE TABLE t1 (c1 varchar(250) NOT NULL);
+CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1));
+INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003');
+INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004');
+LOCK TABLES t1 READ LOCAL, t2 READ LOCAL;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+t1c1 t2c1
+INSERT INTO t2 VALUES ('test000001'), ('test000005');
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+t1c1 t2c1
+DROP TABLE t1,t2;
+CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`));
+Got one of the listed errors
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2, t3 quick;
+Table Checksum
+test.t1 2948697075
+test.t2 NULL
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+checksum table t1, t2, t3;
+Table Checksum
+test.t1 2948697075
+test.t2 2948697075
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+checksum table t1, t2, t3 extended;
+Table Checksum
+test.t1 2948697075
+test.t2 2948697075
+test.t3 NULL
+Warnings:
+Error 1146 Table 'test.t3' doesn't exist
+drop table t1,t2;
+create table t1 (a int, key (a));
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE
+alter table t1 disable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE disabled
+create table t2 (a int);
+set @@rand_seed1=31415926,@@rand_seed2=2718281828;
+insert t1 select * from t2;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A NULL NULL NULL YES BTREE disabled
+alter table t1 enable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 1000 NULL NULL YES BTREE
+alter table t1 engine=heap;
+alter table t1 disable keys;
+Warnings:
+Note 1031 Table storage engine for 't1' doesn't have this option
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a NULL 500 NULL NULL YES HASH
+drop table t1,t2;
+create table t1 ( a tinytext, b char(1), index idx (a(1),b) );
+insert into t1 values (null,''), (null,'');
+explain select count(*) from t1 where a is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref idx idx 4 const 1 Using where
+select count(*) from t1 where a is null;
+count(*)
+2
+drop table t1;
+create table t1 (c1 int, c2 varchar(4) not null default '',
+key(c2(3))) default charset=utf8;
+insert into t1 values (1,'A'), (2, 'B'), (3, 'A');
+update t1 set c2='A B' where c1=2;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (c1 int);
+insert into t1 values (1),(2),(3),(4);
+checksum table t1;
+Table Checksum
+test.t1 149057747
+delete from t1 where c1 = 1;
+create table t2 as select * from t1;
+checksum table t1;
+Table Checksum
+test.t1 984116287
+checksum table t2;
+Table Checksum
+test.t2 984116287
+drop table t1, t2;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_unequal
+create table t1 (a int, key(a));
+insert into t1 values (0),(1),(2),(3),(4);
+insert into t1 select NULL from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+set maria_stats_method=nulls_equal;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_equal
+insert into t1 values (11);
+delete from t1 where a=11;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 5 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 5 NULL NULL YES BTREE
+set maria_stats_method=DEFAULT;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_unequal
+insert into t1 values (11);
+delete from t1 where a=11;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 10 NULL NULL YES BTREE
+drop table t1;
+set maria_stats_method=nulls_ignored;
+show variables like 'maria_stats_method';
+Variable_name Value
+maria_stats_method nulls_ignored
+create table t1 (
+a char(3), b char(4), c char(5), d char(6),
+key(a,b,c,d)
+);
+insert into t1 values ('bcd','def1', NULL, 'zz');
+insert into t1 values ('bcd','def2', NULL, 'zz');
+insert into t1 values ('bce','def1', 'yuu', NULL);
+insert into t1 values ('bce','def2', NULL, 'quux');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 2 NULL NULL YES BTREE
+t1 1 a 2 b A 4 NULL NULL YES BTREE
+t1 1 a 3 c A 4 NULL NULL YES BTREE
+t1 1 a 4 d A 4 NULL NULL YES BTREE
+delete from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show index from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 0 NULL NULL YES BTREE
+t1 1 a 2 b A 0 NULL NULL YES BTREE
+t1 1 a 3 c A 0 NULL NULL YES BTREE
+t1 1 a 4 d A 0 NULL NULL YES BTREE
+set maria_stats_method=DEFAULT;
+drop table t1;
+create table t1(
+cip INT NOT NULL,
+time TIME NOT NULL,
+score INT NOT NULL DEFAULT 0,
+bob TINYBLOB
+);
+insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03');
+insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'),
+(6, 'c', '00:06');
+select * from t1 where bob is null and cip=1;
+cip time score bob
+1 00:01:00 0 NULL
+create index bug on t1 (bob(22), cip, time);
+select * from t1 where bob is null and cip=1;
+cip time score bob
+1 00:01:00 0 NULL
+drop table t1;
+create table t1 (
+id1 int not null auto_increment,
+id2 int not null default '0',
+t text not null,
+primary key (id1),
+key x (id2, t(32))
+) engine=maria;
+insert into t1 (id2, t) values
+(10, 'abc'), (10, 'abc'), (10, 'abc'),
+(20, 'abc'), (20, 'abc'), (20, 'def'),
+(10, 'abc'), (10, 'abc');
+select count(*) from t1 where id2 = 10;
+count(*)
+5
+select count(id1) from t1 where id2 = 10;
+count(id1)
+5
+drop table t1;
+CREATE TABLE t1(a TINYINT, KEY(a));
+INSERT INTO t1 VALUES(1);
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+MAX(a)
+1
+ALTER TABLE t1 DISABLE KEYS;
+SELECT MAX(a) FROM t1;
+MAX(a)
+1
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+MAX(a)
+1
+DROP TABLE t1;
+CREATE TABLE t1(a CHAR(9), b VARCHAR(7));
+INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx');
+UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb';
+SELECT * FROM t1;
+a b
+xxxxxxxxx bbbbbb
+xxxxxxxxx bbbbbb
+DROP TABLE t1;
+SET @@maria_repair_threads=2;
+SHOW VARIABLES LIKE 'maria_repair%';
+Variable_name Value
+maria_repair_threads 2
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) ENGINE=maria DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Dynamic 2 # # # # 0 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+`_id` int(11) NOT NULL default '0',
+`url` text,
+`email` text,
+`description` text,
+`loverlap` int(11) default NULL,
+`roverlap` int(11) default NULL,
+`lneighbor_id` int(11) default NULL,
+`rneighbor_id` int(11) default NULL,
+`length_` int(11) default NULL,
+`sequence` mediumtext,
+`name` text,
+`_obj_class` text NOT NULL,
+PRIMARY KEY (`_id`),
+UNIQUE KEY `sequence_name_index` (`name`(50)),
+KEY (`length_`)
+) ENGINE=maria DEFAULT CHARSET=latin1;
+INSERT INTO t1 VALUES
+(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+SELECT _id FROM t1;
+_id
+1
+2
+3
+4
+5
+6
+7
+8
+9
+DELETE FROM t1 WHERE _id < 8;
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # #
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+REPAIR TABLE t1 QUICK;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+CHECK TABLE t1 EXTENDED;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 MARIA 10 Dynamic 2 # # # # 140 # # # # # #
+SELECT _id FROM t1;
+_id
+8
+9
+DROP TABLE t1;
+SET @@maria_repair_threads=1;
+SHOW VARIABLES LIKE 'maria_repair%';
+Variable_name Value
+maria_repair_threads 1
+drop table if exists t1,t2,t3;
+--- Testing varchar ---
+--- Testing varchar ---
+create table t1 (v varchar(10), c char(10), t text);
+insert into t1 values('+ ', '+ ', '+ ');
+set @a=repeat(' ',20);
+insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a));
+Warnings:
+Note 1265 Data truncated for column 'v' at row 1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+ *+*+ *
+*+ *+*+ *
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+create table t2 like t1;
+show create table t2;
+Table Create Table
+t2 CREATE TABLE `t2` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+create table t3 select * from t1;
+show create table t3;
+Table Create Table
+t3 CREATE TABLE `t3` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify c varchar(10);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify v char(10);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` text
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+alter table t1 modify t varchar(10);
+Warnings:
+Note 1265 Data truncated for column 't' at row 2
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) DEFAULT NULL,
+ `c` varchar(10) DEFAULT NULL,
+ `t` varchar(10) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select concat('*',v,'*',c,'*',t,'*') from t1;
+concat('*',v,'*',c,'*',t,'*')
+*+*+*+ *
+*+*+*+ *
+drop table t1,t2,t3;
+create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10)));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `v` (`v`),
+ KEY `c` (`c`),
+ KEY `t` (`t`(10))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1;
+count(*)
+270
+insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1)));
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where c='a';
+count(*)
+10
+select count(*) from t1 where t='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where c='a ';
+count(*)
+10
+select count(*) from t1 where t='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where c like 'a%';
+count(*)
+11
+select count(*) from t1 where t like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+explain select count(*) from t1 where c='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c c 11 const # Using where; Using index
+explain select count(*) from t1 where t='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range t t 13 NULL # Using where
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 13 NULL # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 13 const # Using where; Using index
+alter table t1 add unique(v);
+ERROR 23000: Duplicate entry '{ ' for key 'v_2'
+alter table t1 add key(v);
+select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a';
+qq
+*a*a*a*
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+*a *a*a *
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v,v_2 # 13 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(c) from t1 group by v limit 10;
+v count(c)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(c) from t1 group by v limit 10;
+v count(c)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select c,count(*) from t1 group by c limit 10;
+c count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select c,count(t) from t1 group by c limit 10;
+c count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result c,count(t) from t1 group by c limit 10;
+c count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select t,count(*) from t1 group by t limit 10;
+t count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select t,count(t) from t1 group by t limit 10;
+t count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result t,count(t) from t1 group by t limit 10;
+t count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(300) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 303 NULL # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where; Using index
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 303 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 drop key v, add key v (v(30));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(300) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`(30))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select count(*) from t1 where v='a';
+count(*)
+10
+select count(*) from t1 where v='a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ';
+count(*)
+10
+select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+count(*)
+10
+select count(*) from t1 where v like 'a%';
+count(*)
+11
+select count(*) from t1 where v like 'a %';
+count(*)
+9
+explain select count(*) from t1 where v='a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select count(*) from t1 where v like 'a%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range v v 33 NULL # Using where
+explain select count(*) from t1 where v between 'a' and 'a ';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+explain select * from t1 where v='a';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref v v 33 const # Using where
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+alter table t1 modify v varchar(600), drop key v, add key v (v);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(600) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `c` (`c`),
+ KEY `t` (`t`(10)),
+ KEY `v` (`v`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+select v,count(*) from t1 group by v limit 10;
+v count(*)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+select sql_big_result v,count(t) from t1 group by v limit 10;
+v count(t)
+a 1
+a 10
+b 10
+c 10
+d 10
+e 10
+f 10
+g 10
+h 10
+i 10
+drop table t1;
+create table t1 (a char(10), unique (a));
+insert into t1 values ('a ');
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a' for key 'a'
+alter table t1 modify a varchar(10);
+insert into t1 values ('a '),('a '),('a '),('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+insert into t1 values ('a ');
+ERROR 23000: Duplicate entry 'a ' for key 'a'
+update t1 set a='a ' where a like 'a%';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='abc ' where a like 'a ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='a ' where a like 'a %';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+update t1 set a='a ' where a like 'a ';
+select concat(a,'.') from t1;
+concat(a,'.')
+a .
+drop table t1;
+create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5)));
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL,
+ `t` text,
+ KEY `v` (`v`(5)),
+ KEY `c` (`c`(5)),
+ KEY `t` (`t`(5))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v char(10) character set utf8);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` char(10) CHARACTER SET utf8 DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(10), c char(10)) row_format=fixed;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` varchar(10) DEFAULT NULL,
+ `c` char(10) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED
+insert into t1 values('a','a'),('a ','a ');
+select concat('*',v,'*',c,'*') from t1;
+concat('*',v,'*',c,'*')
+*a*a*
+*a *a*
+drop table t1;
+create table t1 (v varchar(65530), key(v(10)));
+insert into t1 values(repeat('a',65530));
+select length(v) from t1 where v=repeat('a',65530);
+length(v)
+65530
+drop table t1;
+create table t1(a int, b varchar(12), key ba(b, a));
+insert into t1 values (1, 'A'), (20, NULL);
+explain select * from t1 where a=20 and b is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref ba ba 20 const,const 1 Using where; Using index
+select * from t1 where a=20 and b is null;
+a b
+20 NULL
+drop table t1;
+create table t1 (v varchar(65530), key(v));
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+drop table if exists t1;
+create table t1 (v varchar(65536));
+Warnings:
+Note 1246 Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` mediumtext
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+Warnings:
+Note 1246 Converting column 'v' from VARCHAR to TEXT
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `v` mediumtext CHARACTER SET utf8
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (v varchar(65535));
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs
+set @save_concurrent_insert=@@concurrent_insert;
+set global concurrent_insert=1;
+create table t1 (a int);
+insert into t1 values (1),(2),(3),(4),(5);
+lock table t1 read local;
+insert into t1 values(6),(7);
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+set global concurrent_insert=2;
+insert into t1 values (8),(9);
+unlock tables;
+insert into t1 values (10),(11),(12);
+select * from t1;
+a
+1
+2
+11
+10
+5
+6
+7
+8
+9
+12
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+create table t1 (a int, b varchar(30) default "hello");
+insert into t1 (a) values (1),(2),(3),(4),(5);
+lock table t1 read local;
+insert into t1 (a) values(6),(7);
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+set global concurrent_insert=2;
+insert into t1 (a) values (8),(9);
+unlock tables;
+insert into t1 (a) values (10),(11),(12);
+select a from t1;
+a
+1
+2
+11
+10
+5
+6
+7
+8
+9
+12
+check table t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+drop table t1;
+set global concurrent_insert=@save_concurrent_insert;
+create table t1 (a int, key(a));
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+alter table t1 disable keys;
+alter table t1 enable keys;
+show keys from t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
+t1 1 a 1 a A 8 NULL NULL YES BTREE
+drop table t1;
+show create table t1;
+show create table t1;
+create table t1 (a int) select 42 a;
+select * from t1;
+a
+9
+select * from t1;
+a
+99
+select * from t1;
+a
+42
+drop table t1;
+End of 4.1 tests
+create table t1 (c1 int) pack_keys=0;
+create table t2 (c1 int) pack_keys=1;
+create table t3 (c1 int) pack_keys=default;
+create table t4 (c1 int) pack_keys=2;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1
+drop table t1, t2, t3;
+End of 5.0 tests
+create table t1 (a int not null, key `a` (a) key_block_size=1024);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=2048);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=2048
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a));
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1000))
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1024);
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=4096
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1024
+alter table t1 key_block_size=2048;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024,
+ KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048
+alter table t1 add c int, add key (c);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024,
+ KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096,
+ KEY `c` (`c`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048
+alter table t1 key_block_size=0;
+alter table t1 add d int, add key (d);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ `d` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024,
+ KEY `b` (`b`(1000)) KEY_BLOCK_SIZE=4096,
+ KEY `c` (`c`) KEY_BLOCK_SIZE=2048,
+ KEY `d` (`d`)
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`),
+ KEY `b` (`b`(1000))
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192
+drop table t1;
+create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024,
+ KEY `b` (`b`(1000))
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192
+drop table t1;
+create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024,
+ KEY `b` (`b`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=16384
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=1024
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+Warnings:
+Warning 1071 Specified key was too long; max key length is 1000 bytes
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` varchar(2048) DEFAULT NULL,
+ KEY `a` (`a`(1000)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ KEY `a` (`a`) KEY_BLOCK_SIZE=2048
+) ENGINE=MARIA DEFAULT CHARSET=latin1
+drop table t1;
+create table t1 (a int not null, key key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1
+CREATE TABLE t1 (
+c1 INT,
+c2 VARCHAR(300),
+KEY (c1) KEY_BLOCK_SIZE 1024,
+KEY (c2) KEY_BLOCK_SIZE 8192
+);
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+(11, REPEAT('b', CEIL(RAND() * 300))),
+(12, REPEAT('c', CEIL(RAND() * 300))),
+(13, REPEAT('d', CEIL(RAND() * 300))),
+(14, REPEAT('e', CEIL(RAND() * 300))),
+(15, REPEAT('f', CEIL(RAND() * 300))),
+(16, REPEAT('g', CEIL(RAND() * 300))),
+(17, REPEAT('h', CEIL(RAND() * 300))),
+(18, REPEAT('i', CEIL(RAND() * 300))),
+(19, REPEAT('j', CEIL(RAND() * 300))),
+(20, REPEAT('k', CEIL(RAND() * 300))),
+(21, REPEAT('l', CEIL(RAND() * 300))),
+(22, REPEAT('m', CEIL(RAND() * 300))),
+(23, REPEAT('n', CEIL(RAND() * 300))),
+(24, REPEAT('o', CEIL(RAND() * 300))),
+(25, REPEAT('p', CEIL(RAND() * 300))),
+(26, REPEAT('q', CEIL(RAND() * 300))),
+(27, REPEAT('r', CEIL(RAND() * 300))),
+(28, REPEAT('s', CEIL(RAND() * 300))),
+(29, REPEAT('t', CEIL(RAND() * 300))),
+(30, REPEAT('u', CEIL(RAND() * 300))),
+(31, REPEAT('v', CEIL(RAND() * 300))),
+(32, REPEAT('w', CEIL(RAND() * 300))),
+(33, REPEAT('x', CEIL(RAND() * 300))),
+(34, REPEAT('y', CEIL(RAND() * 300))),
+(35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+REPAIR TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 repair status OK
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+DROP TABLE t1;
+End of 5.1 tests
+set global storage_engine=MyISAM;
diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result
new file mode 100644
index 00000000000..9268c44eecd
--- /dev/null
+++ b/mysql-test/r/ps_maria.result
@@ -0,0 +1,3137 @@
+use test;
+drop table if exists t1, t9 ;
+create table t1
+(
+a int, b varchar(30),
+primary key(a)
+) engine = 'MARIA' ;
+create table t9
+(
+c1 tinyint, c2 smallint, c3 mediumint, c4 int,
+c5 integer, c6 bigint, c7 float, c8 double,
+c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4),
+c13 date, c14 datetime, c15 timestamp, c16 time,
+c17 year, c18 tinyint, c19 bool, c20 char,
+c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext,
+c25 blob, c26 text, c27 mediumblob, c28 mediumtext,
+c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'),
+c32 set('monday', 'tuesday', 'wednesday'),
+primary key(c1)
+) engine = 'MARIA' ;
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+test_sequence
+------ simple select tests ------
+prepare stmt1 from ' select * from t9 order by c1 ' ;
+execute stmt1;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def test t9 t9 c1 c1 1 4 1 N 49155 0 63
+def test t9 t9 c2 c2 2 6 1 Y 32768 0 63
+def test t9 t9 c3 c3 9 9 1 Y 32768 0 63
+def test t9 t9 c4 c4 3 11 1 Y 32768 0 63
+def test t9 t9 c5 c5 3 11 1 Y 32768 0 63
+def test t9 t9 c6 c6 8 20 1 Y 32768 0 63
+def test t9 t9 c7 c7 4 12 1 Y 32768 31 63
+def test t9 t9 c8 c8 5 22 1 Y 32768 31 63
+def test t9 t9 c9 c9 5 22 1 Y 32768 31 63
+def test t9 t9 c10 c10 5 22 1 Y 32768 31 63
+def test t9 t9 c11 c11 246 9 6 Y 0 4 63
+def test t9 t9 c12 c12 246 10 6 Y 0 4 63
+def test t9 t9 c13 c13 10 10 10 Y 128 0 63
+def test t9 t9 c14 c14 12 19 19 Y 128 0 63
+def test t9 t9 c15 c15 7 19 19 N 1249 0 63
+def test t9 t9 c16 c16 11 8 8 Y 128 0 63
+def test t9 t9 c17 c17 13 4 4 Y 32864 0 63
+def test t9 t9 c18 c18 1 4 1 Y 32768 0 63
+def test t9 t9 c19 c19 1 1 1 Y 32768 0 63
+def test t9 t9 c20 c20 254 1 1 Y 0 0 8
+def test t9 t9 c21 c21 254 10 10 Y 0 0 8
+def test t9 t9 c22 c22 253 30 30 Y 0 0 8
+def test t9 t9 c23 c23 252 255 8 Y 144 0 63
+def test t9 t9 c24 c24 252 255 8 Y 16 0 8
+def test t9 t9 c25 c25 252 65535 4 Y 144 0 63
+def test t9 t9 c26 c26 252 65535 4 Y 16 0 8
+def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63
+def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8
+def test t9 t9 c29 c29 252 4294967295 8 Y 144 0 63
+def test t9 t9 c30 c30 252 4294967295 8 Y 16 0 8
+def test t9 t9 c31 c31 254 5 3 Y 256 0 8
+def test t9 t9 c32 c32 254 24 7 Y 2048 0 8
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday
+set @arg00='SELECT' ;
+@arg00 a from t1 where a=1;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a from t1 where a=1' at line 1
+prepare stmt1 from ' ? a from t1 where a=1 ';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a from t1 where a=1' at line 1
+set @arg00=1 ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+1 one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+1 one
+set @arg00='lion' ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+lion one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+lion one
+set @arg00=NULL ;
+select @arg00, b from t1 where a=1 ;
+@arg00 b
+NULL one
+prepare stmt1 from ' select ?, b from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+? b
+NULL one
+set @arg00=1 ;
+select b, a - @arg00 from t1 where a=1 ;
+b a - @arg00
+one 0
+prepare stmt1 from ' select b, a - ? from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+b a - ?
+one 0
+set @arg00=null ;
+select @arg00 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select ? as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+select @arg00 + 1 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select ? + 1 as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+select 1 + @arg00 as my_col ;
+my_col
+NULL
+prepare stmt1 from ' select 1 + ? as my_col';
+execute stmt1 using @arg00 ;
+my_col
+NULL
+set @arg00='MySQL' ;
+select substr(@arg00,1,2) from t1 where a=1 ;
+substr(@arg00,1,2)
+My
+prepare stmt1 from ' select substr(?,1,2) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr(?,1,2)
+My
+set @arg00=3 ;
+select substr('MySQL',@arg00,5) from t1 where a=1 ;
+substr('MySQL',@arg00,5)
+SQL
+prepare stmt1 from ' select substr(''MySQL'',?,5) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr('MySQL',?,5)
+SQL
+select substr('MySQL',1,@arg00) from t1 where a=1 ;
+substr('MySQL',1,@arg00)
+MyS
+prepare stmt1 from ' select substr(''MySQL'',1,?) from t1 where a=1 ' ;
+execute stmt1 using @arg00 ;
+substr('MySQL',1,?)
+MyS
+set @arg00='MySQL' ;
+select a , concat(@arg00,b) from t1 order by a;
+a concat(@arg00,b)
+1 MySQLone
+2 MySQLtwo
+3 MySQLthree
+4 MySQLfour
+prepare stmt1 from ' select a , concat(?,b) from t1 order by a ' ;
+execute stmt1 using @arg00;
+a concat(?,b)
+1 MySQLone
+2 MySQLtwo
+3 MySQLthree
+4 MySQLfour
+select a , concat(b,@arg00) from t1 order by a ;
+a concat(b,@arg00)
+1 oneMySQL
+2 twoMySQL
+3 threeMySQL
+4 fourMySQL
+prepare stmt1 from ' select a , concat(b,?) from t1 order by a ' ;
+execute stmt1 using @arg00;
+a concat(b,?)
+1 oneMySQL
+2 twoMySQL
+3 threeMySQL
+4 fourMySQL
+set @arg00='MySQL' ;
+select group_concat(@arg00,b order by a) from t1
+group by 'a' ;
+group_concat(@arg00,b order by a)
+MySQLone,MySQLtwo,MySQLthree,MySQLfour
+prepare stmt1 from ' select group_concat(?,b order by a) from t1
+group by ''a'' ' ;
+execute stmt1 using @arg00;
+group_concat(?,b order by a)
+MySQLone,MySQLtwo,MySQLthree,MySQLfour
+select group_concat(b,@arg00 order by a) from t1
+group by 'a' ;
+group_concat(b,@arg00 order by a)
+oneMySQL,twoMySQL,threeMySQL,fourMySQL
+prepare stmt1 from ' select group_concat(b,? order by a) from t1
+group by ''a'' ' ;
+execute stmt1 using @arg00;
+group_concat(b,? order by a)
+oneMySQL,twoMySQL,threeMySQL,fourMySQL
+set @arg00='first' ;
+set @arg01='second' ;
+set @arg02=NULL;
+select @arg00, @arg01 from t1 where a=1 ;
+@arg00 @arg01
+first second
+prepare stmt1 from ' select ?, ? from t1 where a=1 ' ;
+execute stmt1 using @arg00, @arg01 ;
+? ?
+first second
+execute stmt1 using @arg02, @arg01 ;
+? ?
+NULL second
+execute stmt1 using @arg00, @arg02 ;
+? ?
+first NULL
+execute stmt1 using @arg02, @arg02 ;
+? ?
+NULL NULL
+drop table if exists t5 ;
+create table t5 (id1 int(11) not null default '0',
+value2 varchar(100), value1 varchar(100)) ;
+insert into t5 values (1,'hh','hh'),(2,'hh','hh'),
+(1,'ii','ii'),(2,'ii','ii') ;
+prepare stmt1 from ' select id1,value1 from t5 where id1=? or value1=? order by id1,value1 ' ;
+set @arg00=1 ;
+set @arg01='hh' ;
+execute stmt1 using @arg00, @arg01 ;
+id1 value1
+1 hh
+1 ii
+2 hh
+drop table t5 ;
+drop table if exists t5 ;
+create table t5(session_id char(9) not null) ;
+insert into t5 values ('abc') ;
+prepare stmt1 from ' select * from t5
+where ?=''1111'' and session_id = ''abc'' ' ;
+set @arg00='abc' ;
+execute stmt1 using @arg00 ;
+session_id
+set @arg00='1111' ;
+execute stmt1 using @arg00 ;
+session_id
+abc
+set @arg00='abc' ;
+execute stmt1 using @arg00 ;
+session_id
+drop table t5 ;
+set @arg00='FROM' ;
+select a @arg00 t1 where a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 t1 where a=1' at line 1
+prepare stmt1 from ' select a ? t1 where a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? t1 where a=1' at line 1
+set @arg00='t1' ;
+select a from @arg00 where a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 where a=1' at line 1
+prepare stmt1 from ' select a from ? where a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? where a=1' at line 1
+set @arg00='WHERE' ;
+select a from t1 @arg00 a=1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a=1' at line 1
+prepare stmt1 from ' select a from t1 ? a=1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a=1' at line 1
+set @arg00=1 ;
+select a FROM t1 where a=@arg00 ;
+a
+1
+prepare stmt1 from ' select a FROM t1 where a=? ' ;
+execute stmt1 using @arg00 ;
+a
+1
+set @arg00=1000 ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=NULL ;
+select a FROM t1 where a=@arg00 ;
+a
+prepare stmt1 from ' select a FROM t1 where a=? ' ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=4 ;
+select a FROM t1 where a=sqrt(@arg00) ;
+a
+2
+prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ;
+execute stmt1 using @arg00 ;
+a
+2
+set @arg00=NULL ;
+select a FROM t1 where a=sqrt(@arg00) ;
+a
+prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ;
+execute stmt1 using @arg00 ;
+a
+set @arg00=2 ;
+set @arg01=3 ;
+select a FROM t1 where a in (@arg00,@arg01) order by a;
+a
+2
+3
+prepare stmt1 from ' select a FROM t1 where a in (?,?) order by a ';
+execute stmt1 using @arg00, @arg01;
+a
+2
+3
+set @arg00= 'one' ;
+set @arg01= 'two' ;
+set @arg02= 'five' ;
+prepare stmt1 from ' select b FROM t1 where b in (?,?,?) order by b ' ;
+execute stmt1 using @arg00, @arg01, @arg02 ;
+b
+one
+two
+prepare stmt1 from ' select b FROM t1 where b like ? ';
+set @arg00='two' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00='tw%' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00='%wo' ;
+execute stmt1 using @arg00 ;
+b
+two
+set @arg00=null ;
+insert into t9 set c1= 0, c5 = NULL ;
+select c5 from t9 where c5 > NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 > ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 < NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 < ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 = NULL ;
+c5
+prepare stmt1 from ' select c5 from t9 where c5 = ? ';
+execute stmt1 using @arg00 ;
+c5
+select c5 from t9 where c5 <=> NULL ;
+c5
+NULL
+prepare stmt1 from ' select c5 from t9 where c5 <=> ? ';
+execute stmt1 using @arg00 ;
+c5
+NULL
+delete from t9 where c1= 0 ;
+set @arg00='>' ;
+select a FROM t1 where a @arg00 1 ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 1' at line 1
+prepare stmt1 from ' select a FROM t1 where a ? 1 ' ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? 1' at line 1
+set @arg00=1 ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL group by a - @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL group by a - ? ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00='two' ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL having b <> @arg00 order by a ;
+a b
+1 one
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL having b <> ? order by a ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+3 three
+4 four
+set @arg00=1 ;
+select a,b FROM t1 where a is not NULL
+AND b is not NULL order by a - @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' select a,b FROM t1 where a is not NULL
+AND b is not NULL order by a - ? ' ;
+execute stmt1 using @arg00 ;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=2 ;
+select a,b from t1 order by 2 ;
+a b
+4 four
+1 one
+3 three
+2 two
+prepare stmt1 from ' select a,b from t1
+order by ? ';
+execute stmt1 using @arg00;
+a b
+4 four
+1 one
+3 three
+2 two
+set @arg00=1 ;
+execute stmt1 using @arg00;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=0 ;
+execute stmt1 using @arg00;
+ERROR 42S22: Unknown column '?' in 'order clause'
+set @arg00=1;
+prepare stmt1 from ' select a,b from t1 order by a
+limit 1 ';
+execute stmt1 ;
+a b
+1 one
+prepare stmt1 from ' select a,b from t1 order by a limit ? ';
+execute stmt1 using @arg00;
+a b
+1 one
+set @arg00='b' ;
+set @arg01=0 ;
+set @arg02=2 ;
+set @arg03=2 ;
+select sum(a), @arg00 from t1 where a > @arg01
+and b is not null group by substr(b,@arg02)
+having sum(a) <> @arg03 ;
+sum(a) @arg00
+3 b
+1 b
+4 b
+prepare stmt1 from ' select sum(a), ? from t1 where a > ?
+and b is not null group by substr(b,?)
+having sum(a) <> ? ';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03;
+sum(a) ?
+3 b
+1 b
+4 b
+test_sequence
+------ join tests ------
+select first.a as a1, second.a as a2
+from t1 first, t1 second
+where first.a = second.a order by a1 ;
+a1 a2
+1 1
+2 2
+3 3
+4 4
+prepare stmt1 from ' select first.a as a1, second.a as a2
+ from t1 first, t1 second
+ where first.a = second.a order by a1 ';
+execute stmt1 ;
+a1 a2
+1 1
+2 2
+3 3
+4 4
+set @arg00='ABC';
+set @arg01='two';
+set @arg02='one';
+select first.a, @arg00, second.a FROM t1 first, t1 second
+where @arg01 = first.b or first.a = second.a or second.b = @arg02
+order by second.a, first.a;
+a @arg00 a
+1 ABC 1
+2 ABC 1
+3 ABC 1
+4 ABC 1
+2 ABC 2
+2 ABC 3
+3 ABC 3
+2 ABC 4
+4 ABC 4
+prepare stmt1 from ' select first.a, ?, second.a FROM t1 first, t1 second
+ where ? = first.b or first.a = second.a or second.b = ?
+ order by second.a, first.a';
+execute stmt1 using @arg00, @arg01, @arg02;
+a ? a
+1 ABC 1
+2 ABC 1
+3 ABC 1
+4 ABC 1
+2 ABC 2
+2 ABC 3
+3 ABC 3
+2 ABC 4
+4 ABC 4
+drop table if exists t2 ;
+create table t2 as select * from t1 ;
+set @query1= 'SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a ' ;
+set @query2= 'SELECT * FROM t2 natural join t1 order by t2.a ' ;
+set @query3= 'SELECT * FROM t2 join t1 using(a) order by t2.a ' ;
+set @query4= 'SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a ' ;
+set @query5= 'SELECT * FROM t2 natural left join t1 order by t2.a ' ;
+set @query6= 'SELECT * FROM t2 left join t1 using(a) order by t2.a ' ;
+set @query7= 'SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a ' ;
+set @query8= 'SELECT * FROM t2 natural right join t1 order by t2.a ' ;
+set @query9= 'SELECT * FROM t2 right join t1 using(a) order by t2.a ' ;
+the join statement is:
+SELECT * FROM t2 right join t1 using(a) order by t2.a
+prepare stmt1 from @query9 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural right join t1 order by t2.a
+prepare stmt1 from @query8 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a
+prepare stmt1 from @query7 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+the join statement is:
+SELECT * FROM t2 left join t1 using(a) order by t2.a
+prepare stmt1 from @query6 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural left join t1 order by t2.a
+prepare stmt1 from @query5 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a
+prepare stmt1 from @query4 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+the join statement is:
+SELECT * FROM t2 join t1 using(a) order by t2.a
+prepare stmt1 from @query3 ;
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+execute stmt1 ;
+a b b
+1 one one
+2 two two
+3 three three
+4 four four
+the join statement is:
+SELECT * FROM t2 natural join t1 order by t2.a
+prepare stmt1 from @query2 ;
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+the join statement is:
+SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a
+prepare stmt1 from @query1 ;
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+execute stmt1 ;
+a b a b
+1 one 1 one
+2 two 2 two
+3 three 3 three
+4 four 4 four
+drop table t2 ;
+test_sequence
+------ subquery tests ------
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ''two'') ';
+execute stmt1 ;
+a b
+2 two
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = 'two' ) and b=@arg00 ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ''two'') and b=? ';
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = @arg00 ) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = ? ) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=3 ;
+set @arg01='three' ;
+select a,b FROM t1 where (a,b) in (select 3, 'three');
+a b
+3 three
+select a FROM t1 where (a,b) in (select @arg00,@arg01);
+a
+3
+prepare stmt1 from ' select a FROM t1 where (a,b) in (select ?, ?) ';
+execute stmt1 using @arg00, @arg01;
+a
+3
+set @arg00=1 ;
+set @arg01='two' ;
+set @arg02=2 ;
+set @arg03='two' ;
+select a, @arg00, b FROM t1 outer_table where
+b=@arg01 and a = (select @arg02 from t1 where b = @arg03 ) ;
+a @arg00 b
+2 1 two
+prepare stmt1 from ' select a, ?, b FROM t1 outer_table where
+ b=? and a = (select ? from t1 where b = ? ) ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+a ? b
+2 1 two
+prepare stmt1 from 'select c4 FROM t9 where
+ c13 = (select MAX(b) from t1 where a = ?) and c22 = ? ' ;
+execute stmt1 using @arg01, @arg02;
+c4
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = outer_table.b ) order by a ';
+execute stmt1 ;
+a b
+1 one
+2 two
+3 three
+4 four
+prepare stmt1 from ' SELECT a as ccc from t1 where a+1=
+ (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+prepare stmt1 from ' SELECT a as ccc from t1 where a+1=
+ (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+prepare stmt1 from ' SELECT a as ccc from t1 where a+1=
+ (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) ';
+execute stmt1 ;
+ccc
+1
+deallocate prepare stmt1 ;
+set @arg00='two' ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where b = outer_table.b ) and b=@arg00 ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where b = outer_table.b) and b=? ';
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=2 ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where a = @arg00 and b = outer_table.b) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where a = ? and b = outer_table.b) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=2 ;
+select a, b FROM t1 outer_table where
+a = (select a from t1 where outer_table.a = @arg00 and a=2) and b='two' ;
+a b
+2 two
+prepare stmt1 from ' select a, b FROM t1 outer_table where
+ a = (select a from t1 where outer_table.a = ? and a=2) and b=''two'' ' ;
+execute stmt1 using @arg00;
+a b
+2 two
+set @arg00=1 ;
+set @arg01='two' ;
+set @arg02=2 ;
+set @arg03='two' ;
+select a, @arg00, b FROM t1 outer_table where
+b=@arg01 and a = (select @arg02 from t1 where outer_table.b = @arg03
+and outer_table.a=a ) ;
+a @arg00 b
+2 1 two
+prepare stmt1 from ' select a, ?, b FROM t1 outer_table where
+ b=? and a = (select ? from t1 where outer_table.b = ?
+ and outer_table.a=a ) ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+a ? b
+2 1 two
+set @arg00=1 ;
+set @arg01=0 ;
+select a, @arg00
+from ( select a - @arg00 as a from t1 where a=@arg00 ) as t2
+where a=@arg01;
+a @arg00
+0 1
+prepare stmt1 from ' select a, ?
+ from ( select a - ? as a from t1 where a=? ) as t2
+ where a=? ';
+execute stmt1 using @arg00, @arg00, @arg00, @arg01 ;
+a ?
+0 1
+drop table if exists t2 ;
+create table t2 as select * from t1;
+prepare stmt1 from ' select a in (select a from t2) from t1 ' ;
+execute stmt1 ;
+a in (select a from t2)
+1
+1
+1
+1
+drop table if exists t5, t6, t7 ;
+create table t5 (a int , b int) ;
+create table t6 like t5 ;
+create table t7 like t5 ;
+insert into t5 values (0, 100), (1, 2), (1, 3), (2, 2), (2, 7),
+(2, -1), (3, 10) ;
+insert into t6 values (0, 0), (1, 1), (2, 1), (3, 1), (4, 1) ;
+insert into t7 values (3, 3), (2, 2), (1, 1) ;
+prepare stmt1 from ' select a, (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1) from t7 ' ;
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+execute stmt1 ;
+a (select count(distinct t5.b) as sum from t5, t6
+ where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b
+ group by t5.a order by sum limit 1)
+3 1
+2 2
+1 2
+drop table t5, t6, t7 ;
+drop table if exists t2 ;
+create table t2 as select * from t9;
+set @stmt= ' SELECT
+ (SELECT SUM(c1 + c12 + 0.0) FROM t2
+ where (t9.c2 - 0e-3) = t2.c2
+ GROUP BY t9.c15 LIMIT 1) as scalar_s,
+ exists (select 1.0e+0 from t2
+ where t2.c3 * 9.0000000000 = t9.c4) as exists_s,
+ c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s,
+ (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s
+FROM t9,
+(select c25 x, c32 y from t2) tt WHERE x = c25 ' ;
+prepare stmt1 from @stmt ;
+execute stmt1 ;
+execute stmt1 ;
+set @stmt= concat('explain ',@stmt);
+prepare stmt1 from @stmt ;
+execute stmt1 ;
+execute stmt1 ;
+set @stmt= ' SELECT
+ (SELECT SUM(c1+c12+?) FROM t2 where (t9.c2-?)=t2.c2
+ GROUP BY t9.c15 LIMIT 1) as scalar_s,
+ exists (select ? from t2
+ where t2.c3*?=t9.c4) as exists_s,
+ c5*? in (select c6+? from t2) as in_s,
+ (c7-?, c8-?) in (select c9+?, c10+? from t2) as in_row_s
+FROM t9,
+(select c25 x, c32 y from t2) tt WHERE x =c25 ' ;
+set @arg00= 0.0 ;
+set @arg01= 0e-3 ;
+set @arg02= 1.0e+0 ;
+set @arg03= 9.0000000000 ;
+set @arg04= 4 ;
+set @arg05= 0.3e+1 ;
+set @arg06= 4 ;
+set @arg07= 4 ;
+set @arg08= 4.0 ;
+set @arg09= 40e-1 ;
+prepare stmt1 from @stmt ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+set @stmt= concat('explain ',@stmt);
+prepare stmt1 from @stmt ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06,
+@arg07, @arg08, @arg09 ;
+drop table t2 ;
+select 1 < (select a from t1) ;
+ERROR 21000: Subquery returns more than 1 row
+prepare stmt1 from ' select 1 < (select a from t1) ' ;
+execute stmt1 ;
+ERROR 21000: Subquery returns more than 1 row
+select 1 as my_col ;
+my_col
+1
+test_sequence
+------ union tests ------
+prepare stmt1 from ' select a FROM t1 where a=1
+ union distinct
+ select a FROM t1 where a=1 ';
+execute stmt1 ;
+a
+1
+execute stmt1 ;
+a
+1
+prepare stmt1 from ' select a FROM t1 where a=1
+ union all
+ select a FROM t1 where a=1 ';
+execute stmt1 ;
+a
+1
+1
+prepare stmt1 from ' SELECT 1, 2 union SELECT 1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT 1 union SELECT 1, 2 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT * from t1 union SELECT 1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+prepare stmt1 from ' SELECT 1 union SELECT * from t1 ' ;
+ERROR 21000: The used SELECT statements have a different number of columns
+set @arg00=1 ;
+select @arg00 FROM t1 where a=1
+union distinct
+select 1 FROM t1 where a=1;
+@arg00
+1
+prepare stmt1 from ' select ? FROM t1 where a=1
+ union distinct
+ select 1 FROM t1 where a=1 ' ;
+execute stmt1 using @arg00;
+?
+1
+set @arg00=1 ;
+select 1 FROM t1 where a=1
+union distinct
+select @arg00 FROM t1 where a=1;
+1
+1
+prepare stmt1 from ' select 1 FROM t1 where a=1
+ union distinct
+ select ? FROM t1 where a=1 ' ;
+execute stmt1 using @arg00;
+1
+1
+set @arg00='a' ;
+select @arg00 FROM t1 where a=1
+union distinct
+select @arg00 FROM t1 where a=1;
+@arg00
+a
+prepare stmt1 from ' select ? FROM t1 where a=1
+ union distinct
+ select ? FROM t1 where a=1 ';
+execute stmt1 using @arg00, @arg00;
+?
+a
+prepare stmt1 from ' select ?
+ union distinct
+ select ? ';
+execute stmt1 using @arg00, @arg00;
+?
+a
+set @arg00='a' ;
+set @arg01=1 ;
+set @arg02='a' ;
+set @arg03=2 ;
+select @arg00 FROM t1 where a=@arg01
+union distinct
+select @arg02 FROM t1 where a=@arg03;
+@arg00
+a
+prepare stmt1 from ' select ? FROM t1 where a=?
+ union distinct
+ select ? FROM t1 where a=? ' ;
+execute stmt1 using @arg00, @arg01, @arg02, @arg03;
+?
+a
+set @arg00=1 ;
+prepare stmt1 from ' select sum(a) + 200, ? from t1
+union distinct
+select sum(a) + 200, 1 from t1
+group by b ' ;
+execute stmt1 using @arg00;
+sum(a) + 200 ?
+210 1
+204 1
+201 1
+203 1
+202 1
+set @Oporto='Oporto' ;
+set @Lisboa='Lisboa' ;
+set @0=0 ;
+set @1=1 ;
+set @2=2 ;
+set @3=3 ;
+set @4=4 ;
+select @Oporto,@Lisboa,@0,@1,@2,@3,@4 ;
+@Oporto @Lisboa @0 @1 @2 @3 @4
+Oporto Lisboa 0 1 2 3 4
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+group by b
+union distinct
+select sum(a) + 200, @Lisboa from t1
+group by b ;
+the_sum the_town
+204 Oporto
+201 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+201 Lisboa
+203 Lisboa
+202 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ group by b
+ union distinct
+ select sum(a) + 200, ? from t1
+ group by b ' ;
+execute stmt1 using @Oporto, @Lisboa;
+the_sum the_town
+204 Oporto
+201 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+201 Lisboa
+203 Lisboa
+202 Lisboa
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+where a > @1
+group by b
+union distinct
+select sum(a) + 200, @Lisboa from t1
+where a > @2
+group by b ;
+the_sum the_town
+204 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+203 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ where a > ?
+ group by b
+ union distinct
+ select sum(a) + 200, ? from t1
+ where a > ?
+ group by b ' ;
+execute stmt1 using @Oporto, @1, @Lisboa, @2;
+the_sum the_town
+204 Oporto
+203 Oporto
+202 Oporto
+204 Lisboa
+203 Lisboa
+select sum(a) + 200 as the_sum, @Oporto as the_town from t1
+where a > @1
+group by b
+having avg(a) > @2
+union distinct
+select sum(a) + 200, @Lisboa from t1
+where a > @2
+group by b
+having avg(a) > @3;
+the_sum the_town
+204 Oporto
+203 Oporto
+204 Lisboa
+prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1
+ where a > ?
+ group by b
+ having avg(a) > ?
+ union distinct
+ select sum(a) + 200, ? from t1
+ where a > ?
+ group by b
+ having avg(a) > ? ';
+execute stmt1 using @Oporto, @1, @2, @Lisboa, @2, @3;
+the_sum the_town
+204 Oporto
+203 Oporto
+204 Lisboa
+test_sequence
+------ explain select tests ------
+prepare stmt1 from ' explain select * from t9 ' ;
+execute stmt1;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def id 8 3 1 N 32929 0 63
+def select_type 253 19 6 N 1 31 8
+def table 253 64 2 Y 0 31 8
+def type 253 10 3 Y 0 31 8
+def possible_keys 253 4096 0 Y 0 31 8
+def key 253 64 0 Y 0 31 8
+def key_len 253 4096 0 Y 128 31 63
+def ref 253 1024 0 Y 0 31 8
+def rows 8 10 1 Y 32928 0 63
+def Extra 253 255 0 N 1 31 8
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t9 ALL NULL NULL NULL NULL 2
+drop table if exists t2 ;
+create table t2 (s varchar(25), fulltext(s))
+ENGINE = 'MARIA' ;
+insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ;
+commit ;
+prepare stmt1 from ' select s from t2 where match (s) against (?) ' ;
+set @arg00='Dogs' ;
+execute stmt1 using @arg00 ;
+s
+Hollow Dogs
+prepare stmt1 from ' SELECT s FROM t2
+where match (s) against (concat(?,''digger'')) ';
+set @arg00='Grave' ;
+execute stmt1 using @arg00 ;
+s
+Gravedigger
+drop table t2 ;
+test_sequence
+------ delete tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'delete from t1 where a=2' ;
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+execute stmt1;
+insert into t1 values(0,NULL);
+set @arg00=NULL;
+prepare stmt1 from 'delete from t1 where b=?' ;
+execute stmt1 using @arg00;
+select a,b from t1 where b is NULL ;
+a b
+0 NULL
+set @arg00='one';
+execute stmt1 using @arg00;
+select a,b from t1 where b=@arg00;
+a b
+prepare stmt1 from 'truncate table t1' ;
+test_sequence
+------ update tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ;
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+2 a=two
+execute stmt1;
+select a,b from t1 where a=2;
+a b
+2 a=two
+set @arg00=NULL;
+prepare stmt1 from 'update t1 set b=? where a=2' ;
+execute stmt1 using @arg00;
+select a,b from t1 where a=2;
+a b
+2 NULL
+set @arg00='two';
+execute stmt1 using @arg00;
+select a,b from t1 where a=2;
+a b
+2 two
+set @arg00=2;
+prepare stmt1 from 'update t1 set b=NULL where a=?' ;
+execute stmt1 using @arg00;
+select a,b from t1 where a=@arg00;
+a b
+2 NULL
+update t1 set b='two' where a=@arg00;
+set @arg00=2000;
+execute stmt1 using @arg00;
+select a,b from t1 where a=@arg00;
+a b
+set @arg00=2;
+set @arg01=22;
+prepare stmt1 from 'update t1 set a=? where a=?' ;
+execute stmt1 using @arg00, @arg00;
+select a,b from t1 where a=@arg00;
+a b
+2 two
+execute stmt1 using @arg01, @arg00;
+select a,b from t1 where a=@arg01;
+a b
+22 two
+execute stmt1 using @arg00, @arg01;
+select a,b from t1 where a=@arg00;
+a b
+2 two
+set @arg00=NULL;
+set @arg01=2;
+execute stmt1 using @arg00, @arg01;
+Warnings:
+Warning 1048 Column 'a' cannot be null
+select a,b from t1 order by a;
+a b
+0 two
+1 one
+3 three
+4 four
+set @arg00=0;
+execute stmt1 using @arg01, @arg00;
+select a,b from t1 order by a;
+a b
+1 one
+2 two
+3 three
+4 four
+set @arg00=23;
+set @arg01='two';
+set @arg02=2;
+set @arg03='two';
+set @arg04=2;
+drop table if exists t2;
+create table t2 as select a,b from t1 ;
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 where a = @arg00 ;
+a b
+23 two
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a not in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 order by a ;
+a b
+1 one
+2 two
+3 three
+4 four
+drop table t2 ;
+create table t2
+(
+a int, b varchar(30),
+primary key(a)
+) engine = 'MARIA' ;
+insert into t2(a,b) select a, b from t1 ;
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 where a = @arg00 ;
+a b
+23 two
+prepare stmt1 from 'update t1 set a=? where b=?
+ and a not in (select ? from t2
+ where b = ? or a = ?)';
+execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ;
+affected rows: 1
+info: Rows matched: 1 Changed: 1 Warnings: 0
+select a,b from t1 order by a ;
+a b
+1 one
+2 two
+3 three
+4 four
+drop table t2 ;
+set @arg00=1;
+prepare stmt1 from 'update t1 set b=''bla''
+where a=2
+limit 1';
+execute stmt1 ;
+select a,b from t1 where b = 'bla' ;
+a b
+2 bla
+prepare stmt1 from 'update t1 set b=''bla'' where a=2 limit ?';
+execute stmt1 using @arg00;
+test_sequence
+------ insert tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+prepare stmt1 from 'insert into t1 values(5, ''five'' )';
+execute stmt1;
+select a,b from t1 where a = 5;
+a b
+5 five
+set @arg00='six' ;
+prepare stmt1 from 'insert into t1 values(6, ? )';
+execute stmt1 using @arg00;
+select a,b from t1 where b = @arg00;
+a b
+6 six
+execute stmt1 using @arg00;
+ERROR 23000: Duplicate entry '6' for key 'PRIMARY'
+set @arg00=NULL ;
+prepare stmt1 from 'insert into t1 values(0, ? )';
+execute stmt1 using @arg00;
+select a,b from t1 where b is NULL;
+a b
+0 NULL
+set @arg00=8 ;
+set @arg01='eight' ;
+prepare stmt1 from 'insert into t1 values(?, ? )';
+execute stmt1 using @arg00, @arg01 ;
+select a,b from t1 where b = @arg01;
+a b
+8 eight
+set @NULL= null ;
+set @arg00= 'abc' ;
+execute stmt1 using @NULL, @NULL ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @NULL ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @arg00 ;
+ERROR 23000: Column 'a' cannot be null
+execute stmt1 using @NULL, @arg00 ;
+ERROR 23000: Column 'a' cannot be null
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @arg00 ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @arg00 ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 abc
+10002 abc
+delete from t1 where a > 10000 ;
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @NULL ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @NULL ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 NULL
+10002 NULL
+delete from t1 where a > 10000 ;
+set @arg01= 10000 + 10 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 9 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 8 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 7 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 6 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 5 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 4 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 3 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 2 ;
+execute stmt1 using @arg01, @arg01 ;
+set @arg01= 10000 + 1 ;
+execute stmt1 using @arg01, @arg01 ;
+select * from t1 where a > 10000 order by a ;
+a b
+10001 10001
+10002 10002
+10003 10003
+10004 10004
+10005 10005
+10006 10006
+10007 10007
+10008 10008
+10009 10009
+10010 10010
+delete from t1 where a > 10000 ;
+set @arg00=81 ;
+set @arg01='8-1' ;
+set @arg02=82 ;
+set @arg03='8-2' ;
+prepare stmt1 from 'insert into t1 values(?,?),(?,?)';
+execute stmt1 using @arg00, @arg01, @arg02, @arg03 ;
+select a,b from t1 where a in (@arg00,@arg02) ;
+a b
+81 8-1
+82 8-2
+set @arg00=9 ;
+set @arg01='nine' ;
+prepare stmt1 from 'insert into t1 set a=?, b=? ';
+execute stmt1 using @arg00, @arg01 ;
+select a,b from t1 where a = @arg00 ;
+a b
+9 nine
+set @arg00=6 ;
+set @arg01=1 ;
+prepare stmt1 from 'insert into t1 set a=?, b=''sechs''
+ on duplicate key update a=a + ?, b=concat(b,''modified'') ';
+execute stmt1 using @arg00, @arg01;
+select * from t1 order by a;
+a b
+0 NULL
+1 one
+2 two
+3 three
+4 four
+5 five
+7 sixmodified
+8 eight
+9 nine
+81 8-1
+82 8-2
+set @arg00=81 ;
+set @arg01=1 ;
+execute stmt1 using @arg00, @arg01;
+ERROR 23000: Duplicate entry '82' for key 'PRIMARY'
+drop table if exists t2 ;
+create table t2 (id int auto_increment primary key)
+ENGINE= 'MARIA' ;
+prepare stmt1 from ' select last_insert_id() ' ;
+insert into t2 values (NULL) ;
+execute stmt1 ;
+last_insert_id()
+1
+insert into t2 values (NULL) ;
+execute stmt1 ;
+last_insert_id()
+2
+drop table t2 ;
+set @1000=1000 ;
+set @x1000_2="x1000_2" ;
+set @x1000_3="x1000_3" ;
+set @x1000="x1000" ;
+set @1100=1100 ;
+set @x1100="x1100" ;
+set @100=100 ;
+set @updated="updated" ;
+insert into t1 values(1000,'x1000_1') ;
+insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3)
+on duplicate key update a = a + @100, b = concat(b,@updated) ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 x1000_3
+1100 x1000_1updated
+delete from t1 where a >= 1000 ;
+insert into t1 values(1000,'x1000_1') ;
+prepare stmt1 from ' insert into t1 values(?,?),(?,?)
+ on duplicate key update a = a + ?, b = concat(b,?) ';
+execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 x1000_3
+1100 x1000_1updated
+delete from t1 where a >= 1000 ;
+insert into t1 values(1000,'x1000_1') ;
+execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ;
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1200 x1000_1updatedupdated
+delete from t1 where a >= 1000 ;
+prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' ';
+execute stmt1;
+execute stmt1;
+execute stmt1;
+test_sequence
+------ multi table tests ------
+delete from t1 ;
+delete from t9 ;
+insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ;
+insert into t9 (c1,c21)
+values (1, 'one'), (2, 'two'), (3, 'three') ;
+prepare stmt_delete from " delete t1, t9
+ from t1, t9 where t1.a=t9.c1 and t1.b='updated' ";
+prepare stmt_update from " update t1, t9
+ set t1.b='updated', t9.c21='updated'
+ where t1.a=t9.c1 and t1.a=? ";
+prepare stmt_select1 from " select a, b from t1 order by a" ;
+prepare stmt_select2 from " select c1, c21 from t9 order by c1" ;
+set @arg00= 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+2 two
+3 three
+execute stmt_select2 ;
+c1 c21
+2 two
+3 three
+set @arg00= @arg00 + 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+3 three
+execute stmt_select2 ;
+c1 c21
+3 three
+set @arg00= @arg00 + 1 ;
+execute stmt_update using @arg00 ;
+execute stmt_delete ;
+execute stmt_select1 ;
+a b
+execute stmt_select2 ;
+c1 c21
+set @arg00= @arg00 + 1 ;
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+insert into t1 values(0,NULL) ;
+set @duplicate='duplicate ' ;
+set @1000=1000 ;
+set @5=5 ;
+select a,b from t1 where a < 5 order by a ;
+a b
+0 NULL
+1 one
+2 two
+3 three
+4 four
+insert into t1 select a + @1000, concat(@duplicate,b) from t1
+where a < @5 ;
+affected rows: 5
+info: Records: 5 Duplicates: 0 Warnings: 0
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 NULL
+1001 duplicate one
+1002 duplicate two
+1003 duplicate three
+1004 duplicate four
+delete from t1 where a >= 1000 ;
+prepare stmt1 from ' insert into t1 select a + ?, concat(?,b) from t1
+where a < ? ' ;
+execute stmt1 using @1000, @duplicate, @5;
+affected rows: 5
+info: Records: 5 Duplicates: 0 Warnings: 0
+select a,b from t1 where a >= 1000 order by a ;
+a b
+1000 NULL
+1001 duplicate one
+1002 duplicate two
+1003 duplicate three
+1004 duplicate four
+delete from t1 where a >= 1000 ;
+set @1=1 ;
+set @2=2 ;
+set @100=100 ;
+set @float=1.00;
+set @five='five' ;
+drop table if exists t2;
+create table t2 like t1 ;
+insert into t2 (b,a)
+select @duplicate, sum(first.a) from t1 first, t1 second
+where first.a <> @5 and second.b = first.b
+and second.b <> @five
+group by second.b
+having sum(second.a) > @2
+union
+select b, a + @100 from t1
+where (a,b) in ( select sqrt(a+@1)+CAST(@float AS signed),b
+from t1);
+affected rows: 3
+info: Records: 3 Duplicates: 0 Warnings: 0
+select a,b from t2 order by a ;
+a b
+3 duplicate
+4 duplicate
+103 three
+delete from t2 ;
+prepare stmt1 from ' insert into t2 (b,a)
+select ?, sum(first.a)
+ from t1 first, t1 second
+ where first.a <> ? and second.b = first.b and second.b <> ?
+ group by second.b
+ having sum(second.a) > ?
+union
+select b, a + ? from t1
+ where (a,b) in ( select sqrt(a+?)+CAST(? AS signed),b
+ from t1 ) ' ;
+execute stmt1 using @duplicate, @5, @five, @2, @100, @1, @float ;
+affected rows: 3
+info: Records: 3 Duplicates: 0 Warnings: 0
+select a,b from t2 order by a ;
+a b
+3 duplicate
+4 duplicate
+103 three
+drop table t2;
+drop table if exists t5 ;
+set @arg01= 8;
+set @arg02= 8.0;
+set @arg03= 80.00000000000e-1;
+set @arg04= 'abc' ;
+set @arg05= CAST('abc' as binary) ;
+set @arg06= '1991-08-05' ;
+set @arg07= CAST('1991-08-05' as date);
+set @arg08= '1991-08-05 01:01:01' ;
+set @arg09= CAST('1991-08-05 01:01:01' as datetime) ;
+set @arg10= unix_timestamp('1991-01-01 01:01:01');
+set @arg11= YEAR('1991-01-01 01:01:01');
+set @arg12= 8 ;
+set @arg12= NULL ;
+set @arg13= 8.0 ;
+set @arg13= NULL ;
+set @arg14= 'abc';
+set @arg14= NULL ;
+set @arg15= CAST('abc' as binary) ;
+set @arg15= NULL ;
+create table t5 as select
+8 as const01, @arg01 as param01,
+8.0 as const02, @arg02 as param02,
+80.00000000000e-1 as const03, @arg03 as param03,
+'abc' as const04, @arg04 as param04,
+CAST('abc' as binary) as const05, @arg05 as param05,
+'1991-08-05' as const06, @arg06 as param06,
+CAST('1991-08-05' as date) as const07, @arg07 as param07,
+'1991-08-05 01:01:01' as const08, @arg08 as param08,
+CAST('1991-08-05 01:01:01' as datetime) as const09, @arg09 as param09,
+unix_timestamp('1991-01-01 01:01:01') as const10, @arg10 as param10,
+YEAR('1991-01-01 01:01:01') as const11, @arg11 as param11,
+NULL as const12, @arg12 as param12,
+@arg13 as param13,
+@arg14 as param14,
+@arg15 as param15;
+show create table t5 ;
+Table Create Table
+t5 CREATE TABLE `t5` (
+ `const01` int(1) NOT NULL DEFAULT '0',
+ `param01` bigint(20) DEFAULT NULL,
+ `const02` decimal(2,1) NOT NULL DEFAULT '0.0',
+ `param02` decimal(65,30) DEFAULT NULL,
+ `const03` double NOT NULL DEFAULT '0',
+ `param03` double DEFAULT NULL,
+ `const04` varchar(3) NOT NULL DEFAULT '',
+ `param04` longtext,
+ `const05` varbinary(3) NOT NULL DEFAULT '',
+ `param05` longblob,
+ `const06` varchar(10) NOT NULL DEFAULT '',
+ `param06` longtext,
+ `const07` date DEFAULT NULL,
+ `param07` longblob,
+ `const08` varchar(19) NOT NULL DEFAULT '',
+ `param08` longtext,
+ `const09` datetime DEFAULT NULL,
+ `param09` longblob,
+ `const10` int(10) NOT NULL DEFAULT '0',
+ `param10` bigint(20) DEFAULT NULL,
+ `const11` int(4) DEFAULT NULL,
+ `param11` bigint(20) DEFAULT NULL,
+ `const12` binary(0) DEFAULT NULL,
+ `param12` bigint(20) DEFAULT NULL,
+ `param13` decimal(65,30) DEFAULT NULL,
+ `param14` longtext,
+ `param15` longblob
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t5 ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def test t5 t5 const01 const01 3 1 1 N 32769 0 63
+def test t5 t5 param01 param01 8 20 1 Y 32768 0 63
+def test t5 t5 const02 const02 246 4 3 N 1 1 63
+def test t5 t5 param02 param02 246 67 32 Y 0 30 63
+def test t5 t5 const03 const03 5 17 1 N 32769 31 63
+def test t5 t5 param03 param03 5 23 1 Y 32768 31 63
+def test t5 t5 const04 const04 253 3 3 N 1 0 8
+def test t5 t5 param04 param04 252 4294967295 3 Y 16 0 8
+def test t5 t5 const05 const05 253 3 3 N 129 0 63
+def test t5 t5 param05 param05 252 4294967295 3 Y 144 0 63
+def test t5 t5 const06 const06 253 10 10 N 1 0 8
+def test t5 t5 param06 param06 252 4294967295 10 Y 16 0 8
+def test t5 t5 const07 const07 10 10 10 Y 128 0 63
+def test t5 t5 param07 param07 252 4294967295 10 Y 144 0 63
+def test t5 t5 const08 const08 253 19 19 N 1 0 8
+def test t5 t5 param08 param08 252 4294967295 19 Y 16 0 8
+def test t5 t5 const09 const09 12 19 19 Y 128 0 63
+def test t5 t5 param09 param09 252 4294967295 19 Y 144 0 63
+def test t5 t5 const10 const10 3 10 9 N 32769 0 63
+def test t5 t5 param10 param10 8 20 9 Y 32768 0 63
+def test t5 t5 const11 const11 3 4 4 Y 32768 0 63
+def test t5 t5 param11 param11 8 20 4 Y 32768 0 63
+def test t5 t5 const12 const12 254 0 0 Y 128 0 63
+def test t5 t5 param12 param12 8 20 0 Y 32768 0 63
+def test t5 t5 param13 param13 246 67 0 Y 0 30 63
+def test t5 t5 param14 param14 252 4294967295 0 Y 16 0 8
+def test t5 t5 param15 param15 252 4294967295 0 Y 144 0 63
+const01 8
+param01 8
+const02 8.0
+param02 8.000000000000000000000000000000
+const03 8
+param03 8
+const04 abc
+param04 abc
+const05 abc
+param05 abc
+const06 1991-08-05
+param06 1991-08-05
+const07 1991-08-05
+param07 1991-08-05
+const08 1991-08-05 01:01:01
+param08 1991-08-05 01:01:01
+const09 1991-08-05 01:01:01
+param09 1991-08-05 01:01:01
+const10 662680861
+param10 662680861
+const11 1991
+param11 1991
+const12 NULL
+param12 NULL
+param13 NULL
+param14 NULL
+param15 NULL
+drop table t5 ;
+test_sequence
+------ data type conversion tests ------
+delete from t1 ;
+insert into t1 values (1,'one');
+insert into t1 values (2,'two');
+insert into t1 values (3,'three');
+insert into t1 values (4,'four');
+commit ;
+delete from t9 ;
+insert into t9
+set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1,
+c10= 1, c11= 1, c12 = 1,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=true, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday';
+insert into t9
+set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9,
+c10= 9, c11= 9, c12 = 9,
+c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11',
+c16= '11:11:11', c17= '2004',
+c18= 1, c19=false, c20= 'a', c21= '123456789a',
+c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext',
+c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext',
+c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday';
+commit ;
+insert into t9 set c1= 0, c15= '1991-01-01 01:01:01' ;
+select * from t9 order by c1 ;
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday
+test_sequence
+------ select @parameter:= column ------
+prepare full_info from "select @arg01, @arg02, @arg03, @arg04,
+ @arg05, @arg06, @arg07, @arg08,
+ @arg09, @arg10, @arg11, @arg12,
+ @arg13, @arg14, @arg15, @arg16,
+ @arg17, @arg18, @arg19, @arg20,
+ @arg21, @arg22, @arg23, @arg24,
+ @arg25, @arg26, @arg27, @arg28,
+ @arg29, @arg30, @arg31, @arg32" ;
+select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= 1 ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 1 Y 128 0 63
+def @arg03 253 20 1 Y 128 0 63
+def @arg04 253 20 1 Y 128 0 63
+def @arg05 253 20 1 Y 128 0 63
+def @arg06 253 20 1 Y 128 0 63
+def @arg07 253 23 1 Y 128 31 63
+def @arg08 253 23 1 Y 128 31 63
+def @arg09 253 23 1 Y 128 31 63
+def @arg10 253 23 1 Y 128 31 63
+def @arg11 253 67 6 Y 128 30 63
+def @arg12 253 67 6 Y 128 30 63
+def @arg13 253 16777216 10 Y 128 31 63
+def @arg14 253 16777216 19 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 8 Y 128 31 63
+def @arg17 253 20 4 Y 128 0 63
+def @arg18 253 20 1 Y 128 0 63
+def @arg19 253 20 1 Y 128 0 63
+def @arg20 253 16777216 1 Y 0 31 8
+def @arg21 253 16777216 10 Y 0 31 8
+def @arg22 253 16777216 30 Y 0 31 8
+def @arg23 253 16777216 8 Y 128 31 63
+def @arg24 253 16777216 8 Y 0 31 8
+def @arg25 253 16777216 4 Y 128 31 63
+def @arg26 253 16777216 4 Y 0 31 8
+def @arg27 253 16777216 10 Y 128 31 63
+def @arg28 253 16777216 10 Y 0 31 8
+def @arg29 253 16777216 8 Y 128 31 63
+def @arg30 253 16777216 8 Y 0 31 8
+def @arg31 253 16777216 3 Y 0 31 8
+def @arg32 253 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= 0 ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 0 Y 128 0 63
+def @arg03 253 20 0 Y 128 0 63
+def @arg04 253 20 0 Y 128 0 63
+def @arg05 253 20 0 Y 128 0 63
+def @arg06 253 20 0 Y 128 0 63
+def @arg07 253 23 0 Y 128 31 63
+def @arg08 253 23 0 Y 128 31 63
+def @arg09 253 23 0 Y 128 31 63
+def @arg10 253 23 0 Y 128 31 63
+def @arg11 253 67 0 Y 128 30 63
+def @arg12 253 67 0 Y 128 30 63
+def @arg13 253 16777216 0 Y 128 31 63
+def @arg14 253 16777216 0 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 0 Y 128 31 63
+def @arg17 253 20 0 Y 128 0 63
+def @arg18 253 20 0 Y 128 0 63
+def @arg19 253 20 0 Y 128 0 63
+def @arg20 253 16777216 0 Y 0 31 8
+def @arg21 253 16777216 0 Y 0 31 8
+def @arg22 253 16777216 0 Y 0 31 8
+def @arg23 253 16777216 0 Y 128 31 63
+def @arg24 253 16777216 0 Y 0 31 8
+def @arg25 253 16777216 0 Y 128 31 63
+def @arg26 253 16777216 0 Y 0 31 8
+def @arg27 253 16777216 0 Y 128 31 63
+def @arg28 253 16777216 0 Y 0 31 8
+def @arg29 253 16777216 0 Y 128 31 63
+def @arg30 253 16777216 0 Y 0 31 8
+def @arg31 253 16777216 0 Y 0 31 8
+def @arg32 253 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select
+ @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4,
+ @arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8,
+ @arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12,
+ @arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16,
+ @arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20,
+ @arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24,
+ @arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28,
+ @arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32
+from t9 where c1= ?" ;
+set @my_key= 1 ;
+execute stmt1 using @my_key ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 1 Y 128 0 63
+def @arg03 253 20 1 Y 128 0 63
+def @arg04 253 20 1 Y 128 0 63
+def @arg05 253 20 1 Y 128 0 63
+def @arg06 253 20 1 Y 128 0 63
+def @arg07 253 23 1 Y 128 31 63
+def @arg08 253 23 1 Y 128 31 63
+def @arg09 253 23 1 Y 128 31 63
+def @arg10 253 23 1 Y 128 31 63
+def @arg11 253 67 6 Y 128 30 63
+def @arg12 253 67 6 Y 128 30 63
+def @arg13 253 16777216 10 Y 128 31 63
+def @arg14 253 16777216 19 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 8 Y 128 31 63
+def @arg17 253 20 4 Y 128 0 63
+def @arg18 253 20 1 Y 128 0 63
+def @arg19 253 20 1 Y 128 0 63
+def @arg20 253 16777216 1 Y 0 31 8
+def @arg21 253 16777216 10 Y 0 31 8
+def @arg22 253 16777216 30 Y 0 31 8
+def @arg23 253 16777216 8 Y 128 31 63
+def @arg24 253 16777216 8 Y 0 31 8
+def @arg25 253 16777216 4 Y 128 31 63
+def @arg26 253 16777216 4 Y 0 31 8
+def @arg27 253 16777216 10 Y 128 31 63
+def @arg28 253 16777216 10 Y 0 31 8
+def @arg29 253 16777216 8 Y 128 31 63
+def @arg30 253 16777216 8 Y 0 31 8
+def @arg31 253 16777216 3 Y 0 31 8
+def @arg32 253 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+set @my_key= 0 ;
+execute stmt1 using @my_key ;
+@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 0 Y 128 0 63
+def @arg03 253 20 0 Y 128 0 63
+def @arg04 253 20 0 Y 128 0 63
+def @arg05 253 20 0 Y 128 0 63
+def @arg06 253 20 0 Y 128 0 63
+def @arg07 253 23 0 Y 128 31 63
+def @arg08 253 23 0 Y 128 31 63
+def @arg09 253 23 0 Y 128 31 63
+def @arg10 253 23 0 Y 128 31 63
+def @arg11 253 67 0 Y 128 30 63
+def @arg12 253 67 0 Y 128 30 63
+def @arg13 253 16777216 0 Y 128 31 63
+def @arg14 253 16777216 0 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 0 Y 128 31 63
+def @arg17 253 20 0 Y 128 0 63
+def @arg18 253 20 0 Y 128 0 63
+def @arg19 253 20 0 Y 128 0 63
+def @arg20 253 16777216 0 Y 0 31 8
+def @arg21 253 16777216 0 Y 0 31 8
+def @arg22 253 16777216 0 Y 0 31 8
+def @arg23 253 16777216 0 Y 128 31 63
+def @arg24 253 16777216 0 Y 0 31 8
+def @arg25 253 16777216 0 Y 128 31 63
+def @arg26 253 16777216 0 Y 0 31 8
+def @arg27 253 16777216 0 Y 128 31 63
+def @arg28 253 16777216 0 Y 0 31 8
+def @arg29 253 16777216 0 Y 128 31 63
+def @arg30 253 16777216 0 Y 0 31 8
+def @arg31 253 16777216 0 Y 0 31 8
+def @arg32 253 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select ? := c1 from t9 where c1= 1" ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ':= c1 from t9 where c1= 1' at line 1
+test_sequence
+------ select column, .. into @parm,.. ------
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= 1 ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 1 Y 128 0 63
+def @arg03 253 20 1 Y 128 0 63
+def @arg04 253 20 1 Y 128 0 63
+def @arg05 253 20 1 Y 128 0 63
+def @arg06 253 20 1 Y 128 0 63
+def @arg07 253 23 1 Y 128 31 63
+def @arg08 253 23 1 Y 128 31 63
+def @arg09 253 23 1 Y 128 31 63
+def @arg10 253 23 1 Y 128 31 63
+def @arg11 253 67 6 Y 128 30 63
+def @arg12 253 67 6 Y 128 30 63
+def @arg13 253 16777216 10 Y 128 31 63
+def @arg14 253 16777216 19 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 8 Y 128 31 63
+def @arg17 253 20 4 Y 128 0 63
+def @arg18 253 20 1 Y 128 0 63
+def @arg19 253 20 1 Y 128 0 63
+def @arg20 253 16777216 1 Y 0 31 8
+def @arg21 253 16777216 10 Y 0 31 8
+def @arg22 253 16777216 30 Y 0 31 8
+def @arg23 253 16777216 8 Y 128 31 63
+def @arg24 253 16777216 8 Y 0 31 8
+def @arg25 253 16777216 4 Y 128 31 63
+def @arg26 253 16777216 4 Y 0 31 8
+def @arg27 253 16777216 10 Y 128 31 63
+def @arg28 253 16777216 10 Y 0 31 8
+def @arg29 253 16777216 8 Y 128 31 63
+def @arg30 253 16777216 8 Y 0 31 8
+def @arg31 253 16777216 3 Y 0 31 8
+def @arg32 253 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= 0 ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 0 Y 128 0 63
+def @arg03 253 20 0 Y 128 0 63
+def @arg04 253 20 0 Y 128 0 63
+def @arg05 253 20 0 Y 128 0 63
+def @arg06 253 20 0 Y 128 0 63
+def @arg07 253 23 0 Y 128 31 63
+def @arg08 253 23 0 Y 128 31 63
+def @arg09 253 23 0 Y 128 31 63
+def @arg10 253 23 0 Y 128 31 63
+def @arg11 253 67 0 Y 128 30 63
+def @arg12 253 67 0 Y 128 30 63
+def @arg13 253 16777216 0 Y 128 31 63
+def @arg14 253 16777216 0 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 0 Y 128 31 63
+def @arg17 253 20 0 Y 128 0 63
+def @arg18 253 20 0 Y 128 0 63
+def @arg19 253 20 0 Y 128 0 63
+def @arg20 253 16777216 0 Y 0 31 8
+def @arg21 253 16777216 0 Y 0 31 8
+def @arg22 253 16777216 0 Y 0 31 8
+def @arg23 253 16777216 0 Y 128 31 63
+def @arg24 253 16777216 0 Y 0 31 8
+def @arg25 253 16777216 0 Y 128 31 63
+def @arg26 253 16777216 0 Y 0 31 8
+def @arg27 253 16777216 0 Y 128 31 63
+def @arg28 253 16777216 0 Y 0 31 8
+def @arg29 253 16777216 0 Y 128 31 63
+def @arg30 253 16777216 0 Y 0 31 8
+def @arg31 253 16777216 0 Y 0 31 8
+def @arg32 253 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
+ c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24,
+ c25, c26, c27, c28, c29, c30, c31, c32
+into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08,
+ @arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16,
+ @arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24,
+ @arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32
+from t9 where c1= ?" ;
+set @my_key= 1 ;
+execute stmt1 using @my_key ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 1 Y 128 0 63
+def @arg03 253 20 1 Y 128 0 63
+def @arg04 253 20 1 Y 128 0 63
+def @arg05 253 20 1 Y 128 0 63
+def @arg06 253 20 1 Y 128 0 63
+def @arg07 253 23 1 Y 128 31 63
+def @arg08 253 23 1 Y 128 31 63
+def @arg09 253 23 1 Y 128 31 63
+def @arg10 253 23 1 Y 128 31 63
+def @arg11 253 67 6 Y 128 30 63
+def @arg12 253 67 6 Y 128 30 63
+def @arg13 253 16777216 10 Y 128 31 63
+def @arg14 253 16777216 19 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 8 Y 128 31 63
+def @arg17 253 20 4 Y 128 0 63
+def @arg18 253 20 1 Y 128 0 63
+def @arg19 253 20 1 Y 128 0 63
+def @arg20 253 16777216 1 Y 0 31 8
+def @arg21 253 16777216 10 Y 0 31 8
+def @arg22 253 16777216 30 Y 0 31 8
+def @arg23 253 16777216 8 Y 128 31 63
+def @arg24 253 16777216 8 Y 0 31 8
+def @arg25 253 16777216 4 Y 128 31 63
+def @arg26 253 16777216 4 Y 0 31 8
+def @arg27 253 16777216 10 Y 128 31 63
+def @arg28 253 16777216 10 Y 0 31 8
+def @arg29 253 16777216 8 Y 128 31 63
+def @arg30 253 16777216 8 Y 0 31 8
+def @arg31 253 16777216 3 Y 0 31 8
+def @arg32 253 16777216 6 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday
+set @my_key= 0 ;
+execute stmt1 using @my_key ;
+execute full_info ;
+Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
+def @arg01 253 20 1 Y 128 0 63
+def @arg02 253 20 0 Y 128 0 63
+def @arg03 253 20 0 Y 128 0 63
+def @arg04 253 20 0 Y 128 0 63
+def @arg05 253 20 0 Y 128 0 63
+def @arg06 253 20 0 Y 128 0 63
+def @arg07 253 23 0 Y 128 31 63
+def @arg08 253 23 0 Y 128 31 63
+def @arg09 253 23 0 Y 128 31 63
+def @arg10 253 23 0 Y 128 31 63
+def @arg11 253 67 0 Y 128 30 63
+def @arg12 253 67 0 Y 128 30 63
+def @arg13 253 16777216 0 Y 128 31 63
+def @arg14 253 16777216 0 Y 128 31 63
+def @arg15 253 16777216 19 Y 128 31 63
+def @arg16 253 16777216 0 Y 128 31 63
+def @arg17 253 20 0 Y 128 0 63
+def @arg18 253 20 0 Y 128 0 63
+def @arg19 253 20 0 Y 128 0 63
+def @arg20 253 16777216 0 Y 0 31 8
+def @arg21 253 16777216 0 Y 0 31 8
+def @arg22 253 16777216 0 Y 0 31 8
+def @arg23 253 16777216 0 Y 128 31 63
+def @arg24 253 16777216 0 Y 0 31 8
+def @arg25 253 16777216 0 Y 128 31 63
+def @arg26 253 16777216 0 Y 0 31 8
+def @arg27 253 16777216 0 Y 128 31 63
+def @arg28 253 16777216 0 Y 0 31 8
+def @arg29 253 16777216 0 Y 128 31 63
+def @arg30 253 16777216 0 Y 0 31 8
+def @arg31 253 16777216 0 Y 0 31 8
+def @arg32 253 16777216 0 Y 0 31 8
+@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32
+0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+prepare stmt1 from "select c1 into ? from t9 where c1= 1" ;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? from t9 where c1= 1' at line 1
+test_sequence
+-- insert into numeric columns --
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 ) ;
+set @arg00= 21 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 )" ;
+execute stmt1 ;
+set @arg00= 23;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0,
+30.0, 30.0, 30.0 ) ;
+set @arg00= 31.0 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0,
+ 32.0, 32.0, 32.0 )" ;
+execute stmt1 ;
+set @arg00= 33.0;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( '40', '40', '40', '40', '40', '40', '40', '40',
+'40', '40', '40' ) ;
+set @arg00= '41' ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( '42', '42', '42', '42', '42', '42', '42', '42',
+ '42', '42', '42' )" ;
+execute stmt1 ;
+set @arg00= '43';
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary),
+CAST('50' as binary), CAST('50' as binary), CAST('50' as binary) ) ;
+set @arg00= CAST('51' as binary) ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary),
+ CAST('52' as binary), CAST('52' as binary), CAST('52' as binary) )" ;
+execute stmt1 ;
+set @arg00= CAST('53' as binary) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 2 ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+NULL, NULL, NULL ) ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 61, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt1 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 62, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL )" ;
+execute stmt1 ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 63, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 8.0 ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 71, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 73, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+set @arg00= 'abc' ;
+set @arg00= NULL ;
+insert into t9
+( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+( 81, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ) ;
+prepare stmt2 from "insert into t9
+ ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 83, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12
+from t9 where c1 >= 20
+order by c1 ;
+c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c12
+20 20 20 20 20 20 20 20 20 20 20.0000
+21 21 21 21 21 21 21 21 21 21 21.0000
+22 22 22 22 22 22 22 22 22 22 22.0000
+23 23 23 23 23 23 23 23 23 23 23.0000
+30 30 30 30 30 30 30 30 30 30 30.0000
+31 31 31 31 31 31 31 31 31 31 31.0000
+32 32 32 32 32 32 32 32 32 32 32.0000
+33 33 33 33 33 33 33 33 33 33 33.0000
+40 40 40 40 40 40 40 40 40 40 40.0000
+41 41 41 41 41 41 41 41 41 41 41.0000
+42 42 42 42 42 42 42 42 42 42 42.0000
+43 43 43 43 43 43 43 43 43 43 43.0000
+50 50 50 50 50 50 50 50 50 50 50.0000
+51 51 51 51 51 51 51 51 51 51 51.0000
+52 52 52 52 52 52 52 52 52 52 52.0000
+53 53 53 53 53 53 53 53 53 53 53.0000
+60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+test_sequence
+-- select .. where numeric column = .. --
+set @arg00= 20;
+select 'true' as found from t9
+where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20
+and c8= 20 and c9= 20 and c10= 20 and c12= 20;
+found
+true
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20
+ and c8= 20 and c9= 20 and c10= 20 and c12= 20 ";
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20.0;
+select 'true' as found from t9
+where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0
+and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0;
+found
+true
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0
+ and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0 ";
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+select 'true' as found from t9
+where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20'
+ and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20';
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20'
+ and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20' ";
+execute stmt1 ;
+found
+true
+set @arg00= '20';
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+select 'true' as found from t9
+where c1= CAST('20' as binary) and c2= CAST('20' as binary) and
+c3= CAST('20' as binary) and c4= CAST('20' as binary) and
+c5= CAST('20' as binary) and c6= CAST('20' as binary) and
+c7= CAST('20' as binary) and c8= CAST('20' as binary) and
+c9= CAST('20' as binary) and c10= CAST('20' as binary) and
+c12= CAST('20' as binary);
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= CAST('20' as binary) and c2= CAST('20' as binary) and
+ c3= CAST('20' as binary) and c4= CAST('20' as binary) and
+ c5= CAST('20' as binary) and c6= CAST('20' as binary) and
+ c7= CAST('20' as binary) and c8= CAST('20' as binary) and
+ c9= CAST('20' as binary) and c10= CAST('20' as binary) and
+ c12= CAST('20' as binary) ";
+execute stmt1 ;
+found
+true
+set @arg00= CAST('20' as binary) ;
+select 'true' as found from t9
+where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00
+and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00
+and c12= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= ? and c2= ? and c3= ? and c4= ? and c5= ?
+ and c6= ? and c7= ? and c8= ? and c9= ? and c10= ?
+ and c12= ? ";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00 ;
+found
+true
+delete from t9 ;
+test_sequence
+-- some numeric overflow experiments --
+prepare my_insert from "insert into t9
+ ( c21, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 )
+values
+ ( 'O', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ;
+prepare my_select from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12
+from t9 where c21 = 'O' ";
+prepare my_delete from "delete from t9 where c21 = 'O' ";
+set @arg00= 9223372036854775807 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 9.22337e+18
+c8 9.22337203685478e+18
+c9 9.22337203685478e+18
+c10 9.22337203685478e+18
+c12 9999.9999
+execute my_delete ;
+set @arg00= '9223372036854775807' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 9.22337e+18
+c8 9.22337203685478e+18
+c9 9.22337203685478e+18
+c10 9.22337203685478e+18
+c12 9999.9999
+execute my_delete ;
+set @arg00= -9223372036854775808 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -9.22337e+18
+c8 -9.22337203685478e+18
+c9 -9.22337203685478e+18
+c10 -9.22337203685478e+18
+c12 -9999.9999
+execute my_delete ;
+set @arg00= '-9223372036854775808' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -9.22337e+18
+c8 -9.22337203685478e+18
+c9 -9.22337203685478e+18
+c10 -9.22337203685478e+18
+c12 -9999.9999
+execute my_delete ;
+set @arg00= 1.11111111111111111111e+50 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 3.40282e+38
+c8 1.11111111111111e+50
+c9 1.11111111111111e+50
+c10 1.11111111111111e+50
+c12 9999.9999
+execute my_delete ;
+set @arg00= '1.11111111111111111111e+50' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 127
+c2 32767
+c3 8388607
+c4 2147483647
+c5 2147483647
+c6 9223372036854775807
+c7 3.40282e+38
+c8 1.11111111111111e+50
+c9 1.11111111111111e+50
+c10 1.11111111111111e+50
+c12 9999.9999
+execute my_delete ;
+set @arg00= -1.11111111111111111111e+50 ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -3.40282e+38
+c8 -1.11111111111111e+50
+c9 -1.11111111111111e+50
+c10 -1.11111111111111e+50
+c12 -9999.9999
+execute my_delete ;
+set @arg00= '-1.11111111111111111111e+50' ;
+execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+Warnings:
+Warning 1264 Out of range value for column 'c1' at row 1
+Warning 1264 Out of range value for column 'c2' at row 1
+Warning 1264 Out of range value for column 'c3' at row 1
+Warning 1264 Out of range value for column 'c4' at row 1
+Warning 1264 Out of range value for column 'c5' at row 1
+Warning 1264 Out of range value for column 'c6' at row 1
+Warning 1264 Out of range value for column 'c7' at row 1
+Warning 1264 Out of range value for column 'c12' at row 1
+execute my_select ;
+c1 -128
+c2 -32768
+c3 -8388608
+c4 -2147483648
+c5 -2147483648
+c6 -9223372036854775808
+c7 -3.40282e+38
+c8 -1.11111111111111e+50
+c9 -1.11111111111111e+50
+c10 -1.11111111111111e+50
+c12 -9999.9999
+execute my_delete ;
+test_sequence
+-- insert into string columns --
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c20' at row 1
+select c1, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30
+from t9 where c1 >= 20
+order by c1 ;
+c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30
+20 2 20 20 20 20 20 20 20 20 20 20
+21 2 21 21 21 21 21 21 21 21 21 21
+22 2 22 22 22 22 22 22 22 22 22 22
+23 2 23 23 23 23 23 23 23 23 23 23
+30 3 30 30 30 30 30 30 30 30 30 30
+31 3 31 31 31 31 31 31 31 31 31 31
+32 3 32 32 32 32 32 32 32 32 32 32
+33 3 33 33 33 33 33 33 33 33 33 33
+40 4 40 40 40 40 40 40 40 40 40 40
+41 4 41 41 41 41 41 41 41 41 41 41
+42 4 42 42 42 42 42 42 42 42 42 42
+43 4 43 43 43 43 43 43 43 43 43 43
+50 5 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0
+51 5 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0
+52 5 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0
+53 5 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0
+54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00
+55 5 55 55 55 55 55 55 55 55 55 55
+56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00
+57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00
+60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+test_sequence
+-- select .. where string column = .. --
+set @arg00= '20';
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and
+c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and
+c27= '20' and c28= '20' and c29= '20' and c30= '20' ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and
+ c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and
+ c27= '20' and c28= '20' and c29= '20' and c30= '20'" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= CAST('20' as binary);
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20)))
+= CAST('20' as binary) and c21= CAST('20' as binary)
+and c22= CAST('20' as binary) and c23= CAST('20' as binary) and
+c24= CAST('20' as binary) and c25= CAST('20' as binary) and
+c26= CAST('20' as binary) and c27= CAST('20' as binary) and
+c28= CAST('20' as binary) and c29= CAST('20' as binary) and
+c30= CAST('20' as binary) ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20))) = @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and
+c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20)))
+ = CAST('20' as binary) and c21= CAST('20' as binary)
+ and c22= CAST('20' as binary) and c23= CAST('20' as binary) and
+ c24= CAST('20' as binary) and c25= CAST('20' as binary) and
+ c26= CAST('20' as binary) and c27= CAST('20' as binary) and
+ c28= CAST('20' as binary) and c29= CAST('20' as binary) and
+ c30= CAST('20' as binary)" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20))) = ? and c21= ? and
+ c22= ? and c23= ? and c25= ? and c26= ? and c27= ? and c28= ? and
+ c29= ? and c30= ?";
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20;
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and
+c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and
+c27= 20 and c28= 20 and c29= 20 and c30= 20 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and
+ c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and
+ c27= 20 and c28= 20 and c29= 20 and c30= 20" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 20.0;
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and
+c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and
+c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and
+c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and
+c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and
+ c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and
+ c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and
+ c21= ? and c22= ? and c23= ? and c25= ? and
+ c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00,
+@arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+delete from t9 ;
+test_sequence
+-- insert into date/time columns --
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1264 Out of range value for column 'c13' at row 1
+Warning 1264 Out of range value for column 'c14' at row 1
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+Warnings:
+Warning 1265 Data truncated for column 'c15' at row 1
+Warning 1264 Out of range value for column 'c16' at row 1
+Warning 1264 Out of range value for column 'c17' at row 1
+select c1, c13, c14, c15, c16, c17 from t9 order by c1 ;
+c1 c13 c14 c15 c16 c17
+20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+21 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+22 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+23 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+30 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+31 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+32 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+33 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991
+40 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+41 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+42 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+43 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+50 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+51 2010-00-00 2010-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+52 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+53 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000
+60 NULL NULL 1991-01-01 01:01:01 NULL NULL
+61 NULL NULL 1991-01-01 01:01:01 NULL NULL
+62 NULL NULL 1991-01-01 01:01:01 NULL NULL
+63 NULL NULL 1991-01-01 01:01:01 NULL NULL
+71 NULL NULL 1991-01-01 01:01:01 NULL NULL
+73 NULL NULL 1991-01-01 01:01:01 NULL NULL
+81 NULL NULL 1991-01-01 01:01:01 NULL NULL
+83 NULL NULL 1991-01-01 01:01:01 NULL NULL
+test_sequence
+-- select .. where date/time column = .. --
+set @arg00= '1991-01-01 01:01:01' ;
+select 'true' as found from t9
+where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and
+c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and
+c17= '1991-01-01 01:01:01' ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00
+and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and
+ c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and
+ c17= '1991-01-01 01:01:01'" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= CAST('1991-01-01 01:01:01' as datetime) ;
+select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and
+c14= CAST('1991-01-01 01:01:01' as datetime) and
+c15= CAST('1991-01-01 01:01:01' as datetime) and
+c16= CAST('1991-01-01 01:01:01' as datetime) and
+c17= CAST('1991-01-01 01:01:01' as datetime) ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00
+and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and
+ c14= CAST('1991-01-01 01:01:01' as datetime) and
+ c15= CAST('1991-01-01 01:01:01' as datetime) and
+ c16= CAST('1991-01-01 01:01:01' as datetime) and
+ c17= CAST('1991-01-01 01:01:01' as datetime)" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ;
+execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ;
+found
+true
+set @arg00= 1991 ;
+select 'true' as found from t9
+where c1= 20 and c17= 1991 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and c17= @arg00 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c17= 1991" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and c17= ?" ;
+execute stmt1 using @arg00 ;
+found
+true
+set @arg00= 1.991e+3 ;
+select 'true' as found from t9
+where c1= 20 and abs(c17 - 1.991e+3) < 0.01 ;
+found
+true
+select 'true' as found from t9
+where c1= 20 and abs(c17 - @arg00) < 0.01 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and abs(c17 - 1.991e+3) < 0.01" ;
+execute stmt1 ;
+found
+true
+prepare stmt1 from "select 'true' as found from t9
+where c1= 20 and abs(c17 - ?) < 0.01" ;
+execute stmt1 using @arg00 ;
+found
+true
+drop table t1, t9;
diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def
index ab3892cd5ca..ed0e665c9ce 100644
--- a/mysql-test/t/disabled.def
+++ b/mysql-test/t/disabled.def
@@ -35,8 +35,9 @@ synchronization : Bug#24529 Test 'synchronization' fails on Mac pushb
#ndb_binlog_ddl_multi : BUG#18976 2006-04-10 kent CRBR: multiple binlog, second binlog may miss schema log events
#ndb_binlog_discover : bug#21806 2006-08-24
#ndb_autodiscover3 : bug#21806
-
mysql_upgrade : Bug#25074 mysql_upgrade gives inconsisten results
plugin : Bug#25659 memory leak via "plugins" test
rpl_ndb_dd_advance : Bug#25913 rpl_ndb_dd_advance fails randomly
ndb_alter_table : Bug##25774 ndb_alter_table.test fails in DBUG_ASSERT() on Linux x64
+maria : Until maria is fully functional
+ps_maria : Until maria is fully functional
diff --git a/mysql-test/t/events_logs_tests.test b/mysql-test/t/events_logs_tests.test
index e45fea1dfad..ba6049d0810 100644
--- a/mysql-test/t/events_logs_tests.test
+++ b/mysql-test/t/events_logs_tests.test
@@ -94,7 +94,7 @@ CREATE EVENT long_event2 ON SCHEDULE EVERY 1 MINUTE DO INSERT INTO slow_event_te
SELECT * FROM slow_event_test;
--echo "Check slow log. Should see 1 row because 4 is over the threshold of 3 for GLOBAL, though under SESSION which is 10"
--replace_column 1 USER_HOST 2 SLEEPVAL
-SELECT user_host, query_time, db, sql_text FROM mysql.slow_log;
+SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event";
DROP EVENT long_event2;
--echo "Make it quite long"
SET SESSION long_query_time=300;
diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test
new file mode 100644
index 00000000000..87a38ff60b0
--- /dev/null
+++ b/mysql-test/t/maria.test
@@ -0,0 +1,1082 @@
+#
+# Test bugs in the MARIA code
+#
+-- source include/have_maria.inc
+
+let $default=`select @@global.storage_engine`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+#
+# Test problem with CHECK TABLE;
+#
+
+CREATE TABLE t1 (
+ STRING_DATA char(255) default NULL,
+ KEY string_data (STRING_DATA)
+);
+
+INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
+INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD');
+INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF');
+INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG');
+INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH');
+INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW');
+CHECK TABLE t1;
+drop table t1;
+
+#
+# Test problem with rows that are 65517-65520 bytes long
+#
+
+create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a));
+
+let $1=100;
+disable_query_log;
+--disable_warnings
+SET SQL_WARNINGS=0;
+while ($1)
+{
+ eval insert into t1 (b) values(repeat(char(65+$1),65550-$1));
+ dec $1;
+}
+SET SQL_WARNINGS=1;
+--enable_warnings
+enable_query_log;
+check table t1;
+repair table t1;
+delete from t1 where (a & 1);
+check table t1;
+repair table t1;
+check table t1;
+drop table t1;
+
+#
+# Test bug: Two optimize in a row reset index cardinality
+#
+
+create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b));
+insert into t1 (b) values (1),(2),(2),(2),(2);
+optimize table t1;
+show index from t1;
+optimize table t1;
+show index from t1;
+drop table t1;
+
+#
+# Test of how ORDER BY works when doing it on the whole table
+#
+
+create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b));
+insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4);
+explain select * from t1 order by a;
+explain select * from t1 order by b;
+explain select * from t1 order by c;
+explain select a from t1 order by a;
+explain select b from t1 order by b;
+explain select a,b from t1 order by b;
+explain select a,b from t1;
+explain select a,b,c from t1;
+drop table t1;
+
+#
+# Test of OPTIMIZE of locked and modified tables
+#
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (2), (3);
+LOCK TABLES t1 WRITE;
+INSERT INTO t1 VALUES (1), (2), (3);
+OPTIMIZE TABLE t1;
+DROP TABLE t1;
+
+#
+# Test of optimize, when only mi_sort_index (but not mi_repair*) is done
+# in ha_maria::repair, and index size is changed (decreased).
+#
+
+create table t1 ( t1 char(255), key(t1(250)));
+insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169');
+insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203');
+insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767');
+insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907');
+insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011');
+insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101');
+insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564');
+insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002');
+insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834');
+insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016');
+insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899');
+insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482');
+insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139');
+insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477');
+insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755');
+insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188');
+insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454');
+insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656');
+insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741');
+insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178');
+insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049');
+insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635');
+insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573');
+insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878');
+insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077');
+delete from t1 where t1>'2';
+insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'),
+('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'),
+('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47');
+optimize table t1;
+check table t1;
+drop table t1;
+
+#
+# test of maria with huge number of packed fields
+#
+
+create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8
+int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17
+int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int,
+i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34
+int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int,
+i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51
+int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int,
+i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68
+int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int,
+i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85
+int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int,
+i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102
+int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110
+int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118
+int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126
+int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134
+int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142
+int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150
+int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158
+int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166
+int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174
+int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182
+int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190
+int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198
+int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206
+int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214
+int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222
+int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230
+int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238
+int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246
+int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254
+int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262
+int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270
+int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278
+int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286
+int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294
+int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302
+int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310
+int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318
+int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326
+int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334
+int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342
+int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350
+int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358
+int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366
+int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374
+int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382
+int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390
+int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398
+int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406
+int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414
+int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422
+int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430
+int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438
+int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446
+int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454
+int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462
+int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470
+int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478
+int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486
+int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494
+int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502
+int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510
+int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518
+int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526
+int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534
+int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542
+int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550
+int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558
+int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566
+int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574
+int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582
+int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590
+int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598
+int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606
+int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614
+int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622
+int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630
+int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638
+int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646
+int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654
+int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662
+int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670
+int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678
+int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686
+int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694
+int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702
+int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710
+int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718
+int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726
+int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734
+int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742
+int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750
+int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758
+int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766
+int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774
+int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782
+int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790
+int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798
+int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806
+int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814
+int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822
+int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830
+int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838
+int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846
+int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854
+int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862
+int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870
+int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878
+int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886
+int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894
+int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902
+int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910
+int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918
+int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926
+int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934
+int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942
+int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950
+int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958
+int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966
+int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974
+int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982
+int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990
+int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998
+int, i999 int, i1000 int, b blob) row_format=dynamic;
+insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei");
+update t1 set b=repeat('a',256);
+update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0;
+check table t1;
+delete from t1 where i8=1;
+select i1,i2 from t1;
+check table t1;
+drop table t1;
+
+#
+# Test of REPAIR that once failed
+#
+CREATE TABLE `t1` (
+ `post_id` mediumint(8) unsigned NOT NULL auto_increment,
+ `topic_id` mediumint(8) unsigned NOT NULL default '0',
+ `post_time` datetime NOT NULL default '0000-00-00 00:00:00',
+ `post_text` text NOT NULL,
+ `icon_url` varchar(10) NOT NULL default '',
+ `sign` tinyint(1) unsigned NOT NULL default '0',
+ `post_edit` varchar(150) NOT NULL default '',
+ `poster_login` varchar(35) NOT NULL default '',
+ `ip` varchar(15) NOT NULL default '',
+ PRIMARY KEY (`post_id`),
+ KEY `post_time` (`post_time`),
+ KEY `ip` (`ip`),
+ KEY `poster_login` (`poster_login`),
+ KEY `topic_id` (`topic_id`),
+ FULLTEXT KEY `post_text` (`post_text`)
+);
+
+INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test');
+
+REPAIR TABLE t1;
+CHECK TABLE t1;
+drop table t1;
+
+#
+# Test of creating table with too long key
+#
+
+--error 1071
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e));
+CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255));
+--error 1071
+ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e);
+DROP TABLE t1;
+
+#
+# Test of cardinality of keys with NULL
+#
+
+CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a));
+INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4);
+create table t2 (a int not null, b int, c int, key(b), key(c), key(a));
+INSERT into t2 values (1,1,1), (2,2,2);
+optimize table t1;
+show index from t1;
+explain select * from t1,t2 where t1.a=t2.a;
+explain select * from t1,t2 force index(a) where t1.a=t2.a;
+explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a;
+explain select * from t1,t2 where t1.b=t2.b;
+explain select * from t1,t2 force index(c) where t1.a=t2.a;
+explain select * from t1 where a=0 or a=2;
+explain select * from t1 force index (a) where a=0 or a=2;
+explain select * from t1 where c=1;
+explain select * from t1 use index() where c=1;
+drop table t1,t2;
+
+#
+# Test bug when updating a split dynamic row where keys are not changed
+#
+
+create table t1 (a int not null auto_increment primary key, b varchar(255));
+insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100));
+update t1 set b=repeat(left(b,1),200) where a=1;
+delete from t1 where (a & 1)= 0;
+update t1 set b=repeat('e',200) where a=1;
+flush tables;
+check table t1;
+
+#
+# check updating with keys
+#
+
+disable_query_log;
+let $1 = 100;
+while ($1)
+{
+ eval insert into t1 (b) values (repeat(char(($1 & 32)+65), $1));
+ dec $1;
+}
+enable_query_log;
+update t1 set b=repeat(left(b,1),255) where a between 1 and 5;
+update t1 set b=repeat(left(b,1),10) where a between 32 and 43;
+update t1 set b=repeat(left(b,1),2) where a between 64 and 66;
+update t1 set b=repeat(left(b,1),65) where a between 67 and 70;
+check table t1;
+insert into t1 (b) values (repeat('z',100));
+update t1 set b="test" where left(b,1) > 'n';
+check table t1;
+drop table t1;
+
+#
+# two bugs in maria-space-stripping feature
+#
+create table t1 ( a text not null, key a (a(20)));
+insert into t1 values ('aaa '),('aaa'),('aa');
+check table t1;
+repair table t1;
+select concat(a,'.') from t1 where a='aaa';
+select concat(a,'.') from t1 where binary a='aaa';
+update t1 set a='bbb' where a='aaa';
+select concat(a,'.') from t1;
+drop table t1;
+
+#
+# Third bug in the same code (BUG#2295)
+#
+
+create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10)));
+insert into t1 values('807780', '477', '165');
+insert into t1 values('807780', '477', '162');
+insert into t1 values('807780', '472', '162');
+select * from t1 where a='807780' and b='477' and c='165';
+drop table t1;
+
+#
+# space-stripping in _mi_prefix_search: BUG#5284
+#
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a));
+INSERT t1 VALUES ("can \tcan");
+INSERT t1 VALUES ("can can");
+INSERT t1 VALUES ("can");
+SELECT * FROM t1;
+CHECK TABLE t1;
+DROP TABLE t1;
+
+#
+# Verify blob handling
+#
+create table t1 (a blob);
+insert into t1 values('a '),('a');
+select concat(a,'.') from t1 where a='a';
+select concat(a,'.') from t1 where a='a ';
+alter table t1 add key(a(2));
+select concat(a,'.') from t1 where a='a';
+select concat(a,'.') from t1 where a='a ';
+drop table t1;
+
+#
+# Test text and unique
+#
+create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20)));
+insert into t1 (b) values ('a'),('b'),('c');
+select concat(b,'.') from t1;
+update t1 set b='b ' where a=2;
+--error 1062
+update t1 set b='b ' where a > 1;
+--error 1062
+insert into t1 (b) values ('b');
+select * from t1;
+delete from t1 where b='b';
+select a,concat(b,'.') from t1;
+drop table t1;
+
+#
+# Test keys with 0 segments. (Bug #3203)
+#
+create table t1 (a int not null);
+create table t2 (a int not null, primary key (a));
+insert into t1 values (1);
+insert into t2 values (1),(2);
+select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+select distinct t1.a from t1,t2 order by t2.a;
+select sql_big_result distinct t1.a from t1,t2;
+explain select sql_big_result distinct t1.a from t1,t2 order by t2.a;
+explain select distinct t1.a from t1,t2 order by t2.a;
+drop table t1,t2;
+
+#
+# Bug#14616 - Freshly imported table returns error 124 when using LIMIT
+#
+create table t1 (
+ c1 varchar(32),
+ key (c1)
+);
+alter table t1 disable keys;
+insert into t1 values ('a'), ('b');
+select c1 from t1 order by c1 limit 1;
+drop table t1;
+
+#
+# Bug #14400 Join could miss concurrently inserted row
+#
+# Partial key.
+create table t1 (a int not null, primary key(a));
+create table t2 (a int not null, b int not null, primary key(a,b));
+insert into t1 values (1),(2),(3),(4),(5),(6);
+insert into t2 values (1,1),(2,1);
+lock tables t1 read local, t2 read local;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+connect (root,localhost,root,,test,$MASTER_MYPORT,master.sock);
+insert into t2 values(2,0);
+disconnect root;
+connection default;
+select straight_join * from t1,t2 force index (primary) where t1.a=t2.a;
+drop table t1,t2;
+#
+# Full key.
+CREATE TABLE t1 (c1 varchar(250) NOT NULL);
+CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1));
+INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003');
+INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004');
+LOCK TABLES t1 READ LOCAL, t2 READ LOCAL;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+ WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+connect (con1,localhost,root,,);
+connection con1;
+INSERT INTO t2 VALUES ('test000001'), ('test000005');
+disconnect con1;
+connection default;
+SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2
+ WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1;
+DROP TABLE t1,t2;
+
+# End of 4.0 tests
+
+#
+# Test RTREE index
+#
+--error 1235, 1289
+CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`));
+# INSERT INTO t1 VALUES (1,1),(1,1);
+# DELETE FROM rt WHERE a<1;
+# DROP TABLE IF EXISTS t1;
+
+create table t1 (a int, b varchar(200), c text not null) checksum=1;
+create table t2 (a int, b varchar(200), c text not null) checksum=0;
+insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, "");
+insert t2 select * from t1;
+checksum table t1, t2, t3 quick;
+checksum table t1, t2, t3;
+checksum table t1, t2, t3 extended;
+#show table status;
+drop table t1,t2;
+
+create table t1 (a int, key (a));
+show keys from t1;
+alter table t1 disable keys;
+show keys from t1;
+create table t2 (a int);
+let $i=1000;
+set @@rand_seed1=31415926,@@rand_seed2=2718281828;
+--disable_query_log
+while ($i)
+{
+ dec $i;
+ insert t2 values (rand()*100000);
+}
+--enable_query_log
+insert t1 select * from t2;
+show keys from t1;
+alter table t1 enable keys;
+show keys from t1;
+alter table t1 engine=heap;
+alter table t1 disable keys;
+show keys from t1;
+drop table t1,t2;
+
+#
+# index search for NULL in blob. Bug #4816
+#
+create table t1 ( a tinytext, b char(1), index idx (a(1),b) );
+insert into t1 values (null,''), (null,'');
+explain select count(*) from t1 where a is null;
+select count(*) from t1 where a is null;
+drop table t1;
+
+#
+# bug9188 - Corruption Can't open file: 'table.MYI' (errno: 145)
+#
+create table t1 (c1 int, c2 varchar(4) not null default '',
+ key(c2(3))) default charset=utf8;
+insert into t1 values (1,'A'), (2, 'B'), (3, 'A');
+update t1 set c2='A B' where c1=2;
+check table t1;
+drop table t1;
+
+
+#
+# Bug#12296 - CHECKSUM TABLE reports 0 for the table
+# This happened if the first record was marked as deleted.
+#
+create table t1 (c1 int);
+insert into t1 values (1),(2),(3),(4);
+checksum table t1;
+delete from t1 where c1 = 1;
+create table t2 as select * from t1;
+# The following returns 0 with the bug in place.
+checksum table t1;
+# The above should give the same number as the following.
+checksum table t2;
+drop table t1, t2;
+
+#
+# BUG#12232: New maria_stats_method variable.
+#
+
+show variables like 'maria_stats_method';
+
+create table t1 (a int, key(a));
+insert into t1 values (0),(1),(2),(3),(4);
+insert into t1 select NULL from t1;
+
+# default: NULLs considered inequal
+analyze table t1;
+show index from t1;
+insert into t1 values (11);
+delete from t1 where a=11;
+check table t1;
+show index from t1;
+
+# Set nulls to be equal:
+set maria_stats_method=nulls_equal;
+show variables like 'maria_stats_method';
+insert into t1 values (11);
+delete from t1 where a=11;
+
+analyze table t1;
+show index from t1;
+
+insert into t1 values (11);
+delete from t1 where a=11;
+
+check table t1;
+show index from t1;
+
+# Set nulls back to be equal
+set maria_stats_method=DEFAULT;
+show variables like 'maria_stats_method';
+insert into t1 values (11);
+delete from t1 where a=11;
+
+analyze table t1;
+show index from t1;
+
+insert into t1 values (11);
+delete from t1 where a=11;
+
+check table t1;
+show index from t1;
+
+drop table t1;
+
+# WL#2609, CSC#XXXX: MARIA
+set maria_stats_method=nulls_ignored;
+show variables like 'maria_stats_method';
+
+create table t1 (
+ a char(3), b char(4), c char(5), d char(6),
+ key(a,b,c,d)
+);
+insert into t1 values ('bcd','def1', NULL, 'zz');
+insert into t1 values ('bcd','def2', NULL, 'zz');
+insert into t1 values ('bce','def1', 'yuu', NULL);
+insert into t1 values ('bce','def2', NULL, 'quux');
+analyze table t1;
+show index from t1;
+delete from t1;
+analyze table t1;
+show index from t1;
+
+set maria_stats_method=DEFAULT;
+drop table t1;
+
+# BUG#13814 - key value packed incorrectly for TINYBLOBs
+
+create table t1(
+ cip INT NOT NULL,
+ time TIME NOT NULL,
+ score INT NOT NULL DEFAULT 0,
+ bob TINYBLOB
+);
+
+insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03');
+insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'),
+ (6, 'c', '00:06');
+select * from t1 where bob is null and cip=1;
+create index bug on t1 (bob(22), cip, time);
+select * from t1 where bob is null and cip=1;
+drop table t1;
+
+#
+# Bug#14980 - COUNT(*) incorrect on MARIA table with certain INDEX
+#
+create table t1 (
+ id1 int not null auto_increment,
+ id2 int not null default '0',
+ t text not null,
+ primary key (id1),
+ key x (id2, t(32))
+) engine=maria; # engine clause is redundant but it's to test its parsing
+insert into t1 (id2, t) values
+(10, 'abc'), (10, 'abc'), (10, 'abc'),
+(20, 'abc'), (20, 'abc'), (20, 'def'),
+(10, 'abc'), (10, 'abc');
+select count(*) from t1 where id2 = 10;
+select count(id1) from t1 where id2 = 10;
+drop table t1;
+
+#
+# BUG##20357 - Got error 124 from storage engine using MIN and MAX functions
+# in queries
+#
+CREATE TABLE t1(a TINYINT, KEY(a));
+INSERT INTO t1 VALUES(1);
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+ALTER TABLE t1 DISABLE KEYS;
+SELECT MAX(a) FROM t1;
+SELECT MAX(a) FROM t1 IGNORE INDEX(a);
+DROP TABLE t1;
+
+#
+# BUG#18036 - update of table joined to self reports table as crashed
+#
+CREATE TABLE t1(a CHAR(9), b VARCHAR(7));
+INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx');
+UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb';
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Bug#8283 - OPTIMIZE TABLE causes data loss
+#
+SET @@maria_repair_threads=2;
+SHOW VARIABLES LIKE 'maria_repair%';
+#
+# Test OPTIMIZE. This creates a new data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) ENGINE=maria DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+OPTIMIZE TABLE t1;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+# Test REPAIR QUICK. This retains the old data file.
+CREATE TABLE t1 (
+ `_id` int(11) NOT NULL default '0',
+ `url` text,
+ `email` text,
+ `description` text,
+ `loverlap` int(11) default NULL,
+ `roverlap` int(11) default NULL,
+ `lneighbor_id` int(11) default NULL,
+ `rneighbor_id` int(11) default NULL,
+ `length_` int(11) default NULL,
+ `sequence` mediumtext,
+ `name` text,
+ `_obj_class` text NOT NULL,
+ PRIMARY KEY (`_id`),
+ UNIQUE KEY `sequence_name_index` (`name`(50)),
+ KEY (`length_`)
+) ENGINE=maria DEFAULT CHARSET=latin1;
+#
+INSERT INTO t1 VALUES
+ (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''),
+ (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''),
+ (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''),
+ (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''),
+ (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''),
+ (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''),
+ (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''),
+ (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''),
+ (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9','');
+#
+SELECT _id FROM t1;
+DELETE FROM t1 WHERE _id < 8;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+CHECK TABLE t1 EXTENDED;
+REPAIR TABLE t1 QUICK;
+CHECK TABLE t1 EXTENDED;
+--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 #
+SHOW TABLE STATUS LIKE 't1';
+SELECT _id FROM t1;
+DROP TABLE t1;
+#
+SET @@maria_repair_threads=1;
+SHOW VARIABLES LIKE 'maria_repair%';
+#
+# Test varchar
+#
+
+source include/varchar.inc;
+
+#
+# Some errors/warnings on create
+#
+
+create table t1 (v varchar(65530), key(v));
+drop table if exists t1;
+create table t1 (v varchar(65536));
+show create table t1;
+drop table t1;
+create table t1 (v varchar(65530) character set utf8);
+show create table t1;
+drop table t1;
+
+# MARIA specific varchar tests
+--error 1118
+create table t1 (v varchar(65535));
+
+#
+# Test concurrent insert
+# First with static record length
+#
+set @save_concurrent_insert=@@concurrent_insert;
+set global concurrent_insert=1;
+create table t1 (a int);
+insert into t1 values (1),(2),(3),(4),(5);
+lock table t1 read local;
+connect (con1,localhost,root,,);
+connection con1;
+# Insert in table without hole
+insert into t1 values(6),(7);
+connection default;
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+connection con1;
+set global concurrent_insert=2;
+# Insert in table with hole -> Should insert at end
+insert into t1 values (8),(9);
+connection default;
+unlock tables;
+# Insert into hole
+insert into t1 values (10),(11),(12);
+select * from t1;
+check table t1;
+drop table t1;
+disconnect con1;
+
+# Same test with dynamic record length
+create table t1 (a int, b varchar(30) default "hello");
+insert into t1 (a) values (1),(2),(3),(4),(5);
+lock table t1 read local;
+connect (con1,localhost,root,,);
+connection con1;
+# Insert in table without hole
+insert into t1 (a) values(6),(7);
+connection default;
+unlock tables;
+delete from t1 where a>=3 and a<=4;
+lock table t1 read local;
+connection con1;
+set global concurrent_insert=2;
+# Insert in table with hole -> Should insert at end
+insert into t1 (a) values (8),(9);
+connection default;
+unlock tables;
+# Insert into hole
+insert into t1 (a) values (10),(11),(12);
+select a from t1;
+check table t1;
+drop table t1;
+disconnect con1;
+set global concurrent_insert=@save_concurrent_insert;
+
+
+# BUG#9622 - ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX produce
+# different statistics on the same table with NULL values.
+create table t1 (a int, key(a));
+
+insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL);
+analyze table t1;
+show keys from t1;
+
+alter table t1 disable keys;
+alter table t1 enable keys;
+show keys from t1;
+
+drop table t1;
+
+#
+# Bug#8706 - temporary table with data directory option fails
+#
+connect (session1,localhost,root,,);
+connect (session2,localhost,root,,);
+
+connection session1;
+disable_query_log;
+eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 9 a;
+enable_query_log;
+disable_result_log;
+show create table t1;
+enable_result_log;
+
+connection session2;
+disable_query_log;
+eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 99 a;
+enable_query_log;
+disable_result_log;
+show create table t1;
+enable_result_log;
+
+connection default;
+create table t1 (a int) select 42 a;
+
+connection session1;
+select * from t1;
+disconnect session1;
+connection session2;
+select * from t1;
+disconnect session2;
+connection default;
+select * from t1;
+drop table t1;
+
+--echo End of 4.1 tests
+
+#
+# Bug#10056 - PACK_KEYS option take values greater than 1 while creating table
+#
+create table t1 (c1 int) pack_keys=0;
+create table t2 (c1 int) pack_keys=1;
+create table t3 (c1 int) pack_keys=default;
+--error 1064
+create table t4 (c1 int) pack_keys=2;
+drop table t1, t2, t3;
+
+--echo End of 5.0 tests
+
+#
+# Test of key_block_size
+#
+
+create table t1 (a int not null, key `a` (a) key_block_size=1024);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, key `a` (a) key_block_size=2048);
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a));
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1024);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024;
+show create table t1;
+alter table t1 key_block_size=2048;
+show create table t1;
+alter table t1 add c int, add key (c);
+show create table t1;
+alter table t1 key_block_size=0;
+alter table t1 add d int, add key (d);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192;
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192;
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384;
+show create table t1;
+drop table t1;
+
+
+# Test limits and errors of key_block_size
+
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+drop table t1;
+
+--error 1064
+create table t1 (a int not null, key key_block_size=1024 (a));
+--error 1064
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+
+
+#
+# Bug#22119 - Changing MI_KEY_BLOCK_LENGTH makes a wrong myisamchk
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 VARCHAR(300),
+ KEY (c1) KEY_BLOCK_SIZE 1024,
+ KEY (c2) KEY_BLOCK_SIZE 8192
+ );
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+ (11, REPEAT('b', CEIL(RAND() * 300))),
+ (12, REPEAT('c', CEIL(RAND() * 300))),
+ (13, REPEAT('d', CEIL(RAND() * 300))),
+ (14, REPEAT('e', CEIL(RAND() * 300))),
+ (15, REPEAT('f', CEIL(RAND() * 300))),
+ (16, REPEAT('g', CEIL(RAND() * 300))),
+ (17, REPEAT('h', CEIL(RAND() * 300))),
+ (18, REPEAT('i', CEIL(RAND() * 300))),
+ (19, REPEAT('j', CEIL(RAND() * 300))),
+ (20, REPEAT('k', CEIL(RAND() * 300))),
+ (21, REPEAT('l', CEIL(RAND() * 300))),
+ (22, REPEAT('m', CEIL(RAND() * 300))),
+ (23, REPEAT('n', CEIL(RAND() * 300))),
+ (24, REPEAT('o', CEIL(RAND() * 300))),
+ (25, REPEAT('p', CEIL(RAND() * 300))),
+ (26, REPEAT('q', CEIL(RAND() * 300))),
+ (27, REPEAT('r', CEIL(RAND() * 300))),
+ (28, REPEAT('s', CEIL(RAND() * 300))),
+ (29, REPEAT('t', CEIL(RAND() * 300))),
+ (30, REPEAT('u', CEIL(RAND() * 300))),
+ (31, REPEAT('v', CEIL(RAND() * 300))),
+ (32, REPEAT('w', CEIL(RAND() * 300))),
+ (33, REPEAT('x', CEIL(RAND() * 300))),
+ (34, REPEAT('y', CEIL(RAND() * 300))),
+ (35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+REPAIR TABLE t1;
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+DROP TABLE t1;
+
+--echo End of 5.1 tests
+
+eval set global storage_engine=$default;
+
+# End of 5.2 tests
diff --git a/mysql-test/t/ps_maria.test b/mysql-test/t/ps_maria.test
new file mode 100644
index 00000000000..2be99842132
--- /dev/null
+++ b/mysql-test/t/ps_maria.test
@@ -0,0 +1,46 @@
+###############################################
+# #
+# Prepared Statements test on MARIA tables #
+# #
+###############################################
+
+#
+# NOTE: PLEASE SEE ps_1general.test (bottom)
+# BEFORE ADDING NEW TEST CASES HERE !!!
+
+use test;
+
+-- source include/have_maria.inc
+
+let $type= 'MARIA' ;
+-- source include/ps_create.inc
+-- source include/ps_renew.inc
+
+-- source include/ps_query.inc
+
+# parameter in SELECT ... MATCH/AGAINST
+# case derived from client_test.c: test_bug1500()
+--disable_warnings
+drop table if exists t2 ;
+--enable_warnings
+eval create table t2 (s varchar(25), fulltext(s))
+ENGINE = $type ;
+insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ;
+commit ;
+
+prepare stmt1 from ' select s from t2 where match (s) against (?) ' ;
+set @arg00='Dogs' ;
+execute stmt1 using @arg00 ;
+prepare stmt1 from ' SELECT s FROM t2
+where match (s) against (concat(?,''digger'')) ';
+set @arg00='Grave' ;
+execute stmt1 using @arg00 ;
+drop table t2 ;
+
+-- source include/ps_modify.inc
+-- source include/ps_modify1.inc
+-- source include/ps_conv.inc
+
+drop table t1, t9;
+
+# End of 4.1 tests
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index fd6a0f76332..b02e0f84ec1 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -30,7 +30,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \
my_malloc.c my_realloc.c my_once.c mulalloc.c \
my_alloc.c safemalloc.c my_new.cc \
- my_vle.c my_atomic.c \
+ my_vle.c my_atomic.c lf_hash.c \
+ lf_dynarray.c lf_alloc-pin.c \
my_fopen.c my_fstream.c my_getsystime.c \
my_error.c errors.c my_div.c my_messnc.c \
mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \
@@ -52,7 +53,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_gethostbyname.c rijndael.c my_aes.c sha1.c \
my_handler.c my_netware.c my_largepage.c \
my_memmem.c \
- my_windac.c my_access.c base64.c my_libwrap.c
+ my_windac.c my_access.c base64.c my_libwrap.c \
+ mf_pagecache.c wqueue.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c \
CMakeLists.txt mf_soundex.c \
@@ -126,5 +128,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES)
$(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS)
$(RM) -f ./test_base64.c
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/mysys/array.c b/mysys/array.c
index 8f4a6087c00..7f909999fe9 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -60,7 +60,8 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size,
array->max_element=init_alloc;
array->alloc_increment=alloc_increment;
array->size_of_element=element_size;
- if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME))))
+ if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc,
+ MYF(MY_WME))))
{
array->max_element=0;
DBUG_RETURN(TRUE);
@@ -153,7 +154,7 @@ byte *pop_dynamic(DYNAMIC_ARRAY *array)
}
/*
- Replace elemnent in array with given element and index
+ Replace element in array with given element and index
SYNOPSIS
set_dynamic()
@@ -174,19 +175,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx)
{
if (idx >= array->elements)
{
- if (idx >= array->max_element)
- {
- uint size;
- char *new_ptr;
- size=(idx+array->alloc_increment)/array->alloc_increment;
- size*= array->alloc_increment;
- if (!(new_ptr=(char*) my_realloc(array->buffer,size*
- array->size_of_element,
- MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
- return TRUE;
- array->buffer=new_ptr;
- array->max_element=size;
- }
+ if (idx >= array->max_element && allocate_dynamic(array, idx))
+ return TRUE;
bzero((gptr) (array->buffer+array->elements*array->size_of_element),
(idx - array->elements)*array->size_of_element);
array->elements=idx+1;
@@ -196,6 +186,42 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx)
return FALSE;
}
+
+/*
+ Ensure that dynamic array has enough elements
+
+ SYNOPSIS
+ allocate_dynamic()
+ array
+ max_elements Numbers of elements that is needed
+
+ NOTES
+ Any new allocated element are NOT initialized
+
+ RETURN VALUE
+ FALSE Ok
+ TRUE Allocation of new memory failed
+*/
+
+my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
+{
+ if (max_elements >= array->max_element)
+ {
+ uint size;
+ char *new_ptr;
+ size= (max_elements + array->alloc_increment)/array->alloc_increment;
+ size*= array->alloc_increment;
+ if (!(new_ptr= (char*) my_realloc(array->buffer,size*
+ array->size_of_element,
+ MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+ return TRUE;
+ array->buffer= new_ptr;
+ array->max_element= size;
+ }
+ return FALSE;
+}
+
+
/*
Get an element from array by given index
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
new file mode 100644
index 00000000000..e964553a64c
--- /dev/null
+++ b/mysys/lf_alloc-pin.c
@@ -0,0 +1,443 @@
+/* QQ: TODO multi-pinbox */
+/* Copyright (C) 2000 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ wait-free concurrent allocator based on pinning addresses
+
+ It works as follows: every thread (strictly speaking - every CPU, but
+ it's too difficult to do) has a small array of pointers. They're called
+ "pins". Before using an object its address must be stored in this array
+ (pinned). When an object is no longer necessary its address must be
+ removed from this array (unpinned). When a thread wants to free() an
+ object it scans all pins of all threads to see if somebody has this
+ object pinned. If yes - the object is not freed (but stored in a
+ "purgatory"). To reduce the cost of a single free() pins are not scanned
+ on every free() but only added to (thread-local) purgatory. On every
+ LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects
+ are freed.
+
+ Pins are used to solve ABA problem. To use pins one must obey
+ a pinning protocol:
+ 1. Let's assume that PTR is a shared pointer to an object. Shared means
+ that any thread may modify it anytime to point to a different object
+ and free the old object. Later the freed object may be potentially
+ allocated by another thread. If we're unlucky that another thread may
+ set PTR to point to this object again. This is ABA problem.
+ 2. Create a local pointer LOCAL_PTR.
+ 3. Pin the PTR in a loop:
+ do
+ {
+ LOCAL_PTR= PTR;
+ pin(PTR, PIN_NUMBER);
+ } while (LOCAL_PTR != PTR)
+ 4. It is guaranteed that after the loop has ended, LOCAL_PTR
+ points to an object (or NULL, if PTR may be NULL), that
+ will never be freed. It is not guaranteed though
+ that LOCAL_PTR == PTR (as PTR can change any time)
+ 5. When done working with the object, remove the pin:
+ unpin(PIN_NUMBER)
+ 6. When copying pins (as in the list traversing loop:
+ pin(CUR, 1);
+ while ()
+ {
+ do // standard
+ { // pinning
+ NEXT=CUR->next; // loop
+ pin(NEXT, 0); // see #3
+ } while (NEXT != CUR->next); // above
+ ...
+ ...
+ CUR=NEXT;
+ pin(CUR, 1); // copy pin[0] to pin[1]
+ }
+ which keeps CUR address constantly pinned), note than pins may be
+ copied only upwards (!!!), that is pin[N] to pin[M], M > N.
+ 7. Don't keep the object pinned longer than necessary - the number of
+ pins you have is limited (and small), keeping an object pinned
+ prevents its reuse and cause unnecessary mallocs.
+
+ Implementation details:
+ Pins are given away from a "pinbox". Pinbox is stack-based allocator.
+ It used dynarray for storing pins, new elements are allocated by dynarray
+ as necessary, old are pushed in the stack for reuse. ABA is solved by
+ versioning a pointer - because we use an array, a pointer to pins is 32 bit,
+ upper 32 bits are used for a version.
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <lf.h>
+
+#define LF_PINBOX_MAX_PINS 65536
+
+static void _lf_pinbox_real_free(LF_PINS *pins);
+
+/*
+ Initialize a pinbox. Normally called from lf_alloc_init.
+ See the latter for details.
+*/
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+ lf_pinbox_free_func *free_func, void *free_func_arg)
+{
+ DBUG_ASSERT(sizeof(LF_PINS) == 128);
+ DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
+ lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS));
+ pinbox->pinstack_top_ver= 0;
+ pinbox->pins_in_stack= 0;
+ pinbox->free_ptr_offset= free_ptr_offset;
+ pinbox->free_func= free_func;
+ pinbox->free_func_arg= free_func_arg;
+}
+
+void lf_pinbox_destroy(LF_PINBOX *pinbox)
+{
+ lf_dynarray_destroy(&pinbox->pinstack);
+}
+
+/*
+ Get pins from a pinbox. Usually called via lf_alloc_get_pins() or
+ lf_hash_get_pins().
+
+ DESCRIPTION
+ get a new LF_PINS structure from a stack of unused pins,
+ or allocate a new one out of dynarray.
+*/
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox)
+{
+ uint32 pins, next, top_ver;
+ LF_PINS *el;
+
+ top_ver= pinbox->pinstack_top_ver;
+ do
+ {
+ if (!(pins= top_ver % LF_PINBOX_MAX_PINS))
+ {
+ pins= my_atomic_add32(&pinbox->pins_in_stack, 1)+1;
+ el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins);
+ break;
+ }
+ el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins);
+ next= el->link;
+ } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+ top_ver-pins+next+LF_PINBOX_MAX_PINS));
+ el->link= pins;
+ el->purgatory_count= 0;
+ el->pinbox= pinbox;
+ return el;
+}
+
+/*
+ Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or
+ lf_hash_put_pins().
+
+ DESCRIPTION
+ empty the purgatory (XXX deadlock warning below!),
+ push LF_PINS structure to a stack
+*/
+void _lf_pinbox_put_pins(LF_PINS *pins)
+{
+ LF_PINBOX *pinbox= pins->pinbox;
+ uint32 top_ver, nr;
+ nr= pins->link;
+#ifdef MY_LF_EXTRA_DEBUG
+ {
+ int i;
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ DBUG_ASSERT(pins->pin[i] == 0);
+ }
+#endif
+ /*
+ XXX this will deadlock if other threads will wait for
+ the caller to do something after _lf_pinbox_put_pins(),
+ and they would have pinned addresses that the caller wants to free.
+ Thus: only free pins when all work is done and nobody can wait for you!!!
+ */
+ while (pins->purgatory_count)
+ {
+ _lf_pinbox_real_free(pins);
+ if (pins->purgatory_count)
+ {
+ my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock);
+ pthread_yield();
+ my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock);
+ }
+ }
+ top_ver= pinbox->pinstack_top_ver;
+ if (nr == pinbox->pins_in_stack)
+ {
+ int32 tmp= nr;
+ if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1))
+ goto ret;
+ }
+
+ do
+ {
+ pins->link= top_ver % LF_PINBOX_MAX_PINS;
+ } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+ top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
+ret:
+ return;
+}
+
+static int ptr_cmp(void **a, void **b)
+{
+ return *a < *b ? -1 : *a == *b ? 0 : 1;
+}
+
+#define add_to_purgatory(PINS, ADDR) \
+ do \
+ { \
+ *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \
+ (PINS)->purgatory; \
+ (PINS)->purgatory= (ADDR); \
+ (PINS)->purgatory_count++; \
+ } while (0)
+
+/*
+ Free an object allocated via pinbox allocator
+
+ DESCRIPTION
+ add an object to purgatory. if necessary, call _lf_pinbox_real_free()
+ to actually free something.
+*/
+void _lf_pinbox_free(LF_PINS *pins, void *addr)
+{
+ add_to_purgatory(pins, addr);
+ if (pins->purgatory_count % LF_PURGATORY_SIZE)
+ _lf_pinbox_real_free(pins);
+}
+
+struct st_harvester {
+ void **granary;
+ int npins;
+};
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins or all threads and accumulate all pins
+*/
+static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
+{
+ int i;
+ LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+ for (; el < el_end; el++)
+ {
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ {
+ void *p= el->pin[i];
+ if (p)
+ *hv->granary++= p;
+ }
+ }
+ hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
+ return 0;
+}
+
+/*
+ callback for _lf_dynarray_iterate:
+ scan all pins or all threads and see if addr is present there
+*/
+static int match_pins(LF_PINS *el, void *addr)
+{
+ int i;
+ LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
+ for (; el < el_end; el++)
+ for (i= 0; i < LF_PINBOX_PINS; i++)
+ if (el->pin[i] == addr)
+ return 1;
+ return 0;
+}
+
+/*
+ Scan the purgatory as free everything that can be freed
+*/
+static void _lf_pinbox_real_free(LF_PINS *pins)
+{
+ int npins;
+ void *list;
+ void **addr;
+ LF_PINBOX *pinbox= pins->pinbox;
+
+ npins= pinbox->pins_in_stack+1;
+
+#ifdef HAVE_ALLOCA
+ /* create a sorted list of pinned addresses, to speed up searches */
+ if (sizeof(void *)*LF_PINBOX_PINS*npins < my_thread_stack_size)
+ {
+ struct st_harvester hv;
+ addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins);
+ hv.granary= addr;
+ hv.npins= npins;
+ /* scan the dynarray and accumulate all pinned addresses */
+ _lf_dynarray_iterate(&pinbox->pinstack,
+ (lf_dynarray_func)harvest_pins, &hv);
+
+ npins= hv.granary-addr;
+ /* and sort them */
+ if (npins)
+ qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
+ }
+ else
+#endif
+ addr= 0;
+
+ list= pins->purgatory;
+ pins->purgatory= 0;
+ pins->purgatory_count= 0;
+ while (list)
+ {
+ void *cur= list;
+ list= *(void **)((char *)cur+pinbox->free_ptr_offset);
+ if (npins)
+ {
+ if (addr) /* use binary search */
+ {
+ void **a, **b, **c;
+ for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2)
+ if (cur == *c)
+ a= b= c;
+ else if (cur > *c)
+ a= c;
+ else
+ b= c;
+ if (cur == *a || cur == *b)
+ goto found;
+ }
+ else /* no alloca - no cookie. linear search here */
+ {
+ if (_lf_dynarray_iterate(&pinbox->pinstack,
+ (lf_dynarray_func)match_pins, cur))
+ goto found;
+ }
+ }
+ /* not pinned - freeing */
+ pinbox->free_func(cur, pinbox->free_func_arg);
+ continue;
+found:
+ /* pinned - keeping */
+ add_to_purgatory(pins, cur);
+ }
+}
+
+/*
+ callback for _lf_pinbox_real_free to free an unpinned object -
+ add it back to the allocator stack
+*/
+static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator)
+{
+ struct st_lf_alloc_node *tmp;
+ tmp= allocator->top;
+ do
+ {
+ node->next= tmp;
+ } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) &&
+ LF_BACKOFF);
+}
+
+/* lock-free memory allocator for fixed-size objects */
+
+LF_REQUIRE_PINS(1);
+
+/*
+ initialize lock-free allocatod.
+
+ SYNOPSYS
+ allocator -
+ size a size of an object to allocate
+ free_ptr_offset an offset inside the object to a sizeof(void *)
+ memory that is guaranteed to be unused after
+ the object is put in the purgatory. Unused by ANY
+ thread, not only the purgatory owner.
+*/
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
+{
+ lf_pinbox_init(&allocator->pinbox, free_ptr_offset,
+ (lf_pinbox_free_func *)alloc_free, allocator);
+ allocator->top= 0;
+ allocator->mallocs= 0;
+ allocator->element_size= size;
+ DBUG_ASSERT(size >= (int)sizeof(void *));
+ DBUG_ASSERT(free_ptr_offset < size);
+}
+
+/*
+ destroy the allocator, free everything that's in it
+*/
+void lf_alloc_destroy(LF_ALLOCATOR *allocator)
+{
+ struct st_lf_alloc_node *node= allocator->top;
+ while (node)
+ {
+ struct st_lf_alloc_node *tmp= node->next;
+ my_free((void *)node, MYF(0));
+ node= tmp;
+ }
+ lf_pinbox_destroy(&allocator->pinbox);
+ allocator->top= 0;
+}
+
+/*
+ Allocate and return an new object.
+
+ DESCRIPTION
+ Pop an unused object from the stack or malloc it is the stack is empty.
+ pin[0] is used, it's removed on return.
+*/
+void *_lf_alloc_new(LF_PINS *pins)
+{
+ LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
+ struct st_lf_alloc_node *node;
+ for (;;)
+ {
+ do
+ {
+ node= allocator->top;
+ _lf_pin(pins, 0, node);
+ } while (node != allocator->top && LF_BACKOFF);
+ if (!node)
+ {
+ if (!(node= (void *)my_malloc(allocator->element_size,
+ MYF(MY_WME|MY_ZEROFILL))))
+ break;
+#ifdef MY_LF_EXTRA_DEBUG
+ my_atomic_add32(&allocator->mallocs, 1);
+#endif
+ break;
+ }
+ if (my_atomic_casptr((void **)&allocator->top,
+ (void *)&node, *(void **)node))
+ break;
+ }
+ _lf_unpin(pins, 0);
+ return node;
+}
+
+/*
+ count the number of objects in a pool.
+
+ NOTE
+ This is NOT thread-safe !!!
+*/
+uint lf_alloc_in_pool(LF_ALLOCATOR *allocator)
+{
+ uint i;
+ struct st_lf_alloc_node *node;
+ for (node= allocator->top, i= 0; node; node= node->next, i++)
+ /* no op */;
+ return i;
+}
+
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
new file mode 100644
index 00000000000..c6dd654bf03
--- /dev/null
+++ b/mysys/lf_dynarray.c
@@ -0,0 +1,204 @@
+/* Copyright (C) 2000 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Analog of DYNAMIC_ARRAY that never reallocs
+ (so no pointer into the array may ever become invalid).
+
+ Memory is allocated in non-contiguous chunks.
+ This data structure is not space efficient for sparse arrays.
+
+ The number of elements is limited to 4311810304
+
+ Every element is aligned to sizeof(element) boundary
+ (to avoid false sharing if element is big enough).
+
+ LF_DYNARRAY is a recursive structure. On the zero level
+ LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements,
+ on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers
+ to arrays of elements, on the second level it's an array of pointers
+ to arrays of pointers to arrays of elements. And so on.
+
+ Actually, it's wait-free, not lock-free ;-)
+*/
+
+#include <my_global.h>
+#include <strings.h>
+#include <my_sys.h>
+#include <lf.h>
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
+{
+ bzero(array, sizeof(*array));
+ array->size_of_element= element_size;
+ my_atomic_rwlock_init(&array->lock);
+}
+
+static void recursive_free(void **alloc, int level)
+{
+ if (!alloc)
+ return;
+
+ if (level)
+ {
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ recursive_free(alloc[i], level-1);
+ my_free((void *)alloc, MYF(0));
+ }
+ else
+ my_free(alloc[-1], MYF(0));
+}
+
+void lf_dynarray_destroy(LF_DYNARRAY *array)
+{
+ int i;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ recursive_free(array->level[i], i);
+ my_atomic_rwlock_destroy(&array->lock);
+}
+
+static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH +
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
+};
+
+static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
+{
+ 0, /* +1 here to to avoid -1's below */
+ LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
+ LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+ LF_DYNARRAY_LEVEL_LENGTH,
+};
+
+/*
+ Returns a valid lvalue pointer to the element number 'idx'.
+ Allocates memory if necessary.
+*/
+void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ {
+ void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
+ ptr= alloc;
+ else
+ my_free(alloc, MYF(0));
+ }
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx%= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ {
+ void *alloc, *data;
+ alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
+ max(array->size_of_element, sizeof(void *)),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!alloc))
+ return(NULL);
+ /* reserve the space for free() address */
+ data= alloc + sizeof(void *);
+ { /* alignment */
+ intptr mod= ((intptr)data) % array->size_of_element;
+ if (mod)
+ data+= array->size_of_element - mod;
+ }
+ ((void **)data)[-1]= alloc; /* free() will need the original pointer */
+ if (my_atomic_casptr(ptr_ptr, &ptr, data))
+ ptr= data;
+ else
+ my_free(alloc, MYF(0));
+ }
+ return ptr + array->size_of_element * idx;
+}
+
+/*
+ Returns a pointer to the element number 'idx'
+ or NULL if an element does not exists
+*/
+void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
+{
+ void * ptr, * volatile * ptr_ptr= 0;
+ int i;
+
+ for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+ /* no-op */;
+ ptr_ptr= &array->level[i];
+ idx-= dynarray_idxes_in_prev_levels[i];
+ for (; i > 0; i--)
+ {
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+ idx %= dynarray_idxes_in_prev_level[i];
+ }
+ if (!(ptr= *ptr_ptr))
+ return(NULL);
+ return ptr + array->size_of_element * idx;
+}
+
+static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level,
+ lf_dynarray_func func, void *arg)
+{
+ int res, i;
+ if (!ptr)
+ return 0;
+ if (!level)
+ return func(ptr, arg);
+ for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+ if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg)))
+ return res;
+ return 0;
+}
+
+/*
+ Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements
+ in lf_dynarray.
+
+ DESCRIPTION
+ lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements
+ each. _lf_dynarray_iterate() calls user-supplied function on every array
+ from the set. It is the fastest way to scan the array, faster than
+ for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); }
+*/
+int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg)
+{
+ int i, res;
+ for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+ if ((res= recursive_iterate(array, array->level[i], i, func, arg)))
+ return res;
+ return 0;
+}
+
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
new file mode 100644
index 00000000000..fb2fb88492f
--- /dev/null
+++ b/mysys/lf_hash.c
@@ -0,0 +1,400 @@
+/* Copyright (C) 2000 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ extensible hash
+
+ TODO
+ try to get rid of dummy nodes ?
+ for non-unique hash, count only _distinct_ values
+ (but how to do it in lf_hash_delete ?)
+*/
+#include <my_global.h>
+#include <m_string.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+
+LF_REQUIRE_PINS(3);
+
+/* An element of the list */
+typedef struct {
+ intptr volatile link; /* a pointer to the next element in a listand a flag */
+ uint32 hashnr; /* reversed hash number, for sorting */
+ const byte *key;
+ uint keylen;
+} LF_SLIST;
+
+/*
+ a structure to pass the context (pointers two the three successive elements
+ in a list) from lfind to linsert/ldelete
+*/
+typedef struct {
+ intptr volatile *prev;
+ LF_SLIST *curr, *next;
+} CURSOR;
+
+/*
+ the last bit in LF_SLIST::link is a "deleted" flag.
+ the helper macros below convert it to a pure pointer or a pure flag
+*/
+#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
+#define DELETED(V) ((V) & 1)
+
+/*
+ DESCRIPTION
+ Search for hashnr/key/keylen in the list starting from 'head' and
+ position the cursor. The list is ORDER BY hashnr, key
+
+ RETURN
+ 0 - not found
+ 1 - found
+
+ NOTE
+ cursor is positioned in either case
+ pins[0..2] are used, they are NOT removed on return
+*/
+static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
+{
+ uint32 cur_hashnr;
+ const byte *cur_key;
+ uint cur_keylen;
+ intptr link;
+
+retry:
+ cursor->prev= (intptr *)head;
+ do {
+ cursor->curr= PTR(*cursor->prev);
+ _lf_pin(pins, 1, cursor->curr);
+ } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ for (;;)
+ {
+ if (!cursor->curr)
+ return 0;
+ do {
+ /* QQ: XXX or goto retry ? */
+ link= cursor->curr->link;
+ cursor->next= PTR(link);
+ _lf_pin(pins, 0, cursor->next);
+ } while(link != cursor->curr->link && LF_BACKOFF);
+ cur_hashnr= cursor->curr->hashnr;
+ cur_key= cursor->curr->key;
+ cur_keylen= cursor->curr->keylen;
+ if (*cursor->prev != (intptr)cursor->curr)
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ if (!DELETED(link))
+ {
+ if (cur_hashnr >= hashnr)
+ {
+ int r= 1;
+ if (cur_hashnr > hashnr ||
+ (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key,
+ keylen)) >= 0)
+ return !r;
+ }
+ cursor->prev= &(cursor->curr->link);
+ _lf_pin(pins, 2, cursor->curr);
+ }
+ else
+ {
+ if (my_atomic_casptr((void **)cursor->prev,
+ (void **)&cursor->curr, cursor->next))
+ _lf_alloc_free(pins, cursor->curr);
+ else
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ }
+ cursor->curr= cursor->next;
+ _lf_pin(pins, 1, cursor->curr);
+ }
+}
+
+/*
+ DESCRIPTION
+ insert a 'node' in the list that starts from 'head' in the correct
+ position (as found by lfind)
+
+ RETURN
+ 0 - inserted
+ not 0 - a pointer to a duplicate (not pinned and thus unusable)
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+*/
+static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ LF_SLIST *node, LF_PINS *pins, uint flags)
+{
+ CURSOR cursor;
+ int res= -1;
+
+ do
+ {
+ if (lfind(head, cs, node->hashnr, node->key, node->keylen,
+ &cursor, pins) &&
+ (flags & LF_HASH_UNIQUE))
+ res= 0; /* duplicate found */
+ else
+ {
+ node->link= (intptr)cursor.curr;
+ assert(node->link != (intptr)node);
+ assert(cursor.prev != &node->link);
+ if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+ res= 1; /* inserted ok */
+ }
+ } while (res == -1);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ return res ? 0 : cursor.curr;
+}
+
+/*
+ DESCRIPTION
+ deletes a node as identified by hashnr/keey/keylen from the list
+ that starts from 'head'
+
+ RETURN
+ 0 - ok
+ 1 - not found
+
+ NOTE
+ it uses pins[0..2], on return all pins are removed.
+*/
+static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+ const byte *key, uint keylen, LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res= -1;
+
+ do
+ {
+ if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins))
+ res= 1;
+ else
+ if (my_atomic_casptr((void **)&(cursor.curr->link),
+ (void **)&cursor.next, 1+(char *)cursor.next))
+ {
+ if (my_atomic_casptr((void **)cursor.prev,
+ (void **)&cursor.curr, cursor.next))
+ _lf_alloc_free(pins, cursor.curr);
+ else
+ lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ res= 0;
+ }
+ } while (res == -1);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ return res;
+}
+
+/*
+ DESCRIPTION
+ searches for a node as identified by hashnr/keey/keylen in the list
+ that starts from 'head'
+
+ RETURN
+ 0 - not found
+ node - found
+
+ NOTE
+ it uses pins[0..2], on return the pin[2] keeps the node found
+ all other pins are removed.
+*/
+static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+ uint32 hashnr, const byte *key, uint keylen,
+ LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+ if (res) _lf_pin(pins, 2, cursor.curr);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ return res ? cursor.curr : 0;
+}
+
+static inline const byte* hash_key(const LF_HASH *hash,
+ const byte *record, uint *length)
+{
+ if (hash->get_key)
+ return (*hash->get_key)(record, length, 0);
+ *length= hash->key_length;
+ return record + hash->key_offset;
+}
+
+static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen)
+{
+ ulong nr1= 1, nr2= 4;
+ hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen,
+ &nr1, &nr2);
+ return nr1 & INT_MAX32;
+}
+
+#define MAX_LOAD 1.0
+static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
+
+/*
+ Initializes lf_hash, the arguments are compatible with hash_init
+*/
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+ uint key_offset, uint key_length, hash_get_key get_key,
+ CHARSET_INFO *charset)
+{
+ lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
+ offsetof(LF_SLIST, key));
+ lf_dynarray_init(&hash->array, sizeof(LF_SLIST **));
+ hash->size= 1;
+ hash->count= 0;
+ hash->element_size= element_size;
+ hash->flags= flags;
+ hash->charset= charset ? charset : &my_charset_bin;
+ hash->key_offset= key_offset;
+ hash->key_length= key_length;
+ hash->get_key= get_key;
+ DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
+}
+
+void lf_hash_destroy(LF_HASH *hash)
+{
+ LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0);
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ lf_alloc_real_free(&hash->alloc, el);
+ else
+ my_free((void *)el, MYF(0));
+ el= (LF_SLIST *)next;
+ }
+ lf_alloc_destroy(&hash->alloc);
+ lf_dynarray_destroy(&hash->array);
+}
+
+/*
+ DESCRIPTION
+ inserts a new element to a hash. it will have a _copy_ of
+ data, not a pointer to it.
+
+ RETURN
+ 0 - inserted
+ 1 - didn't (unique key conflict)
+
+ NOTE
+ see linsert() for pin usage notes
+*/
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
+{
+ int csize, bucket, hashnr;
+ LF_SLIST *node, * volatile *el;
+
+ lf_rwlock_by_pins(pins);
+ node= (LF_SLIST *)_lf_alloc_new(pins);
+ memcpy(node+1, data, hash->element_size);
+ node->key= hash_key(hash, (byte *)(node+1), &node->keylen);
+ hashnr= calc_hash(hash, node->key, node->keylen);
+ bucket= hashnr % hash->size;
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(hash, el, bucket, pins);
+ node->hashnr= my_reverse_bits(hashnr) | 1;
+ if (linsert(el, hash->charset, node, pins, hash->flags))
+ {
+ _lf_alloc_free(pins, node);
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ csize= hash->size;
+ if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
+ my_atomic_cas32(&hash->size, &csize, csize*2);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ RETURN
+ 0 - deleted
+ 1 - didn't (not found)
+ NOTE
+ see ldelete() for pin usage notes
+*/
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el;
+ uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(hash, el, bucket, pins);
+ if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (byte *)key, keylen, pins))
+ {
+ lf_rwunlock_by_pins(pins);
+ return 1;
+ }
+ my_atomic_add32(&hash->count, -1);
+ lf_rwunlock_by_pins(pins);
+ return 0;
+}
+
+/*
+ NOTE
+ see lsearch() for pin usage notes
+*/
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+ LF_SLIST * volatile *el, *found;
+ uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
+
+ bucket= hashnr % hash->size;
+ lf_rwlock_by_pins(pins);
+ el= _lf_dynarray_lvalue(&hash->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(hash, el, bucket, pins);
+ found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
+ (byte *)key, keylen, pins);
+ lf_rwunlock_by_pins(pins);
+ return found ? found+1 : 0;
+}
+
+static const char *dummy_key= "";
+
+static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
+ uint bucket, LF_PINS *pins)
+{
+ uint parent= my_clear_highest_bit(bucket);
+ LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
+ LF_SLIST **tmp= 0, *cur;
+ LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
+ if (*el == NULL && bucket)
+ initialize_bucket(hash, el, parent, pins);
+ dummy->hashnr= my_reverse_bits(bucket);
+ dummy->key= (char*) dummy_key;
+ dummy->keylen= 0;
+ if ((cur= linsert(el, hash->charset, dummy, pins, 0)))
+ {
+ my_free((void *)dummy, MYF(0));
+ dummy= cur;
+ }
+ my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+}
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index 86394fec239..95a9f08a07a 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -42,6 +42,7 @@
#include <keycache.h>
#include "my_static.h"
#include <m_string.h>
+#include <my_bit.h>
#include <errno.h>
#include <stdarg.h>
@@ -1009,12 +1010,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
KEYCACHE_THREAD_TRACE("unlink_block");
#if defined(KEYCACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
keycache->blocks_available--;
KEYCACHE_DBUG_PRINT("unlink_block",
("unlinked block %u status=%x #requests=%u #available=%u",
BLOCK_NUMBER(block), block->status,
block->requests, keycache->blocks_available));
- KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0);
#endif
}
@@ -1643,9 +1644,9 @@ restart:
KEYCACHE_DBUG_ASSERT(page_status != -1);
*page_st=page_status;
KEYCACHE_DBUG_PRINT("find_key_block",
- ("fd: %d pos: %lu block->status: %u page_status: %u",
+ ("fd: %d pos: %lu block->status: %u page_status: %d",
file, (ulong) filepos, block->status,
- (uint) page_status));
+ page_status));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_keycache2",
@@ -1793,8 +1794,6 @@ byte *key_cache_read(KEY_CACHE *keycache,
uint offset= 0;
byte *start= buff;
DBUG_ENTER("key_cache_read");
- DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
- (uint) file, (ulong) filepos, length));
if (keycache->can_be_used)
{
@@ -1804,6 +1803,11 @@ byte *key_cache_read(KEY_CACHE *keycache,
uint status;
int page_st;
+ DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u",
+ (uint) file, (ulong) filepos,
+ (ulong) (filepos / keycache->key_cache_block_size),
+ length));
+
offset= (uint) (filepos & (keycache->key_cache_block_size-1));
/* Read data in key_cache_block_size increments */
do
@@ -2055,10 +2059,6 @@ int key_cache_write(KEY_CACHE *keycache,
reg1 BLOCK_LINK *block;
int error=0;
DBUG_ENTER("key_cache_write");
- DBUG_PRINT("enter",
- ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u",
- (uint) file, (ulong) filepos, length, block_length,
- keycache ? keycache->key_cache_block_size : 0));
if (!dont_write)
{
@@ -2080,6 +2080,12 @@ int key_cache_write(KEY_CACHE *keycache,
int page_st;
uint offset;
+ DBUG_PRINT("enter",
+ ("fd: %u pos: %lu page: %lu length: %u block_length: %u",
+ (uint) file, (ulong) filepos,
+ (ulong) (filepos / keycache->key_cache_block_size),
+ length, block_length));
+
offset= (uint) (filepos & (keycache->key_cache_block_size-1));
do
{
diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c
index 51ad54159e5..3f40f4f4010 100644
--- a/mysys/mf_keycaches.c
+++ b/mysys/mf_keycaches.c
@@ -147,7 +147,8 @@ static void safe_hash_free(SAFE_HASH *hash)
Return the value stored for a key or default value if no key
*/
-static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length)
+static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length,
+ byte *def)
{
byte *result;
DBUG_ENTER("safe_hash_search");
@@ -155,7 +156,7 @@ static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length)
result= hash_search(&hash->hash, key, length);
rw_unlock(&hash->mutex);
if (!result)
- result= hash->default_value;
+ result= def;
else
result= ((SAFE_HASH_ENTRY*) result)->data;
DBUG_PRINT("exit",("data: 0x%lx", (long) result));
@@ -315,6 +316,7 @@ void multi_keycache_free(void)
multi_key_cache_search()
key key to find (usually table path)
uint length Length of key.
+ def Default value if no key cache
NOTES
This function is coded in such a way that we will return the
@@ -325,11 +327,13 @@ void multi_keycache_free(void)
key cache to use
*/
-KEY_CACHE *multi_key_cache_search(byte *key, uint length)
+KEY_CACHE *multi_key_cache_search(byte *key, uint length,
+ KEY_CACHE *def)
{
if (!key_cache_hash.hash.records)
- return dflt_key_cache;
- return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length);
+ return def;
+ return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length,
+ (void*) def);
}
diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c
new file mode 100755
index 00000000000..1b9d48c80e6
--- /dev/null
+++ b/mysys/mf_pagecache.c
@@ -0,0 +1,4102 @@
+/* Copyright (C) 2000-2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ These functions handle page cacheing for Maria tables.
+
+ One cache can handle many files.
+ It must contain buffers of the same blocksize.
+ init_pagecache() should be used to init cache handler.
+
+ The free list (free_block_list) is a stack like structure.
+ When a block is freed by free_block(), it is pushed onto the stack.
+ When a new block is required it is first tried to pop one from the stack.
+ If the stack is empty, it is tried to get a never-used block from the pool.
+ If this is empty too, then a block is taken from the LRU ring, flushing it
+ to disk, if necessary. This is handled in find_block().
+ With the new free list, the blocks can have three temperatures:
+ hot, warm and cold (which is free). This is remembered in the block header
+ by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
+ temperature is necessary to correctly count the number of warm blocks,
+ which is required to decide when blocks are allowed to become hot. Whenever
+ a block is inserted to another (sub-)chain, we take the old and new
+ temperature into account to decide if we got one more or less warm block.
+ blocks_unused is the sum of never used blocks in the pool and of currently
+ free blocks. blocks_used is the number of blocks fetched from the pool and
+ as such gives the maximum number of in-use blocks at any time.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include <pagecache.h>
+#include "my_static.h"
+#include <my_bit.h>
+#include <errno.h>
+#include <stdarg.h>
+
+/*
+ Some compilation flags have been added specifically for this module
+ to control the following:
+ - not to let a thread to yield the control when reading directly
+ from page cache, which might improve performance in many cases;
+ to enable this add:
+ #define SERIALIZED_READ_FROM_CACHE
+ - to set an upper bound for number of threads simultaneously
+ using the page cache; this setting helps to determine an optimal
+ size for hash table and improve performance when the number of
+ blocks in the page cache much less than the number of threads
+ accessing it;
+ to set this number equal to <N> add
+ #define MAX_THREADS <N>
+ - to substitute calls of pthread_cond_wait for calls of
+ pthread_cond_timedwait (wait with timeout set up);
+ this setting should be used only when you want to trap a deadlock
+ situation, which theoretically should not happen;
+ to set timeout equal to <T> seconds add
+ #define PAGECACHE_TIMEOUT <T>
+ - to enable the module traps and to send debug information from
+ page cache module to a special debug log add:
+ #define PAGECACHE_DEBUG
+ the name of this debug log file <LOG NAME> can be set through:
+ #define PAGECACHE_DEBUG_LOG <LOG NAME>
+ if the name is not defined, it's set by default;
+ if the PAGECACHE_DEBUG flag is not set up and we are in a debug
+ mode, i.e. when ! defined(DBUG_OFF), the debug information from the
+ module is sent to the regular debug log.
+
+ Example of the settings:
+ #define SERIALIZED_READ_FROM_CACHE
+ #define MAX_THREADS 100
+ #define PAGECACHE_TIMEOUT 1
+ #define PAGECACHE_DEBUG
+ #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log"
+*/
+
+/*
+ In key cache we have external raw locking here we use
+ SERIALIZED_READ_FROM_CACHE to avoid problem of reading
+ not consistent data from the page.
+ (keycache functions (key_cache_read(), key_cache_insert() and
+ key_cache_write()) rely on external MyISAM lock, we don't)
+*/
+#define SERIALIZED_READ_FROM_CACHE yes
+
+#define BLOCK_INFO(B) \
+ DBUG_PRINT("info", \
+ ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \
+ "wrlock: %c", \
+ (ulong)(B), \
+ (ulong)((B)->hash_link ? \
+ (B)->hash_link->file.file : \
+ 0), \
+ (ulong)((B)->hash_link ? \
+ (B)->hash_link->pageno : \
+ 0), \
+ (B)->status, \
+ (ulong)(B)->hash_link, \
+ (uint) (B)->requests, \
+ (uint)((B)->hash_link ? \
+ (B)->hash_link->requests : \
+ 0), \
+ ((block->status & BLOCK_WRLOCK)?'Y':'N')))
+
+/* TODO: put it to my_static.c */
+my_bool my_disable_flush_pagecache_blocks= 0;
+
+#define STRUCT_PTR(TYPE, MEMBER, a) \
+ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+
+/* types of condition variables */
+#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
+#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
+#define COND_FOR_WRLOCK 2 /* queue of write lock */
+#define COND_SIZE 3 /* number of COND_* queues */
+
+typedef pthread_cond_t KEYCACHE_CONDVAR;
+
+/* descriptor of the page in the page cache block buffer */
+struct st_pagecache_page
+{
+ PAGECACHE_FILE file; /* file to which the page belongs to */
+ pgcache_page_no_t pageno; /* number of the page in the file */
+};
+
+/* element in the chain of a hash table bucket */
+struct st_pagecache_hash_link
+{
+ struct st_pagecache_hash_link
+ *next, **prev; /* to connect links in the same bucket */
+ struct st_pagecache_block_link
+ *block; /* reference to the block for the page: */
+ PAGECACHE_FILE file; /* from such a file */
+ pgcache_page_no_t pageno; /* this page */
+ uint requests; /* number of requests for the page */
+};
+
+/* simple states of a block */
+#define BLOCK_ERROR 1 /* an error occurred when performing disk i/o */
+#define BLOCK_READ 2 /* the is page in the block buffer */
+#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */
+#define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */
+#define BLOCK_IN_FLUSH 16 /* block is in flush operation */
+#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
+#define BLOCK_WRLOCK 64 /* write locked block */
+
+/* page status, returned by find_block */
+#define PAGE_READ 0
+#define PAGE_TO_BE_READ 1
+#define PAGE_WAIT_TO_BE_READ 2
+
+/* block temperature determines in which (sub-)chain the block currently is */
+enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
+
+/* debug info */
+#ifndef DBUG_OFF
+static char *page_cache_page_type_str[]=
+{
+ (char*)"PLAIN",
+ (char*)"LSN"
+};
+static char *page_cache_page_write_mode_str[]=
+{
+ (char*)"DELAY",
+ (char*)"NOW",
+ (char*)"DONE"
+};
+static char *page_cache_page_lock_str[]=
+{
+ (char*)"free -> free ",
+ (char*)"read -> read ",
+ (char*)"write -> write",
+ (char*)"free -> read ",
+ (char*)"free -> write",
+ (char*)"read -> free ",
+ (char*)"write -> free ",
+ (char*)"write -> read "
+};
+static char *page_cache_page_pin_str[]=
+{
+ (char*)"pinned -> pinned ",
+ (char*)"unpinned -> unpinned",
+ (char*)"unpinned -> pinned ",
+ (char*)"pinned -> unpinned"
+};
+#endif
+#ifdef PAGECACHE_DEBUG
+typedef struct st_pagecache_pin_info
+{
+ struct st_pagecache_pin_info *next, **prev;
+ struct st_my_thread_var *thread;
+} PAGECACHE_PIN_INFO;
+/*
+ st_pagecache_lock_info structure should be kept in next, prev, thread part
+ compatible with st_pagecache_pin_info to be compatible in functions.
+*/
+typedef struct st_pagecache_lock_info
+{
+ struct st_pagecache_lock_info *next, **prev;
+ struct st_my_thread_var *thread;
+ my_bool write_lock;
+} PAGECACHE_LOCK_INFO;
+
+
+/* service functions maintain debugging info about pin & lock */
+
+
+/*
+ Links information about thread pinned/locked the block to the list
+
+ SYNOPSIS
+ info_link()
+ list the list to link in
+ node the node which should be linked
+*/
+
+static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
+{
+ if ((node->next= *list))
+ node->next->prev= &(node->next);
+ *list= node;
+ node->prev= list;
+}
+
+
+/*
+ Unlinks information about thread pinned/locked the block from the list
+
+ SYNOPSIS
+ info_unlink()
+ node the node which should be unlinked
+*/
+
+static void info_unlink(PAGECACHE_PIN_INFO *node)
+{
+ if ((*node->prev= node->next))
+ node->next->prev= node->prev;
+}
+
+
+/*
+ Finds information about given thread in the list of threads which
+ pinned/locked this block.
+
+ SYNOPSIS
+ info_find()
+ list the list where to find the thread
+ thread thread ID (reference to the st_my_thread_var
+ of the thread)
+
+ RETURN
+ 0 - the thread was not found
+ pointer to the information node of the thread in the list
+*/
+
+static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
+ struct st_my_thread_var *thread)
+{
+ register PAGECACHE_PIN_INFO *i= list;
+ for(; i != 0; i= i->next)
+ if (i->thread == thread)
+ return i;
+ return 0;
+}
+#endif
+
+/* page cache block */
+struct st_pagecache_block_link
+{
+ struct st_pagecache_block_link
+ *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
+ struct st_pagecache_block_link
+ *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
+ struct st_pagecache_hash_link
+ *hash_link; /* backward ptr to referring hash_link */
+ WQUEUE
+ wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */
+ uint requests; /* number of requests for the block */
+ byte *buffer; /* buffer for the block page */
+ uint status; /* state of the block */
+ uint pins; /* pin counter */
+#ifdef PAGECACHE_DEBUG
+ PAGECACHE_PIN_INFO *pin_list;
+ PAGECACHE_LOCK_INFO *lock_list;
+#endif
+ enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
+ enum pagecache_page_type type; /* type of the block */
+ uint hits_left; /* number of hits left until promotion */
+ ulonglong last_hit_time; /* timestamp of the last hit */
+ LSN rec_lsn; /* LSN when first became dirty */
+ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
+};
+
+#ifdef PAGECACHE_DEBUG
+/* debug checks */
+static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_pin mode)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread);
+ DBUG_ENTER("info_check_pin");
+ if (info)
+ {
+ if (mode == PAGECACHE_PIN_LEFT_UNPINNED)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_UNPINNED!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ else if (mode == PAGECACHE_PIN)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block 0x%lx: PIN!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ if (mode == PAGECACHE_PIN_LEFT_PINNED)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_PINNED!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ else if (mode == PAGECACHE_UNPIN)
+ {
+ DBUG_PRINT("info",
+ ("info_check_pin: thread: 0x%lx block 0x%lx: UNPIN!!!",
+ (ulong)thread, (ulong)block));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Debug function which checks current lock/pin state and requested changes
+
+ SYNOPSIS
+ info_check_lock()
+ lock requested lock changes
+ pin requested pin changes
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
+ thread);
+ DBUG_ENTER("info_check_lock");
+ switch(lock)
+ {
+ case PAGECACHE_LOCK_LEFT_UNLOCKED:
+ if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
+ info)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_LEFT_READLOCKED:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN_LEFT_PINNED) ||
+ info == 0 || info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_LEFT_WRITELOCKED:
+ if (pin != PAGECACHE_PIN_LEFT_PINNED ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_READ:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN) ||
+ info != 0)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE:
+ if (pin != PAGECACHE_PIN ||
+ info != 0)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_READ_UNLOCK:
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE_UNLOCK:
+ if (pin != PAGECACHE_UNPIN ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ case PAGECACHE_LOCK_WRITE_TO_READ:
+ if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || !info->write_lock)
+ goto error;
+ break;
+ }
+ DBUG_RETURN(0);
+error:
+ DBUG_PRINT("info",
+ ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d,"
+ "to lock: %s, to pin: %s",
+ (ulong)thread, (ulong)block, test(info),
+ (info ? info->write_lock : 0),
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ DBUG_RETURN(1);
+}
+#endif
+
+#define FLUSH_CACHE 2000 /* sort this many blocks at once */
+
+static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
+static void test_key_cache(PAGECACHE *pagecache,
+ const char *where, my_bool lock);
+
+#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \
+ (ulong) (f).file) & (p->hash_entries-1))
+#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1))
+
+#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log"
+
+#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG)
+#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG
+#endif
+
+#if defined(PAGECACHE_DEBUG_LOG)
+static FILE *pagecache_debug_log= NULL;
+static void pagecache_debug_print _VARARGS((const char *fmt, ...));
+#define PAGECACHE_DEBUG_OPEN \
+ if (!pagecache_debug_log) \
+ { \
+ pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \
+ (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \
+ }
+
+#define PAGECACHE_DEBUG_CLOSE \
+ if (pagecache_debug_log) \
+ { \
+ fclose(pagecache_debug_log); \
+ pagecache_debug_log= 0; \
+ }
+#else
+#define PAGECACHE_DEBUG_OPEN
+#define PAGECACHE_DEBUG_CLOSE
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG)
+#define KEYCACHE_DBUG_PRINT(l, m) \
+ { if (pagecache_debug_log) \
+ fprintf(pagecache_debug_log, "%s: ", l); \
+ pagecache_debug_print m; }
+
+#define KEYCACHE_DBUG_ASSERT(a) \
+ { if (! (a) && pagecache_debug_log) \
+ fclose(pagecache_debug_log); \
+ assert(a); }
+#else
+#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
+#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
+#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */
+
+#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF)
+#ifdef THREAD
+static long pagecache_thread_id;
+#define KEYCACHE_THREAD_TRACE(l) \
+ KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id))
+
+#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
+ { struct st_my_thread_var *thread_var= my_thread_var; \
+ pagecache_thread_id= thread_var->id; \
+ KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) }
+
+#define KEYCACHE_THREAD_TRACE_END(l) \
+ KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id))
+#else /* THREAD */
+#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,(""))
+#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,(""))
+#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,(""))
+#endif /* THREAD */
+#else
+#define KEYCACHE_THREAD_TRACE_BEGIN(l)
+#define KEYCACHE_THREAD_TRACE_END(l)
+#define KEYCACHE_THREAD_TRACE(l)
+#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */
+
+#define BLOCK_NUMBER(p, b) \
+ ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK)))
+#define PAGECACHE_HASH_LINK_NUMBER(p, h) \
+ ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \
+ sizeof(PAGECACHE_HASH_LINK)))
+
+#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG)
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex);
+#else
+#define pagecache_pthread_cond_wait pthread_cond_wait
+#endif
+
+#if defined(PAGECACHE_DEBUG)
+static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex);
+static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex);
+static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
+#define pagecache_pthread_mutex_lock(M) \
+{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_mutex_lock(M);}
+#define pagecache_pthread_mutex_unlock(M) \
+{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_mutex_unlock(M);}
+#define pagecache_pthread_cond_signal(M) \
+{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \
+ ___pagecache_pthread_cond_signal(M);}
+#else
+#define pagecache_pthread_mutex_lock pthread_mutex_lock
+#define pagecache_pthread_mutex_unlock pthread_mutex_unlock
+#define pagecache_pthread_cond_signal pthread_cond_signal
+#endif /* defined(PAGECACHE_DEBUG) */
+
+extern my_bool translog_flush(LSN lsn);
+
+/*
+ Write page to the disk
+
+ SYNOPSIS
+ pagecache_fwrite()
+ pagecache - page cache pointer
+ filedesc - pagecache file descriptor structure
+ buffer - buffer which we will write
+ type - page type (plain or with LSN)
+ flags - MYF() flags
+
+ RETURN
+ 0 - OK
+ !=0 - Error
+*/
+
+static uint pagecache_fwrite(PAGECACHE *pagecache,
+ PAGECACHE_FILE *filedesc,
+ byte *buffer,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_type type,
+ myf flags)
+{
+ DBUG_ENTER("pagecache_fwrite");
+ if (type == PAGECACHE_LSN_PAGE)
+ {
+ LSN lsn;
+ DBUG_PRINT("info", ("Log handler call"));
+ /* TODO: integrate with page format */
+#define PAGE_LSN_OFFSET 0
+ lsn= lsn_korr(buffer + PAGE_LSN_OFFSET);
+ /*
+ check CONTROL_FILE_IMPOSSIBLE_FILENO &
+ CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET
+ */
+ DBUG_ASSERT(lsn != 0);
+ translog_flush(lsn);
+ }
+ DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size,
+ (pageno)<<(pagecache->shift), flags));
+}
+
+
+/*
+ Read page from the disk
+
+ SYNOPSIS
+ pagecache_fread()
+ pagecache - page cache pointer
+ filedesc - pagecache file descriptor structure
+ buffer - buffer in which we will read
+ pageno - page number
+ flags - MYF() flags
+*/
+#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \
+ my_pread((filedesc)->file, buffer, pagecache->block_size, \
+ (pageno)<<(pagecache->shift), flags)
+
+
+/*
+ next_power(value) is 2 at the power of (1+floor(log2(value)));
+ e.g. next_power(2)=4, next_power(3)=4.
+*/
+static inline uint next_power(uint value)
+{
+ return (uint) my_round_up_to_next_power((uint32) value) << 1;
+}
+
+
+/*
+ Initialize a page cache
+
+ SYNOPSIS
+ init_pagecache()
+ pagecache pointer to a page cache data structure
+ key_cache_block_size size of blocks to keep cached data
+ use_mem total memory to use for the key cache
+ division_limit division limit (may be zero)
+ age_threshold age threshold (may be zero)
+ block_size size of block (should be power of 2)
+
+ RETURN VALUE
+ number of blocks in the key cache, if successful,
+ 0 - otherwise.
+
+ NOTES.
+ if pagecache->inited != 0 we assume that the key cache
+ is already initialized. This is for now used by myisamchk, but shouldn't
+ be something that a program should rely on!
+
+ It's assumed that no two threads call this function simultaneously
+ referring to the same key cache handle.
+
+*/
+
+int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
+ uint division_limit, uint age_threshold,
+ uint block_size)
+{
+ uint blocks, hash_links, length;
+ int error;
+ DBUG_ENTER("init_pagecache");
+ DBUG_ASSERT(block_size >= 512);
+
+ PAGECACHE_DEBUG_OPEN;
+ if (pagecache->inited && pagecache->disk_blocks > 0)
+ {
+ DBUG_PRINT("warning",("key cache already in use"));
+ DBUG_RETURN(0);
+ }
+
+ pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
+ pagecache->global_cache_read= pagecache->global_cache_write= 0;
+ pagecache->disk_blocks= -1;
+ if (! pagecache->inited)
+ {
+ pagecache->inited= 1;
+ pagecache->in_init= 0;
+ pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST);
+ pagecache->resize_queue.last_thread= NULL;
+ }
+
+ pagecache->mem_size= use_mem;
+ pagecache->block_size= block_size;
+ pagecache->shift= my_bit_log2(block_size);
+ DBUG_PRINT("info", ("block_size: %u",
+ block_size));
+ DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size);
+
+ blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) +
+ 2 * sizeof(PAGECACHE_HASH_LINK) +
+ sizeof(PAGECACHE_HASH_LINK*) *
+ 5/4 + block_size));
+ /* It doesn't make sense to have too few blocks (less than 8) */
+ if (blocks >= 8 && pagecache->disk_blocks < 0)
+ {
+ for ( ; ; )
+ {
+ /* Set my_hash_entries to the next bigger 2 power */
+ if ((pagecache->hash_entries= next_power(blocks)) <
+ (blocks) * 5/4)
+ pagecache->hash_entries<<= 1;
+ hash_links= 2 * blocks;
+#if defined(MAX_THREADS)
+ if (hash_links < MAX_THREADS + blocks - 1)
+ hash_links= MAX_THREADS + blocks - 1;
+#endif
+ while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) +
+ ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
+ ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
+ pagecache->hash_entries))) +
+ (((ulong) blocks) << pagecache->shift) > use_mem)
+ blocks--;
+ /* Allocate memory for cache page buffers */
+ if ((pagecache->block_mem=
+ my_large_malloc((ulong) blocks * pagecache->block_size,
+ MYF(MY_WME))))
+ {
+ /*
+ Allocate memory for blocks, hash_links and hash entries;
+ For each block 2 hash links are allocated
+ */
+ if ((pagecache->block_root=
+ (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length,
+ MYF(0))))
+ break;
+ my_large_free(pagecache->block_mem, MYF(0));
+ pagecache->block_mem= 0;
+ }
+ if (blocks < 8)
+ {
+ my_errno= ENOMEM;
+ goto err;
+ }
+ blocks= blocks / 4*3;
+ }
+ pagecache->blocks_unused= (ulong) blocks;
+ pagecache->disk_blocks= (int) blocks;
+ pagecache->hash_links= hash_links;
+ pagecache->hash_root=
+ (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root +
+ ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK)));
+ pagecache->hash_link_root=
+ (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root +
+ ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) *
+ pagecache->hash_entries)));
+ bzero((byte*) pagecache->block_root,
+ pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK));
+ bzero((byte*) pagecache->hash_root,
+ pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*));
+ bzero((byte*) pagecache->hash_link_root,
+ pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK));
+ pagecache->hash_links_used= 0;
+ pagecache->free_hash_list= NULL;
+ pagecache->blocks_used= pagecache->blocks_changed= 0;
+
+ pagecache->global_blocks_changed= 0;
+ pagecache->blocks_available=0; /* For debugging */
+
+ /* The LRU chain is empty after initialization */
+ pagecache->used_last= NULL;
+ pagecache->used_ins= NULL;
+ pagecache->free_block_list= NULL;
+ pagecache->time= 0;
+ pagecache->warm_blocks= 0;
+ pagecache->min_warm_blocks= (division_limit ?
+ blocks * division_limit / 100 + 1 :
+ blocks);
+ pagecache->age_threshold= (age_threshold ?
+ blocks * age_threshold / 100 :
+ blocks);
+
+ pagecache->cnt_for_resize_op= 0;
+ pagecache->resize_in_flush= 0;
+ pagecache->can_be_used= 1;
+
+ pagecache->waiting_for_hash_link.last_thread= NULL;
+ pagecache->waiting_for_block.last_thread= NULL;
+ DBUG_PRINT("exit",
+ ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\
+ hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx",
+ pagecache->disk_blocks, (long) pagecache->block_root,
+ pagecache->hash_entries, (long) pagecache->hash_root,
+ pagecache->hash_links, (long) pagecache->hash_link_root));
+ bzero((gptr) pagecache->changed_blocks,
+ sizeof(pagecache->changed_blocks[0]) *
+ PAGECACHE_CHANGED_BLOCKS_HASH);
+ bzero((gptr) pagecache->file_blocks,
+ sizeof(pagecache->file_blocks[0]) *
+ PAGECACHE_CHANGED_BLOCKS_HASH);
+ }
+
+ pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0;
+ DBUG_RETURN((uint) pagecache->blocks);
+
+err:
+ error= my_errno;
+ pagecache->disk_blocks= 0;
+ pagecache->blocks= 0;
+ if (pagecache->block_mem)
+ {
+ my_large_free((gptr) pagecache->block_mem, MYF(0));
+ pagecache->block_mem= NULL;
+ }
+ if (pagecache->block_root)
+ {
+ my_free((gptr) pagecache->block_root, MYF(0));
+ pagecache->block_root= NULL;
+ }
+ my_errno= error;
+ pagecache->can_be_used= 0;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Flush all blocks in the key cache to disk
+*/
+
+#ifdef NOT_USED
+static int flush_all_key_blocks(PAGECACHE *pagecache)
+{
+#if defined(PAGECACHE_DEBUG)
+ uint cnt=0;
+#endif
+ while (pagecache->blocks_changed > 0)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->used_last->next_used ; ; block=block->next_used)
+ {
+ if (block->hash_link)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file,
+ FLUSH_RELEASE))
+ return 1;
+ break;
+ }
+ if (block == pagecache->used_last)
+ break;
+ }
+ }
+ return 0;
+}
+#endif /* NOT_USED */
+
+/*
+ Resize a key cache
+
+ SYNOPSIS
+ resize_pagecache()
+ pagecache pointer to a page cache data structure
+ use_mem total memory to use for the new key cache
+ division_limit new division limit (if not zero)
+ age_threshold new age threshold (if not zero)
+
+ RETURN VALUE
+ number of blocks in the key cache, if successful,
+ 0 - otherwise.
+
+ NOTES.
+ The function first compares the memory size parameter
+ with the key cache value.
+
+ If they differ the function free the the memory allocated for the
+ old key cache blocks by calling the end_pagecache function and
+ then rebuilds the key cache with new blocks by calling
+ init_key_cache.
+
+ The function starts the operation only when all other threads
+ performing operations with the key cache let her to proceed
+ (when cnt_for_resize=0).
+
+ Before being usable, this function needs:
+ - to receive fixes for BUG#17332 "changing key_buffer_size on a running
+ server can crash under load" similar to those done to the key cache
+ - to have us (Sanja) look at the additional constraints placed on
+ resizing, due to the page locking specific to this page cache.
+ So we disable it for now.
+*/
+#if NOT_USED /* keep disabled until code is fixed see above !! */
+int resize_pagecache(PAGECACHE *pagecache,
+ my_size_t use_mem, uint division_limit,
+ uint age_threshold)
+{
+ int blocks;
+#ifdef THREAD
+ struct st_my_thread_var *thread;
+ WQUEUE *wqueue;
+
+#endif
+ DBUG_ENTER("resize_pagecache");
+
+ if (!pagecache->inited)
+ DBUG_RETURN(pagecache->disk_blocks);
+
+ if(use_mem == pagecache->mem_size)
+ {
+ change_pagecache_param(pagecache, division_limit, age_threshold);
+ DBUG_RETURN(pagecache->disk_blocks);
+ }
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+#ifdef THREAD
+ wqueue= &pagecache->resize_queue;
+ thread= my_thread_var;
+ wqueue_link_into_queue(wqueue, thread);
+
+ while (wqueue->last_thread->next != thread)
+ {
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ }
+#endif
+
+ pagecache->resize_in_flush= 1;
+ if (flush_all_key_blocks(pagecache))
+ {
+ /* TODO: if this happens, we should write a warning in the log file ! */
+ pagecache->resize_in_flush= 0;
+ blocks= 0;
+ pagecache->can_be_used= 0;
+ goto finish;
+ }
+ pagecache->resize_in_flush= 0;
+ pagecache->can_be_used= 0;
+#ifdef THREAD
+ while (pagecache->cnt_for_resize_op)
+ {
+ KEYCACHE_DBUG_PRINT("resize_pagecache: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0);
+#endif
+
+ end_pagecache(pagecache, 0); /* Don't free mutex */
+ /* The following will work even if use_mem is 0 */
+ blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
+ division_limit, age_threshold);
+
+finish:
+#ifdef THREAD
+ wqueue_unlink_from_queue(wqueue, thread);
+ /* Signal for the next resize request to proceeed if any */
+ if (wqueue->last_thread)
+ {
+ KEYCACHE_DBUG_PRINT("resize_pagecache: signal",
+ ("thread %ld", wqueue->last_thread->next->id));
+ pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
+ }
+#endif
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(blocks);
+}
+#endif /* 0 */
+
+
+/*
+ Increment counter blocking resize key cache operation
+*/
+static inline void inc_counter_for_resize_op(PAGECACHE *pagecache)
+{
+ pagecache->cnt_for_resize_op++;
+}
+
+
+/*
+ Decrement counter blocking resize key cache operation;
+ Signal the operation to proceed when counter becomes equal zero
+*/
+static inline void dec_counter_for_resize_op(PAGECACHE *pagecache)
+{
+#ifdef THREAD
+ struct st_my_thread_var *last_thread;
+ if (!--pagecache->cnt_for_resize_op &&
+ (last_thread= pagecache->resize_queue.last_thread))
+ {
+ KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal",
+ ("thread %ld", last_thread->next->id));
+ pagecache_pthread_cond_signal(&last_thread->next->suspend);
+ }
+#else
+ pagecache->cnt_for_resize_op--;
+#endif
+}
+
+/*
+ Change the page cache parameters
+
+ SYNOPSIS
+ change_pagecache_param()
+ pagecache pointer to a page cache data structure
+ division_limit new division limit (if not zero)
+ age_threshold new age threshold (if not zero)
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Presently the function resets the key cache parameters
+ concerning midpoint insertion strategy - division_limit and
+ age_threshold.
+*/
+
+void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
+ uint age_threshold)
+{
+ DBUG_ENTER("change_pagecache_param");
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (division_limit)
+ pagecache->min_warm_blocks= (pagecache->disk_blocks *
+ division_limit / 100 + 1);
+ if (age_threshold)
+ pagecache->age_threshold= (pagecache->disk_blocks *
+ age_threshold / 100);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Removes page cache from memory. Does NOT flush pages to disk.
+
+ SYNOPSIS
+ end_pagecache()
+ pagecache page cache handle
+ cleanup Complete free (Free also mutex for key cache)
+
+ RETURN VALUE
+ none
+*/
+
+void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
+{
+ DBUG_ENTER("end_pagecache");
+ DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache));
+
+ if (!pagecache->inited)
+ DBUG_VOID_RETURN;
+
+ if (pagecache->disk_blocks > 0)
+ {
+ if (pagecache->block_mem)
+ {
+ my_large_free((gptr) pagecache->block_mem, MYF(0));
+ pagecache->block_mem= NULL;
+ my_free((gptr) pagecache->block_root, MYF(0));
+ pagecache->block_root= NULL;
+ }
+ pagecache->disk_blocks= -1;
+ /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
+ pagecache->blocks_changed= 0;
+ }
+
+ DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
+ "writes: %lu r_requests: %lu reads: %lu",
+ pagecache->blocks_used, pagecache->global_blocks_changed,
+ (ulong) pagecache->global_cache_w_requests,
+ (ulong) pagecache->global_cache_write,
+ (ulong) pagecache->global_cache_r_requests,
+ (ulong) pagecache->global_cache_read));
+
+ if (cleanup)
+ {
+ pthread_mutex_destroy(&pagecache->cache_lock);
+ pagecache->inited= pagecache->can_be_used= 0;
+ PAGECACHE_DEBUG_CLOSE;
+ }
+ DBUG_VOID_RETURN;
+} /* end_pagecache */
+
+
+/*
+ Unlink a block from the chain of dirty/clean blocks
+*/
+
+static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block)
+{
+ if (block->next_changed)
+ block->next_changed->prev_changed= block->prev_changed;
+ *block->prev_changed= block->next_changed;
+}
+
+
+/*
+ Link a block into the chain of dirty/clean blocks
+*/
+
+static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
+ PAGECACHE_BLOCK_LINK **phead)
+{
+ block->prev_changed= phead;
+ if ((block->next_changed= *phead))
+ (*phead)->prev_changed= &block->next_changed;
+ *phead= block;
+}
+
+
+/*
+ Unlink a block from the chain of dirty/clean blocks, if it's asked for,
+ and link it to the chain of clean blocks for the specified file
+*/
+
+static void link_to_file_list(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ PAGECACHE_FILE *file, my_bool unlink)
+{
+ if (unlink)
+ unlink_changed(block);
+ link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
+ if (block->status & BLOCK_CHANGED)
+ {
+ block->status&= ~BLOCK_CHANGED;
+ block->rec_lsn= 0;
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ }
+}
+
+
+/*
+ Unlink a block from the chain of clean blocks for the specified
+ file and link it to the chain of dirty blocks for this file
+*/
+
+static inline void link_to_changed_list(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block)
+{
+ unlink_changed(block);
+ link_changed(block,
+ &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]);
+ block->status|=BLOCK_CHANGED;
+ pagecache->blocks_changed++;
+ pagecache->global_blocks_changed++;
+}
+
+
+/*
+ Link a block to the LRU chain at the beginning or at the end of
+ one of two parts.
+
+ SYNOPSIS
+ link_block()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to link to the LRU chain
+ hot <-> to link the block into the hot subchain
+ at_end <-> to link the block at the end of the subchain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ The LRU chain is represented by a curcular list of block structures.
+ The list is double-linked of the type (**prev,*next) type.
+ The LRU chain is divided into two parts - hot and warm.
+ There are two pointers to access the last blocks of these two
+ parts. The beginning of the warm part follows right after the
+ end of the hot part.
+ Only blocks of the warm part can be used for replacement.
+ The first block from the beginning of this subchain is always
+ taken for eviction (pagecache->last_used->next)
+
+ LRU chain: +------+ H O T +------+
+ +----| end |----...<----| beg |----+
+ | +------+last +------+ |
+ v<-link in latest hot (new end) |
+ | link in latest warm (new end)->^
+ | +------+ W A R M +------+ |
+ +----| beg |---->...----| end |----+
+ +------+ +------+ins
+ first for eviction
+*/
+
+static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
+ my_bool hot, my_bool at_end)
+{
+ PAGECACHE_BLOCK_LINK *ins;
+ PAGECACHE_BLOCK_LINK **ptr_ins;
+
+ BLOCK_INFO(block);
+ KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
+#ifdef THREAD
+ if (!hot && pagecache->waiting_for_block.last_thread)
+ {
+ /* Signal that in the LRU warm sub-chain an available block has appeared */
+ struct st_my_thread_var *last_thread=
+ pagecache->waiting_for_block.last_thread;
+ struct st_my_thread_var *first_thread= last_thread->next;
+ struct st_my_thread_var *next_thread= first_thread;
+ PAGECACHE_HASH_LINK *hash_link=
+ (PAGECACHE_HASH_LINK *) first_thread->opt_info;
+ struct st_my_thread_var *thread;
+ do
+ {
+ thread= next_thread;
+ next_thread= thread->next;
+ /*
+ We notify about the event all threads that ask
+ for the same page as the first thread in the queue
+ */
+ if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link)
+ {
+ KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
+ block->requests++;
+ }
+ }
+ while (thread != last_thread);
+ hash_link->block= block;
+ KEYCACHE_THREAD_TRACE("link_block: after signaling");
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_PRINT("link_block",
+ ("linked,unlinked block %u status=%x #requests=%u #available=%u",
+ BLOCK_NUMBER(pagecache, block), block->status,
+ block->requests, pagecache->blocks_available));
+#endif
+ return;
+ }
+#else /* THREAD */
+ KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread));
+ /* Condition not transformed using DeMorgan, to keep the text identical */
+#endif /* THREAD */
+ ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
+ ins= *ptr_ins;
+ if (ins)
+ {
+ ins->next_used->prev_used= &block->next_used;
+ block->next_used= ins->next_used;
+ block->prev_used= &ins->next_used;
+ ins->next_used= block;
+ if (at_end)
+ *ptr_ins= block;
+ }
+ else
+ {
+ /* The LRU chain is empty */
+ pagecache->used_last= pagecache->used_ins= block->next_used= block;
+ block->prev_used= &block->next_used;
+ }
+ KEYCACHE_THREAD_TRACE("link_block");
+#if defined(PAGECACHE_DEBUG)
+ pagecache->blocks_available++;
+ KEYCACHE_DBUG_PRINT("link_block",
+ ("linked block %u:%1u status=%x #requests=%u #available=%u",
+ BLOCK_NUMBER(pagecache, block), at_end, block->status,
+ block->requests, pagecache->blocks_available));
+ KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <=
+ pagecache->blocks_used);
+#endif
+}
+
+
+/*
+ Unlink a block from the LRU chain
+
+ SYNOPSIS
+ unlink_block()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to unlink from the LRU chain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See NOTES for link_block
+*/
+
+static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("unlink_block");
+ DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
+ if (block->next_used == block)
+ /* The list contains only one member */
+ pagecache->used_last= pagecache->used_ins= NULL;
+ else
+ {
+ block->next_used->prev_used= block->prev_used;
+ *block->prev_used= block->next_used;
+ if (pagecache->used_last == block)
+ pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
+ next_used, block->prev_used);
+ if (pagecache->used_ins == block)
+ pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK,
+ next_used, block->prev_used);
+ }
+ block->next_used= NULL;
+
+ KEYCACHE_THREAD_TRACE("unlink_block");
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
+ pagecache->blocks_available--;
+ KEYCACHE_DBUG_PRINT("unlink_block",
+ ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u",
+ (ulong)block, BLOCK_NUMBER(pagecache, block), block->status,
+ block->requests, pagecache->blocks_available));
+ BLOCK_INFO(block);
+#endif
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Register requests for a block
+
+ SYNOPSIS
+ reg_requests()
+ pagecache this page cache reference
+ block the block we request reference
+ count how many requests we register (it is 1 everywhere)
+
+ NOTE
+ Registration of request means we are going to use this block so we exclude
+ it from the LRU if it is first request
+*/
+static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
+ int count)
+{
+ DBUG_ENTER("reg_requests");
+ DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u",
+ (ulong)block, BLOCK_NUMBER(pagecache, block),
+ block->status, block->requests));
+ BLOCK_INFO(block);
+ if (! block->requests)
+ /* First request for the block unlinks it */
+ unlink_block(pagecache, block);
+ block->requests+= count;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unregister request for a block
+ linking it to the LRU chain if it's the last request
+
+ SYNOPSIS
+ unreg_request()
+ pagecache pointer to a page cache data structure
+ block pointer to the block to link to the LRU chain
+ at_end <-> to link the block at the end of the LRU chain
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Every linking to the LRU chain decrements by one a special block
+ counter (if it's positive). If the at_end parameter is TRUE the block is
+ added either at the end of warm sub-chain or at the end of hot sub-chain.
+ It is added to the hot subchain if its counter is zero and number of
+ blocks in warm sub-chain is not less than some low limit (determined by
+ the division_limit parameter). Otherwise the block is added to the warm
+ sub-chain. If the at_end parameter is FALSE the block is always added
+ at beginning of the warm sub-chain.
+ Thus a warm block can be promoted to the hot sub-chain when its counter
+ becomes zero for the first time.
+ At the same time the block at the very beginning of the hot subchain
+ might be moved to the beginning of the warm subchain if it stays untouched
+ for a too long time (this time is determined by parameter age_threshold).
+*/
+
+static void unreg_request(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block, int at_end)
+{
+ DBUG_ENTER("unreg_request");
+ DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u",
+ (ulong)block, BLOCK_NUMBER(pagecache, block),
+ block->status, block->requests));
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->requests > 0);
+ if (! --block->requests)
+ {
+ my_bool hot;
+ if (block->hits_left)
+ block->hits_left--;
+ hot= !block->hits_left && at_end &&
+ pagecache->warm_blocks > pagecache->min_warm_blocks;
+ if (hot)
+ {
+ if (block->temperature == BLOCK_WARM)
+ pagecache->warm_blocks--;
+ block->temperature= BLOCK_HOT;
+ KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
+ pagecache->warm_blocks));
+ }
+ link_block(pagecache, block, hot, (my_bool)at_end);
+ block->last_hit_time= pagecache->time;
+ pagecache->time++;
+
+ block= pagecache->used_ins;
+ /* Check if we should link a hot block to the warm block */
+ if (block && pagecache->time - block->last_hit_time >
+ pagecache->age_threshold)
+ {
+ unlink_block(pagecache, block);
+ link_block(pagecache, block, 0, 0);
+ if (block->temperature != BLOCK_WARM)
+ {
+ pagecache->warm_blocks++;
+ block->temperature= BLOCK_WARM;
+ }
+ KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
+ pagecache->warm_blocks));
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Remove a reader of the page in block
+*/
+
+static inline void remove_reader(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("remove_reader");
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->hash_link->requests > 0);
+#ifdef THREAD
+ if (! --block->hash_link->requests && block->condvar)
+ pagecache_pthread_cond_signal(block->condvar);
+#else
+ --block->hash_link->requests;
+#endif
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Wait until the last reader of the page in block
+ signals on its termination
+*/
+
+static inline void wait_for_readers(PAGECACHE *pagecache
+ __attribute__((unused)),
+ PAGECACHE_BLOCK_LINK *block)
+{
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ while (block->hash_link->requests)
+ {
+ KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
+ ("suspend thread %ld block %u",
+ thread->id, BLOCK_NUMBER(pagecache, block)));
+ block->condvar= &thread->suspend;
+ pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock);
+ block->condvar= NULL;
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0);
+#endif
+}
+
+
+/*
+ Add a hash link to a bucket in the hash_table
+*/
+
+static inline void link_hash(PAGECACHE_HASH_LINK **start,
+ PAGECACHE_HASH_LINK *hash_link)
+{
+ if (*start)
+ (*start)->prev= &hash_link->next;
+ hash_link->next= *start;
+ hash_link->prev= start;
+ *start= hash_link;
+}
+
+
+/*
+ Remove a hash link from the hash table
+*/
+
+static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
+{
+ KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
+ (uint) hash_link->file.file, (ulong) hash_link->pageno,
+ hash_link->requests));
+ KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
+ if ((*hash_link->prev= hash_link->next))
+ hash_link->next->prev= hash_link->prev;
+ hash_link->block= NULL;
+#ifdef THREAD
+ if (pagecache->waiting_for_hash_link.last_thread)
+ {
+ /* Signal that a free hash link has appeared */
+ struct st_my_thread_var *last_thread=
+ pagecache->waiting_for_hash_link.last_thread;
+ struct st_my_thread_var *first_thread= last_thread->next;
+ struct st_my_thread_var *next_thread= first_thread;
+ PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info);
+ struct st_my_thread_var *thread;
+
+ hash_link->file= first_page->file;
+ hash_link->pageno= first_page->pageno;
+ do
+ {
+ PAGECACHE_PAGE *page;
+ thread= next_thread;
+ page= (PAGECACHE_PAGE *) thread->opt_info;
+ next_thread= thread->next;
+ /*
+ We notify about the event all threads that ask
+ for the same page as the first thread in the queue
+ */
+ if (page->file.file == hash_link->file.file &&
+ page->pageno == hash_link->pageno)
+ {
+ KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
+ pagecache_pthread_cond_signal(&thread->suspend);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
+ }
+ }
+ while (thread != last_thread);
+ link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache,
+ hash_link->file,
+ hash_link->pageno)],
+ hash_link);
+ return;
+ }
+#else /* THREAD */
+ KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread));
+#endif /* THREAD */
+ hash_link->next= pagecache->free_hash_list;
+ pagecache->free_hash_list= hash_link;
+}
+
+
+/*
+ Get the hash link for the page if it is in the cache (do not put the
+ page in the cache if it is absent there)
+
+ SYNOPSIS
+ get_present_hash_link()
+ pagecache Pagecache reference
+ file file ID
+ pageno page number in the file
+ start where to put pointer to found hash bucket (for
+ direct referring it)
+
+ RETURN
+ found hashlink pointer
+*/
+
+static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ PAGECACHE_HASH_LINK ***start)
+{
+ reg1 PAGECACHE_HASH_LINK *hash_link;
+#if defined(PAGECACHE_DEBUG)
+ int cnt;
+#endif
+ DBUG_ENTER("get_present_hash_link");
+
+ KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
+ (uint) file->file, (ulong) pageno));
+
+ /*
+ Find the bucket in the hash table for the pair (file, pageno);
+ start contains the head of the bucket list,
+ hash_link points to the first member of the list
+ */
+ hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache,
+ *file, pageno)]);
+#if defined(PAGECACHE_DEBUG)
+ cnt= 0;
+#endif
+ /* Look for an element for the pair (file, pageno) in the bucket chain */
+ while (hash_link &&
+ (hash_link->pageno != pageno ||
+ hash_link->file.file != file->file))
+ {
+ hash_link= hash_link->next;
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ if (! (cnt <= pagecache->hash_links_used))
+ {
+ int i;
+ for (i=0, hash_link= **start ;
+ i < cnt ; i++, hash_link= hash_link->next)
+ {
+ KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu",
+ (uint) hash_link->file.file, (ulong) hash_link->pageno));
+ }
+ }
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
+#endif
+ }
+ if (hash_link)
+ {
+ /* Register the request for the page */
+ hash_link->requests++;
+ }
+
+ DBUG_RETURN(hash_link);
+}
+
+
+/*
+ Get the hash link for a page
+*/
+
+static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno)
+{
+ reg1 PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_HASH_LINK **start;
+
+ KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
+ (uint) file->file, (ulong) pageno));
+
+restart:
+ /* try to find the page in the cache */
+ hash_link= get_present_hash_link(pagecache, file, pageno,
+ &start);
+ if (!hash_link)
+ {
+ /* There is no hash link in the hash table for the pair (file, pageno) */
+ if (pagecache->free_hash_list)
+ {
+ hash_link= pagecache->free_hash_list;
+ pagecache->free_hash_list= hash_link->next;
+ }
+ else if (pagecache->hash_links_used < pagecache->hash_links)
+ {
+ hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++];
+ }
+ else
+ {
+#ifdef THREAD
+ /* Wait for a free hash link */
+ struct st_my_thread_var *thread= my_thread_var;
+ PAGECACHE_PAGE page;
+ KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
+ page.file= *file;
+ page.pageno= pageno;
+ thread->opt_info= (void *) &page;
+ wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
+ KEYCACHE_DBUG_PRINT("get_hash_link: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ thread->opt_info= NULL;
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+#endif
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+ hash_link->file= *file;
+ hash_link->pageno= pageno;
+ link_hash(start, hash_link);
+ /* Register the request for the page */
+ hash_link->requests++;
+ }
+
+ return hash_link;
+}
+
+
+/*
+ Get a block for the file page requested by a pagecache read/write operation;
+ If the page is not in the cache return a free block, if there is none
+ return the lru block after saving its buffer if the page is dirty.
+
+ SYNOPSIS
+
+ find_block()
+ pagecache pointer to a page cache data structure
+ file handler for the file to read page from
+ pageno number of the page in the file
+ init_hits_left how initialize the block counter for the page
+ wrmode <-> get for writing
+ reg_req Register request to thye page
+ page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
+
+ RETURN VALUE
+ Pointer to the found block if successful, 0 - otherwise
+
+ NOTES.
+ For the page from file positioned at pageno the function checks whether
+ the page is in the key cache specified by the first parameter.
+ If this is the case it immediately returns the block.
+ If not, the function first chooses a block for this page. If there is
+ no not used blocks in the key cache yet, the function takes the block
+ at the very beginning of the warm sub-chain. It saves the page in that
+ block if it's dirty before returning the pointer to it.
+ The function returns in the page_st parameter the following values:
+ PAGE_READ - if page already in the block,
+ PAGE_TO_BE_READ - if it is to be read yet by the current thread
+ WAIT_TO_BE_READ - if it is to be read by another thread
+ If an error occurs THE BLOCK_ERROR bit is set in the block status.
+ It might happen that there are no blocks in LRU chain (in warm part) -
+ all blocks are unlinked for some read/write operations. Then the function
+ waits until first of this operations links any block back.
+*/
+
+static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ int init_hits_left,
+ my_bool wrmode,
+ my_bool reg_req,
+ int *page_st)
+{
+ PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_BLOCK_LINK *block;
+ int error= 0;
+ int page_status;
+
+ DBUG_ENTER("find_block");
+ KEYCACHE_THREAD_TRACE("find_block:begin");
+ DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
+ file->file, (ulong) pageno, wrmode));
+ KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d",
+ file->file, (ulong) pageno,
+ wrmode));
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "start of find_block", 0););
+#endif
+
+restart:
+ /* Find the hash link for the requested page (file, pageno) */
+ hash_link= get_hash_link(pagecache, file, pageno);
+
+ page_status= -1;
+ if ((block= hash_link->block) &&
+ block->hash_link == hash_link && (block->status & BLOCK_READ))
+ page_status= PAGE_READ;
+
+ if (wrmode && pagecache->resize_in_flush)
+ {
+ /* This is a write request during the flush phase of a resize operation */
+
+ if (page_status != PAGE_READ)
+ {
+ /* We don't need the page in the cache: we are going to write on disk */
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ unlink_hash(pagecache, hash_link);
+ return 0;
+ }
+ if (!(block->status & BLOCK_IN_FLUSH))
+ {
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ /*
+ Remove block to invalidate the page in the block buffer
+ as we are going to write directly on disk.
+ Although we have an exclusive lock for the updated key part
+ the control can be yielded by the current thread as we might
+ have unfinished readers of other key parts in the block
+ buffer. Still we are guaranteed not to have any readers
+ of the key part we are writing into until the block is
+ removed from the cache as we set the BLOCK_REASSIGNED
+ flag (see the code below that handles reading requests).
+ */
+ free_block(pagecache, block);
+ return 0;
+ }
+ /* Wait until the page is flushed on disk */
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /*
+ Given the use of "resize_in_flush", it seems impossible
+ that this whole branch is ever entered in single-threaded case
+ because "(wrmode && pagecache->resize_in_flush)" cannot be true.
+ TODO: Check this, and then put the whole branch into the
+ "#ifdef THREAD" guard.
+ */
+#endif
+ }
+ /* Invalidate page in the block if it has not been done yet */
+ if (block->status)
+ free_block(pagecache, block);
+ return 0;
+ }
+
+ if (page_status == PAGE_READ &&
+ (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED)))
+ {
+ /* This is a request for a page to be removed from cache */
+
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request for old page in block %u "
+ "wrmode: %d block->status: %d",
+ BLOCK_NUMBER(pagecache, block), wrmode,
+ block->status));
+ /*
+ Only reading requests can proceed until the old dirty page is flushed,
+ all others are to be suspended, then resubmitted
+ */
+ if (!wrmode && !(block->status & BLOCK_REASSIGNED))
+ {
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ }
+ else
+ {
+ DBUG_ASSERT(hash_link->requests > 0);
+ hash_link->requests--;
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request waiting for old page to be saved"));
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ /* Put the request into the queue of those waiting for the old page */
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ /* Wait until the request can be resubmitted */
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("request for old page resubmitted"));
+ DBUG_PRINT("info", ("restarting..."));
+ /* Resubmit the request */
+ goto restart;
+ }
+ block->status&= ~BLOCK_IN_SWITCH;
+ }
+ else
+ {
+ /* This is a request for a new page or for a page not to be removed */
+ if (! block)
+ {
+ /* No block is assigned for the page yet */
+ if (pagecache->blocks_unused)
+ {
+ if (pagecache->free_block_list)
+ {
+ /* There is a block in the free list. */
+ block= pagecache->free_block_list;
+ pagecache->free_block_list= block->next_used;
+ block->next_used= NULL;
+ }
+ else
+ {
+ /* There are some never used blocks, take first of them */
+ block= &pagecache->block_root[pagecache->blocks_used];
+ block->buffer= ADD_TO_PTR(pagecache->block_mem,
+ ((ulong) pagecache->blocks_used*
+ pagecache->block_size),
+ byte*);
+ pagecache->blocks_used++;
+ }
+ pagecache->blocks_unused--;
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status= 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->requests= 1;
+ block->temperature= BLOCK_COLD;
+ block->hits_left= init_hits_left;
+ block->last_hit_time= 0;
+ link_to_file_list(pagecache, block, file, 0);
+ block->hash_link= hash_link;
+ hash_link->block= block;
+ page_status= PAGE_TO_BE_READ;
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("got free or never used block %u",
+ BLOCK_NUMBER(pagecache, block)));
+ }
+ else
+ {
+ /* There are no never used blocks, use a block from the LRU chain */
+
+ /*
+ Wait until a new block is added to the LRU chain;
+ several threads might wait here for the same page,
+ all of them must get the same block
+ */
+
+#ifdef THREAD
+ if (! pagecache->used_last)
+ {
+ struct st_my_thread_var *thread= my_thread_var;
+ thread->opt_info= (void *) hash_link;
+ wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("find_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+ thread->opt_info= NULL;
+ }
+#else
+ KEYCACHE_DBUG_ASSERT(pagecache->used_last);
+#endif
+ block= hash_link->block;
+ if (! block)
+ {
+ /*
+ Take the first block from the LRU chain
+ unlinking it from the chain
+ */
+ block= pagecache->used_last->next_used;
+ block->hits_left= init_hits_left;
+ block->last_hit_time= 0;
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ hash_link->block= block;
+ }
+ BLOCK_INFO(block);
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins == 0);
+
+ if (block->hash_link != hash_link &&
+ ! (block->status & BLOCK_IN_SWITCH) )
+ {
+ /* this is a primary request for a new page */
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK);
+
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("got block %u for new page",
+ BLOCK_NUMBER(pagecache, block)));
+
+ if (block->status & BLOCK_CHANGED)
+ {
+ /* The block contains a dirty page - push it out of the cache */
+
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ The call is thread safe because only the current
+ thread might change the block->hash_link value
+ */
+ DBUG_ASSERT(block->pins == 0);
+ error= pagecache_fwrite(pagecache,
+ &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ pagecache->global_cache_write++;
+ }
+
+ block->status|= BLOCK_REASSIGNED;
+ if (block->hash_link)
+ {
+ /*
+ Wait until all pending read requests
+ for this page are executed
+ (we could have avoided this waiting, if we had read
+ a page in the cache in a sweep, without yielding control)
+ */
+ wait_for_readers(pagecache, block);
+
+ /* Remove the hash link for this page from the hash table */
+ unlink_hash(pagecache, block->hash_link);
+ /* All pending requests for this page must be resubmitted */
+#ifdef THREAD
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+ }
+ link_to_file_list(pagecache, block, file,
+ (my_bool)(block->hash_link ? 1 : 0));
+ BLOCK_INFO(block);
+ block->status= error? BLOCK_ERROR : 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->hash_link= hash_link;
+ page_status= PAGE_TO_BE_READ;
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
+
+ KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
+ KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
+ }
+ else
+ {
+ /* This is for secondary requests for a new page only */
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block->hash_link: %p hash_link: %p "
+ "block->status: %u", block->hash_link,
+ hash_link, block->status ));
+ page_status= (((block->hash_link == hash_link) &&
+ (block->status & BLOCK_READ)) ?
+ PAGE_READ : PAGE_WAIT_TO_BE_READ);
+ }
+ }
+ pagecache->global_cache_read++;
+ }
+ else
+ {
+ if (reg_req)
+ reg_requests(pagecache, block, 1);
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block->hash_link: %p hash_link: %p "
+ "block->status: %u", block->hash_link,
+ hash_link, block->status ));
+ page_status= (((block->hash_link == hash_link) &&
+ (block->status & BLOCK_READ)) ?
+ PAGE_READ : PAGE_WAIT_TO_BE_READ);
+ }
+ }
+
+ KEYCACHE_DBUG_ASSERT(page_status != -1);
+ *page_st= page_status;
+ DBUG_PRINT("info",
+ ("block: 0x%lx fd: %u pos %lu block->status %u page_status %u",
+ (ulong) block, (uint) file->file,
+ (ulong) pageno, block->status, (uint) page_status));
+ KEYCACHE_DBUG_PRINT("find_block",
+ ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d",
+ (ulong) block,
+ file->file, (ulong) pageno, block->status,
+ page_status));
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "end of find_block",0););
+#endif
+ KEYCACHE_THREAD_TRACE("find_block:end");
+ DBUG_RETURN(block);
+}
+
+
+static void add_pin(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("add_pin");
+ DBUG_PRINT("enter", ("block 0x%lx pins: %u",
+ (ulong) block,
+ block->pins));
+ BLOCK_INFO(block);
+ block->pins++;
+#ifdef PAGECACHE_DEBUG
+ {
+ PAGECACHE_PIN_INFO *info=
+ (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0));
+ info->thread= my_thread_var;
+ info_link(&block->pin_list, info);
+ }
+#endif
+ DBUG_VOID_RETURN;
+}
+
+static void remove_pin(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("remove_pin");
+ DBUG_PRINT("enter", ("block 0x%lx pins: %u",
+ (ulong) block,
+ block->pins));
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->pins > 0);
+ block->pins--;
+#ifdef PAGECACHE_DEBUG
+ {
+ PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var);
+ DBUG_ASSERT(info != 0);
+ info_unlink(info);
+ my_free((gptr) info, MYF(0));
+ }
+#endif
+ DBUG_VOID_RETURN;
+}
+#ifdef PAGECACHE_DEBUG
+static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
+ info->thread= my_thread_var;
+ info->write_lock= wl;
+ info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
+ (PAGECACHE_PIN_INFO *)info);
+}
+static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
+ my_thread_var);
+ DBUG_ASSERT(info != 0);
+ info_unlink((PAGECACHE_PIN_INFO *)info);
+ my_free((gptr)info, MYF(0));
+}
+static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+{
+ PAGECACHE_LOCK_INFO *info=
+ (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
+ my_thread_var);
+ DBUG_ASSERT(info != 0 && info->write_lock != wl);
+ info->write_lock= wl;
+}
+#else
+#define info_add_lock(B,W)
+#define info_remove_lock(B)
+#define info_change_lock(B,W)
+#endif
+
+/*
+ Put on the block write lock
+
+ SYNOPSIS
+ get_wrlock()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+
+ RETURN
+ 0 - OK
+ 1 - Can't lock this block, need retry
+*/
+
+static my_bool get_wrlock(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block)
+{
+ PAGECACHE_FILE file= block->hash_link->file;
+ pgcache_page_no_t pageno= block->hash_link->pageno;
+ DBUG_ENTER("get_wrlock");
+ DBUG_PRINT("info", ("the block 0x%lx "
+ "files %d(%d) pages %d(%d)",
+ (ulong)block,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
+ BLOCK_INFO(block);
+ while (block->status & BLOCK_WRLOCK)
+ {
+ /* Lock failed we will wait */
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
+ dec_counter_for_resize_op(pagecache);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("get_wrlock: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while(thread->next);
+#else
+ DBUG_ASSERT(0);
+#endif
+ BLOCK_INFO(block);
+ if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) ||
+ file.file != block->hash_link->file.file ||
+ pageno != block->hash_link->pageno)
+ {
+ DBUG_PRINT("info", ("the block 0x%lx changed => need retry"
+ "status %x files %d != %d or pages %d !=%d",
+ (ulong)block, block->status,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_ASSERT(block->pins == 0);
+ /* we are doing it by global cache mutex protection, so it is OK */
+ block->status|= BLOCK_WRLOCK;
+ DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Remove write lock from the block
+
+ SYNOPSIS
+ release_wrlock()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+
+ RETURN
+ 0 - OK
+*/
+
+static void release_wrlock(PAGECACHE_BLOCK_LINK *block)
+{
+ DBUG_ENTER("release_wrlock");
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->status & BLOCK_WRLOCK);
+ DBUG_ASSERT(block->pins > 0);
+ block->status&= ~BLOCK_WRLOCK;
+ DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
+#ifdef THREAD
+ /* release all threads waiting for write lock */
+ if (block->wqueue[COND_FOR_WRLOCK].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]);
+#endif
+ BLOCK_INFO(block);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Try to lock/unlock and pin/unpin the block
+
+ SYNOPSIS
+ make_lock_and_pin()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+ lock lock change mode
+ pin pinchange mode
+
+ RETURN
+ 0 - OK
+ 1 - Try to lock the block failed
+*/
+
+static my_bool make_lock_and_pin(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin)
+{
+ DBUG_ENTER("make_lock_and_pin");
+ DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s",
+ (ulong)block, BLOCK_NUMBER(pagecache, block),
+ ((block->status & BLOCK_WRLOCK)?'Y':'N'),
+ block->pins,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ BLOCK_INFO(block);
+#ifdef PAGECACHE_DEBUG
+ DBUG_ASSERT(info_check_pin(block, pin) == 0 &&
+ info_check_lock(block, lock, pin) == 0);
+#endif
+ switch (lock)
+ {
+ case PAGECACHE_LOCK_WRITE: /* free -> write */
+ /* Writelock and pin the buffer */
+ if (get_wrlock(pagecache, block))
+ {
+ /* can't lock => need retry */
+ goto retry;
+ }
+
+ /* The cache is locked so nothing afraid of */
+ add_pin(block);
+ info_add_lock(block, 1);
+ break;
+ case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */
+ case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */
+ /*
+ Removes write lock and puts read lock (which is nothing in our
+ implementation)
+ */
+ release_wrlock(block);
+ case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */
+ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */
+ if (pin == PAGECACHE_UNPIN)
+ {
+ remove_pin(block);
+ }
+ if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
+ {
+ info_change_lock(block, 0);
+ }
+ else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_READ_UNLOCK)
+ {
+ info_remove_lock(block);
+ }
+ break;
+ case PAGECACHE_LOCK_READ: /* free -> read */
+ if (pin == PAGECACHE_PIN)
+ {
+ /* The cache is locked so nothing afraid off */
+ add_pin(block);
+ }
+ info_add_lock(block, 0);
+ break;
+ case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */
+ case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */
+ break; /* do nothing */
+ default:
+ DBUG_ASSERT(0); /* Never should happened */
+ }
+
+ BLOCK_INFO(block);
+ DBUG_RETURN(0);
+retry:
+ DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->hash_link->requests > 0);
+ block->hash_link->requests--;
+ DBUG_ASSERT(block->requests > 0);
+ unreg_request(pagecache, block, 1);
+ BLOCK_INFO(block);
+ DBUG_RETURN(1);
+
+}
+
+
+/*
+ Read into a key cache block buffer from disk.
+
+ SYNOPSIS
+
+ read_block()
+ pagecache pointer to a page cache data structure
+ block block to which buffer the data is to be read
+ primary <-> the current thread will read the data
+ validator validator of read from the disk data
+ validator_data pointer to the data need by the validator
+
+ RETURN VALUE
+ None
+
+ NOTES.
+ The function either reads a page data from file to the block buffer,
+ or waits until another thread reads it. What page to read is determined
+ by a block parameter - reference to a hash link for this page.
+ If an error occurs THE BLOCK_ERROR bit is set in the block status.
+*/
+
+static void read_block(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ my_bool primary,
+ pagecache_disk_read_validator validator,
+ gptr validator_data)
+{
+ uint got_length;
+
+ /* On entry cache_lock is locked */
+
+ DBUG_ENTER("read_block");
+ if (primary)
+ {
+ /*
+ This code is executed only by threads
+ that submitted primary requests
+ */
+
+ DBUG_PRINT("read_block",
+ ("page to be read by primary request"));
+
+ /* Page is not in buffer yet, is to be read from disk */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ Here other threads may step in and register as secondary readers.
+ They will register in block->wqueue[COND_FOR_REQUESTED].
+ */
+ got_length= pagecache_fread(pagecache, &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno, MYF(0));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (got_length < pagecache->block_size)
+ block->status|= BLOCK_ERROR;
+ else
+ block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK));
+
+ if (validator != NULL &&
+ (*validator)(block->buffer, validator_data))
+ block->status|= BLOCK_ERROR;
+
+ DBUG_PRINT("read_block",
+ ("primary request: new page in cache"));
+ /* Signal that all pending requests for this page now can be processed */
+#ifdef THREAD
+ if (block->wqueue[COND_FOR_REQUESTED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+#endif
+ }
+ else
+ {
+ /*
+ This code is executed only by threads
+ that submitted secondary requests
+ */
+ DBUG_PRINT("read_block",
+ ("secondary request waiting for new page to be read"));
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ /* Put the request into a queue and wait until it can be processed */
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
+ do
+ {
+ DBUG_PRINT("read_block: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+ DBUG_PRINT("read_block",
+ ("secondary request: new page in cache"));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unlock/unpin page and put LSN stamp if it need
+
+ SYNOPSIS
+ pagecache_unlock_page()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+ lock lock change
+ pin pin page
+ first_REDO_LSN_for_page do not set it if it is zero
+
+ NOTE
+ Pininig uses requests registration mechanism it works following way:
+ | beginnig | ending |
+ | of func. | of func. |
+ ----------------------------+-------------+---------------+
+ PAGECACHE_PIN_LEFT_PINNED | - | - |
+ PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request |
+ PAGECACHE_PIN | reg request | - |
+ PAGECACHE_UNPIN | - | unreg request |
+
+
+*/
+
+void pagecache_unlock_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page)
+{
+ PAGECACHE_BLOCK_LINK *block;
+ int page_st;
+ DBUG_ENTER("pagecache_unlock_page");
+ DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s",
+ (uint) file->file, (ulong) pageno,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ /* we do not allow any lock/pin increasing here */
+ DBUG_ASSERT(pin != PAGECACHE_PIN &&
+ lock != PAGECACHE_LOCK_READ &&
+ lock != PAGECACHE_LOCK_WRITE);
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock because want
+ to unlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ /* See NOTE for pagecache_unlock_page about registering requests */
+ block= find_block(pagecache, file, pageno, 0, 0,
+ test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st);
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
+ if (first_REDO_LSN_for_page)
+ {
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK &&
+ pin == PAGECACHE_UNPIN);
+ set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page);
+ }
+
+#ifndef DBUG_OFF
+ if (
+#endif
+ make_lock_and_pin(pagecache, block, lock, pin)
+#ifndef DBUG_OFF
+ )
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+#else
+ ;
+#endif
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock_page about registering requests.
+ */
+ if (pin != PAGECACHE_PIN_LEFT_PINNED)
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unpin page
+
+ SYNOPSIS
+ pagecache_unpin_page()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+*/
+
+void pagecache_unpin_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno)
+{
+ PAGECACHE_BLOCK_LINK *block;
+ int page_st;
+ DBUG_ENTER("pagecache_unpin_page");
+ DBUG_PRINT("enter", ("fd: %u page: %lu",
+ (uint) file->file, (ulong) pageno));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock bacause want
+ aunlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ /* See NOTE for pagecache_unlock_page about registering requests */
+ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
+ DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
+
+#ifndef DBUG_OFF
+ if (
+#endif
+ /*
+ we can just unpin only with keeping read lock because:
+ a) we can't pin without any lock
+ b) we can't unpin keeping write lock
+ */
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN)
+#ifndef DBUG_OFF
+ )
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+#else
+ ;
+#endif
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock_page about registering requests
+ */
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unlock/unpin page and put LSN stamp if it need
+ (uses direct block/page pointer)
+
+ SYNOPSIS
+ pagecache_unlock()
+ pagecache pointer to a page cache data structure
+ link direct link to page (returned by read or write)
+ lock lock change
+ pin pin page
+ first_REDO_LSN_for_page do not set it if it is zero
+*/
+
+void pagecache_unlock(PAGECACHE *pagecache,
+ PAGECACHE_PAGE_LINK *link,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ LSN first_REDO_LSN_for_page)
+{
+ PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link;
+ DBUG_ENTER("pagecache_unlock");
+ DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu l%s p%s",
+ (ulong) block,
+ (uint) block->hash_link->file.file,
+ (ulong) block->hash_link->pageno,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ /*
+ We do not allow any lock/pin increasing here and page can't be
+ unpinned because we use direct link.
+ */
+ DBUG_ASSERT(pin != PAGECACHE_PIN &&
+ pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ lock != PAGECACHE_LOCK_READ &&
+ lock != PAGECACHE_LOCK_WRITE);
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
+ lock == PAGECACHE_LOCK_READ_UNLOCK)
+ {
+#ifndef DBUG_OFF
+ if (
+#endif
+ /* block do not need here so we do not provide it */
+ make_lock_and_pin(pagecache, 0, lock, pin)
+#ifndef DBUG_OFF
+ )
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+#else
+ ;
+#endif
+ DBUG_VOID_RETURN;
+ }
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock bacause want
+ aunlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+ if (first_REDO_LSN_for_page)
+ {
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK &&
+ pin == PAGECACHE_UNPIN);
+ set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page);
+ }
+
+#ifndef DBUG_OFF
+ if (
+#endif
+ make_lock_and_pin(pagecache, block, lock, pin)
+#ifndef DBUG_OFF
+ )
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+#else
+ ;
+#endif
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock_page about registering requests.
+ */
+ if (pin != PAGECACHE_PIN_LEFT_PINNED)
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Unpin page
+ (uses direct block/page pointer)
+
+ SYNOPSIS
+ pagecache_unpin_page()
+ pagecache pointer to a page cache data structure
+ link direct link to page (returned by read or write)
+*/
+
+void pagecache_unpin(PAGECACHE *pagecache,
+ PAGECACHE_PAGE_LINK *link)
+{
+ PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link;
+ DBUG_ENTER("pagecache_unpin");
+ DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu",
+ (ulong) block,
+ (uint) block->hash_link->file.file,
+ (ulong) block->hash_link->pageno));
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ /*
+ As soon as we keep lock cache can be used, and we have lock bacause want
+ aunlock.
+ */
+ DBUG_ASSERT(pagecache->can_be_used);
+
+ inc_counter_for_resize_op(pagecache);
+
+#ifndef DBUG_OFF
+ if (
+#endif
+ /*
+ we can just unpin only with keeping read lock because:
+ a) we can't pin without any lock
+ b) we can't unpin keeping write lock
+ */
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN)
+#ifndef DBUG_OFF
+ )
+ {
+ DBUG_ASSERT(0); /* should not happend */
+ }
+#else
+ ;
+#endif
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock_page about registering requests.
+ */
+ unreg_request(pagecache, block, 1);
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Read a block of data from a cached file into a buffer;
+
+ SYNOPSIS
+ pagecache_valid_read()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+ level determines the weight of the data
+ buff buffer to where the data must be placed
+ type type of the page
+ lock lock change
+ link link to the page if we pin it
+ validator validator of read from the disk data
+ validator_data pointer to the data need by the validator
+
+ RETURN VALUE
+ Returns address from where the data is placed if sucessful, 0 - otherwise.
+
+ Pin will be choosen according to lock parameter (see lock_to_pin)
+*/
+static enum pagecache_page_pin lock_to_pin[]=
+{
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+ PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
+ PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/,
+ PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
+ PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+ PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/
+};
+
+byte *pagecache_valid_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_PAGE_LINK *link,
+ pagecache_disk_read_validator validator,
+ gptr validator_data)
+{
+ int error= 0;
+ enum pagecache_page_pin pin= lock_to_pin[lock];
+ PAGECACHE_PAGE_LINK fake_link;
+ DBUG_ENTER("pagecache_valid_read");
+ DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s",
+ (uint) file->file, (ulong) pageno, level,
+ page_cache_page_type_str[type],
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+
+ if (!link)
+ link= &fake_link;
+ else
+ *link= 0;
+
+restart:
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ PAGECACHE_BLOCK_LINK *block;
+ uint status;
+ int page_st;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ {
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ goto no_key_cache;
+ }
+
+ inc_counter_for_resize_op(pagecache);
+ pagecache->global_cache_r_requests++;
+ /* See NOTE for pagecache_unlock_page about registering requests. */
+ block= find_block(pagecache, file, pageno, level,
+ test(lock == PAGECACHE_LOCK_WRITE),
+ test((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+ (pin == PAGECACHE_PIN)),
+ &page_st);
+ DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
+ block->type == type);
+ block->type= type;
+ if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
+ {
+ DBUG_PRINT("info", ("read block 0x%lx", (ulong)block));
+ /* The requested page is to be read into the block buffer */
+ read_block(pagecache, block,
+ (my_bool)(page_st == PAGE_TO_BE_READ),
+ validator, validator_data);
+ DBUG_PRINT("info", ("read is done"));
+ }
+ if (make_lock_and_pin(pagecache, block, lock, pin))
+ {
+ /*
+ We failed to write lock the block, cache is unlocked,
+ we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+ if (! ((status= block->status) & BLOCK_ERROR))
+ {
+#if !defined(SERIALIZED_READ_FROM_CACHE)
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+#endif
+
+ DBUG_ASSERT((pagecache->block_size & 511) == 0);
+ /* Copy data from the cache buffer */
+ bmove512(buff, block->buffer, pagecache->block_size);
+
+#if !defined(SERIALIZED_READ_FROM_CACHE)
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+#endif
+ }
+
+ remove_reader(block);
+ /*
+ Link the block into the LRU chain if it's the last submitted request
+ for the block and block will not be pinned.
+ See NOTE for pagecache_unlock_page about registering requests.
+ */
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+ unreg_request(pagecache, block, 1);
+ else
+ *link= (PAGECACHE_PAGE_LINK)block;
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ if (status & BLOCK_ERROR)
+ DBUG_RETURN((byte *) 0);
+
+ DBUG_RETURN(buff);
+ }
+
+no_key_cache: /* Key cache is not used */
+
+ /* We can't use mutex here as the key cache may not be initialized */
+ pagecache->global_cache_r_requests++;
+ pagecache->global_cache_read++;
+ if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP)))
+ error= 1;
+ DBUG_RETURN(error ? (byte*) 0 : buff);
+}
+
+
+/*
+ Delete page from the buffer
+
+ SYNOPSIS
+ pagecache_delete_page()
+ pagecache pointer to a page cache data structure
+ file handler for the file for the block of data to be read
+ pageno number of the block of data in the file
+ lock lock change
+ flush flush page if it is dirty
+
+ RETURN VALUE
+ 0 - deleted or was not present at all
+ 1 - error
+
+ NOTES.
+ lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked
+ before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete)
+*/
+my_bool pagecache_delete_page(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_lock lock,
+ my_bool flush)
+{
+ int error= 0;
+ enum pagecache_page_pin pin= lock_to_pin[lock];
+ DBUG_ENTER("pagecache_delete_page");
+ DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s",
+ (uint) file->file, (ulong) pageno,
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin]));
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
+ DBUG_ASSERT(pin == PAGECACHE_PIN ||
+ pin == PAGECACHE_PIN_LEFT_PINNED);
+
+restart:
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ reg1 PAGECACHE_BLOCK_LINK *block;
+ PAGECACHE_HASH_LINK **unused_start, *link;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ goto end;
+
+ inc_counter_for_resize_op(pagecache);
+ link= get_present_hash_link(pagecache, file, pageno, &unused_start);
+ if (!link)
+ {
+ DBUG_PRINT("info", ("There is no such page in the cache"));
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(0);
+ }
+ block= link->block;
+ /* See NOTE for pagecache_unlock_page about registering requests. */
+ if (pin == PAGECACHE_PIN)
+ reg_requests(pagecache, block, 1);
+ DBUG_ASSERT(block != 0);
+ if (make_lock_and_pin(pagecache, block, lock, pin))
+ {
+ /*
+ We failed to writelock the block, cache is unlocked, and last write
+ lock is released, we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+ if (block->status & BLOCK_CHANGED)
+ {
+ if (flush)
+ {
+ /* The block contains a dirty page - push it out of the cache */
+
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ The call is thread safe because only the current
+ thread might change the block->hash_link value
+ */
+ DBUG_ASSERT(block->pins == 1);
+ error= pagecache_fwrite(pagecache,
+ &block->hash_link->file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ pagecache->global_cache_write++;
+
+ if (error)
+ {
+ block->status|= BLOCK_ERROR;
+ goto err;
+ }
+ }
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ /*
+ free_block() will change the status and rec_lsn of the block so no
+ need to change them here.
+ */
+ }
+ /* Cache is locked, so we can relese page before freeing it */
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN);
+ DBUG_ASSERT(link->requests > 0);
+ link->requests--;
+ /* See NOTE for pagecache_unlock_page about registering requests. */
+ free_block(pagecache, block);
+
+err:
+ dec_counter_for_resize_op(pagecache);
+end:
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ }
+
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Write a buffer into a cached file.
+
+ SYNOPSIS
+
+ pagecache_write()
+ pagecache pointer to a page cache data structure
+ file handler for the file to write data to
+ pageno number of the block of data in the file
+ level determines the weight of the data
+ buff buffer to where the data must be placed
+ type type of the page
+ lock lock change
+ pin pin page
+ write_mode how to write page
+ link link to the page if we pin it
+
+ RETURN VALUE
+ 0 if a success, 1 - otherwise.
+*/
+
+/* description of how to change lock before and after write */
+struct write_lock_change
+{
+ int need_lock_change; /* need changing of lock at the end of write */
+ enum pagecache_page_lock new_lock; /* lock at the beginning */
+ enum pagecache_page_lock unlock_lock; /* lock at the end */
+};
+
+static struct write_lock_change write_lock_change_table[]=
+{
+ {1,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+ {0, /*unsupported*/
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+ {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+ {1,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/,
+ {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/,
+ {0, /*unsupported*/
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/,
+ {1,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+ {1,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/
+};
+
+/* description of how to change pin before and after write */
+struct write_pin_change
+{
+ enum pagecache_page_pin new_pin; /* pin status at the beginning */
+ enum pagecache_page_pin unlock_pin; /* pin status at the end */
+};
+
+static struct write_pin_change write_pin_change_table[]=
+{
+ {PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
+ {PAGECACHE_PIN,
+ PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/,
+ {PAGECACHE_PIN,
+ PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/,
+ {PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/
+};
+
+my_bool pagecache_write(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin,
+ enum pagecache_write_mode write_mode,
+ PAGECACHE_PAGE_LINK *link)
+{
+ reg1 PAGECACHE_BLOCK_LINK *block= NULL;
+ PAGECACHE_PAGE_LINK fake_link;
+ int error= 0;
+ int need_lock_change= write_lock_change_table[lock].need_lock_change;
+ DBUG_ENTER("pagecache_write");
+ DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s m%s",
+ (uint) file->file, (ulong) pageno, level,
+ page_cache_page_type_str[type],
+ page_cache_page_lock_str[lock],
+ page_cache_page_pin_str[pin],
+ page_cache_page_write_mode_str[write_mode]));
+ DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED &&
+ lock != PAGECACHE_LOCK_READ_UNLOCK);
+ if (!link)
+ link= &fake_link;
+ else
+ *link= 0;
+
+ if (write_mode == PAGECACHE_WRITE_NOW)
+ {
+ /* we allow direct write if we do not use long term lockings */
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
+ /* Force writing from buff into disk */
+ pagecache->global_cache_write++;
+ if (pagecache_fwrite(pagecache, file, buff, pageno, type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL)))
+ DBUG_RETURN(1);
+ }
+restart:
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "start of key_cache_write", 1););
+#endif
+
+ if (pagecache->can_be_used)
+ {
+ /* Key cache is used */
+ int page_st;
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ if (!pagecache->can_be_used)
+ {
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ goto no_key_cache;
+ }
+
+ inc_counter_for_resize_op(pagecache);
+ pagecache->global_cache_w_requests++;
+ /* See NOTE for pagecache_unlock_page about registering requests. */
+ block= find_block(pagecache, file, pageno, level,
+ test(write_mode != PAGECACHE_WRITE_DONE &&
+ lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
+ lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
+ lock != PAGECACHE_LOCK_WRITE_TO_READ),
+ test((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+ (pin == PAGECACHE_PIN)),
+ &page_st);
+ if (!block)
+ {
+ DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE);
+ /* It happens only for requests submitted during resize operation */
+ dec_counter_for_resize_op(pagecache);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /* Write to the disk key cache is in resize at the moment*/
+ goto no_key_cache;
+ }
+
+ DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
+ block->type == type);
+ block->type= type;
+
+ if (make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].new_lock,
+ (need_lock_change ?
+ write_pin_change_table[pin].new_pin :
+ pin)))
+ {
+ /*
+ We failed to writelock the block, cache is unlocked, and last write
+ lock is released, we will try to get the block again.
+ */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
+ goto restart;
+ }
+
+
+ if (write_mode == PAGECACHE_WRITE_DONE)
+ {
+ if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ)
+ {
+ /* Copy data from buff */
+ bmove512(block->buffer, buff, pagecache->block_size);
+ block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK));
+ KEYCACHE_DBUG_PRINT("key_cache_insert",
+ ("primary request: new page in cache"));
+#ifdef THREAD
+ /* Signal that all pending requests for this now can be processed. */
+ if (block->wqueue[COND_FOR_REQUESTED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+#endif
+ }
+ }
+ else
+ {
+ if (write_mode == PAGECACHE_WRITE_NOW)
+ {
+ /* buff has been written to disk at start */
+ if ((block->status & BLOCK_CHANGED) &&
+ !(block->status & BLOCK_ERROR))
+ link_to_file_list(pagecache, block, &block->hash_link->file, 1);
+ }
+ else if (! (block->status & BLOCK_CHANGED))
+ link_to_changed_list(pagecache, block);
+
+ if (! (block->status & BLOCK_ERROR))
+ {
+ bmove512(block->buffer, buff, pagecache->block_size);
+ block->status|= BLOCK_READ;
+ }
+ }
+
+
+ if (need_lock_change)
+ {
+#ifndef DBUG_OFF
+ int rc=
+#endif
+ /*
+ QQ: We are doing an unlock here, so need to give the page its rec_lsn
+ */
+ make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].unlock_lock,
+ write_pin_change_table[pin].unlock_pin);
+#ifndef DBUG_OFF
+ DBUG_ASSERT(rc == 0);
+#endif
+ }
+
+ /* Unregister the request */
+ DBUG_ASSERT(block->hash_link->requests > 0);
+ block->hash_link->requests--;
+ /* See NOTE for pagecache_unlock_page about registering requests. */
+ if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+ unreg_request(pagecache, block, 1);
+ else
+ *link= (PAGECACHE_PAGE_LINK)block;
+
+
+ if (block->status & BLOCK_ERROR)
+ error= 1;
+
+ dec_counter_for_resize_op(pagecache);
+
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+
+ goto end;
+ }
+
+no_key_cache:
+ /* Key cache is not used */
+ if (write_mode == PAGECACHE_WRITE_DELAY)
+ {
+ pagecache->global_cache_w_requests++;
+ pagecache->global_cache_write++;
+ if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL)))
+ error=1;
+ }
+
+end:
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("exec",
+ test_key_cache(pagecache, "end of key_cache_write", 1););
+#endif
+ BLOCK_INFO(block);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Free block: remove reference to it from hash table,
+ remove it from the chain file of dirty/clean blocks
+ and add it to the free list.
+*/
+
+static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
+{
+ KEYCACHE_THREAD_TRACE("free block");
+ KEYCACHE_DBUG_PRINT("free_block",
+ ("block %u to be freed, hash_link %p",
+ BLOCK_NUMBER(pagecache, block), block->hash_link));
+ if (block->hash_link)
+ {
+ /*
+ While waiting for readers to finish, new readers might request the
+ block. But since we set block->status|= BLOCK_REASSIGNED, they
+ will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
+ later.
+ */
+ block->status|= BLOCK_REASSIGNED;
+ wait_for_readers(pagecache, block);
+ unlink_hash(pagecache, block->hash_link);
+ }
+
+ unlink_changed(block);
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins == 0);
+ block->status= 0;
+#ifndef DBUG_OFF
+ block->type= PAGECACHE_EMPTY_PAGE;
+#endif
+ block->rec_lsn= 0;
+ KEYCACHE_THREAD_TRACE("free block");
+ KEYCACHE_DBUG_PRINT("free_block",
+ ("block is freed"));
+ unreg_request(pagecache, block, 0);
+ block->hash_link= NULL;
+
+ /* Remove the free block from the LRU ring. */
+ unlink_block(pagecache, block);
+ if (block->temperature == BLOCK_WARM)
+ pagecache->warm_blocks--;
+ block->temperature= BLOCK_COLD;
+ /* Insert the free block in the free list. */
+ block->next_used= pagecache->free_block_list;
+ pagecache->free_block_list= block;
+ /* Keep track of the number of currently unused blocks. */
+ pagecache->blocks_unused++;
+
+#ifdef THREAD
+ /* All pending requests for this page must be resubmitted. */
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+}
+
+
+static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b)
+{
+ return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 :
+ ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0);
+}
+
+
+/*
+ Flush a portion of changed blocks to disk,
+ free used blocks if requested
+*/
+
+static int flush_cached_blocks(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ PAGECACHE_BLOCK_LINK **cache,
+ PAGECACHE_BLOCK_LINK **end,
+ enum flush_type type)
+{
+ int error;
+ int last_errno= 0;
+ uint count= (uint) (end-cache);
+ DBUG_ENTER("flush_cached_blocks");
+
+ /* Don't lock the cache during the flush */
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ /*
+ As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
+ we are guarunteed no thread will change them
+ */
+ qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
+
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ for (; cache != end; cache++)
+ {
+ PAGECACHE_BLOCK_LINK *block= *cache;
+
+ if (block->pins)
+ {
+ KEYCACHE_DBUG_PRINT("flush_cached_blocks",
+ ("block %u (0x%lx) pinned",
+ BLOCK_NUMBER(pagecache, block), (ulong)block));
+ DBUG_PRINT("info", ("block %u (0x%lx) pinned",
+ BLOCK_NUMBER(pagecache, block), (ulong)block));
+ BLOCK_INFO(block);
+ last_errno= -1;
+ unreg_request(pagecache, block, 1);
+ continue;
+ }
+ /* if the block is not pinned then it is not write locked */
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins == 0);
+#ifndef DBUG_OFF
+ {
+ int rc=
+#endif
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE, PAGECACHE_PIN);
+#ifndef DBUG_OFF
+ DBUG_ASSERT(rc == 0);
+ }
+#endif
+
+ KEYCACHE_DBUG_PRINT("flush_cached_blocks",
+ ("block %u (0x%lx) to be flushed",
+ BLOCK_NUMBER(pagecache, block), (ulong)block));
+ DBUG_PRINT("info", ("block %u (0x%lx) to be flushed",
+ BLOCK_NUMBER(pagecache, block), (ulong)block));
+ BLOCK_INFO(block);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("block %u (0x%lx) pins: %u",
+ BLOCK_NUMBER(pagecache, block), (ulong)block,
+ block->pins));
+ DBUG_ASSERT(block->pins == 1);
+ error= pagecache_fwrite(pagecache, file,
+ block->buffer,
+ block->hash_link->pageno,
+ block->type,
+ MYF(MY_NABP | MY_WAIT_IF_FULL));
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN);
+
+ pagecache->global_cache_write++;
+ if (error)
+ {
+ block->status|= BLOCK_ERROR;
+ if (!last_errno)
+ last_errno= errno ? errno : -1;
+ }
+#ifdef THREAD
+ /*
+ Let to proceed for possible waiting requests to write to the block page.
+ It might happen only during an operation to resize the key cache.
+ */
+ if (block->wqueue[COND_FOR_SAVED].last_thread)
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
+#endif
+ /* type will never be FLUSH_IGNORE_CHANGED here */
+ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
+ {
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ free_block(pagecache, block);
+ }
+ else
+ {
+ block->status&= ~BLOCK_IN_FLUSH;
+ link_to_file_list(pagecache, block, file, 1);
+ unreg_request(pagecache, block, 1);
+ }
+ }
+ DBUG_RETURN(last_errno);
+}
+
+
+/*
+ flush all key blocks for a file to disk, but don't do any mutex locks
+
+ flush_pagecache_blocks_int()
+ pagecache pointer to a key cache data structure
+ file handler for the file to flush to
+ flush_type type of the flush
+
+ NOTES
+ This function doesn't do any mutex locks because it needs to be called
+ both from flush_pagecache_blocks and flush_all_key_blocks (the later one
+ does the mutex lock in the resize_pagecache() function).
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ enum flush_type type)
+{
+ PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
+ int last_errno= 0;
+ DBUG_ENTER("flush_pagecache_blocks_int");
+ DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
+ file->file, pagecache->blocks_used, pagecache->blocks_changed));
+
+#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache,
+ "start of flush_pagecache_blocks", 0););
+#endif
+
+ cache= cache_buff;
+ if (pagecache->disk_blocks > 0 &&
+ (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP))
+ {
+ /* Key cache exists and flush is not disabled */
+ int error= 0;
+ uint count= 0;
+ PAGECACHE_BLOCK_LINK **pos, **end;
+ PAGECACHE_BLOCK_LINK *first_in_switch= NULL;
+ PAGECACHE_BLOCK_LINK *block, *next;
+#if defined(PAGECACHE_DEBUG)
+ uint cnt= 0;
+#endif
+
+ if (type != FLUSH_IGNORE_CHANGED)
+ {
+ /*
+ Count how many key blocks we have to cache to be able
+ to flush all dirty pages with minimum seek moves
+ */
+ for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
+ block;
+ block= block->next_changed)
+ {
+ if (block->hash_link->file.file == file->file)
+ {
+ count++;
+ KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used);
+ }
+ }
+ /* Allocate a new buffer only if its bigger than the one we have */
+ if (count > FLUSH_CACHE &&
+ !(cache=
+ (PAGECACHE_BLOCK_LINK**)
+ my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0))))
+ {
+ cache= cache_buff;
+ count= FLUSH_CACHE;
+ }
+ }
+
+ /* Retrieve the blocks and write them to a buffer to be flushed */
+restart:
+ end= (pos= cache)+count;
+ for (block= pagecache->changed_blocks[FILE_HASH(*file)] ;
+ block;
+ block= next)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ next= block->next_changed;
+ if (block->hash_link->file.file == file->file)
+ {
+ /*
+ Mark the block with BLOCK_IN_FLUSH in order not to let
+ other threads to use it for new pages and interfere with
+ our sequence ot flushing dirty file pages
+ */
+ block->status|= BLOCK_IN_FLUSH;
+
+ if (! (block->status & BLOCK_IN_SWITCH))
+ {
+ /*
+ We care only for the blocks for which flushing was not
+ initiated by other threads as a result of page swapping
+ */
+ reg_requests(pagecache, block, 1);
+ if (type != FLUSH_IGNORE_CHANGED)
+ {
+ /* It's not a temporary file */
+ if (pos == end)
+ {
+ /*
+ This happens only if there is not enough
+ memory for the big block
+ */
+ if ((error= flush_cached_blocks(pagecache, file, cache,
+ end,type)))
+ last_errno=error;
+ DBUG_PRINT("info", ("restarting..."));
+ /*
+ Restart the scan as some other thread might have changed
+ the changed blocks chain: the blocks that were in switch
+ state before the flush started have to be excluded
+ */
+ goto restart;
+ }
+ *pos++= block;
+ }
+ else
+ {
+ /* It's a temporary file */
+ pagecache->blocks_changed--;
+ pagecache->global_blocks_changed--;
+ free_block(pagecache, block);
+ }
+ }
+ else
+ {
+ /* Link the block into a list of blocks 'in switch' */
+ /* QQ:
+ #warning this unlink_changed() is a serious problem for
+ Maria's Checkpoint: it removes a page from the list of dirty
+ pages, while it's still dirty. A solution is to abandon
+ first_in_switch, just wait for this page to be
+ flushed by somebody else, and loop. TODO: check all places
+ where we remove a page from the list of dirty pages
+ */
+ unlink_changed(block);
+ link_changed(block, &first_in_switch);
+ }
+ }
+ }
+ if (pos != cache)
+ {
+ if ((error= flush_cached_blocks(pagecache, file, cache, pos, type)))
+ last_errno= error;
+ }
+ /* Wait until list of blocks in switch is empty */
+ while (first_in_switch)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt= 0;
+#endif
+ block= first_in_switch;
+ {
+#ifdef THREAD
+ struct st_my_thread_var *thread= my_thread_var;
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ do
+ {
+ KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait",
+ ("suspend thread %ld", thread->id));
+ pagecache_pthread_cond_wait(&thread->suspend,
+ &pagecache->cache_lock);
+ }
+ while (thread->next);
+#else
+ KEYCACHE_DBUG_ASSERT(0);
+ /* No parallel requests in single-threaded case */
+#endif
+ }
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ }
+ /* The following happens very seldom */
+ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt=0;
+#endif
+ for (block= pagecache->file_blocks[FILE_HASH(*file)] ;
+ block;
+ block= next)
+ {
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used);
+#endif
+ next= block->next_changed;
+ if (block->hash_link->file.file == file->file &&
+ (! (block->status & BLOCK_CHANGED)
+ || type == FLUSH_IGNORE_CHANGED))
+ {
+ reg_requests(pagecache, block, 1);
+ free_block(pagecache, block);
+ }
+ }
+ }
+ }
+
+#ifndef DBUG_OFF
+ DBUG_EXECUTE("check_pagecache",
+ test_key_cache(pagecache, "end of flush_pagecache_blocks", 0););
+#endif
+ if (cache != cache_buff)
+ my_free((gptr) cache, MYF(0));
+ if (last_errno)
+ errno=last_errno; /* Return first error */
+ DBUG_RETURN(last_errno != 0);
+}
+
+
+/*
+ Flush all blocks for a file to disk
+
+ SYNOPSIS
+
+ flush_pagecache_blocks()
+ pagecache pointer to a page cache data structure
+ file handler for the file to flush to
+ flush_type type of the flush
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+int flush_pagecache_blocks(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file, enum flush_type type)
+{
+ int res;
+ DBUG_ENTER("flush_pagecache_blocks");
+ DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache));
+
+ if (pagecache->disk_blocks <= 0)
+ DBUG_RETURN(0);
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+ inc_counter_for_resize_op(pagecache);
+ res= flush_pagecache_blocks_int(pagecache, file, type);
+ dec_counter_for_resize_op(pagecache);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Reset the counters of a key cache.
+
+ SYNOPSIS
+ reset_pagecache_counters()
+ name the name of a key cache
+ pagecache pointer to the pagecache to be reset
+
+ DESCRIPTION
+ This procedure is used to reset the counters of all currently used key
+ caches, both the default one and the named ones.
+
+ RETURN
+ 0 on success (always because it can't fail)
+*/
+
+int reset_pagecache_counters(const char *name, PAGECACHE *pagecache)
+{
+ DBUG_ENTER("reset_pagecache_counters");
+ if (!pagecache->inited)
+ {
+ DBUG_PRINT("info", ("Key cache %s not initialized.", name));
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
+
+ pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
+ pagecache->global_cache_r_requests= 0; /* Key_read_requests */
+ pagecache->global_cache_read= 0; /* Key_reads */
+ pagecache->global_cache_w_requests= 0; /* Key_write_requests */
+ pagecache->global_cache_write= 0; /* Key_writes */
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Allocates a buffer and stores in it some information about all dirty pages
+ of type PAGECACHE_LSN_PAGE.
+
+ SYNOPSIS
+ pagecache_collect_changed_blocks_with_lsn()
+ pagecache pointer to the page cache
+ str (OUT) pointer to a LEX_STRING where the allocated buffer, and
+ its size, will be put
+ max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all
+ relevant dirty pages will be put
+
+ DESCRIPTION
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is to be done by the caller (if the "str" member of the
+ LEX_STRING is not NULL).
+ Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they
+ are not interesting for a checkpoint record.
+ The caller has the intention of doing checkpoints.
+
+ RETURN
+ 0 on success
+ 1 on error
+*/
+my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
+ LEX_STRING *str,
+ LSN *max_lsn)
+{
+ my_bool error;
+ ulong stored_list_size= 0;
+ uint file_hash;
+ char *ptr;
+ DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN");
+
+ *max_lsn= 0;
+ DBUG_ASSERT(NULL == str->str);
+ /*
+ We lock the entire cache but will be quick, just reading/writing a few MBs
+ of memory at most.
+ When we enter here, we must be sure that no "first_in_switch" situation
+ is happening or will happen (either we have to get rid of
+ first_in_switch in the code or, first_in_switch has to increment a
+ "danger" counter for this function to know it has to wait). TODO.
+ */
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ /* Count how many dirty pages are interesting */
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ /*
+ Q: is there somthing subtle with block->hash_link: can it be NULL?
+ does it have to be == hash_link->block... ?
+ */
+ DBUG_ASSERT(block->hash_link != NULL);
+ DBUG_ASSERT(block->status & BLOCK_CHANGED);
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it */
+ /*
+ In the current pagecache, rec_lsn is not set correctly:
+ 1) it is set on pagecache_unlock(), too late (a page is dirty
+ (BLOCK_CHANGED) since the first pagecache_write()). So in this
+ scenario:
+ thread1: thread2:
+ write_REDO
+ pagecache_write() checkpoint : reclsn not known
+ pagecache_unlock(sets rec_lsn)
+ commit
+ crash,
+ at recovery we will wrongly skip the REDO. It also affects the
+ low-water mark's computation.
+ 2) sometimes the unlocking can be an implicit action of
+ pagecache_write(), without any call to pagecache_unlock(), then
+ rec_lsn is not set.
+ 1) and 2) are critical problems.
+ TODO: fix this when Monty has explained how he writes BLOB pages.
+ */
+ if (block->rec_lsn == 0)
+ {
+ DBUG_ASSERT(0);
+ goto err;
+ }
+ stored_list_size++;
+ }
+ }
+
+ str->length= 8+(4+4+8)*stored_list_size;
+ if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME))))
+ goto err;
+ ptr= str->str;
+ int8store(ptr, stored_list_size);
+ ptr+= 8;
+ if (0 == stored_list_size)
+ goto end;
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
+ {
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ if (block->type != PAGECACHE_LSN_PAGE)
+ continue; /* no need to store it in the checkpoint record */
+ DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) &&
+ (4 == sizeof(block->hash_link->pageno)));
+ int4store(ptr, block->hash_link->file.file);
+ ptr+= 4;
+ int4store(ptr, block->hash_link->pageno);
+ ptr+= 4;
+ int8store(ptr, (ulonglong) block->rec_lsn);
+ ptr+= 8;
+ set_if_bigger(*max_lsn, block->rec_lsn);
+ }
+ }
+ error= 0;
+ goto end;
+err:
+ error= 1;
+end:
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_RETURN(error);
+}
+
+
+#ifndef DBUG_OFF
+/*
+ Test if disk-cache is ok
+*/
+static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
+ const char *where __attribute__((unused)),
+ my_bool lock __attribute__((unused)))
+{
+ /* TODO */
+}
+#endif
+
+#if defined(PAGECACHE_TIMEOUT)
+
+#define KEYCACHE_DUMP_FILE "pagecache_dump.txt"
+#define MAX_QUEUE_LEN 100
+
+
+static void pagecache_dump(PAGECACHE *pagecache)
+{
+ FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
+ struct st_my_thread_var *last;
+ struct st_my_thread_var *thread;
+ PAGECACHE_BLOCK_LINK *block;
+ PAGECACHE_HASH_LINK *hash_link;
+ PAGECACHE_PAGE *page;
+ uint i;
+
+ fprintf(pagecache_dump_file, "thread:%u\n", thread->id);
+
+ i=0;
+ thread=last=waiting_for_hash_link.last_thread;
+ fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n");
+ if (thread)
+ do
+ {
+ thread= thread->next;
+ page= (PAGECACHE_PAGE *) thread->opt_info;
+ fprintf(pagecache_dump_file,
+ "thread:%u, (file,pageno)=(%u,%lu)\n",
+ thread->id,(uint) page->file.file,(ulong) page->pageno);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+
+ i=0;
+ thread=last=waiting_for_block.last_thread;
+ fprintf(pagecache_dump_file, "queue of threads waiting for block\n");
+ if (thread)
+ do
+ {
+ thread=thread->next;
+ hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info;
+ fprintf(pagecache_dump_file,
+ "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n",
+ thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link),
+ (uint) hash_link->file.file,(ulong) hash_link->pageno);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+
+ for (i=0 ; i < pagecache->blocks_used ; i++)
+ {
+ int j;
+ block= &pagecache->block_root[i];
+ hash_link= block->hash_link;
+ fprintf(pagecache_dump_file,
+ "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
+ i, (int) (hash_link ?
+ PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) :
+ -1),
+ block->status, block->requests, block->condvar ? 1 : 0);
+ for (j=0 ; j < COND_SIZE; j++)
+ {
+ PAGECACHE_WQUEUE *wqueue=&block->wqueue[j];
+ thread= last= wqueue->last_thread;
+ fprintf(pagecache_dump_file, "queue #%d\n", j);
+ if (thread)
+ {
+ do
+ {
+ thread=thread->next;
+ fprintf(pagecache_dump_file,
+ "thread:%u\n", thread->id);
+ if (++i == MAX_QUEUE_LEN)
+ break;
+ }
+ while (thread != last);
+ }
+ }
+ }
+ fprintf(pagecache_dump_file, "LRU chain:");
+ block= pagecache= used_last;
+ if (block)
+ {
+ do
+ {
+ block= block->next_used;
+ fprintf(pagecache_dump_file,
+ "block:%u, ", BLOCK_NUMBER(pagecache, block));
+ }
+ while (block != pagecache->used_last);
+ }
+ fprintf(pagecache_dump_file, "\n");
+
+ fclose(pagecache_dump_file);
+}
+
+#endif /* defined(PAGECACHE_TIMEOUT) */
+
+#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)
+
+
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex)
+{
+ int rc;
+ struct timeval now; /* time when we started waiting */
+ struct timespec timeout; /* timeout value for the wait function */
+ struct timezone tz;
+#if defined(PAGECACHE_DEBUG)
+ int cnt=0;
+#endif
+
+ /* Get current time */
+ gettimeofday(&now, &tz);
+ /* Prepare timeout value */
+ timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT;
+ /*
+ timeval uses microseconds.
+ timespec uses nanoseconds.
+ 1 nanosecond = 1000 micro seconds
+ */
+ timeout.tv_nsec= now.tv_usec * 1000;
+ KEYCACHE_THREAD_TRACE_END("started waiting");
+#if defined(PAGECACHE_DEBUG)
+ cnt++;
+ if (cnt % 100 == 0)
+ fprintf(pagecache_debug_log, "waiting...\n");
+ fflush(pagecache_debug_log);
+#endif
+ rc= pthread_cond_timedwait(cond, mutex, &timeout);
+ KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
+ if (rc == ETIMEDOUT || rc == ETIME)
+ {
+#if defined(PAGECACHE_DEBUG)
+ fprintf(pagecache_debug_log,"aborted by pagecache timeout\n");
+ fclose(pagecache_debug_log);
+ abort();
+#endif
+ pagecache_dump();
+ }
+
+#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
+#else
+ assert(rc != ETIMEDOUT);
+#endif
+ return rc;
+}
+#else
+#if defined(PAGECACHE_DEBUG)
+static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex)
+{
+ int rc;
+ KEYCACHE_THREAD_TRACE_END("started waiting");
+ rc= pthread_cond_wait(cond, mutex);
+ KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
+ return rc;
+}
+#endif
+#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
+
+#if defined(PAGECACHE_DEBUG)
+static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ int rc;
+ rc= pthread_mutex_lock(mutex);
+ KEYCACHE_THREAD_TRACE_BEGIN("");
+ return rc;
+}
+
+
+static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ KEYCACHE_THREAD_TRACE_END("");
+ pthread_mutex_unlock(mutex);
+}
+
+
+static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond)
+{
+ int rc;
+ KEYCACHE_THREAD_TRACE("signal");
+ rc= pthread_cond_signal(cond);
+ return rc;
+}
+
+
+#if defined(PAGECACHE_DEBUG_LOG)
+
+
+static void pagecache_debug_print(const char * fmt, ...)
+{
+ va_list args;
+ va_start(args,fmt);
+ if (pagecache_debug_log)
+ {
+ VOID(vfprintf(pagecache_debug_log, fmt, args));
+ VOID(fputc('\n',pagecache_debug_log));
+ }
+ va_end(args);
+}
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#if defined(PAGECACHE_DEBUG_LOG)
+
+
+void pagecache_debug_log_close(void)
+{
+ if (pagecache_debug_log)
+ fclose(pagecache_debug_log);
+}
+#endif /* defined(PAGECACHE_DEBUG_LOG) */
+
+#endif /* defined(PAGECACHE_DEBUG) */
diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c
index 6a30267eb80..aa04d55f624 100644
--- a/mysys/my_atomic.c
+++ b/mysys/my_atomic.c
@@ -17,11 +17,10 @@
#include <my_pthread.h>
#ifndef HAVE_INLINE
-/*
- the following will cause all inline functions to be instantiated
-*/
+/* the following will cause all inline functions to be instantiated */
#define HAVE_INLINE
-#define static extern
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
#endif
#include <my_atomic.h>
@@ -35,7 +34,7 @@
*/
int my_atomic_initialize()
{
- DBUG_ASSERT(sizeof(intptr) == sizeof(void *));
+ compile_time_assert(sizeof(intptr) == sizeof(void *));
/* currently the only thing worth checking is SMP/UP issue */
#ifdef MY_ATOMIC_MODE_DUMMY
return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU;
diff --git a/mysys/my_bit.c b/mysys/my_bit.c
index 5a9b1187c83..2881eb1ebd2 100644
--- a/mysys/my_bit.c
+++ b/mysys/my_bit.c
@@ -13,23 +13,18 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-/* Some useful bit functions */
+#include <my_global.h>
-#include "mysys_priv.h"
-
-/*
- Find smallest X in 2^X >= value
- This can be used to divide a number with value by doing a shift instead
-*/
+#ifndef HAVE_INLINE
+/* the following will cause all inline functions to be instantiated */
+#define HAVE_INLINE
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
+#endif
-uint my_bit_log2(ulong value)
-{
- uint bit;
- for (bit=0 ; value > 1 ; value>>=1, bit++) ;
- return bit;
-}
+#include <my_bit.h>
-static char nbits[256] = {
+const char _my_bits_nbits[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -48,60 +43,29 @@ static char nbits[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
-uint my_count_bits(ulonglong v)
-{
-#if SIZEOF_LONG_LONG > 4
- /* The following code is a bit faster on 16 bit machines than if we would
- only shift v */
- ulong v2=(ulong) (v >> 32);
- return (uint) (uchar) (nbits[(uchar) v] +
- nbits[(uchar) (v >> 8)] +
- nbits[(uchar) (v >> 16)] +
- nbits[(uchar) (v >> 24)] +
- nbits[(uchar) (v2)] +
- nbits[(uchar) (v2 >> 8)] +
- nbits[(uchar) (v2 >> 16)] +
- nbits[(uchar) (v2 >> 24)]);
-#else
- return (uint) (uchar) (nbits[(uchar) v] +
- nbits[(uchar) (v >> 8)] +
- nbits[(uchar) (v >> 16)] +
- nbits[(uchar) (v >> 24)]);
-#endif
-}
-
-uint my_count_bits_ushort(ushort v)
-{
- return nbits[v];
-}
-
-
/*
- Next highest power of two
-
- SYNOPSIS
- my_round_up_to_next_power()
- v Value to check
-
- RETURN
- Next or equal power of 2
- Note: 0 will return 0
-
- NOTES
- Algorithm by Sean Anderson, according to:
- http://graphics.stanford.edu/~seander/bithacks.html
- (Orignal code public domain)
-
- Comments shows how this works with 01100000000000000000000000001011
+ perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)'
*/
+const uchar _my_bits_reverse_table[256]={
+0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30,
+0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98,
+0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64,
+0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC,
+0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02,
+0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2,
+0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A,
+0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E,
+0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81,
+0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71,
+0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
+0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15,
+0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
+0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43,
+0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B,
+0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
+0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
+0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
-uint32 my_round_up_to_next_power(uint32 v)
-{
- v--; /* 01100000000000000000000000001010 */
- v|= v >> 1; /* 01110000000000000000000000001111 */
- v|= v >> 2; /* 01111100000000000000000000001111 */
- v|= v >> 4; /* 01111111110000000000000000001111 */
- v|= v >> 8; /* 01111111111111111100000000001111 */
- v|= v >> 16; /* 01111111111111111111111111111111 */
- return v+1; /* 10000000000000000000000000000000 */
-}
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 10eff40b9ed..e127b2584ae 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -38,6 +38,7 @@
#include "mysys_priv.h"
#include <my_bitmap.h>
#include <m_string.h>
+#include <my_bit.h>
void create_last_word_mask(MY_BITMAP *map)
{
diff --git a/mysys/my_create.c b/mysys/my_create.c
index 5639459f5a9..13e4675ee66 100644
--- a/mysys/my_create.c
+++ b/mysys/my_create.c
@@ -52,6 +52,13 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
fd = open(FileName, access_flags);
#endif
+ if ((MyFlags & MY_SYNC_DIR) && (fd >=0) &&
+ my_sync_dir_by_file(FileName, MyFlags))
+ {
+ my_close(fd, MyFlags);
+ fd= -1;
+ }
+
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
EE_CANTCREATEFILE, MyFlags));
} /* my_create */
diff --git a/mysys/my_delete.c b/mysys/my_delete.c
index bac3e2513e1..14374fd3fa8 100644
--- a/mysys/my_delete.c
+++ b/mysys/my_delete.c
@@ -29,6 +29,9 @@ int my_delete(const char *name, myf MyFlags)
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
name,errno);
}
+ else if ((MyFlags & MY_SYNC_DIR) &&
+ my_sync_dir_by_file(name, MyFlags))
+ err= -1;
DBUG_RETURN(err);
} /* my_delete */
diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c
index 2fd7eed7778..8f7cc5b7029 100644
--- a/mysys/my_getsystime.c
+++ b/mysys/my_getsystime.c
@@ -34,10 +34,6 @@ ulonglong my_getsystime()
LARGE_INTEGER t_cnt;
if (!offset)
{
- /* strictly speaking there should be a mutex to protect
- initialization section. But my_getsystime() is called from
- UUID() code, and UUID() calls are serialized with a mutex anyway
- */
LARGE_INTEGER li;
FILETIME ft;
GetSystemTimeAsFileTime(&ft);
diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index afc44cc2838..757cbe490f8 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -16,9 +16,11 @@
MA 02111-1307, USA */
#include <my_global.h>
-#include "my_handler.h"
+#include <m_ctype.h>
+#include <my_base.h>
+#include <my_handler.h>
-int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
+int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
uchar *b, uint b_length, my_bool part_key,
my_bool skip_end_space)
{
@@ -174,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
- (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+ (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -187,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
{
uint length=(uint) (end-a), a_length=length, b_length=length;
if (piks &&
- (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length,
+ (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -235,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
- (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+ (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0),
(my_bool) ((nextflag & (SEARCH_FIND |
@@ -482,12 +484,15 @@ end:
DESCRIPTION
Find the first NULL value in index-suffix values tuple.
- TODO Consider optimizing this fuction or its use so we don't search for
- NULL values in completely NOT NULL index suffixes.
+
+ TODO
+ Consider optimizing this function or its use so we don't search for
+ NULL values in completely NOT NULL index suffixes.
RETURN
- First key part that has NULL as value in values tuple, or the last key part
- (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs.
+ First key part that has NULL as value in values tuple, or the last key
+ part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain
+ NULLs.
*/
HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
diff --git a/mysys/my_init.c b/mysys/my_init.c
index 7784c09d9d6..e8a55fdc1e6 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -43,6 +43,7 @@ static void netware_init();
my_bool my_init_done= 0;
uint mysys_usage_id= 0; /* Incremented for each my_init() */
+ulong my_thread_stack_size= 65536;
static ulong atoi_octal(const char *str)
{
diff --git a/mysys/my_open.c b/mysys/my_open.c
index 21bdedddc48..6fe7883b99b 100644
--- a/mysys/my_open.c
+++ b/mysys/my_open.c
@@ -161,6 +161,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type
}
pthread_mutex_unlock(&THR_LOCK_open);
(void) my_close(fd, MyFlags);
+ fd= -1;
my_errno=ENOMEM;
}
else
diff --git a/mysys/my_pread.c b/mysys/my_pread.c
index 7a09c21e039..2b9a994299f 100644
--- a/mysys/my_pread.c
+++ b/mysys/my_pread.c
@@ -51,7 +51,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset,
if (!error) /* Seek was successful */
{
if ((readbytes = (uint) read(Filedes, Buffer, Count)) == -1L)
- my_errno= errno;
+ my_errno= errno ? errno : -1;
/*
We should seek back, even if read failed. If this fails,
@@ -67,7 +67,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset,
#else
if ((error= ((readbytes =
(uint) pread(Filedes, Buffer, Count, offset)) != Count)))
- my_errno= errno;
+ my_errno= errno ? errno : -1;
#endif
if (error || readbytes != Count)
{
@@ -87,8 +87,10 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset,
my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG),
my_filename(Filedes),my_errno);
else if (MyFlags & (MY_NABP | MY_FNABP))
+ {
my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG),
my_filename(Filedes),my_errno);
+ }
}
if ((int) readbytes == -1 || (MyFlags & (MY_FNABP | MY_NABP)))
DBUG_RETURN(MY_FILE_ERROR); /* Return with error */
@@ -158,7 +160,8 @@ uint my_pwrite(int Filedes, const byte *Buffer, uint Count, my_off_t offset,
Count-=writenbytes;
offset+=writenbytes;
}
- DBUG_PRINT("error",("Write only %d bytes",writenbytes));
+ DBUG_PRINT("error",("Write only %d bytes, error: %d",
+ writenbytes, my_errno));
#ifndef NO_BACKGROUND
#ifdef THREAD
if (my_thread_var->abort)
diff --git a/mysys/my_rename.c b/mysys/my_rename.c
index 6a6aa6a5796..64dbac955ea 100644
--- a/mysys/my_rename.c
+++ b/mysys/my_rename.c
@@ -16,8 +16,9 @@
#include "mysys_priv.h"
#include <my_dir.h>
#include "mysys_err.h"
-
+#include "m_string.h"
#undef my_rename
+
/* On unix rename deletes to file if it exists */
int my_rename(const char *from, const char *to, myf MyFlags)
@@ -60,5 +61,18 @@ int my_rename(const char *from, const char *to, myf MyFlags)
if (MyFlags & (MY_FAE+MY_WME))
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
}
+ else if (MyFlags & MY_SYNC_DIR)
+ {
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ /* do only the needed amount of syncs: */
+ char dir_from[FN_REFLEN], dir_to[FN_REFLEN];
+ dirname_part(dir_from, from);
+ dirname_part(dir_to, to);
+ if (my_sync_dir(dir_from, MyFlags) ||
+ (strcmp(dir_from, dir_to) &&
+ my_sync_dir(dir_to, MyFlags)))
+ error= -1;
+#endif
+ }
DBUG_RETURN(error);
} /* my_rename */
diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c
index 810c0c72632..98059ccd508 100644
--- a/mysys/my_symlink.c
+++ b/mysys/my_symlink.c
@@ -84,6 +84,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags)
if (MyFlags & MY_WME)
my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno);
}
+ else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags))
+ result= -1;
DBUG_RETURN(result);
#endif /* HAVE_READLINK */
}
diff --git a/mysys/my_sync.c b/mysys/my_sync.c
index 64fce3aac21..ab3fc89e0d3 100644
--- a/mysys/my_sync.c
+++ b/mysys/my_sync.c
@@ -48,6 +48,16 @@ int my_sync(File fd, myf my_flags)
do
{
+#if defined(F_FULLFSYNC)
+ /*
+ In Mac OS X >= 10.3 this call is safer than fsync() (it forces the
+ disk's cache and guarantees ordered writes).
+ */
+ if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
+ break; /* ok */
+ /* Some file systems don't support F_FULLFSYNC and fail above: */
+ DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back"));
+#endif
#if defined(HAVE_FDATASYNC)
res= fdatasync(fd);
#elif defined(HAVE_FSYNC)
@@ -55,6 +65,7 @@ int my_sync(File fd, myf my_flags)
#elif defined(__WIN__)
res= _commit(fd);
#else
+#error Cannot find a way to sync a file, durability in danger
res= 0; /* No sync (strange OS) */
#endif
} while (res == -1 && errno == EINTR);
@@ -66,10 +77,78 @@ int my_sync(File fd, myf my_flags)
my_errno= -1; /* Unknown error */
if ((my_flags & MY_IGNORE_BADFD) &&
(er == EBADF || er == EINVAL || er == EROFS))
+ {
+ DBUG_PRINT("info", ("ignoring errno %d", er));
res= 0;
+ }
else if (my_flags & MY_WME)
my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno);
}
DBUG_RETURN(res);
} /* my_sync */
+
+static const char cur_dir_name[]= {FN_CURLIB, 0};
+/*
+ Force directory information to disk.
+
+ SYNOPSIS
+ my_sync_dir()
+ dir_name the name of the directory
+ my_flags flags (MY_WME etc)
+
+ RETURN
+ 0 if ok, !=0 if error
+*/
+int my_sync_dir(const char *dir_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ DBUG_ENTER("my_sync_dir");
+ DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
+ File dir_fd;
+ int res= 0;
+ const char *correct_dir_name;
+ /* Sometimes the path does not contain an explicit directory */
+ correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name;
+ /*
+ Syncing a dir may give EINVAL on tmpfs on Linux, which is ok.
+ EIO on the other hand is very important. Hence MY_IGNORE_BADFD.
+ */
+ if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0)
+ {
+ if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD)))
+ res= 2;
+ if (my_close(dir_fd, MYF(my_flags)))
+ res= 3;
+ }
+ else
+ res= 1;
+ DBUG_RETURN(res);
+#else
+ return 0;
+#endif
+}
+
+
+/*
+ Force directory information to disk.
+
+ SYNOPSIS
+ my_sync_dir_by_file()
+ file_name the name of a file in the directory
+ my_flags flags (MY_WME etc)
+
+ RETURN
+ 0 if ok, !=0 if error
+*/
+int my_sync_dir_by_file(const char *file_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+ char dir_name[FN_REFLEN];
+ dirname_part(dir_name, file_name);
+ return my_sync_dir(dir_name, my_flags);
+#else
+ return 0;
+#endif
+}
+
diff --git a/mysys/wqueue.c b/mysys/wqueue.c
new file mode 100644
index 00000000000..28e044ff606
--- /dev/null
+++ b/mysys/wqueue.c
@@ -0,0 +1,167 @@
+
+#include <wqueue.h>
+
+#define STRUCT_PTR(TYPE, MEMBER, a) \
+ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+/*
+ Link a thread into double-linked queue of waiting threads.
+
+ SYNOPSIS
+ wqueue_link_into_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is double-linked of the type (**prev,*next), accessed by
+ a pointer to the last element.
+*/
+
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ {
+ /* Queue is empty */
+ thread->next= thread;
+ thread->prev= &thread->next;
+ }
+ else
+ {
+ thread->prev= last->next->prev;
+ last->next->prev= &thread->next;
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+
+/*
+ Add a thread to single-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_add_to_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is single-linked of the type (*next), accessed by a pointer
+ to the last element.
+*/
+
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ thread->next= thread;
+ else
+ {
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+/*
+ Unlink a thread from double-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_unlink_from_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be removed from the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See NOTES for link_into_queue
+*/
+
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ if (thread->next == thread)
+ /* The queue contains only one member */
+ wqueue->last_thread= NULL;
+ else
+ {
+ thread->next->prev= thread->prev;
+ *thread->prev= thread->next;
+ if (wqueue->last_thread == thread)
+ wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
+ thread->prev);
+ }
+ thread->next= NULL;
+}
+
+
+/*
+ Remove all threads from queue signaling them to proceed
+
+ SYNOPSIS
+ wqueue_realease_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See notes for add_to_queue
+ When removed from the queue each thread is signaled via condition
+ variable thread->suspend.
+*/
+
+void wqueue_release_queue(WQUEUE *wqueue)
+{
+ struct st_my_thread_var *last= wqueue->last_thread;
+ struct st_my_thread_var *next= last->next;
+ struct st_my_thread_var *thread;
+ do
+ {
+ thread= next;
+ pthread_cond_signal(&thread->suspend);
+ next= thread->next;
+ thread->next= NULL;
+ }
+ while (thread != last);
+ wqueue->last_thread= NULL;
+}
+
+
+/*
+ Add thread and wait
+
+ SYNOPSYS
+ wqueue_add_and_wait()
+ wqueue queue to add to
+ thread thread which is waiting
+ lock mutex need for the operation
+*/
+
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread, pthread_mutex_t *lock)
+{
+ DBUG_ENTER("wqueue_add_and_wait");
+ DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx",
+ (ulong) thread, (ulong) &thread->suspend, (ulong) lock));
+ wqueue_add_to_queue(wqueue, thread);
+ do
+ {
+ DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock));
+ pthread_cond_wait(&thread->suspend, lock);
+ DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock,
+ (ulong) thread->next));
+ }
+ while (thread->next);
+ DBUG_VOID_RETURN;
+}
diff --git a/plugin/daemon_example/daemon_example.cc b/plugin/daemon_example/daemon_example.cc
index 994c385796e..e73a3d336ea 100644
--- a/plugin/daemon_example/daemon_example.cc
+++ b/plugin/daemon_example/daemon_example.cc
@@ -81,7 +81,7 @@ pthread_handler_t mysql_heartbeat(void *p)
1 failure (cannot happen)
*/
-static int daemon_example_plugin_init(void *p)
+static int daemon_example_plugin_init(void *p __attribute__ ((unused)))
{
DBUG_ENTER("daemon_example_plugin_init");
@@ -147,7 +147,7 @@ static int daemon_example_plugin_init(void *p)
*/
-static int daemon_example_plugin_deinit(void *p)
+static int daemon_example_plugin_deinit(void *p __attribute__ ((unused)))
{
DBUG_ENTER("daemon_example_plugin_deinit");
char buffer[HEART_STRING_BUFFER];
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 2f9a96472ca..4b0170a0db0 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1064,7 +1064,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
{
- uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
+ byte *reuse_end= reuse->base + reuse->max_keys * key_length;
for (uint i= 0; i < queue->elements; ++i)
{
BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
@@ -1135,7 +1135,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
offset= rec_length-res_length;
maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
to_start_filepos= my_b_tell(to_file);
- strpos= (uchar*) sort_buffer;
+ strpos= sort_buffer;
org_max_rows=max_rows= param->max_rows;
/* The following will fire if there is not enough space in sort_buffer */
@@ -1147,10 +1147,10 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
DBUG_RETURN(1); /* purecov: inspected */
for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
{
- buffpek->base= strpos;
+ buffpek->base= (byte*) strpos;
buffpek->max_keys= maxcount;
strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek,
- rec_length));
+ rec_length));
if (error == -1)
goto err; /* purecov: inspected */
buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected
@@ -1239,7 +1239,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
}
}
buffpek= (BUFFPEK*) queue_top(&queue);
- buffpek->base= sort_buffer;
+ buffpek->base= (byte*) sort_buffer;
buffpek->max_keys= param->keys;
/*
@@ -1274,7 +1274,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
else
{
register uchar *end;
- strpos= buffpek->key+offset;
+ strpos= (uchar*) buffpek->key+offset;
for (end= strpos+buffpek->mem_count*rec_length ;
strpos != end ;
strpos+= rec_length)
diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc
index 2d78999017a..7a78bcf591e 100644
--- a/sql/gen_lex_hash.cc
+++ b/sql/gen_lex_hash.cc
@@ -481,8 +481,8 @@ int main(int argc,char **argv)
printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
printf("\
-static inline SYMBOL *get_hash_symbol(const char *s,\n\
- unsigned int len,bool function)\n\
+static SYMBOL *get_hash_symbol(const char *s,\n\
+ unsigned int len,bool function)\n\
{\n\
register uchar *hash_map;\n\
register const char *cur_str= s;\n\
diff --git a/sql/handler.h b/sql/handler.h
index 82970cc1ac6..b6fd807194e 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -20,6 +20,7 @@
#pragma interface /* gcc class implementation */
#endif
+#include <my_handler.h>
#include <ft_global.h>
#include <keycache.h>
@@ -259,6 +260,7 @@ enum legacy_db_type
DB_TYPE_TABLE_FUNCTION,
DB_TYPE_MEMCACHE,
DB_TYPE_FALCON,
+ DB_TYPE_MARIA,
DB_TYPE_FIRST_DYNAMIC=42,
DB_TYPE_DEFAULT=127 // Must be last
};
diff --git a/sql/item_func.cc b/sql/item_func.cc
index aa344a343de..8f3ffd00e34 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -26,6 +26,7 @@
#include <hash.h>
#include <time.h>
#include <ft_global.h>
+#include <my_bit.h>
#include "sp_head.h"
#include "sp_rcontext.h"
diff --git a/sql/item_func.h b/sql/item_func.h
index 3306b059097..29dd524d849 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -1326,7 +1326,6 @@ public:
/* for fulltext search */
-#include <ft_global.h>
class Item_func_match :public Item_real_func
{
diff --git a/sql/log.cc b/sql/log.cc
index e25f008e90c..8de6e8d7d22 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -2397,6 +2397,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
{
+ /*
+ TODO: all operations creating/deleting the index file or a log, should
+ call my_sync_dir() or my_sync_dir_by_file() to be durable.
+ TODO: file creation should be done with my_create() not my_open().
+ */
if (index_file_nr >= 0)
my_close(index_file_nr,MYF(0));
return TRUE;
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index bf6bd374ef8..755f7c80fc2 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1592,7 +1592,7 @@ extern ulong max_connections,max_connect_errors, connect_timeout;
extern ulong slave_net_timeout, slave_trans_retries;
extern uint max_user_connections;
extern ulong what_to_log,flush_time;
-extern ulong query_buff_size, thread_stack;
+extern ulong query_buff_size;
extern ulong max_prepared_stmt_count, prepared_stmt_count;
extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit;
extern ulong max_binlog_size, max_relay_log_size;
@@ -1694,7 +1694,7 @@ extern SHOW_COMP_OPTION have_innodb;
extern SHOW_COMP_OPTION have_csv_db;
extern SHOW_COMP_OPTION have_ndbcluster;
extern SHOW_COMP_OPTION have_partition_db;
-
+extern SHOW_COMP_OPTION have_maria_db;
extern handlerton *partition_hton;
extern handlerton *myisam_hton;
extern handlerton *heap_hton;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index a6525c1c78c..4b722479516 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -16,6 +16,7 @@
#include "mysql_priv.h"
#include <m_ctype.h>
#include <my_dir.h>
+#include <my_bit.h>
#include "slave.h"
#include "sql_repl.h"
#include "rpl_filter.h"
@@ -26,6 +27,9 @@
#include "events.h"
#include "../storage/myisam/ha_myisam.h"
+#ifdef WITH_MARIA_STORAGE_ENGINE
+#include "../storage/maria/ha_maria.h"
+#endif
#include "rpl_injector.h"
@@ -469,7 +473,7 @@ uint volatile thread_count, thread_running;
ulonglong thd_startup_options;
ulong back_log, connect_timeout, concurrency, server_id;
ulong table_cache_size, table_def_size;
-ulong thread_stack, what_to_log;
+ulong what_to_log;
ulong query_buff_size, slow_launch_time, slave_open_temp_tables;
ulong open_files_limit, max_binlog_size, max_relay_log_size;
ulong slave_net_timeout, slave_trans_retries;
@@ -531,6 +535,7 @@ char *mysqld_unix_port, *opt_mysql_tmpdir;
const char **errmesg; /* Error messages */
const char *myisam_recover_options_str="OFF";
const char *myisam_stats_method_str="nulls_unequal";
+const char *maria_stats_method_str="nulls_unequal";
/* name of reference on left espression in rewritten IN subquery */
const char *in_left_expr_name= "<left expr>";
@@ -2224,7 +2229,7 @@ the thread stack. Please read http://www.mysql.com/doc/en/Linux.html\n\n",
{
fprintf(stderr,"thd: 0x%lx\n",(long) thd);
print_stacktrace(thd ? (gptr) thd->thread_stack : (gptr) 0,
- thread_stack);
+ my_thread_stack_size);
}
if (thd)
{
@@ -2368,9 +2373,9 @@ static void start_signal_handler(void)
Peculiar things with ia64 platforms - it seems we only have half the
stack size in reality, so we have to double it here
*/
- pthread_attr_setstacksize(&thr_attr,thread_stack*2);
+ pthread_attr_setstacksize(&thr_attr,my_thread_stack_size*2);
#else
- pthread_attr_setstacksize(&thr_attr,thread_stack);
+ pthread_attr_setstacksize(&thr_attr,my_thread_stack_size);
#endif
#endif
@@ -3616,9 +3621,9 @@ int main(int argc, char **argv)
Peculiar things with ia64 platforms - it seems we only have half the
stack size in reality, so we have to double it here
*/
- pthread_attr_setstacksize(&connection_attrib,thread_stack*2);
+ pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size*2);
#else
- pthread_attr_setstacksize(&connection_attrib,thread_stack);
+ pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size);
#endif
#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
{
@@ -3629,15 +3634,15 @@ int main(int argc, char **argv)
stack_size/= 2;
#endif
/* We must check if stack_size = 0 as Solaris 2.9 can return 0 here */
- if (stack_size && stack_size < thread_stack)
+ if (stack_size && stack_size < my_thread_stack_size)
{
if (global_system_variables.log_warnings)
sql_print_warning("Asked for %lu thread stack, but got %ld",
- thread_stack, (long) stack_size);
+ my_thread_stack_size, (long) stack_size);
#if defined(__ia64__) || defined(__ia64)
- thread_stack= stack_size*2;
+ my_thread_stack_size= stack_size*2;
#else
- thread_stack= stack_size;
+ my_thread_stack_size= stack_size;
#endif
}
}
@@ -4886,10 +4891,17 @@ enum options_mysqld
OPT_MAX_LENGTH_FOR_SORT_DATA,
OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE,
OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE,
+
OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE,
- OPT_MYISAM_USE_MMAP,
+ OPT_MYISAM_USE_MMAP, OPT_MYISAM_REPAIR_THREADS,
OPT_MYISAM_STATS_METHOD,
+
+ OPT_MARIA_BLOCK_SIZE,
+ OPT_MARIA_MAX_SORT_FILE_SIZE, OPT_MARIA_SORT_BUFFER_SIZE,
+ OPT_MARIA_USE_MMAP, OPT_MARIA_REPAIR_THREADS,
+ OPT_MARIA_STATS_METHOD,
+
OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT,
OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT,
OPT_OPEN_FILES_LIMIT,
@@ -4903,7 +4915,7 @@ enum options_mysqld
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
- OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS,
+ OPT_WAIT_TIMEOUT,
OPT_INNODB_MIRRORED_LOG_GROUPS,
OPT_INNODB_LOG_FILES_IN_GROUP,
OPT_INNODB_LOG_FILE_SIZE,
@@ -6042,6 +6054,49 @@ log and this option does nothing anymore.",
0
#endif
, 0, 2, 0, 1, 0},
+#ifdef WITH_MARIA_STORAGE_ENGINE
+ {"maria_block_size", OPT_MARIA_BLOCK_SIZE,
+ "Block size to be used for MARIA index pages.",
+ (gptr*) &maria_block_size,
+ (gptr*) &maria_block_size, 0, GET_ULONG, REQUIRED_ARG,
+ MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
+ MARIA_MAX_KEY_BLOCK_LENGTH,
+ 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0},
+ {"maria_key_buffer_size", OPT_KEY_BUFFER_SIZE,
+ "The size of the buffer used for index blocks for Maria tables. Increase "
+ "this to get better index handling (for all reads and multiple writes) to "
+ "as much as you can afford; 64M on a 256M machine that mainly runs MySQL "
+ "is quite common.",
+ (gptr*) &maria_key_cache_var.param_buff_size, (gptr*) 0,
+ 0, (GET_ULL | GET_ASK_ADDR),
+ REQUIRED_ARG, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, MALLOC_OVERHEAD,
+ IO_SIZE, 0},
+ {"maria_max_sort_file_size", OPT_MARIA_MAX_SORT_FILE_SIZE,
+ "Don't use the fast sort index method to created index if the temporary "
+ "file would get bigger than this.",
+ (gptr*) &global_system_variables.maria_max_sort_file_size,
+ (gptr*) &max_system_variables.maria_max_sort_file_size, 0,
+ GET_ULL, REQUIRED_ARG, (longlong) LONG_MAX, 0, (ulonglong) MAX_FILE_SIZE,
+ 0, 1024*1024, 0},
+ {"maria_repair_threads", OPT_MARIA_REPAIR_THREADS,
+ "Number of threads to use when repairing maria tables. The value of 1 "
+ "disables parallel repair.",
+ (gptr*) &global_system_variables.maria_repair_threads,
+ (gptr*) &max_system_variables.maria_repair_threads, 0,
+ GET_ULONG, REQUIRED_ARG, 1, 1, ~0L, 0, 1, 0},
+ {"maria_sort_buffer_size", OPT_MARIA_SORT_BUFFER_SIZE,
+ "The buffer that is allocated when sorting the index when doing a REPAIR "
+ "or when creating indexes with CREATE INDEX or ALTER TABLE.",
+ (gptr*) &global_system_variables.maria_sort_buff_size,
+ (gptr*) &max_system_variables.maria_sort_buff_size, 0,
+ GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0},
+ {"maria_stats_method", OPT_MARIA_STATS_METHOD,
+ "Specifies how maria index statistics collection code should threat NULLs. "
+ "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
+ "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
+ (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
{"max_allowed_packet", OPT_MAX_ALLOWED_PACKET,
"Max packetlength to send/receive from to server.",
(gptr*) &global_system_variables.max_allowed_packet,
@@ -6145,12 +6200,6 @@ The minimum value for this variable is 4096.",
(gptr*) &myisam_data_pointer_size,
(gptr*) &myisam_data_pointer_size, 0, GET_ULONG, REQUIRED_ARG,
6, 2, 7, 0, 1, 0},
- {"myisam_max_extra_sort_file_size", OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE,
- "Deprecated option",
- (gptr*) &global_system_variables.myisam_max_extra_sort_file_size,
- (gptr*) &max_system_variables.myisam_max_extra_sort_file_size,
- 0, GET_ULL, REQUIRED_ARG, (ulonglong) MI_MAX_TEMP_LENGTH,
- 0, (ulonglong) MAX_FILE_SIZE, 0, 1, 0},
{"myisam_max_sort_file_size", OPT_MYISAM_MAX_SORT_FILE_SIZE,
"Don't use the fast sort index method to created index if the temporary file would get bigger than this.",
(gptr*) &global_system_variables.myisam_max_sort_file_size,
@@ -6363,8 +6412,8 @@ The minimum value for this variable is 4096.",
REQUIRED_ARG, 20, 1, 16384, 0, 1, 0},
#endif
{"thread_stack", OPT_THREAD_STACK,
- "The stack size for each thread.", (gptr*) &thread_stack,
- (gptr*) &thread_stack, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK,
+ "The stack size for each thread.", (gptr*) &my_thread_stack_size,
+ (gptr*) &my_thread_stack_size, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK,
1024L*128L, ~0L, 0, 1024, 0},
{ "time_format", OPT_TIME_FORMAT,
"The TIME format (for future).",
@@ -7109,15 +7158,24 @@ static void mysql_init_variables(void)
global_query_id= thread_id= 1L;
strmov(server_version, MYSQL_SERVER_VERSION);
myisam_recover_options_str= sql_mode_str= "OFF";
- myisam_stats_method_str= "nulls_unequal";
+ myisam_stats_method_str= maria_stats_method_str= "nulls_unequal";
my_bind_addr = htonl(INADDR_ANY);
threads.empty();
thread_cache.empty();
key_caches.empty();
if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str,
- default_key_cache_base.length)))
+ default_key_cache_base.length)))
exit(1);
- multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */
+#ifdef WITH_MARIA_STORAGE_ENGINE
+ if (!(maria_key_cache= get_or_create_key_cache(maria_key_cache_base.str,
+ maria_key_cache_base.length)))
+ exit(1);
+ maria_key_cache->param_buff_size= maria_key_cache_var.param_buff_size;
+ maria_key_cache->param_block_size= maria_block_size;
+#endif
+
+ /* set key_cache_hash.default_value = dflt_key_cache */
+ multi_keycache_init();
/* Set directory paths */
strmake(language, LANGUAGE, sizeof(language)-1);
@@ -7159,6 +7217,7 @@ static void mysql_init_variables(void)
when collecting index statistics for MyISAM tables.
*/
global_system_variables.myisam_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ global_system_variables.maria_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
/* Variables that depends on compile options */
#ifndef DBUG_OFF
@@ -7176,6 +7235,11 @@ static void mysql_init_variables(void)
#else
have_csv_db= SHOW_OPTION_NO;
#endif
+#ifdef WITH_MARIA_STORAGE_ENGINE
+ have_maria_db= SHOW_OPTION_YES;
+#else
+ have_maria_db= SHOW_OPTION_NO;
+#endif
#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
have_ndbcluster= SHOW_OPTION_DISABLED;
#else
@@ -7773,7 +7837,6 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
int method;
LINT_INIT(method_conv);
- myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument);
@@ -8267,11 +8330,13 @@ void refresh_status(THD *thd)
#undef have_innodb
#undef have_ndbcluster
#undef have_csv_db
+#undef have_maria_db
SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO;
SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO;
SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO;
SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO;
+SHOW_COMP_OPTION have_maria_db= SHOW_OPTION_NO;
#ifndef WITH_INNOBASE_STORAGE_ENGINE
uint innobase_flush_log_at_trx_commit;
diff --git a/sql/set_var.cc b/sql/set_var.cc
index ad5559165f3..2d40cb67bab 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -55,6 +55,9 @@
#include <thr_alarm.h>
#include <myisam.h>
#include <my_dir.h>
+#ifdef WITH_MARIA_STORAGE_ENGINE
+#include <maria.h>
+#endif
#include "events.h"
@@ -141,6 +144,7 @@ static void fix_max_join_size(THD *thd, enum_var_type type);
static void fix_query_cache_size(THD *thd, enum_var_type type);
static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type);
static void fix_myisam_max_sort_file_size(THD *thd, enum_var_type type);
+static void fix_maria_max_sort_file_size(THD *thd, enum_var_type type);
static void fix_max_binlog_size(THD *thd, enum_var_type type);
static void fix_max_relay_log_size(THD *thd, enum_var_type type);
static void fix_max_connections(THD *thd, enum_var_type type);
@@ -343,6 +347,14 @@ sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method",
&myisam_stats_method_typelib,
NULL);
+sys_var_thd_ulonglong sys_maria_max_sort_file_size("maria_max_sort_file_size", &SV::maria_max_sort_file_size, fix_maria_max_sort_file_size, 1);
+sys_var_thd_ulong sys_maria_repair_threads("maria_repair_threads", &SV::maria_repair_threads);
+sys_var_thd_ulong sys_maria_sort_buffer_size("maria_sort_buffer_size", &SV::maria_sort_buff_size);
+sys_var_thd_enum sys_maria_stats_method("maria_stats_method",
+ &SV::maria_stats_method,
+ &myisam_stats_method_typelib,
+ NULL);
+
sys_var_thd_ulong sys_net_buffer_length("net_buffer_length",
&SV::net_buffer_length);
sys_var_thd_ulong sys_net_read_timeout("net_read_timeout",
@@ -667,6 +679,7 @@ sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db);
sys_var_have_variable sys_have_dlopen("have_dynamic_loading", &have_dlopen);
sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry);
sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb);
+sys_var_have_variable sys_have_maria_db("have_maria", &have_maria_db);
sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster);
sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl);
sys_var_have_variable sys_have_partition_db("have_partitioning",
@@ -792,6 +805,7 @@ SHOW_VAR init_vars[]= {
{sys_have_dlopen.name, (char*) &have_dlopen, SHOW_HAVE},
{sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE},
{sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE},
+ {sys_have_maria_db.name, (char*) &have_maria_db, SHOW_HAVE},
{sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE},
{sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE},
{sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE},
@@ -871,6 +885,15 @@ SHOW_VAR init_vars[]= {
{sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS},
{"lower_case_file_system", (char*) &lower_case_file_system, SHOW_MY_BOOL},
{"lower_case_table_names", (char*) &lower_case_table_names, SHOW_INT},
+
+ {sys_maria_max_sort_file_size.name, (char*) &sys_maria_max_sort_file_size,
+ SHOW_SYS},
+ {sys_maria_repair_threads.name, (char*) &sys_maria_repair_threads,
+ SHOW_SYS},
+ {sys_maria_sort_buffer_size.name, (char*) &sys_maria_sort_buffer_size,
+ SHOW_SYS},
+ {sys_maria_stats_method.name, (char*) &sys_maria_stats_method, SHOW_SYS},
+
{sys_max_allowed_packet.name,(char*) &sys_max_allowed_packet, SHOW_SYS},
{sys_max_binlog_cache_size.name,(char*) &sys_max_binlog_cache_size, SHOW_SYS},
{sys_max_binlog_size.name, (char*) &sys_max_binlog_size, SHOW_SYS},
@@ -1019,10 +1042,10 @@ SHOW_VAR init_vars[]= {
#if HAVE_POOL_OF_THREADS == 1
{sys_thread_pool_size.name, (char*) &sys_thread_pool_size, SHOW_SYS},
#endif
- {"thread_stack", (char*) &thread_stack, SHOW_LONG},
+ {"thread_stack", (char*) &my_thread_stack_size, SHOW_LONG},
{sys_time_format.name, (char*) &sys_time_format, SHOW_SYS},
{"time_zone", (char*) &sys_time_zone, SHOW_SYS},
- {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS},
+ {sys_timed_mutexes.name, (char*) &sys_timed_mutexes, SHOW_SYS},
{sys_tmp_table_size.name, (char*) &sys_tmp_table_size, SHOW_SYS},
{sys_tmpdir.name, (char*) &sys_tmpdir, SHOW_SYS},
{sys_trans_alloc_block_size.name, (char*) &sys_trans_alloc_block_size,
@@ -1165,6 +1188,16 @@ fix_myisam_max_sort_file_size(THD *thd, enum_var_type type)
(my_off_t) global_system_variables.myisam_max_sort_file_size;
}
+static void
+fix_maria_max_sort_file_size(THD *thd, enum_var_type type)
+{
+#ifdef WITH_MARIA_STORAGE_ENGINE
+ maria_max_temp_length=
+ (my_off_t) global_system_variables.myisam_max_sort_file_size;
+#endif
+}
+
+
/*
Set the OPTION_BIG_SELECTS flag if max_join_size == HA_POS_ERROR
*/
@@ -2364,6 +2397,7 @@ void sys_var_collation_server::set_default(THD *thd, enum_var_type type)
LEX_STRING default_key_cache_base= {(char *) "default", 7 };
+LEX_STRING maria_key_cache_base= {(char *) "maria", 5 };
static KEY_CACHE zero_key_cache;
@@ -2373,7 +2407,7 @@ KEY_CACHE *get_key_cache(LEX_STRING *cache_name)
if (!cache_name || ! cache_name->length)
cache_name= &default_key_cache_base;
return ((KEY_CACHE*) find_named(&key_caches,
- cache_name->str, cache_name->length, 0));
+ cache_name->str, cache_name->length, 0));
}
diff --git a/sql/set_var.h b/sql/set_var.h
index 8887d91ec69..0540f9964e0 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -1120,6 +1120,7 @@ public:
extern sys_var_thd_bool sys_old_alter_table;
extern sys_var_thd_bool sys_old_passwords;
extern LEX_STRING default_key_cache_base;
+extern LEX_STRING maria_key_cache_base;
/* For sql_yacc */
struct sys_var_with_base
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 1f5f7aedbb4..cbfbc72d0aa 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -180,7 +180,7 @@ class Time_zone;
struct system_variables
{
- ulonglong myisam_max_extra_sort_file_size;
+ ulonglong maria_max_sort_file_size;
ulonglong myisam_max_sort_file_size;
ulonglong max_heap_table_size;
ulonglong tmp_table_size;
@@ -196,6 +196,9 @@ struct system_variables
ulong max_sort_length;
ulong max_tmp_tables;
ulong max_insert_delayed_threads;
+ ulong maria_repair_threads;
+ ulong maria_sort_buff_size;
+ ulong maria_stats_method;
ulong multi_range_count;
ulong myisam_repair_threads;
ulong myisam_sort_buff_size;
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 11eb510d6c8..ec2807750bd 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -4868,10 +4868,10 @@ bool check_stack_overrun(THD *thd, long margin,
long stack_used;
DBUG_ASSERT(thd == current_thd);
if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >=
- (long) (thread_stack - margin))
+ (long) (my_thread_stack_size - margin))
{
sprintf(errbuff[0],ER(ER_STACK_OVERRUN_NEED_MORE),
- stack_used,thread_stack,margin);
+ stack_used,my_thread_stack_size,margin);
my_message(ER_STACK_OVERRUN_NEED_MORE,errbuff[0],MYF(0));
thd->fatal_error();
return 1;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 75b3f5ae981..f58433cd220 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -25,6 +25,7 @@
#include "sql_cursor.h"
#include <m_ctype.h>
+#include <my_bit.h>
#include <hash.h>
#include <ft_global.h>
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index da28ca07e2c..1572f6304e1 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -34,7 +34,9 @@
the callback function 'unpack_addon_fields'.
*/
-typedef struct st_sort_addon_field { /* Sort addon packed field */
+typedef struct st_sort_addon_field
+{
+ /* Sort addon packed field */
Field *field; /* Original field */
uint offset; /* Offset from the last sorted field */
uint null_offset; /* Offset to to null bit from the last sorted field */
@@ -42,13 +44,6 @@ typedef struct st_sort_addon_field { /* Sort addon packed field */
uint8 null_bit; /* Null bit mask for the field */
} SORT_ADDON_FIELD;
-typedef struct st_buffpek { /* Struktur om sorteringsbuffrarna */
- my_off_t file_pos; /* Where we are in the sort file */
- uchar *base,*key; /* key pointers */
- ha_rows count; /* Number of rows in table */
- ulong mem_count; /* numbers of keys in memory */
- ulong max_keys; /* Max keys in buffert */
-} BUFFPEK;
typedef struct st_sort_param {
uint rec_length; /* Length of sorted records */
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index 9e30cf5878c..07bd1390116 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -459,7 +459,7 @@ void mysql_print_status()
VOID(my_getwd(current_dir, sizeof(current_dir),MYF(0)));
printf("Current dir: %s\n", current_dir);
printf("Running threads: %d Stack size: %ld\n", thread_count,
- (long) thread_stack);
+ (long) my_thread_stack_size);
thr_print_locks(); // Write some debug info
#ifndef DBUG_OFF
print_cached_tables();
@@ -534,7 +534,7 @@ Estimated memory (with thread stack): %ld\n",
(int) info.uordblks,
(int) info.fordblks,
(int) info.keepcost,
- (long) (thread_count * thread_stack + info.hblkhd + info.arena));
+ (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena));
#endif
Events::get_instance()->dump_internal_status();
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 9eb827f62a3..17c9c875659 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -449,7 +449,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
*/
for (top= begin; top != end; ++top)
{
- top->base= merge_buffer + (top - begin) * piece_size;
+ top->base= (byte*) (merge_buffer + (top - begin) * piece_size);
top->max_keys= max_key_count_per_piece;
bytes_read= read_to_buffer(file, top, key_length);
if (bytes_read == (uint) (-1))
diff --git a/sql/unireg.cc b/sql/unireg.cc
index d90420313a6..c55d6db3c85 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -284,8 +284,10 @@ bool mysql_create_frm(THD *thd, const char *file_name,
my_free((gptr) keybuff, MYF(0));
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
- my_sync(file, MYF(MY_WME)))
- goto err2;
+ (my_sync(file, MYF(MY_WME)) ||
+ my_sync_dir_by_file(file_name, MYF(MY_WME))))
+ goto err2;
+
if (my_close(file,MYF(MY_WME)))
goto err3;
diff --git a/storage/Makefile.am b/storage/Makefile.am
index b978453d29d..eec499aabf4 100644
--- a/storage/Makefile.am
+++ b/storage/Makefile.am
@@ -19,7 +19,12 @@ AUTOMAKE_OPTIONS = foreign
# These are built from source in the Docs directory
EXTRA_DIST =
-SUBDIRS = @mysql_se_dirs@
+# Until we remove fulltext-related references from Maria to MyISAM
+# MyISAM must be built before Maria, which is not the case by default
+# because of alphabetical order
+# So we put myisam first; this is very ugly regarding plugins' logic
+# but it works, and we'll remove it soon.
+SUBDIRS = myisam @mysql_se_dirs@
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt
new file mode 100644
index 00000000000..cfe23054e2f
--- /dev/null
+++ b/storage/maria/CMakeLists.txt
@@ -0,0 +1 @@
+# empty for the moment; will fill it when we build under Windows
diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am
new file mode 100644
index 00000000000..b6c713571d7
--- /dev/null
+++ b/storage/maria/Makefile.am
@@ -0,0 +1,162 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+MYSQLDATAdir = $(localstatedir)
+MYSQLSHAREdir = $(pkgdatadir)
+MYSQLBASEdir= $(prefix)
+MYSQLLIBdir= $(pkglibdir)
+INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \
+ -I$(top_srcdir)/regex \
+ -I$(top_srcdir)/sql \
+ -I$(srcdir)
+WRAPLIBS=
+
+LDADD =
+
+DEFS = @DEFS@
+
+# "." is needed first because tests in unittest need libmaria
+SUBDIRS = . unittest
+
+EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in
+pkgdata_DATA = ma_test_all ma_test_all.res
+pkglib_LIBRARIES = libmaria.a
+bin_PROGRAMS = maria_chk maria_pack maria_ftdump
+maria_chk_DEPENDENCIES= $(LIBRARIES)
+# Only reason to link with libmyisam.a here is that it's where some fulltext
+# pieces are (but soon we'll remove fulltext dependencies from Maria).
+# For now, it imposes that storage/myisam be built before storage/maria.
+maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+maria_pack_DEPENDENCIES=$(LIBRARIES)
+maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test
+noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \
+ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \
+ ma_ft_eval.h trnman.h lockman.h tablockman.h \
+ ma_control_file.h ha_maria.h ma_blockrec.h
+ma_test1_DEPENDENCIES= $(LIBRARIES)
+ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_test2_DEPENDENCIES= $(LIBRARIES)
+ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_test3_DEPENDENCIES= $(LIBRARIES)
+ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+#ma_ft_test1_DEPENDENCIES= $(LIBRARIES)
+#ma_ft_eval_DEPENDENCIES= $(LIBRARIES)
+maria_ftdump_DEPENDENCIES= $(LIBRARIES)
+maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_rt_test_DEPENDENCIES= $(LIBRARIES)
+ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+ma_sp_test_DEPENDENCIES= $(LIBRARIES)
+ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
+libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
+ ma_rnext.c ma_rnext_same.c \
+ ma_search.c ma_page.c ma_key.c ma_locking.c \
+ ma_rrnd.c ma_scan.c ma_cache.c \
+ ma_statrec.c ma_packrec.c ma_dynrec.c \
+ ma_blockrec.c ma_bitmap.c \
+ ma_update.c ma_write.c ma_unique.c \
+ ma_delete.c \
+ ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \
+ ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\
+ ma_range.c ma_dbug.c ma_checksum.c \
+ ma_changed.c ma_static.c ma_delete_all.c \
+ ma_delete_table.c ma_rename.c ma_check.c \
+ ma_keycache.c ma_preload.c ma_ft_parser.c \
+ ma_ft_update.c ma_ft_boolean_search.c \
+ ma_ft_nlq_search.c ft_maria.c ma_sort.c \
+ ha_maria.cc trnman.c lockman.c tablockman.c \
+ ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \
+ ma_sp_key.c ma_control_file.c ma_loghandler.c
+CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA?
+
+SUFFIXES = .sh
+
+.sh:
+ @RM@ -f $@ $@-t
+ @SED@ \
+ -e 's!@''bindir''@!$(bindir)!g' \
+ -e 's!@''scriptdir''@!$(bindir)!g' \
+ -e 's!@''prefix''@!$(prefix)!g' \
+ -e 's!@''datadir''@!$(datadir)!g' \
+ -e 's!@''localstatedir''@!$(localstatedir)!g' \
+ -e 's!@''libexecdir''@!$(libexecdir)!g' \
+ -e 's!@''CC''@!@CC@!'\
+ -e 's!@''CXX''@!@CXX@!'\
+ -e 's!@''GXX''@!@GXX@!'\
+ -e 's!@''PERL''@!@PERL@!' \
+ -e 's!@''CFLAGS''@!@SAVE_CFLAGS@!'\
+ -e 's!@''CXXFLAGS''@!@SAVE_CXXFLAGS@!'\
+ -e 's!@''LDFLAGS''@!@SAVE_LDFLAGS@!'\
+ -e 's!@''VERSION''@!@VERSION@!' \
+ -e 's!@''MYSQL_SERVER_SUFFIX''@!@MYSQL_SERVER_SUFFIX@!' \
+ -e 's!@''COMPILATION_COMMENT''@!@COMPILATION_COMMENT@!' \
+ -e 's!@''MACHINE_TYPE''@!@MACHINE_TYPE@!' \
+ -e 's!@''HOSTNAME''@!@HOSTNAME@!' \
+ -e 's!@''SYSTEM_TYPE''@!@SYSTEM_TYPE@!' \
+ -e 's!@''CHECK_PID''@!@CHECK_PID@!' \
+ -e 's!@''FIND_PROC''@!@FIND_PROC@!' \
+ -e 's!@''MYSQLD_DEFAULT_SWITCHES''@!@MYSQLD_DEFAULT_SWITCHES@!' \
+ -e 's!@''MYSQL_UNIX_ADDR''@!@MYSQL_UNIX_ADDR@!' \
+ -e 's!@''TARGET_LINUX''@!@TARGET_LINUX@!' \
+ -e "s!@""CONF_COMMAND""@!@CONF_COMMAND@!" \
+ -e 's!@''MYSQLD_USER''@!@MYSQLD_USER@!' \
+ -e 's!@''sysconfdir''@!@sysconfdir@!' \
+ -e 's!@''SHORT_MYSQL_INTRO''@!@SHORT_MYSQL_INTRO@!' \
+ -e 's!@''SHARED_LIB_VERSION''@!@SHARED_LIB_VERSION@!' \
+ -e 's!@''MYSQL_BASE_VERSION''@!@MYSQL_BASE_VERSION@!' \
+ -e 's!@''MYSQL_NO_DASH_VERSION''@!@MYSQL_NO_DASH_VERSION@!' \
+ -e 's!@''MYSQL_TCP_PORT''@!@MYSQL_TCP_PORT@!' \
+ -e 's!@''PERL_DBI_VERSION''@!@PERL_DBI_VERSION@!' \
+ -e 's!@''PERL_DBD_VERSION''@!@PERL_DBD_VERSION@!' \
+ -e 's!@''PERL_DATA_DUMPER''@!@PERL_DATA_DUMPER@!' \
+ $< > $@-t
+ @CHMOD@ +x $@-t
+ @MV@ $@-t $@
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c
new file mode 100644
index 00000000000..7104c6704ba
--- /dev/null
+++ b/storage/maria/ft_maria.c
@@ -0,0 +1,49 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/*
+ This function is for interface functions between fulltext and maria
+*/
+
+#include "ma_ftdefs.h"
+
+FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr,
+ byte *query, uint query_len, CHARSET_INFO *cs,
+ byte *record)
+{
+ FT_INFO *res;
+ if (flags & FT_BOOL)
+ res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query,
+ query_len, cs);
+ else
+ res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len,
+ flags, record);
+ return res;
+}
+
+const struct _ft_vft _ma_ft_vft_nlq = {
+ maria_ft_nlq_read_next, maria_ft_nlq_find_relevance,
+ maria_ft_nlq_close_search, maria_ft_nlq_get_relevance,
+ maria_ft_nlq_reinit_search
+};
+const struct _ft_vft _ma_ft_vft_boolean = {
+ maria_ft_boolean_read_next, maria_ft_boolean_find_relevance,
+ maria_ft_boolean_close_search, maria_ft_boolean_get_relevance,
+ maria_ft_boolean_reinit_search
+};
+
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
new file mode 100644
index 00000000000..102f987f01a
--- /dev/null
+++ b/storage/maria/ha_maria.cc
@@ -0,0 +1,1896 @@
+/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#define MYSQL_SERVER 1
+#include "mysql_priv.h"
+#include <mysql/plugin.h>
+#include <m_ctype.h>
+#include <myisampack.h>
+#include <my_bit.h>
+#include "ha_maria.h"
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+ulong maria_recover_options= HA_RECOVER_NONE;
+
+/* bits in maria_recover_options */
+const char *maria_recover_names[]=
+{
+ "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS
+};
+TYPELIB maria_recover_typelib=
+{
+ array_elements(maria_recover_names) - 1, "",
+ maria_recover_names, NULL
+};
+
+const char *maria_stats_method_names[]=
+{
+ "nulls_unequal", "nulls_equal",
+ "nulls_ignored", NullS
+};
+TYPELIB maria_stats_method_typelib=
+{
+ array_elements(maria_stats_method_names) - 1, "",
+ maria_stats_method_names, NULL
+};
+
+
+/*****************************************************************************
+** MARIA tables
+*****************************************************************************/
+
+static handler *maria_create_handler(handlerton *hton,
+ TABLE_SHARE * table,
+ MEM_ROOT *mem_root)
+{
+ return new (mem_root) ha_maria(hton, table);
+}
+
+
+// collect errors printed by maria_check routines
+
+static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
+ const char *fmt, va_list args)
+{
+ THD *thd= (THD *) param->thd;
+ Protocol *protocol= thd->protocol;
+ uint length, msg_length;
+ char msgbuf[MARIA_MAX_MSG_BUF];
+ char name[NAME_LEN * 2 + 2];
+
+ msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
+ msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia
+
+ DBUG_PRINT(msg_type, ("message: %s", msgbuf));
+
+ if (!thd->vio_ok())
+ {
+ sql_print_error(msgbuf);
+ return;
+ }
+
+ if (param->testflag &
+ (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
+ {
+ my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
+ return;
+ }
+ length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
+ NullS) - name);
+ protocol->prepare_for_resend();
+ protocol->store(name, length, system_charset_info);
+ protocol->store(param->op_name, system_charset_info);
+ protocol->store(msg_type, system_charset_info);
+ protocol->store(msgbuf, msg_length, system_charset_info);
+ if (protocol->write())
+ sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
+ msgbuf);
+ return;
+}
+
+extern "C" {
+
+volatile int *_ma_killed_ptr(HA_CHECK *param)
+{
+ /* In theory Unsafe conversion, but should be ok for now */
+ return (int*) &(((THD *) (param->thd))->killed);
+}
+
+
+void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
+{
+ param->error_printed |= 1;
+ param->out_flag |= O_DATA_LOST;
+ va_list args;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "error", fmt, args);
+ va_end(args);
+}
+
+
+void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "info", fmt, args);
+ va_end(args);
+}
+
+
+void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
+{
+ param->warning_printed= 1;
+ param->out_flag |= O_DATA_LOST;
+ va_list args;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, "warning", fmt, args);
+ va_end(args);
+}
+
+}
+
+
+ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
+handler(hton, table_arg), file(0),
+int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
+ HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
+ HA_FILE_BASED | HA_CAN_GEOMETRY | HA_NO_TRANSACTIONS |
+ HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS |
+ HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT),
+can_enable_indexes(1)
+{}
+
+
+handler *ha_maria::clone(MEM_ROOT *mem_root)
+{
+ ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(mem_root));
+ if (new_handler)
+ new_handler->file->state= file->state;
+ return new_handler;
+}
+
+
+static const char *ha_maria_exts[]=
+{
+ MARIA_NAME_IEXT,
+ MARIA_NAME_DEXT,
+ NullS
+};
+
+
+const char **ha_maria::bas_ext() const
+{
+ return ha_maria_exts;
+}
+
+
+const char *ha_maria::index_type(uint key_number)
+{
+ return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
+ "FULLTEXT" :
+ (table->key_info[key_number].flags & HA_SPATIAL) ?
+ "SPATIAL" :
+ (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
+ "RTREE" : "BTREE");
+}
+
+
+#ifdef HAVE_REPLICATION
+int ha_maria::net_read_dump(NET * net)
+{
+ int data_fd= file->dfile;
+ int error= 0;
+
+ my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
+ for (;;)
+ {
+ ulong packet_len= my_net_read(net);
+ if (!packet_len)
+ break; // end of file
+ if (packet_len == packet_error)
+ {
+ sql_print_error("ha_maria::net_read_dump - read error ");
+ error= -1;
+ goto err;
+ }
+ if (my_write(data_fd, (byte *) net->read_pos, (uint) packet_len,
+ MYF(MY_WME | MY_FNABP)))
+ {
+ error= errno;
+ goto err;
+ }
+ }
+err:
+ return error;
+}
+
+
+int ha_maria::dump(THD * thd, int fd)
+{
+ MARIA_SHARE *share= file->s;
+ NET *net= &thd->net;
+ uint block_size= share->block_size;
+ my_off_t bytes_to_read= share->state.state.data_file_length;
+ int data_fd= file->dfile;
+ byte *buf= (byte *) my_malloc(block_size, MYF(MY_WME));
+ if (!buf)
+ return ENOMEM;
+
+ int error= 0;
+ my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME));
+ for (; bytes_to_read > 0;)
+ {
+ uint bytes= my_read(data_fd, buf, block_size, MYF(MY_WME));
+ if (bytes == MY_FILE_ERROR)
+ {
+ error= errno;
+ goto err;
+ }
+
+ if (fd >= 0)
+ {
+ if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP)))
+ {
+ error= errno ? errno : EPIPE;
+ goto err;
+ }
+ }
+ else
+ {
+ if (my_net_write(net, (char*) buf, bytes))
+ {
+ error= errno ? errno : EPIPE;
+ goto err;
+ }
+ }
+ bytes_to_read -= bytes;
+ }
+
+ if (fd < 0)
+ {
+ if (my_net_write(net, "", 0))
+ error= errno ? errno : EPIPE;
+ net_flush(net);
+ }
+
+err:
+ my_free((gptr) buf, MYF(0));
+ return error;
+}
+#endif /* HAVE_REPLICATION */
+
+
+bool ha_maria::check_if_locking_is_allowed(uint sql_command,
+ ulong type, TABLE *table,
+ uint count,
+ bool called_by_privileged_thread)
+{
+ /*
+ To be able to open and lock for reading system tables like 'mysql.proc',
+ when we already have some tables opened and locked, and avoid deadlocks
+ we have to disallow write-locking of these tables with any other tables.
+ */
+ if (table->s->system_table &&
+ table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && count != 1)
+ {
+ my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str,
+ table->s->table_name.str);
+ return FALSE;
+ }
+
+ /*
+ Deny locking of the log tables, which is incompatible with
+ concurrent insert. Unless called from a logger THD (general_log_thd
+ or slow_log_thd) or by a privileged thread.
+ */
+ if (!called_by_privileged_thread)
+ return check_if_log_table_locking_is_allowed(sql_command, type, table);
+
+ return TRUE;
+}
+
+
+ /* Name is here without an extension */
+
+int ha_maria::open(const char *name, int mode, uint test_if_locked)
+{
+ uint i;
+ if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
+ return (my_errno ? my_errno : -1);
+
+ if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
+ VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0));
+
+#ifdef NOT_USED
+ if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
+ VOID(maria_extra(file, HA_EXTRA_MMAP, 0));
+#endif
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
+ VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0));
+ if (!table->s->db_record_offset)
+ int_table_flags |= HA_REC_NOT_IN_SEQ;
+ if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ int_table_flags |= HA_HAS_CHECKSUM;
+
+ for (i= 0; i < table->s->keys; i++)
+ {
+ struct st_plugin_int *parser= table->key_info[i].parser;
+ if (table->key_info[i].flags & HA_USES_PARSER)
+ file->s->keyinfo[i].parser=
+ (struct st_mysql_ftparser *) parser->plugin->info;
+ table->key_info[i].block_size= file->s->keyinfo[i].block_length;
+ }
+ return (0);
+}
+
+
+int ha_maria::close(void)
+{
+ MARIA_HA *tmp= file;
+ file= 0;
+ return maria_close(tmp);
+}
+
+
+int ha_maria::write_row(byte * buf)
+{
+ statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status);
+
+ /* If we have a timestamp column, update it to the current time */
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
+ table->timestamp_field->set_time();
+
+ /*
+ If we have an auto_increment column and we are writing a changed row
+ or a new row, then update the auto_increment value in the record.
+ */
+ if (table->next_number_field && buf == table->record[0])
+ {
+ int error;
+ if ((error= update_auto_increment()))
+ return error;
+ }
+ return maria_write(file, buf);
+}
+
+
+int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
+{
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+ int error;
+ HA_CHECK param;
+ MARIA_SHARE *share= file->s;
+ const char *old_proc_info= thd->proc_info;
+
+ thd->proc_info= "Checking table";
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "check";
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.testflag= check_opt->flags | T_CHECK | T_SILENT;
+ param.stats_method= (enum_handler_stats_method) thd->variables.
+ maria_stats_method;
+
+ if (!(table->db_stat & HA_READ_ONLY))
+ param.testflag |= T_STATISTICS;
+ param.using_global_keycache= 1;
+
+ if (!maria_is_crashed(file) &&
+ (((param.testflag & T_CHECK_ONLY_CHANGED) &&
+ !(share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR)) &&
+ share->state.open_count == 0) ||
+ ((param.testflag & T_FAST) && (share->state.open_count ==
+ (uint) (share->global_changed ? 1 :
+ 0)))))
+ return HA_ADMIN_ALREADY_DONE;
+
+ error= maria_chk_status(&param, file); // Not fatal
+ error= maria_chk_size(&param, file);
+ if (!error)
+ error |= maria_chk_del(&param, file, param.testflag);
+ if (!error)
+ error= maria_chk_key(&param, file);
+ if (!error)
+ {
+ if ((!(param.testflag & T_QUICK) &&
+ ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
+ (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
+ {
+ uint old_testflag= param.testflag;
+ param.testflag |= T_MEDIUM;
+ if (!(error= init_io_cache(&param.read_cache, file->dfile,
+ my_default_record_cache_size, READ_CACHE,
+ share->pack.header_length, 1, MYF(MY_WME))))
+ {
+ error= maria_chk_data_link(&param, file, param.testflag & T_EXTEND);
+ end_io_cache(&(param.read_cache));
+ }
+ param.testflag= old_testflag;
+ }
+ }
+ if (!error)
+ {
+ if ((share->state.changed & (STATE_CHANGED |
+ STATE_CRASHED_ON_REPAIR |
+ STATE_CRASHED | STATE_NOT_ANALYZED)) ||
+ (param.testflag & T_STATISTICS) || maria_is_crashed(file))
+ {
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ pthread_mutex_lock(&share->intern_lock);
+ share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ if (!(table->db_stat & HA_READ_ONLY))
+ error= maria_update_state_info(&param, file, UPDATE_TIME | UPDATE_OPEN_COUNT |
+ UPDATE_STAT);
+ pthread_mutex_unlock(&share->intern_lock);
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+ }
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ {
+ maria_mark_crashed(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+
+ thd->proc_info= old_proc_info;
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+
+/*
+ Analyze the key distribution in the table
+ As the table may be only locked for read, we have to take into account that
+ two threads may do an analyze at the same time!
+*/
+
+int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
+{
+ int error= 0;
+ HA_CHECK param;
+ MARIA_SHARE *share= file->s;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "analyze";
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
+ T_DONT_CHECK_CHECKSUM);
+ param.using_global_keycache= 1;
+ param.stats_method= (enum_handler_stats_method) thd->variables.
+ maria_stats_method;
+
+ if (!(share->state.changed & STATE_NOT_ANALYZED))
+ return HA_ADMIN_ALREADY_DONE;
+
+ error= maria_chk_key(&param, file);
+ if (!error)
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ error= maria_update_state_info(&param, file, UPDATE_STAT);
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ maria_mark_crashed(file);
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+
+int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ HA_CHECK_OPT tmp_check_opt;
+ char *backup_dir= thd->lex->backup_dir;
+ char src_path[FN_REFLEN], dst_path[FN_REFLEN];
+ char table_name[FN_REFLEN];
+ int error;
+ const char *errmsg;
+ DBUG_ENTER("restore");
+
+ VOID(tablename_to_filename(table->s->table_name.str, table_name,
+ sizeof(table_name)));
+
+ if (fn_format_relative_to_data_home(src_path, table_name, backup_dir,
+ MARIA_NAME_DEXT))
+ DBUG_RETURN(HA_ADMIN_INVALID);
+
+ strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
+ if (my_copy(src_path, dst_path, MYF(MY_WME)))
+ {
+ error= HA_ADMIN_FAILED;
+ errmsg= "Failed in my_copy (Error %d)";
+ goto err;
+ }
+
+ tmp_check_opt.init();
+ tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK;
+ DBUG_RETURN(repair(thd, &tmp_check_opt));
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "restore";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg, my_errno);
+ DBUG_RETURN(error);
+ }
+}
+
+
+int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ char *backup_dir= thd->lex->backup_dir;
+ char src_path[FN_REFLEN], dst_path[FN_REFLEN];
+ char table_name[FN_REFLEN];
+ int error;
+ const char *errmsg;
+ DBUG_ENTER("ha_maria::backup");
+
+ VOID(tablename_to_filename(table->s->table_name.str, table_name,
+ sizeof(table_name)));
+
+ if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
+ reg_ext))
+ {
+ errmsg= "Failed in fn_format() for .frm file (errno: %d)";
+ error= HA_ADMIN_INVALID;
+ goto err;
+ }
+
+ strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS);
+ if (my_copy(src_path, dst_path,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
+ {
+ error= HA_ADMIN_FAILED;
+ errmsg= "Failed copying .frm file (errno: %d)";
+ goto err;
+ }
+
+ /* Change extension */
+ if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir,
+ MARIA_NAME_DEXT))
+ {
+ errmsg= "Failed in fn_format() for .MYD file (errno: %d)";
+ error= HA_ADMIN_INVALID;
+ goto err;
+ }
+
+ strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS);
+ if (my_copy(src_path, dst_path,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE)))
+ {
+ errmsg= "Failed copying .MYD file (errno: %d)";
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+ DBUG_RETURN(HA_ADMIN_OK);
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "backup";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg, my_errno);
+ DBUG_RETURN(error);
+ }
+}
+
+
+int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ HA_CHECK param;
+ ha_rows start_records;
+
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "repair";
+ param.testflag= ((check_opt->flags & ~(T_EXTEND)) |
+ T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
+ (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
+ param.sort_buffer_length= check_opt->sort_buffer_size;
+ start_records= file->state->records;
+ while ((error= repair(thd, param, 0)) && param.retry_repair)
+ {
+ param.retry_repair= 0;
+ if (test_all_bits(param.testflag,
+ (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
+ {
+ param.testflag &= ~T_RETRY_WITHOUT_QUICK;
+ sql_print_information("Retrying repair of: '%s' without quick",
+ table->s->path.str);
+ continue;
+ }
+ param.testflag &= ~T_QUICK;
+ if ((param.testflag & T_REP_BY_SORT))
+ {
+ param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP;
+ sql_print_information("Retrying repair of: '%s' with keycache",
+ table->s->path.str);
+ continue;
+ }
+ break;
+ }
+ if (!error && start_records != file->state->records &&
+ !(check_opt->flags & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ sql_print_information("Found %s of %s rows when repairing '%s'",
+ llstr(file->state->records, llbuff),
+ llstr(start_records, llbuff2),
+ table->s->path.str);
+ }
+ return error;
+}
+
+int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ if (!file)
+ return HA_ADMIN_INTERNAL_ERROR;
+ HA_CHECK param;
+
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "optimize";
+ param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
+ T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
+ param.sort_buffer_length= check_opt->sort_buffer_size;
+ if ((error= repair(thd, param, 1)) && param.retry_repair)
+ {
+ sql_print_warning("Warning: Optimize table got errno %d, retrying",
+ my_errno);
+ param.testflag &= ~T_REP_BY_SORT;
+ error= repair(thd, param, 1);
+ }
+ return error;
+}
+
+
+int ha_maria::repair(THD *thd, HA_CHECK &param, bool optimize)
+{
+ int error= 0;
+ uint local_testflag= param.testflag;
+ bool optimize_done= !optimize, statistics_done= 0;
+ const char *old_proc_info= thd->proc_info;
+ char fixed_name[FN_REFLEN];
+ MARIA_SHARE *share= file->s;
+ ha_rows rows= file->state->records;
+ DBUG_ENTER("ha_maria::repair");
+
+ param.db_name= table->s->db.str;
+ param.table_name= table->alias;
+ param.tmpfile_createflag= O_RDWR | O_TRUNC;
+ param.using_global_keycache= 1;
+ param.thd= thd;
+ param.tmpdir= &mysql_tmpdir_list;
+ param.out_flag= 0;
+ strmov(fixed_name, file->filename);
+
+ // Don't lock tables if we have used LOCK TABLE
+ if (!thd->locked_tables &&
+ maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
+ {
+ _ma_check_print_error(&param, ER(ER_CANT_LOCK), my_errno);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+
+ if (!optimize ||
+ ((file->state->del || share->state.split != file->state->records) &&
+ (!(param.testflag & T_QUICK) ||
+ !(share->state.changed & STATE_NOT_OPTIMIZED_KEYS))))
+ {
+ ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
+ maria_get_mask_all_keys_active(share->base.keys) :
+ share->state.key_map);
+ uint testflag= param.testflag;
+ if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
+ (local_testflag & T_REP_BY_SORT))
+ {
+ local_testflag |= T_STATISTICS;
+ param.testflag |= T_STATISTICS; // We get this for free
+ statistics_done= 1;
+ if (thd->variables.maria_repair_threads > 1)
+ {
+ char buf[40];
+ /* TODO: respect maria_repair_threads variable */
+ my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
+ thd->proc_info= buf;
+ error= maria_repair_parallel(&param, file, fixed_name,
+ param.testflag & T_QUICK);
+ thd->proc_info= "Repair done"; // to reset proc_info, as
+ // it was pointing to local buffer
+ }
+ else
+ {
+ thd->proc_info= "Repair by sorting";
+ error= maria_repair_by_sort(&param, file, fixed_name,
+ param.testflag & T_QUICK);
+ }
+ }
+ else
+ {
+ thd->proc_info= "Repair with keycache";
+ param.testflag &= ~T_REP_BY_SORT;
+ error= maria_repair(&param, file, fixed_name, param.testflag & T_QUICK);
+ }
+ param.testflag= testflag;
+ optimize_done= 1;
+ }
+ if (!error)
+ {
+ if ((local_testflag & T_SORT_INDEX) &&
+ (share->state.changed & STATE_NOT_SORTED_PAGES))
+ {
+ optimize_done= 1;
+ thd->proc_info= "Sorting index";
+ error= maria_sort_index(&param, file, fixed_name);
+ }
+ if (!statistics_done && (local_testflag & T_STATISTICS))
+ {
+ if (share->state.changed & STATE_NOT_ANALYZED)
+ {
+ optimize_done= 1;
+ thd->proc_info= "Analyzing";
+ error= maria_chk_key(&param, file);
+ }
+ else
+ local_testflag &= ~T_STATISTICS; // Don't update statistics
+ }
+ }
+ thd->proc_info= "Saving state";
+ if (!error)
+ {
+ if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
+ {
+ share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+ /*
+ the following 'if', thought conceptually wrong,
+ is a useful optimization nevertheless.
+ */
+ if (file->state != &file->s->state.state)
+ file->s->state.state= *file->state;
+ if (file->s->base.auto_key)
+ _ma_update_auto_increment_key(&param, file, 1);
+ if (optimize_done)
+ error= maria_update_state_info(&param, file,
+ UPDATE_TIME | UPDATE_OPEN_COUNT |
+ (local_testflag &
+ T_STATISTICS ? UPDATE_STAT : 0));
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+ if (rows != file->state->records && !(param.testflag & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ _ma_check_print_warning(&param, "Number of rows changed from %s to %s",
+ llstr(rows, llbuff),
+ llstr(file->state->records, llbuff2));
+ }
+ }
+ else
+ {
+ maria_mark_crashed_on_repair(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ maria_update_state_info(&param, file, 0);
+ }
+ thd->proc_info= old_proc_info;
+ if (!thd->locked_tables)
+ maria_lock_database(file, F_UNLCK);
+ DBUG_RETURN(error ? HA_ADMIN_FAILED :
+ !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK);
+}
+
+
+/*
+ Assign table indexes to a specific key cache.
+*/
+
+int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ KEY_CACHE *new_key_cache= check_opt->key_cache;
+ const char *errmsg= 0;
+ int error= HA_ADMIN_OK;
+ ulonglong map= ~(ulonglong) 0;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+ DBUG_ENTER("ha_maria::assign_to_keycache");
+
+ /* Check validity of the index references */
+ if (table_list->use_index)
+ {
+ /* We only come here when the user did specify an index map */
+ key_map kmap;
+ if (get_key_map_from_key_list(&kmap, table, table_list->use_index))
+ {
+ errmsg= thd->net.last_error;
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+ map= kmap.to_ulonglong();
+ }
+
+ if ((error= maria_assign_to_key_cache(file, map, new_key_cache)))
+ {
+ char buf[STRING_BUFFER_USUAL_SIZE];
+ my_snprintf(buf, sizeof(buf),
+ "Failed to flush to index file (errno: %d)", error);
+ errmsg= buf;
+ error= HA_ADMIN_CORRUPT;
+ }
+
+err:
+ if (error != HA_ADMIN_OK)
+ {
+ /* Send error to user */
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "assign_to_keycache";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg);
+ }
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Preload pages of the index file for a table into the key cache.
+*/
+
+int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ const char *errmsg;
+ ulonglong map= ~(ulonglong) 0;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+ my_bool ignore_leaves= table_list->ignore_leaves;
+
+ DBUG_ENTER("ha_maria::preload_keys");
+
+ /* Check validity of the index references */
+ if (table_list->use_index)
+ {
+ key_map kmap;
+ get_key_map_from_key_list(&kmap, table, table_list->use_index);
+ if (kmap.is_set_all())
+ {
+ errmsg= thd->net.last_error;
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+ if (!kmap.is_clear_all())
+ map= kmap.to_ulonglong();
+ }
+
+ maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
+ (void*) &thd->variables.preload_buff_size);
+
+ if ((error= maria_preload(file, map, ignore_leaves)))
+ {
+ switch (error) {
+ case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
+ errmsg= "Indexes use different block sizes";
+ break;
+ case HA_ERR_OUT_OF_MEM:
+ errmsg= "Failed to allocate buffer";
+ break;
+ default:
+ char buf[ERRMSGSIZE + 20];
+ my_snprintf(buf, ERRMSGSIZE,
+ "Failed to read from index file (errno: %d)", my_errno);
+ errmsg= buf;
+ }
+ error= HA_ADMIN_FAILED;
+ goto err;
+ }
+
+ DBUG_RETURN(HA_ADMIN_OK);
+
+err:
+ {
+ HA_CHECK param;
+ maria_chk_init(&param);
+ param.thd= thd;
+ param.op_name= "preload_keys";
+ param.db_name= table->s->db.str;
+ param.table_name= table->s->table_name.str;
+ param.testflag= 0;
+ _ma_check_print_error(&param, errmsg);
+ DBUG_RETURN(error);
+ }
+}
+
+
+/*
+ Disable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ disable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ disable all non-unique keys
+ HA_KEY_SWITCH_ALL disable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::disable_indexes(uint mode)
+{
+ int error;
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ /* call a storage engine function to switch the key map */
+ error= maria_disable_indexes(file);
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ maria_extra(file, HA_EXTRA_NO_KEYS, 0);
+ info(HA_STATUS_CONST); // Read new key info
+ error= 0;
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ return error;
+}
+
+
+/*
+ Enable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ enable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ enable all non-unique keys
+ HA_KEY_SWITCH_ALL enable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent
+
+ DESCRIPTION
+ Enable indexes, which might have been disabled by disable_index() before.
+ The modes without _SAVE work only if both data and indexes are empty,
+ since the MARIA repair would enable them persistently.
+ To be sure in these cases, call handler::delete_all_rows() before.
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ !=0 Error, among others:
+ HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry.
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::enable_indexes(uint mode)
+{
+ int error;
+
+ if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
+ {
+ /* All indexes are enabled already. */
+ return 0;
+ }
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ error= maria_enable_indexes(file);
+ /*
+ Do not try to repair on error,
+ as this could make the enabled state persistent,
+ but mode==HA_KEY_SWITCH_ALL forbids it.
+ */
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ THD *thd= current_thd;
+ HA_CHECK param;
+ const char *save_proc_info= thd->proc_info;
+ thd->proc_info= "Creating index";
+ maria_chk_init(&param);
+ param.op_name= "recreating_index";
+ param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
+ T_CREATE_MISSING_KEYS);
+ param.myf_rw &= ~MY_WAIT_IF_FULL;
+ param.sort_buffer_length= thd->variables.maria_sort_buff_size;
+ param.stats_method= (enum_handler_stats_method) thd->variables.
+ maria_stats_method;
+ param.tmpdir= &mysql_tmpdir_list;
+ if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param.retry_repair)
+ {
+ sql_print_warning("Warning: Enabling keys got errno %d, retrying",
+ my_errno);
+ /* Repairing by sort failed. Now try standard repair method. */
+ param.testflag &= ~(T_REP_BY_SORT | T_QUICK);
+ error= (repair(thd, param, 0) != HA_ADMIN_OK);
+ /*
+ If the standard repair succeeded, clear all error messages which
+ might have been set by the first repair. They can still be seen
+ with SHOW WARNINGS then.
+ */
+ if (!error)
+ thd->clear_error();
+ }
+ info(HA_STATUS_CONST);
+ thd->proc_info= save_proc_info;
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ return error;
+}
+
+
+/*
+ Test if indexes are disabled.
+
+
+ SYNOPSIS
+ indexes_are_disabled()
+ no parameters
+
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ [2 non-unique indexes are disabled - NOT YET IMPLEMENTED]
+*/
+
+int ha_maria::indexes_are_disabled(void)
+{
+ return maria_indexes_are_disabled(file);
+}
+
+
+/*
+ prepare for a many-rows insert operation
+ e.g. - disable indexes (if they can be recreated fast) or
+ activate special bulk-insert optimizations
+
+ SYNOPSIS
+ start_bulk_insert(rows)
+ rows Rows to be inserted
+ 0 if we don't know
+
+ NOTICE
+ Do not forget to call end_bulk_insert() later!
+*/
+
+void ha_maria::start_bulk_insert(ha_rows rows)
+{
+ DBUG_ENTER("ha_maria::start_bulk_insert");
+ THD *thd= current_thd;
+ ulong size= min(thd->variables.read_buff_size,
+ table->s->avg_row_length * rows);
+ DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
+ (ulong) rows, size));
+
+ /* don't enable row cache if too few rows */
+ if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
+ maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
+
+ can_enable_indexes= maria_is_all_keys_active(file->s->state.key_map,
+ file->s->base.keys);
+
+ if (!(specialflag & SPECIAL_SAFE_MODE))
+ {
+ /*
+ Only disable old index if the table was empty and we are inserting
+ a lot of rows.
+ We should not do this for only a few rows as this is slower and
+ we don't want to update the key statistics based of only a few rows.
+ */
+ if (file->state->records == 0 && can_enable_indexes &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES))
+ maria_disable_non_unique_index(file, rows);
+ else
+ if (!file->bulk_insert &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
+ {
+ maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ end special bulk-insert optimizations,
+ which have been activated by start_bulk_insert().
+
+ SYNOPSIS
+ end_bulk_insert()
+ no arguments
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+int ha_maria::end_bulk_insert()
+{
+ maria_end_bulk_insert(file);
+ int err= maria_extra(file, HA_EXTRA_NO_CACHE, 0);
+ return err ? err : can_enable_indexes ?
+ enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0;
+}
+
+
+bool ha_maria::check_and_repair(THD *thd)
+{
+ int error= 0;
+ int marked_crashed;
+ char *old_query;
+ uint old_query_length;
+ HA_CHECK_OPT check_opt;
+ DBUG_ENTER("ha_maria::check_and_repair");
+
+ check_opt.init();
+ check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
+ // Don't use quick if deleted rows
+ if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
+ check_opt.flags |= T_QUICK;
+ sql_print_warning("Checking table: '%s'", table->s->path.str);
+
+ old_query= thd->query;
+ old_query_length= thd->query_length;
+ pthread_mutex_lock(&LOCK_thread_count);
+ thd->query= table->s->table_name.str;
+ thd->query_length= table->s->table_name.length;
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt))
+ {
+ sql_print_warning("Recovering table: '%s'", table->s->path.str);
+ check_opt.flags=
+ ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
+ (marked_crashed ? 0 : T_QUICK) |
+ (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
+ T_AUTO_REPAIR);
+ if (repair(thd, &check_opt))
+ error= 1;
+ }
+ pthread_mutex_lock(&LOCK_thread_count);
+ thd->query= old_query;
+ thd->query_length= old_query_length;
+ pthread_mutex_unlock(&LOCK_thread_count);
+ DBUG_RETURN(error);
+}
+
+
+bool ha_maria::is_crashed() const
+{
+ return (file->s->state.changed & STATE_CRASHED ||
+ (my_disable_locking && file->s->state.open_count));
+}
+
+
+int ha_maria::update_row(const byte * old_data, byte * new_data)
+{
+ statistic_increment(table->in_use->status_var.ha_update_count, &LOCK_status);
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+ return maria_update(file, old_data, new_data);
+}
+
+
+int ha_maria::delete_row(const byte * buf)
+{
+ statistic_increment(table->in_use->status_var.ha_delete_count, &LOCK_status);
+ return maria_delete(file, buf);
+}
+
+
+int ha_maria::index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag)
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_key_count,
+ &LOCK_status);
+ int error= maria_rkey(file, buf, active_index, key, key_len, find_flag);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_read_idx(byte * buf, uint index, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag)
+{
+ statistic_increment(table->in_use->status_var.ha_read_key_count,
+ &LOCK_status);
+ int error= maria_rkey(file, buf, index, key, key_len, find_flag);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_read_last(byte * buf, const byte * key, uint key_len)
+{
+ DBUG_ENTER("ha_maria::index_read_last");
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_key_count,
+ &LOCK_status);
+ int error= maria_rkey(file, buf, active_index, key, key_len,
+ HA_READ_PREFIX_LAST);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::index_next(byte * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_next_count,
+ &LOCK_status);
+ int error= maria_rnext(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_prev(byte * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_prev_count,
+ &LOCK_status);
+ int error= maria_rprev(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_first(byte * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_first_count,
+ &LOCK_status);
+ int error= maria_rfirst(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_last(byte * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_last_count,
+ &LOCK_status);
+ int error= maria_rlast(file, buf, active_index);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::index_next_same(byte * buf,
+ const byte *key __attribute__ ((unused)),
+ uint length __attribute__ ((unused)))
+{
+ DBUG_ASSERT(inited == INDEX);
+ statistic_increment(table->in_use->status_var.ha_read_next_count,
+ &LOCK_status);
+ int error= maria_rnext_same(file, buf);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::rnd_init(bool scan)
+{
+ if (scan)
+ return maria_scan_init(file);
+ return maria_reset(file); // Free buffers
+}
+
+
+int ha_maria::rnd_next(byte *buf)
+{
+ statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
+ &LOCK_status);
+ int error= maria_scan(file, buf);
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+int ha_maria::restart_rnd_next(byte *buf, byte *pos)
+{
+ return rnd_pos(buf, pos);
+}
+
+
+int ha_maria::rnd_pos(byte * buf, byte *pos)
+{
+ statistic_increment(table->in_use->status_var.ha_read_rnd_count,
+ &LOCK_status);
+ int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+void ha_maria::position(const byte * record)
+{
+ my_off_t position= maria_position(file);
+ my_store_ptr(ref, ref_length, position);
+}
+
+
+int ha_maria::info(uint flag)
+{
+ MARIA_INFO info;
+ char name_buff[FN_REFLEN];
+
+ (void) maria_status(file, &info, flag);
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ stats.records= info.records;
+ stats.deleted= info.deleted;
+ stats.data_file_length= info.data_file_length;
+ stats.index_file_length= info.index_file_length;
+ stats.delete_length= info.delete_length;
+ stats.check_time= info.check_time;
+ stats.mean_rec_length= info.mean_reclength;
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ TABLE_SHARE *share= table->s;
+ stats.max_data_file_length= info.max_data_file_length;
+ stats.max_index_file_length= info.max_index_file_length;
+ stats.create_time= info.create_time;
+ ref_length= info.reflength;
+ share->db_options_in_use= info.options;
+ stats.block_size= maria_block_size;
+
+ /* Update share */
+ if (share->tmp_table == NO_TMP_TABLE)
+ pthread_mutex_lock(&share->mutex);
+ share->keys_in_use.set_prefix(share->keys);
+ share->keys_in_use.intersect_extended(info.key_map);
+ share->keys_for_keyread.intersect(share->keys_in_use);
+ share->db_record_offset= info.record_offset;
+ if (share->key_parts)
+ memcpy((char*) table->key_info[0].rec_per_key,
+ (char*) info.rec_per_key,
+ sizeof(table->key_info[0].rec_per_key) * share->key_parts);
+ if (share->tmp_table == NO_TMP_TABLE)
+ pthread_mutex_unlock(&share->mutex);
+
+ /*
+ Set data_file_name and index_file_name to point at the symlink value
+ if table is symlinked (Ie; Real name is not same as generated name)
+ */
+ data_file_name= index_file_name= 0;
+ fn_format(name_buff, file->filename, "", MARIA_NAME_DEXT, MY_APPEND_EXT);
+ if (strcmp(name_buff, info.data_file_name))
+ data_file_name= info.data_file_name;
+ fn_format(name_buff, file->filename, "", MARIA_NAME_IEXT, MY_APPEND_EXT);
+ if (strcmp(name_buff, info.index_file_name))
+ index_file_name= info.index_file_name;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ errkey= info.errkey;
+ my_store_ptr(dup_ref, ref_length, info.dup_key_pos);
+ }
+ /* Faster to always update, than to do it based on flag */
+ stats.update_time= info.update_time;
+ stats.auto_increment_value= info.auto_increment;
+
+ return 0;
+}
+
+
+int ha_maria::extra(enum ha_extra_function operation)
+{
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
+ return 0;
+ return maria_extra(file, operation, 0);
+}
+
+int ha_maria::reset(void)
+{
+ return maria_reset(file);
+}
+
+/* To be used with WRITE_CACHE and EXTRA_CACHE */
+
+int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
+{
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
+ return 0;
+ return maria_extra(file, operation, (void*) &cache_size);
+}
+
+
+int ha_maria::delete_all_rows()
+{
+ return maria_delete_all_rows(file);
+}
+
+
+int ha_maria::delete_table(const char *name)
+{
+ return maria_delete_table(name);
+}
+
+
+int ha_maria::external_lock(THD *thd, int lock_type)
+{
+ return maria_lock_database(file, !table->s->tmp_table ?
+ lock_type : ((lock_type == F_UNLCK) ?
+ F_UNLCK : F_EXTRA_LCK));
+}
+
+
+THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
+ THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type)
+{
+ if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
+ file->lock.type= lock_type;
+ *to++= &file->lock;
+ return to;
+}
+
+
+void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
+{
+ ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
+ if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
+ {
+ create_info->auto_increment_value= stats.auto_increment_value;
+ }
+ create_info->data_file_name= data_file_name;
+ create_info->index_file_name= index_file_name;
+}
+
+
+int ha_maria::create(const char *name, register TABLE *table_arg,
+ HA_CREATE_INFO *info)
+{
+ int error;
+ uint i, j, recpos, minpos, fieldpos, temp_length, length, create_flags= 0;
+ bool found_real_auto_increment= 0;
+ enum ha_base_keytype type;
+ char buff[FN_REFLEN];
+ byte *record;
+ KEY *pos;
+ MARIA_KEYDEF *keydef;
+ MARIA_COLUMNDEF *recinfo, *recinfo_pos;
+ HA_KEYSEG *keyseg;
+ TABLE_SHARE *share= table_arg->s;
+ uint options= share->db_options_in_use;
+ enum data_file_type row_type;
+ DBUG_ENTER("ha_maria::create");
+
+ type= HA_KEYTYPE_BINARY; // Keep compiler happy
+ if (!(my_multi_malloc(MYF(MY_WME),
+ &recinfo, (share->fields * 2 + 2) *
+ sizeof(MARIA_COLUMNDEF),
+ &keydef, share->keys * sizeof(MARIA_KEYDEF),
+ &keyseg,
+ ((share->key_parts + share->keys) *
+ sizeof(HA_KEYSEG)), NullS)))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ pos= table_arg->key_info;
+ for (i= 0; i < share->keys; i++, pos++)
+ {
+ if (pos->flags & HA_USES_PARSER)
+ create_flags |= HA_CREATE_RELIES_ON_SQL_LAYER;
+ keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL));
+ keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
+ (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
+ pos->algorithm;
+ keydef[i].block_length= pos->block_size;
+
+ keydef[i].seg= keyseg;
+ keydef[i].keysegs= pos->key_parts;
+ for (j= 0; j < pos->key_parts; j++)
+ {
+ Field *field= pos->key_part[j].field;
+ type= field->key_type();
+ keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
+
+ if (options & HA_OPTION_PACK_KEYS ||
+ (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
+ HA_SPACE_PACK_USED)))
+ {
+ if (pos->key_part[j].length > 8 &&
+ (type == HA_KEYTYPE_TEXT ||
+ type == HA_KEYTYPE_NUM ||
+ (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
+ {
+ /* No blobs here */
+ if (j == 0)
+ keydef[i].flag |= HA_PACK_KEY;
+ if (!(field->flags & ZEROFILL_FLAG) &&
+ (field->type() == MYSQL_TYPE_STRING ||
+ field->type() == MYSQL_TYPE_VAR_STRING ||
+ ((int) (pos->key_part[j].length - field->decimals())) >= 4))
+ keydef[i].seg[j].flag |= HA_SPACE_PACK;
+ }
+ else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
+ keydef[i].flag |= HA_BINARY_PACK_KEY;
+ }
+ keydef[i].seg[j].type= (int) type;
+ keydef[i].seg[j].start= pos->key_part[j].offset;
+ keydef[i].seg[j].length= pos->key_part[j].length;
+ keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end=
+ keydef[i].seg[j].bit_length= 0;
+ keydef[i].seg[j].bit_pos= 0;
+ keydef[i].seg[j].language= field->charset()->number;
+
+ if (field->null_ptr)
+ {
+ keydef[i].seg[j].null_bit= field->null_bit;
+ keydef[i].seg[j].null_pos= (uint) (field->null_ptr -
+ (uchar *) table_arg->record[0]);
+ }
+ else
+ {
+ keydef[i].seg[j].null_bit= 0;
+ keydef[i].seg[j].null_pos= 0;
+ }
+ if (field->type() == FIELD_TYPE_BLOB ||
+ field->type() == FIELD_TYPE_GEOMETRY)
+ {
+ keydef[i].seg[j].flag |= HA_BLOB_PART;
+ /* save number of bytes used to pack length */
+ keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
+ share->blob_ptr_size);
+ }
+ else if (field->type() == FIELD_TYPE_BIT)
+ {
+ keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
+ keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
+ keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
+ (uchar *) table_arg->record[0]);
+ }
+ }
+ keyseg += pos->key_parts;
+ }
+
+ if (table_arg->found_next_number_field)
+ {
+ keydef[share->next_number_index].flag |= HA_AUTO_KEY;
+ found_real_auto_increment= share->next_number_key_offset == 0;
+ }
+
+ record= table_arg->record[0];
+ recpos= 0;
+ recinfo_pos= recinfo;
+ while (recpos < (uint) share->reclength)
+ {
+ Field **field, *found= 0;
+ minpos= share->reclength;
+ length= 0;
+
+ for (field= table_arg->field; *field; field++)
+ {
+ if ((fieldpos=(*field)->offset(record)) >= recpos &&
+ fieldpos <= minpos)
+ {
+ /* skip null fields */
+ if (!(temp_length= (*field)->pack_length_in_rec()))
+ continue; /* Skip null-fields */
+ if (!found || fieldpos < minpos ||
+ (fieldpos == minpos && temp_length < length))
+ {
+ minpos= fieldpos;
+ found= *field;
+ length= temp_length;
+ }
+ }
+ }
+ DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d",
+ (long) found, recpos, minpos, length));
+ if (recpos != minpos)
+ { // Reserved space (Null bits?)
+ bzero((char*) recinfo_pos, sizeof(*recinfo_pos));
+ recinfo_pos->type= FIELD_NORMAL;
+ recinfo_pos++->length= (uint16) (minpos - recpos);
+ }
+ if (!found)
+ break;
+
+ if (found->flags & BLOB_FLAG)
+ recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_VARCHAR)
+ recinfo_pos->type= FIELD_VARCHAR;
+ else if (!(options & HA_OPTION_PACK_RECORD))
+ recinfo_pos->type= FIELD_NORMAL;
+ else if (found->zero_pack())
+ recinfo_pos->type= FIELD_SKIP_ZERO;
+ else
+ recinfo_pos->type= ((length <= 3 ||
+ (found->flags & ZEROFILL_FLAG)) ?
+ FIELD_NORMAL :
+ found->type() == MYSQL_TYPE_STRING ||
+ found->type() == MYSQL_TYPE_VAR_STRING ?
+ FIELD_SKIP_ENDSPACE : FIELD_SKIP_PRESPACE);
+ if (found->null_ptr)
+ {
+ recinfo_pos->null_bit= found->null_bit;
+ recinfo_pos->null_pos= (uint) (found->null_ptr -
+ (uchar *) table_arg->record[0]);
+ }
+ else
+ {
+ recinfo_pos->null_bit= 0;
+ recinfo_pos->null_pos= 0;
+ }
+ (recinfo_pos++)->length= (uint16) length;
+ recpos= minpos + length;
+ DBUG_PRINT("loop", ("length: %d type: %d",
+ recinfo_pos[-1].length, recinfo_pos[-1].type));
+
+ }
+ MARIA_CREATE_INFO create_info;
+ bzero((char*) &create_info, sizeof(create_info));
+ create_info.max_rows= share->max_rows;
+ create_info.reloc_rows= share->min_rows;
+ create_info.with_auto_increment= found_real_auto_increment;
+ create_info.auto_increment= (info->auto_increment_value ?
+ info->auto_increment_value - 1 : (ulonglong) 0);
+ create_info.data_file_length= ((ulonglong) share->max_rows *
+ share->avg_row_length);
+ create_info.data_file_name= info->data_file_name;
+ create_info.index_file_name= info->index_file_name;
+
+ if (info->options & HA_LEX_CREATE_TMP_TABLE)
+ create_flags |= HA_CREATE_TMP_TABLE;
+ if (options & HA_OPTION_PACK_RECORD)
+ create_flags |= HA_PACK_RECORD;
+ if (options & HA_OPTION_CHECKSUM)
+ create_flags |= HA_CREATE_CHECKSUM;
+ if (options & HA_OPTION_DELAY_KEY_WRITE)
+ create_flags |= HA_CREATE_DELAY_KEY_WRITE;
+
+ switch (info->row_type) {
+ case ROW_TYPE_FIXED:
+ row_type= STATIC_RECORD;
+ break;
+ case ROW_TYPE_DYNAMIC:
+ row_type= DYNAMIC_RECORD;
+ break;
+ default:
+ case ROW_TYPE_PAGES:
+ row_type= BLOCK_RECORD;
+ break;
+ }
+
+ /* TODO: Check that the following fn_format is really needed */
+ error=
+ maria_create(fn_format(buff, name, "", "",
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ row_type, share->keys, keydef, (uint) (recinfo_pos - recinfo),
+ recinfo, 0, (MARIA_UNIQUEDEF *) 0, &create_info,
+ create_flags);
+
+ my_free((gptr) recinfo, MYF(0));
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::rename_table(const char *from, const char *to)
+{
+ return maria_rename(from, to);
+}
+
+
+void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values)
+{
+ ulonglong nr;
+ int error;
+ byte key[HA_MAX_KEY_LENGTH];
+
+ if (!table->s->next_number_key_offset)
+ { // Autoincrement at key-start
+ ha_maria::info(HA_STATUS_AUTO);
+ *first_value= stats.auto_increment_value;
+ /* Maria has only table-level lock for now, so reserves to +inf */
+ *nb_reserved_values= ULONGLONG_MAX;
+ return;
+ }
+
+ /* it's safe to call the following if bulk_insert isn't on */
+ maria_flush_bulk_insert(file, table->s->next_number_index);
+
+ (void) extra(HA_EXTRA_KEYREAD);
+ key_copy(key, table->record[0],
+ table->key_info + table->s->next_number_index,
+ table->s->next_number_key_offset);
+ error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
+ key, table->s->next_number_key_offset, HA_READ_PREFIX_LAST);
+ if (error)
+ nr= 1;
+ else
+ {
+ /* Get data from record[1] */
+ nr= ((ulonglong) table->next_number_field->
+ val_int_offset(table->s->rec_buff_length) + 1);
+ }
+ extra(HA_EXTRA_NO_KEYREAD);
+ *first_value= nr;
+ /*
+ MySQL needs to call us for next row: assume we are inserting ("a",null)
+ here, we return 3, and next this statement will want to insert ("b",null):
+ there is no reason why ("b",3+1) would be the good row to insert: maybe it
+ already exists, maybe 3+1 is too large...
+ */
+ *nb_reserved_values= 1;
+}
+
+
+/*
+ Find out how many rows there is in the given range
+
+ SYNOPSIS
+ records_in_range()
+ inx Index to use
+ min_key Start of range. Null pointer if from first key
+ max_key End of range. Null pointer if to last key
+
+ NOTES
+ min_key.flag can have one of the following values:
+ HA_READ_KEY_EXACT Include the key in the range
+ HA_READ_AFTER_KEY Don't include key in range
+
+ max_key.flag can have one of the following values:
+ HA_READ_BEFORE_KEY Don't include key in range
+ HA_READ_AFTER_KEY Include all 'end_key' values in the range
+
+ RETURN
+ HA_POS_ERROR Something is wrong with the index tree.
+ 0 There is no matching keys in the given range
+ number > 0 There is approximately 'number' matching rows in
+ the range.
+*/
+
+ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
+ key_range *max_key)
+{
+ return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
+}
+
+
+int ha_maria::ft_read(byte * buf)
+{
+ int error;
+
+ if (!ft_handler)
+ return -1;
+
+ thread_safe_increment(table->in_use->status_var.ha_read_next_count,
+ &LOCK_status); // why ?
+
+ error= ft_handler->please->read_next(ft_handler, (char*) buf);
+
+ table->status= error ? STATUS_NOT_FOUND : 0;
+ return error;
+}
+
+
+uint ha_maria::checksum() const
+{
+ return (uint) file->state->checksum;
+}
+
+
+bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info,
+ uint table_changes)
+{
+ uint options= table->s->db_options_in_use;
+
+ if (info->auto_increment_value != stats.auto_increment_value ||
+ info->data_file_name != data_file_name ||
+ info->index_file_name != index_file_name ||
+ table_changes == IS_EQUAL_NO ||
+ table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet
+ return COMPATIBLE_DATA_NO;
+
+ if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)) !=
+ (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)))
+ return COMPATIBLE_DATA_NO;
+ return COMPATIBLE_DATA_YES;
+}
+
+extern int maria_panic(enum ha_panic_function flag);
+int maria_panic(handlerton *hton, ha_panic_function flag)
+{
+ return maria_panic(flag);
+}
+
+static int ha_maria_init(void *p)
+{
+ handlerton *maria_hton;
+
+ maria_hton= (handlerton *)p;
+ maria_hton->state= SHOW_OPTION_YES;
+ maria_hton->db_type= DB_TYPE_MARIA;
+ maria_hton->create= maria_create_handler;
+ maria_hton->panic= maria_panic;
+ /* TODO: decide if we support Maria being used for log tables */
+ maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
+ return test(maria_init());
+}
+
+struct st_mysql_storage_engine maria_storage_engine=
+{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+mysql_declare_plugin(maria)
+{
+ MYSQL_STORAGE_ENGINE_PLUGIN,
+ &maria_storage_engine,
+ "Maria",
+ "MySQL AB",
+ "Traditional transactional MySQL tables",
+ PLUGIN_LICENSE_GPL,
+ ha_maria_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0100, /* 1.0 */
+ NULL, /* status variables */
+ NULL, /* system variables */
+ NULL /* config options */
+}
+mysql_declare_plugin_end;
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
new file mode 100644
index 00000000000..1f243f9ec59
--- /dev/null
+++ b/storage/maria/ha_maria.h
@@ -0,0 +1,145 @@
+/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface /* gcc class implementation */
+#endif
+
+/* class for the the maria handler */
+
+#include <maria.h>
+
+#define HA_RECOVER_NONE 0 /* No automatic recover */
+#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */
+#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */
+#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */
+#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
+
+extern ulong maria_sort_buffer_size;
+extern TYPELIB maria_recover_typelib;
+extern ulong maria_recover_options;
+
+class ha_maria :public handler
+{
+ MARIA_HA *file;
+ ulonglong int_table_flags;
+ char *data_file_name, *index_file_name;
+ bool can_enable_indexes;
+ int repair(THD * thd, HA_CHECK &param, bool optimize);
+
+public:
+ ha_maria(handlerton *hton, TABLE_SHARE * table_arg);
+ ~ha_maria() {}
+ handler *clone(MEM_ROOT *mem_root);
+ const char *table_type() const
+ { return "MARIA"; }
+ const char *index_type(uint key_number);
+ const char **bas_ext() const;
+ ulonglong table_flags() const
+ { return int_table_flags; }
+ ulong index_flags(uint inx, uint part, bool all_parts) const
+ {
+ return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ?
+ 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
+ HA_READ_ORDER | HA_KEYREAD_ONLY);
+ }
+ uint max_supported_keys() const
+ { return MARIA_MAX_KEY; }
+ uint max_supported_key_length() const
+ { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const
+ { return HA_MAX_KEY_LENGTH; }
+ uint checksum() const;
+
+ virtual bool check_if_locking_is_allowed(uint sql_command,
+ ulong type, TABLE * table,
+ uint count,
+ bool called_by_logger_thread);
+ int open(const char *name, int mode, uint test_if_locked);
+ int close(void);
+ int write_row(byte * buf);
+ int update_row(const byte * old_data, byte * new_data);
+ int delete_row(const byte * buf);
+ int index_read(byte * buf, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ int index_read_idx(byte * buf, uint idx, const byte * key,
+ uint key_len, enum ha_rkey_function find_flag);
+ int index_read_last(byte * buf, const byte * key, uint key_len);
+ int index_next(byte * buf);
+ int index_prev(byte * buf);
+ int index_first(byte * buf);
+ int index_last(byte * buf);
+ int index_next_same(byte * buf, const byte * key, uint keylen);
+ int ft_init()
+ {
+ if (!ft_handler)
+ return 1;
+ ft_handler->please->reinit_search(ft_handler);
+ return 0;
+ }
+ FT_INFO *ft_init_ext(uint flags, uint inx, String * key)
+ {
+ return maria_ft_init_search(flags, file, inx,
+ (byte *) key->ptr(), key->length(),
+ key->charset(), table->record[0]);
+ }
+ int ft_read(byte * buf);
+ int rnd_init(bool scan);
+ int rnd_next(byte * buf);
+ int rnd_pos(byte * buf, byte * pos);
+ int restart_rnd_next(byte * buf, byte * pos);
+ void position(const byte * record);
+ int info(uint);
+ int extra(enum ha_extra_function operation);
+ int extra_opt(enum ha_extra_function operation, ulong cache_size);
+ int reset(void);
+ int external_lock(THD * thd, int lock_type);
+ int delete_all_rows(void);
+ int disable_indexes(uint mode);
+ int enable_indexes(uint mode);
+ int indexes_are_disabled(void);
+ void start_bulk_insert(ha_rows rows);
+ int end_bulk_insert();
+ ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key);
+ void update_create_info(HA_CREATE_INFO * create_info);
+ int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info);
+ THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
+ enum thr_lock_type lock_type);
+ virtual void get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values);
+ int rename_table(const char *from, const char *to);
+ int delete_table(const char *name);
+ int check(THD * thd, HA_CHECK_OPT * check_opt);
+ int analyze(THD * thd, HA_CHECK_OPT * check_opt);
+ int repair(THD * thd, HA_CHECK_OPT * check_opt);
+ bool check_and_repair(THD * thd);
+ bool is_crashed() const;
+ bool auto_repair() const
+ { return maria_recover_options != 0; }
+ int optimize(THD * thd, HA_CHECK_OPT * check_opt);
+ int restore(THD * thd, HA_CHECK_OPT * check_opt);
+ int backup(THD * thd, HA_CHECK_OPT * check_opt);
+ int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt);
+ int preload_keys(THD * thd, HA_CHECK_OPT * check_opt);
+ bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
+#ifdef HAVE_REPLICATION
+ int dump(THD * thd, int fd);
+ int net_read_dump(NET * net);
+#endif
+};
diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c
new file mode 100644
index 00000000000..fdacda84875
--- /dev/null
+++ b/storage/maria/lockman.c
@@ -0,0 +1,787 @@
+/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */
+/* QQ: TODO instant duration locks */
+/* QQ: #warning automatically place S instead of LS if possible */
+
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Generic Lock Manager
+
+ Lock manager handles locks on "resources", a resource must be uniquely
+ identified by a 64-bit number. Lock manager itself does not imply
+ anything about the nature of a resource - it can be a row, a table, a
+ database, or just anything.
+
+ Locks belong to "lock owners". A Lock owner is uniquely identified by a
+ 16-bit number. A function loid2lo must be provided by the application
+ that takes such a number as an argument and returns a LOCK_OWNER
+ structure.
+
+ Lock levels are completely defined by three tables. Lock compatibility
+ matrix specifies which locks can be held at the same time on a resource.
+ Lock combining matrix specifies what lock level has the same behaviour as
+ a pair of two locks of given levels. getlock_result matrix simplifies
+ intention locking and lock escalation for an application, basically it
+ defines which locks are intention locks and which locks are "loose"
+ locks. It is only used to provide better diagnostics for the
+ application, lock manager itself does not differentiate between normal,
+ intention, and loose locks.
+
+ Internally lock manager is based on a lock-free hash, see lf_hash.c for
+ details. All locks are stored in a hash, with a resource id as a search
+ key, so all locks for the same resource will be considered collisions and
+ will be put in a one (lock-free) linked list. The main lock-handling
+ logic is in the inner loop that searches for a lock in such a linked
+ list - lockfind().
+
+ This works as follows. Locks generally are added to the end of the list
+ (with one exception, see below). When scanning the list it is always
+ possible to determine what locks are granted (active) and what locks are
+ waiting - first lock is obviously active, the second is active if it's
+ compatible with the first, and so on, a lock is active if it's compatible
+ with all previous locks and all locks before it are also active.
+ To calculate the "compatible with all previous locks" all locks are
+ accumulated in prev_lock variable using lock_combining_matrix.
+
+ Lock upgrades: when a thread that has a lock on a given resource,
+ requests a new lock on the same resource and the old lock is not enough
+ to satisfy new lock requirements (which is defined by
+ lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock is
+ placed in the list. Depending on other locks it is immediately active or
+ it will wait for other locks. Here's an exception to "locks are added
+ to the end" rule - upgraded locks are added after the last active lock
+ but before all waiting locks. Old lock (the one we upgraded from) is
+ not removed from the list, indeed it may be needed if the new lock was
+ in a savepoint that gets rolled back. So old lock is marked as "ignored"
+ (IGNORE_ME flag). New lock gets an UPGRADED flag.
+
+ Loose locks add an important exception to the above. Loose locks do not
+ always commute with other locks. In the list IX-LS both locks are active,
+ while in the LS-IX list only the first lock is active. This creates a
+ problem in lock upgrades. If the list was IX-LS and the owner of the
+ first lock wants to place LS lock (which can be immediately granted), the
+ IX lock is upgraded to LSIX and the list becomes IX-LS-LSIX, which,
+ according to the lock compatibility matrix means that the last lock is
+ waiting - of course it all happened because IX and LS were swapped and
+ they don't commute. To work around this there's ACTIVE flag which is set
+ in every lock that never waited (was placed active), and this flag
+ overrides "compatible with all previous locks" rule.
+
+ When a lock is placed to the end of the list it's either compatible with
+ all locks and all locks are active - new lock becomes active at once, or
+ it conflicts with some of the locks, in this case in the 'blocker'
+ variable a conflicting lock is returned and the calling thread waits on a
+ pthread condition in the LOCK_OWNER structure of the owner of the
+ conflicting lock. Or a new lock is compatible with all locks, but some
+ existing locks are not compatible with each other (example: request IS,
+ when the list is S-IX) - that is not all locks are active. In this case a
+ first waiting lock is returned in the 'blocker' variable, lockman_getlock()
+ notices that a "blocker" does not conflict with the requested lock, and
+ "dereferences" it, to find the lock that it's waiting on. The calling
+ thread than begins to wait on the same lock.
+
+ To better support table-row relations where one needs to lock the table
+ with an intention lock before locking the row, extended diagnostics is
+ provided. When an intention lock (presumably on a table) is granted,
+ lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row,
+ perhaps the thread already has a normal lock on this table),
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual),
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check
+ whether it's possible to lock the row, but no need to lock it - perhaps
+ the thread has a loose lock on this table). This is defined by
+ getlock_result[] table.
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+#include "lockman.h"
+
+/*
+ Lock compatibility matrix.
+
+ It's asymmetric. Read it as "Somebody has the lock <value in the row
+ label>, can I set the lock <value in the column label> ?"
+
+ ') Though you can take LS lock while somebody has S lock, it makes no
+ sense - it's simpler to take S lock too.
+
+ 1 - compatible
+ 0 - incompatible
+ -1 - "impossible", so that we can assert the impossibility.
+*/
+static int lock_compatibility_matrix[10][10]=
+{ /* N S X IS IX SIX LS LX SLX LSIX */
+ { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
+ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
+ { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */
+};
+
+/*
+ Lock combining matrix.
+
+ It's symmetric. Read it as "what lock level L is identical to the
+ set of two locks A and B"
+
+ One should never get N from it, we assert the impossibility
+*/
+static enum lock_type lock_combining_matrix[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */
+ { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
+ { X, X, X, X, X, X, X, X, X, X}, /* X */
+ { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
+ { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
+ { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
+ { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
+ { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
+ { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
+ { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
+};
+
+#define REPEAT_ONCE_MORE 0
+#define OK_TO_PLACE_THE_LOCK 1
+#define OK_TO_PLACE_THE_REQUEST 2
+#define ALREADY_HAVE_THE_LOCK 4
+#define ALREADY_HAVE_THE_REQUEST 8
+#define PLACE_NEW_DISABLE_OLD 16
+#define REQUEST_NEW_DISABLE_OLD 32
+#define RESOURCE_WAS_UNLOCKED 64
+
+#define NEED_TO_WAIT (OK_TO_PLACE_THE_REQUEST | ALREADY_HAVE_THE_REQUEST |\
+ REQUEST_NEW_DISABLE_OLD)
+#define ALREADY_HAVE (ALREADY_HAVE_THE_LOCK | ALREADY_HAVE_THE_REQUEST)
+#define LOCK_UPGRADE (PLACE_NEW_DISABLE_OLD | REQUEST_NEW_DISABLE_OLD)
+
+
+/*
+ the return codes for lockman_getlock
+
+ It's asymmetric. Read it as "I have the lock <value in the row label>,
+ what value should be returned for <value in the column label> ?"
+
+ 0 means impossible combination (assert!)
+
+ Defines below help to preserve the table structure.
+ I/L/A values are self explanatory
+ x means the combination is possible (assert should not crash)
+ but it cannot happen in row locks, only in table locks (S,X),
+ or lock escalations (LS,LX)
+*/
+#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE
+#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+#define A GOT_THE_LOCK
+#define x GOT_THE_LOCK
+static enum lockman_getlock_result getlock_result[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
+ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
+ { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */
+ { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */
+ { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */
+ { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */
+ { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */
+ { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */
+ { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */
+ { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */
+};
+#undef I
+#undef L
+#undef A
+#undef x
+
+LF_REQUIRE_PINS(4);
+
+typedef struct lockman_lock {
+ uint64 resource;
+ struct lockman_lock *lonext;
+ intptr volatile link;
+ uint32 hashnr;
+ /* QQ: TODO - remove hashnr from LOCK */
+ uint16 loid;
+ uchar lock; /* sizeof(uchar) <= sizeof(enum) */
+ uchar flags;
+} LOCK;
+
+#define IGNORE_ME 1
+#define UPGRADED 2
+#define ACTIVE 4
+
+typedef struct {
+ intptr volatile *prev;
+ LOCK *curr, *next;
+ LOCK *blocker, *upgrade_from;
+} CURSOR;
+
+#define PTR(V) (LOCK *)((V) & (~(intptr)1))
+#define DELETED(V) ((V) & 1)
+
+/*
+ NOTE
+ cursor is positioned in either case
+ pins[0..3] are used, they are NOT removed on return
+*/
+static int lockfind(LOCK * volatile *head, LOCK *node,
+ CURSOR *cursor, LF_PINS *pins)
+{
+ uint32 hashnr, cur_hashnr;
+ uint64 resource, cur_resource;
+ intptr link;
+ my_bool cur_active, compatible, upgrading, prev_active;
+ enum lock_type lock, prev_lock, cur_lock;
+ uint16 loid, cur_loid;
+ int cur_flags, flags;
+
+ hashnr= node->hashnr;
+ resource= node->resource;
+ lock= node->lock;
+ loid= node->loid;
+ flags= node->flags;
+
+retry:
+ cursor->prev= (intptr *)head;
+ prev_lock= N;
+ cur_active= TRUE;
+ compatible= TRUE;
+ upgrading= FALSE;
+ cursor->blocker= cursor->upgrade_from= 0;
+ _lf_unpin(pins, 3);
+ do {
+ cursor->curr= PTR(*cursor->prev);
+ _lf_pin(pins, 1, cursor->curr);
+ } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+ for (;;)
+ {
+ if (!cursor->curr)
+ break;
+ do {
+ link= cursor->curr->link;
+ cursor->next= PTR(link);
+ _lf_pin(pins, 0, cursor->next);
+ } while(link != cursor->curr->link && LF_BACKOFF);
+ cur_hashnr= cursor->curr->hashnr;
+ cur_resource= cursor->curr->resource;
+ cur_lock= cursor->curr->lock;
+ cur_loid= cursor->curr->loid;
+ cur_flags= cursor->curr->flags;
+ if (*cursor->prev != (intptr)cursor->curr)
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ if (!DELETED(link))
+ {
+ if (cur_hashnr > hashnr ||
+ (cur_hashnr == hashnr && cur_resource >= resource))
+ {
+ if (cur_hashnr > hashnr || cur_resource > resource)
+ break;
+ /* ok, we have a lock for this resource */
+ DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0);
+ DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0);
+ if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME))
+ {
+ DBUG_ASSERT(cur_active);
+ if (cur_loid == loid)
+ cursor->upgrade_from= cursor->curr;
+ }
+ else
+ {
+ prev_active= cur_active;
+ if (cur_flags & ACTIVE)
+ DBUG_ASSERT(prev_active == TRUE);
+ else
+ cur_active&= lock_compatibility_matrix[prev_lock][cur_lock];
+ if (upgrading && !cur_active /*&& !(cur_flags & UPGRADED)*/)
+ break;
+ if (prev_active && !cur_active)
+ {
+ cursor->blocker= cursor->curr;
+ _lf_pin(pins, 3, cursor->curr);
+ }
+ if (cur_loid == loid)
+ {
+ /* we already have a lock on this resource */
+ DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N);
+ DBUG_ASSERT(!upgrading || (flags & IGNORE_ME));
+ if (lock_combining_matrix[cur_lock][lock] == cur_lock)
+ {
+ /* new lock is compatible */
+ if (cur_active)
+ {
+ cursor->blocker= cursor->curr; /* loose-locks! */
+ _lf_unpin(pins, 3); /* loose-locks! */
+ return ALREADY_HAVE_THE_LOCK;
+ }
+ else
+ return ALREADY_HAVE_THE_REQUEST;
+ }
+ /* not compatible, upgrading */
+ upgrading= TRUE;
+ cursor->upgrade_from= cursor->curr;
+ }
+ else
+ {
+ if (!lock_compatibility_matrix[cur_lock][lock])
+ {
+ compatible= FALSE;
+ cursor->blocker= cursor->curr;
+ _lf_pin(pins, 3, cursor->curr);
+ }
+ }
+ prev_lock= lock_combining_matrix[prev_lock][cur_lock];
+ DBUG_ASSERT(prev_lock != N);
+ }
+ }
+ cursor->prev= &(cursor->curr->link);
+ _lf_pin(pins, 2, cursor->curr);
+ }
+ else
+ {
+ if (my_atomic_casptr((void **)cursor->prev,
+ (void **)&cursor->curr, cursor->next))
+ _lf_alloc_free(pins, cursor->curr);
+ else
+ {
+ (void)LF_BACKOFF;
+ goto retry;
+ }
+ }
+ cursor->curr= cursor->next;
+ _lf_pin(pins, 1, cursor->curr);
+ }
+ /*
+ either the end of lock list - no more locks for this resource,
+ or upgrading and the end of active lock list
+ */
+ if (upgrading)
+ {
+ if (compatible /*&& prev_active*/)
+ return PLACE_NEW_DISABLE_OLD;
+ else
+ return REQUEST_NEW_DISABLE_OLD;
+ }
+ if (cur_active && compatible)
+ {
+ /*
+ either no locks for this resource or all are compatible.
+ ok to place the lock in any case.
+ */
+ return prev_lock == N ? RESOURCE_WAS_UNLOCKED
+ : OK_TO_PLACE_THE_LOCK;
+ }
+ /* we have a lock conflict. ok to place a lock request. And wait */
+ return OK_TO_PLACE_THE_REQUEST;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays
+*/
+static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins,
+ LOCK **blocker)
+{
+ CURSOR cursor;
+ int res;
+
+ do
+ {
+ res= lockfind(head, node, &cursor, pins);
+ DBUG_ASSERT(res != ALREADY_HAVE_THE_REQUEST);
+ if (!(res & ALREADY_HAVE))
+ {
+ if (res & LOCK_UPGRADE)
+ {
+ node->flags|= UPGRADED;
+ node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock];
+ }
+ if (!(res & NEED_TO_WAIT))
+ node->flags|= ACTIVE;
+ node->link= (intptr)cursor.curr;
+ DBUG_ASSERT(node->link != (intptr)node);
+ DBUG_ASSERT(cursor.prev != &node->link);
+ if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+ {
+ res= REPEAT_ONCE_MORE;
+ node->flags&= ~ACTIVE;
+ }
+ if (res & LOCK_UPGRADE)
+ cursor.upgrade_from->flags|= IGNORE_ME;
+ /*
+ QQ: is this OK ? if a reader has already read upgrade_from,
+ it may find it conflicting with node :(
+ - see the last test from test_lockman_simple()
+ */
+ }
+
+ } while (res == REPEAT_ONCE_MORE);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ /*
+ note that blocker is not necessarily pinned here (when it's == curr).
+ this is ok as in such a case it's either a dummy node for
+ initialize_bucket() and dummy nodes don't need pinning,
+ or it's a lock of the same transaction for lockman_getlock,
+ and it cannot be removed by another thread
+ */
+ *blocker= cursor.blocker;
+ return res;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays
+*/
+static int lockpeek(LOCK * volatile *head, LOCK *node, LF_PINS *pins,
+ LOCK **blocker)
+{
+ CURSOR cursor;
+ int res;
+
+ res= lockfind(head, node, &cursor, pins);
+
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ if (blocker)
+ *blocker= cursor.blocker;
+ return res;
+}
+
+/*
+ NOTE
+ it uses pins[0..3], on return all pins are removed.
+
+ One _must_ have the lock (or request) to call this
+*/
+static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins)
+{
+ CURSOR cursor;
+ int res;
+
+ do
+ {
+ res= lockfind(head, node, &cursor, pins);
+ DBUG_ASSERT(res & ALREADY_HAVE);
+
+ if (cursor.upgrade_from)
+ cursor.upgrade_from->flags&= ~IGNORE_ME;
+
+ /*
+ XXX this does not work with savepoints, as old lock is left ignored.
+ It cannot be unignored, as would basically mean moving the lock back
+ in the lock chain (from upgraded). And the latter is not allowed -
+ because it breaks list scanning. So old ignored lock must be deleted,
+ new - same - lock must be installed right after the lock we're deleting,
+ then we can delete. Good news is - this is only required when rolling
+ back a savepoint.
+ */
+ if (my_atomic_casptr((void **)&(cursor.curr->link),
+ (void **)&cursor.next, 1+(char *)cursor.next))
+ {
+ if (my_atomic_casptr((void **)cursor.prev,
+ (void **)&cursor.curr, cursor.next))
+ _lf_alloc_free(pins, cursor.curr);
+ else
+ lockfind(head, node, &cursor, pins);
+ }
+ else
+ {
+ res= REPEAT_ONCE_MORE;
+ if (cursor.upgrade_from)
+ cursor.upgrade_from->flags|= IGNORE_ME;
+ }
+ } while (res == REPEAT_ONCE_MORE);
+ _lf_unpin(pins, 0);
+ _lf_unpin(pins, 1);
+ _lf_unpin(pins, 2);
+ _lf_unpin(pins, 3);
+ return res;
+}
+
+void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout)
+{
+ lf_alloc_init(&lm->alloc, sizeof(LOCK), offsetof(LOCK, lonext));
+ lf_dynarray_init(&lm->array, sizeof(LOCK **));
+ lm->size= 1;
+ lm->count= 0;
+ lm->loid_to_lo= func;
+ lm->lock_timeout= timeout;
+}
+
+void lockman_destroy(LOCKMAN *lm)
+{
+ LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0);
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ lf_alloc_real_free(&lm->alloc, el);
+ else
+ my_free((void *)el, MYF(0));
+ el= (LOCK *)next;
+ }
+ lf_alloc_destroy(&lm->alloc);
+ lf_dynarray_destroy(&lm->array);
+}
+
+/* TODO: optimize it */
+#define MAX_LOAD 1
+
+static void initialize_bucket(LOCKMAN *lm, LOCK * volatile *node,
+ uint bucket, LF_PINS *pins)
+{
+ int res;
+ uint parent= my_clear_highest_bit(bucket);
+ LOCK *dummy= (LOCK *)my_malloc(sizeof(LOCK), MYF(MY_WME));
+ LOCK **tmp= 0, *cur;
+ LOCK * volatile *el= _lf_dynarray_lvalue(&lm->array, parent);
+
+ if (*el == NULL && bucket)
+ initialize_bucket(lm, el, parent, pins);
+ dummy->hashnr= my_reverse_bits(bucket);
+ dummy->loid= 0;
+ dummy->lock= X; /* doesn't matter, in fact */
+ dummy->resource= 0;
+ dummy->flags= 0;
+ res= lockinsert(el, dummy, pins, &cur);
+ DBUG_ASSERT(res & (ALREADY_HAVE_THE_LOCK | RESOURCE_WAS_UNLOCKED));
+ if (res & ALREADY_HAVE_THE_LOCK)
+ {
+ my_free((void *)dummy, MYF(0));
+ dummy= cur;
+ }
+ my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+}
+
+static inline uint calc_hash(uint64 resource)
+{
+ const uchar *pos= (uchar *)&resource;
+ ulong nr1= 1, nr2= 4, i;
+ for (i= 0; i < sizeof(resource) ; i++, pos++)
+ {
+ nr1^= (ulong) ((((uint) nr1 & 63)+nr2) * ((uint)*pos)) + (nr1 << 8);
+ nr2+= 3;
+ }
+ return nr1 & INT_MAX32;
+}
+
+/*
+ RETURN
+ see enum lockman_getlock_result
+ NOTE
+ uses pins[0..3], they're removed on return
+*/
+enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
+ uint64 resource,
+ enum lock_type lock)
+{
+ int res;
+ uint csize, bucket, hashnr;
+ LOCK *node, * volatile *el, *blocker;
+ LF_PINS *pins= lo->pins;
+ enum lock_type old_lock;
+
+ DBUG_ASSERT(lo->loid);
+ lf_rwlock_by_pins(pins);
+ node= (LOCK *)_lf_alloc_new(pins);
+ node->flags= 0;
+ node->lock= lock;
+ node->loid= lo->loid;
+ node->resource= resource;
+ hashnr= calc_hash(resource);
+ bucket= hashnr % lm->size;
+ el= _lf_dynarray_lvalue(&lm->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(lm, el, bucket, pins);
+ node->hashnr= my_reverse_bits(hashnr) | 1;
+ res= lockinsert(el, node, pins, &blocker);
+ if (res & ALREADY_HAVE)
+ {
+ int r;
+ old_lock= blocker->lock;
+ _lf_alloc_free(pins, node);
+ lf_rwunlock_by_pins(pins);
+ r= getlock_result[old_lock][lock];
+ DBUG_ASSERT(r);
+ return r;
+ }
+ /* a new value was added to the hash */
+ csize= lm->size;
+ if ((my_atomic_add32(&lm->count, 1)+1.0) / csize > MAX_LOAD)
+ my_atomic_cas32(&lm->size, &csize, csize*2);
+ node->lonext= lo->all_locks;
+ lo->all_locks= node;
+ for ( ; res & NEED_TO_WAIT; res= lockpeek(el, node, pins, &blocker))
+ {
+ LOCK_OWNER *wait_for_lo;
+ ulonglong deadline;
+ struct timespec timeout;
+
+ _lf_assert_pin(pins, 3); /* blocker must be pinned here */
+ wait_for_lo= lm->loid_to_lo(blocker->loid);
+
+ /*
+ now, this is tricky. blocker is not necessarily a LOCK
+ we're waiting for. If it's compatible with what we want,
+ then we're waiting for a lock that blocker is waiting for
+ (see two places where blocker is set in lockfind)
+ In the latter case, let's "dereference" it
+ */
+ if (lock_compatibility_matrix[blocker->lock][lock])
+ {
+ blocker= wait_for_lo->all_locks;
+ _lf_pin(pins, 3, blocker);
+ if (blocker != wait_for_lo->all_locks)
+ continue;
+ wait_for_lo= wait_for_lo->waiting_for;
+ }
+
+ /*
+ note that the blocker transaction may have ended by now,
+ its LOCK_OWNER and short id were reused, so 'wait_for_lo' may point
+ to an unrelated - albeit valid - LOCK_OWNER
+ */
+ if (!wait_for_lo)
+ continue;
+
+ lo->waiting_for= wait_for_lo;
+ lf_rwunlock_by_pins(pins);
+
+ /*
+ We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must
+ belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER,
+ if there're other active LOCK_OWNERs.
+ */
+ /* QQ: race condition here */
+ pthread_mutex_lock(wait_for_lo->mutex);
+ if (DELETED(blocker->link))
+ {
+ /*
+ blocker transaction was ended, or a savepoint that owned
+ the lock was rolled back. Either way - the lock was removed
+ */
+ pthread_mutex_unlock(wait_for_lo->mutex);
+ lf_rwlock_by_pins(pins);
+ continue;
+ }
+
+ /* yuck. waiting */
+ deadline= my_getsystime() + lm->lock_timeout * 10000;
+ timeout.tv_sec= deadline/10000000;
+ timeout.tv_nsec= (deadline % 10000000) * 100;
+ do
+ {
+ pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout);
+ } while (!DELETED(blocker->link) && my_getsystime() < deadline);
+ pthread_mutex_unlock(wait_for_lo->mutex);
+ lf_rwlock_by_pins(pins);
+ if (!DELETED(blocker->link))
+ {
+ /*
+ timeout.
+ note that we _don't_ release the lock request here.
+ Instead we're relying on the caller to abort the transaction,
+ and release all locks at once - see lockman_release_locks()
+ */
+ _lf_unpin(pins, 3);
+ lf_rwunlock_by_pins(pins);
+ return DIDNT_GET_THE_LOCK;
+ }
+ }
+ lo->waiting_for= 0;
+ _lf_assert_unpin(pins, 3); /* unpin should not be needed */
+ lf_rwunlock_by_pins(pins);
+ return getlock_result[lock][lock];
+}
+
+/*
+ RETURN
+ 0 - deleted
+ 1 - didn't (not found)
+ NOTE
+ see lockdelete() for pin usage notes
+*/
+int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo)
+{
+ LOCK * volatile *el, *node, *next;
+ uint bucket;
+ LF_PINS *pins= lo->pins;
+
+ pthread_mutex_lock(lo->mutex);
+ lf_rwlock_by_pins(pins);
+ for (node= lo->all_locks; node; node= next)
+ {
+ next= node->lonext;
+ bucket= calc_hash(node->resource) % lm->size;
+ el= _lf_dynarray_lvalue(&lm->array, bucket);
+ if (*el == NULL)
+ initialize_bucket(lm, el, bucket, pins);
+ lockdelete(el, node, pins);
+ my_atomic_add32(&lm->count, -1);
+ }
+ lf_rwunlock_by_pins(pins);
+ lo->all_locks= 0;
+ /* now signal all waiters */
+ pthread_cond_broadcast(lo->cond);
+ pthread_mutex_unlock(lo->mutex);
+ return 0;
+}
+
+#ifdef MY_LF_EXTRA_DEBUG
+static const char *lock2str[]=
+{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" };
+/*
+ NOTE
+ the function below is NOT thread-safe !!!
+*/
+void print_lockhash(LOCKMAN *lm)
+{
+ LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0);
+ printf("hash: size %u count %u\n", lm->size, lm->count);
+ while (el)
+ {
+ intptr next= el->link;
+ if (el->hashnr & 1)
+ {
+ printf("0x%08lx { resource %lu, loid %u, lock %s",
+ (long) el->hashnr, (ulong) el->resource, el->loid,
+ lock2str[el->lock]);
+ if (el->flags & IGNORE_ME) printf(" IGNORE_ME");
+ if (el->flags & UPGRADED) printf(" UPGRADED");
+ if (el->flags & ACTIVE) printf(" ACTIVE");
+ if (DELETED(next)) printf(" ***DELETED***");
+ printf("}\n");
+ }
+ else
+ {
+ /*printf("0x%08x { dummy }\n", el->hashnr);*/
+ DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X);
+ }
+ el= PTR(next);
+ }
+}
+#endif
diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h
new file mode 100644
index 00000000000..fd96f8930d5
--- /dev/null
+++ b/storage/maria/lockman.h
@@ -0,0 +1,77 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _lockman_h
+#define _lockman_h
+
+/*
+ Lock levels:
+ ^^^^^^^^^^^
+
+ N - "no lock", not a lock, used sometimes internally to simplify the code
+ S - Shared
+ X - eXclusive
+ IS - Intention Shared
+ IX - Intention eXclusive
+ SIX - Shared + Intention eXclusive
+ LS - Loose Shared
+ LX - Loose eXclusive
+ SLX - Shared + Loose eXclusive
+ LSIX - Loose Shared + Intention eXclusive
+*/
+enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
+
+struct lockman_lock;
+
+typedef struct st_lock_owner LOCK_OWNER;
+struct st_lock_owner {
+ LF_PINS *pins; /* must be allocated from lockman's pinbox */
+ struct lockman_lock *all_locks; /* a LIFO */
+ LOCK_OWNER *waiting_for;
+ pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */
+ pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
+ uint16 loid;
+};
+
+typedef LOCK_OWNER *loid_to_lo_func(uint16);
+typedef struct {
+ LF_DYNARRAY array; /* hash itself */
+ LF_ALLOCATOR alloc; /* allocator for elements */
+ int32 volatile size; /* size of array */
+ int32 volatile count; /* number of elements in the hash */
+ uint lock_timeout;
+ loid_to_lo_func *loid_to_lo;
+} LOCKMAN;
+#define DIDNT_GET_THE_LOCK 0
+enum lockman_getlock_result {
+ NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
+ GOT_THE_LOCK,
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+};
+
+void lockman_init(LOCKMAN *, loid_to_lo_func *, uint);
+void lockman_destroy(LOCKMAN *);
+enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo,
+ uint64 resource,
+ enum lock_type lock);
+int lockman_release_locks(LOCKMAN *, LOCK_OWNER *);
+
+#ifdef EXTRA_DEBUG
+void print_lockhash(LOCKMAN *lm);
+#endif
+
+#endif
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
new file mode 100644
index 00000000000..5ed5a776658
--- /dev/null
+++ b/storage/maria/ma_bitmap.c
@@ -0,0 +1,1704 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Bitmap handling (for records in block)
+
+ The data file starts with a bitmap page, followed by as many data
+ pages as the bitmap can cover. After this there is a new bitmap page
+ and more data pages etc.
+
+ The bitmap code assumes there is always an active bitmap page and thus
+ that there is at least one bitmap page in the file
+
+ Structure of bitmap page:
+
+ Fixed size records (to be implemented later):
+
+ 2 bits are used to indicate:
+
+ 0 Empty
+ 1 50-75 % full (at least room for 2 records)
+ 2 75-100 % full (at least room for one record)
+ 3 100 % full (no more room for records)
+
+ Assuming 8K pages, this will allow us to map:
+ 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M
+
+ (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN)
+
+ Note that for fixed size rows, we can't add more columns without doing
+ a full reorganization of the table. The user can always force a dynamic
+ size row format by specifying ROW_FORMAT=dynamic.
+
+
+ Dynamic size records:
+
+ 3 bits are used to indicate
+
+ 0 Empty page
+ 1 0-30 % full (at least room for 3 records)
+ 2 30-60 % full (at least room for 2 records)
+ 3 60-90 % full (at least room for one record)
+ 4 100 % full (no more room for records)
+ 5 Tail page, 0-40 % full
+ 6 Tail page, 40-80 % full
+ 7 Full tail page or full blob page
+
+ Assuming 8K pages, this will allow us to map:
+ 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M
+
+ Note that values 1-3 may be adjust for each individual table based on
+ 'min record length'. Tail pages are for overflow data which can be of
+ any size and thus doesn't have to be adjusted for different tables.
+ If we add more columns to the table, some of the originally calculated
+ 'cut off' points may not be optimal, but they shouldn't be 'drasticly
+ wrong'.
+
+ When allocating data from the bitmap, we are trying to do it in a
+ 'best fit' manner. Blobs and varchar blocks are given out in large
+ continuous extents to allow fast access to these. Before allowing a
+ row to 'flow over' to other blocks, we will compact the page and use
+ all space on it. If there is many rows in the page, we will ensure
+ there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other
+ rows to grow.
+
+ The bitmap format allows us to extend the row file in big chunks, if needed.
+
+ When calculating the size for a packed row, we will calculate the following
+ things separately:
+ - Row header + null_bits + empty_bits fixed size segments etc.
+ - Size of all char/varchar fields
+ - Size of each blob field
+
+ The bitmap handler will get all the above information and return
+ either one page or a set of pages to put the different parts.
+
+ Bitmaps are read on demand in response to insert/delete/update operations.
+ The following bitmap pointers will be cached and stored on disk on close:
+ - Current insert_bitmap; When inserting new data we will first try to
+ fill this one.
+ - First bitmap which is not completely full. This is updated when we
+ free data with an update or delete.
+
+ While flushing out bitmaps, we will cache the status of the bitmap in memory
+ to avoid having to read a bitmap for insert of new data that will not
+ be of any use
+ - Total empty space
+ - Largest number of continuous pages
+
+ Bitmap ONLY goes to disk in the following scenarios
+ - The file is closed (and we flush all changes to disk)
+ - On checkpoint
+ (Ie: When we do a checkpoint, we have to ensure that all bitmaps are
+ put on disk even if they are not in the page cache).
+ - When explicitely requested (for example on backup or after recvoery,
+ to simplify things)
+
+*/
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+
+/* Number of pages to store blob parts */
+#define BLOB_SEGMENT_MIN_SIZE 128
+
+#define FULL_HEAD_PAGE 4
+#define FULL_TAIL_PAGE 7
+
+static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
+ MARIA_FILE_BITMAP *bitmap)
+{
+ return (key_cache_write(share->key_cache,
+ bitmap->file, bitmap->page * bitmap->block_size, 0,
+ (byte*) bitmap->map,
+ bitmap->block_size, bitmap->block_size, 1));
+}
+
+/*
+ Initialize bitmap. This is called the first time a file is opened
+*/
+
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
+{
+ uint aligned_bit_blocks;
+ uint max_page_size;
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ uint size= share->block_size;
+#ifndef DBUG_OFF
+ /* We want to have a copy of the bitmap to be able to print differences */
+ size*= 2;
+#endif
+
+ if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))))
+ return 1;
+
+ bitmap->file= file;
+ bitmap->changed= 0;
+ bitmap->block_size= share->block_size;
+ /* Size needs to be alligned on 6 */
+ aligned_bit_blocks= share->block_size / 6;
+ bitmap->total_size= aligned_bit_blocks * 6;
+ /*
+ In each 6 bytes, we have 6*8/3 = 16 pages covered
+ The +1 is to add the bitmap page, as this doesn't have to be covered
+ */
+ bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
+
+ /* Update size for bits */
+ /* TODO; Make this dependent of the row size */
+ max_page_size= share->block_size - PAGE_OVERHEAD_SIZE;
+ bitmap->sizes[0]= max_page_size; /* Empty page */
+ bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100;
+ bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100;
+ bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100;
+ bitmap->sizes[4]= 0; /* Full page */
+ bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100;
+ bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100;
+ bitmap->sizes[7]= 0;
+
+ pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
+
+ /*
+ Start by reading first page (assume table scan)
+ Later code is simpler if it can assume we always have an active bitmap.
+ */
+ if (_ma_read_bitmap_page(share, bitmap, (ulonglong) 0))
+ return(1);
+ return 0;
+}
+
+
+/*
+ Free data allocated by _ma_bitmap_init
+*/
+
+my_bool _ma_bitmap_end(MARIA_SHARE *share)
+{
+ my_bool res= 0;
+ _ma_flush_bitmap(share);
+ pthread_mutex_destroy(&share->bitmap.bitmap_lock);
+ my_free((byte*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR));
+ return res;
+}
+
+
+/*
+ Flush bitmap to disk
+*/
+
+my_bool _ma_flush_bitmap(MARIA_SHARE *share)
+{
+ my_bool res= 0;
+ if (share->bitmap.changed)
+ {
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ if (share->bitmap.changed)
+ {
+ res= write_changed_bitmap(share, &share->bitmap);
+ share->bitmap.changed= 0;
+ }
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ }
+ return res;
+}
+
+
+/*
+ Return bitmap pattern for the smallest head block that can hold 'size'
+
+ SYNOPSIS
+ size_to_head_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0-3 For a description of the bitmap sizes, see the header
+*/
+
+static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size <= bitmap->sizes[3])
+ return 3;
+ if (size <= bitmap->sizes[2])
+ return 2;
+ if (size <= bitmap->sizes[1])
+ return 1;
+ DBUG_ASSERT(size <= bitmap->sizes[0]);
+ return 0;
+}
+
+
+/*
+ Return bitmap pattern for block where there is size bytes free
+*/
+
+uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size < bitmap->sizes[3])
+ return 4;
+ if (size < bitmap->sizes[2])
+ return 3;
+ if (size < bitmap->sizes[1])
+ return 2;
+ return (size < bitmap->sizes[0]) ? 1 : 0;
+}
+
+
+/*
+ Return bitmap pattern for the smallest tail block that can hold 'size'
+
+ SYNOPSIS
+ size_to_tail_pattern()
+ bitmap Bitmap
+ size Requested size
+
+ RETURN
+ 0, 5 or 6 For a description of the bitmap sizes, see the header
+*/
+
+static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size <= bitmap->sizes[6])
+ return 6;
+ if (size <= bitmap->sizes[5])
+ return 5;
+ DBUG_ASSERT(size <= bitmap->sizes[0]);
+ return 0;
+}
+
+
+static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
+{
+ if (size >= bitmap->sizes[0])
+ return 0; /* Revert to empty page */
+ if (size < bitmap->sizes[6])
+ return 7;
+ if (size < bitmap->sizes[5])
+ return 6;
+ return 5;
+}
+
+
+/*
+ Return size guranteed to be available on a page
+
+ SYNOPSIS
+ pattern_to_head_size
+ bitmap Bitmap
+ pattern Pattern (0-7)
+
+ RETURN
+ 0 - block_size
+*/
+
+static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
+{
+ DBUG_ASSERT(pattern <= 7);
+ return bitmap->sizes[pattern];
+}
+
+
+/*
+ Print bitmap for debugging
+*/
+
+#ifndef DBUG_OFF
+
+const char *bits_to_txt[]=
+{
+ "empty", "00-30% full", "30-60% full", "60-90% full", "full",
+ "tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
+};
+
+static void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap)
+{
+ uchar *pos, *end, *org_pos;
+ ulong page;
+
+ end= bitmap->map+ bitmap->used_size;
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n",
+ (ulong) bitmap->page);
+
+ page= (ulong) bitmap->page+1;
+ for (pos= bitmap->map, org_pos= bitmap->map+bitmap->block_size ; pos < end ;
+ pos+= 6, org_pos+= 6)
+ {
+ ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
+ ulonglong org_bits= uint6korr(org_pos);
+ uint i;
+ if (bits != org_bits)
+ {
+ for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3)
+ {
+ if ((bits & 7) != (org_bits & 7))
+ fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i,
+ bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]);
+ }
+ }
+ page+= 16;
+ }
+ fputc('\n', DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+ memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size);
+}
+
+#endif /* DBUG_OFF */
+
+
+/***************************************************************************
+ Reading & writing bitmap pages
+***************************************************************************/
+
+/*
+ Read a given bitmap page
+
+ SYNOPSIS
+ read_bitmap_page()
+ info Maria handler
+ bitmap Bitmap handler
+ page Page to read
+
+ TODO
+ Update 'bitmap->used_size' to real size of used bitmap
+
+ RETURN
+ 0 ok
+ 1 error (Error writing old bitmap or reading bitmap page)
+*/
+
+my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ my_off_t position= page * bitmap->block_size;
+ my_bool res;
+ DBUG_ENTER("_ma_read_bitmap_page");
+ DBUG_ASSERT(page % bitmap->pages_covered == 0);
+
+ bitmap->page= page;
+ if (position >= share->state.state.data_file_length)
+ {
+ share->state.state.data_file_length= position + bitmap->block_size;
+ bzero(bitmap->map, bitmap->block_size);
+ bitmap->used_size= 0;
+ DBUG_RETURN(0);
+ }
+ bitmap->used_size= bitmap->total_size;
+ res= key_cache_read(share->key_cache,
+ bitmap->file, position, 0,
+ (byte*) bitmap->map,
+ bitmap->block_size, bitmap->block_size, 0) == 0;
+#ifndef DBUG_OFF
+ if (!res)
+ memcpy(bitmap->map+ bitmap->block_size, bitmap->map, bitmap->block_size);
+#endif
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Change to another bitmap page
+
+ SYNOPSIS
+ _ma_change_bitmap_page()
+ info Maria handler
+ bitmap Bitmap handler
+ page Bitmap page to read
+
+ NOTES
+ If old bitmap was changed, write it out before reading new one
+ We return empty bitmap if page is outside of file size
+
+ RETURN
+ 0 ok
+ 1 error (Error writing old bitmap or reading bitmap page)
+*/
+
+static my_bool _ma_change_bitmap_page(MARIA_HA *info,
+ MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ DBUG_ENTER("_ma_change_bitmap_page");
+ DBUG_ASSERT(page % bitmap->pages_covered == 0);
+
+ if (bitmap->changed)
+ {
+ if (write_changed_bitmap(info->s, bitmap))
+ DBUG_RETURN(1);
+ bitmap->changed= 0;
+ }
+ DBUG_RETURN(_ma_read_bitmap_page(info->s, bitmap, page));
+}
+
+
+/*
+ Read next suitable bitmap
+
+ SYNOPSIS
+ move_to_next_bitmap()
+ bitmap Bitmap handle
+
+ TODO
+ Add cache of bitmaps to not read something that is not usable
+
+ RETURN
+ 0 ok
+ 1 error (either couldn't save old bitmap or read new one
+*/
+
+static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
+{
+ ulonglong page= bitmap->page;
+ MARIA_STATE_INFO *state= &info->s->state;
+ DBUG_ENTER("move_to_next_bitmap");
+
+ if (state->first_bitmap_with_space != ~(ulonglong) 0 &&
+ state->first_bitmap_with_space != page)
+ {
+ page= state->first_bitmap_with_space;
+ state->first_bitmap_with_space= ~(ulonglong) 0;
+ }
+ else
+ page+= bitmap->pages_covered;
+ DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
+}
+
+
+/****************************************************************************
+ Allocate data in bitmaps
+****************************************************************************/
+
+/*
+ Store data in 'block' and mark the place used in the bitmap
+
+ SYNOPSIS
+ fill_block()
+ bitmap Bitmap handle
+ block Store data about what we found
+ best_data Pointer to best 6 byte aligned area in bitmap->map
+ best_pos Which bit in *best_data the area starts
+ 0 = first bit pattern, 1 second bit pattern etc
+ fill_pattern Bitmap pattern to store in best_data[best_pos]
+*/
+
+static void fill_block(MARIA_FILE_BITMAP *bitmap,
+ MARIA_BITMAP_BLOCK *block,
+ uchar *best_data, uint best_pos, uint best_bits,
+ uint fill_pattern)
+{
+ uint page, offset, tmp;
+ uchar *data;
+
+ /* For each 6 bytes we have 6*8/3= 16 patterns */
+ page= (best_data - bitmap->map) / 6 * 16 + best_pos;
+ block->page= bitmap->page + 1 + page;
+ block->page_count= 1 + TAIL_BIT;
+ block->empty_space= pattern_to_size(bitmap, best_bits);
+ block->sub_blocks= 1;
+ block->org_bitmap_value= best_bits;
+ block->used= BLOCKUSED_TAIL;
+
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ best_pos*= 3;
+ data= best_data+ best_pos / 8;
+ offset= best_pos & 7;
+ tmp= uint2korr(data);
+ tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
+ int2store(data, tmp);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+}
+
+
+/*
+ Allocate data for head block
+
+ SYNOPSIS
+ allocate_head()
+ bitmap bitmap
+ size Size of block we need to find
+ block Store found information here
+
+ RETURN
+ 0 ok (block is updated)
+ 1 error (no space in bitmap; block is not touched)
+*/
+
+
+static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size,
+ MARIA_BITMAP_BLOCK *block)
+{
+ uint min_bits= size_to_head_pattern(bitmap, size);
+ uchar *data= bitmap->map, *end= data + bitmap->used_size;
+ uchar *best_data= 0;
+ uint best_bits= (uint) -1, best_pos;
+ DBUG_ENTER("allocate_head");
+
+ LINT_INIT(best_pos);
+ DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
+
+ for (; data < end; data += 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uint i;
+
+ /*
+ Skip common patterns
+ We can skip empty pages (if we already found a match) or
+ anything matching the following pattern as this will be either
+ a full page or a tail page
+ */
+ if ((!bits && best_data) ||
+ ((bits & LL(04444444444444444)) == LL(04444444444444444)))
+ continue;
+ for (i= 0; i < 16 ; i++, bits >>= 3)
+ {
+ uint pattern= bits & 7;
+ if (pattern <= min_bits)
+ {
+ if (pattern == min_bits)
+ {
+ /* Found perfect match */
+ best_bits= min_bits;
+ best_data= data;
+ best_pos= i;
+ goto found;
+ }
+ if ((int) pattern > (int) best_bits)
+ {
+ best_bits= pattern;
+ best_data= data;
+ best_pos= i;
+ }
+ }
+ }
+ }
+ if (!best_data)
+ {
+ if (bitmap->used_size == bitmap->total_size)
+ DBUG_RETURN(1);
+ /* Allocate data at end of bitmap */
+ bitmap->used_size+= 6;
+ best_data= data;
+ best_pos= best_bits= 0;
+ }
+
+found:
+ fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Allocate data for tail block
+
+ SYNOPSIS
+ allocate_tail()
+ bitmap bitmap
+ size Size of block we need to find
+ block Store found information here
+
+ RETURN
+ 0 ok (block is updated)
+ 1 error (no space in bitmap; block is not touched)
+*/
+
+
+static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size,
+ MARIA_BITMAP_BLOCK *block)
+{
+ uint min_bits= size_to_tail_pattern(bitmap, size);
+ uchar *data= bitmap->map, *end= data + bitmap->used_size;
+ uchar *best_data= 0;
+ uint best_bits= (uint) -1, best_pos;
+ DBUG_ENTER("allocate_tail");
+ DBUG_PRINT("enter", ("size: %u", size));
+
+ LINT_INIT(best_pos);
+ DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
+
+ for (; data < end; data += 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uint i;
+
+ /*
+ Skip common patterns
+ We can skip empty pages (if we already found a match) or
+ the following patterns: 1-4 or 7
+ */
+
+ if ((!bits && best_data) || bits == LL(0xffffffffffff))
+ continue;
+ for (i= 0; i < 16; i++, bits >>= 3)
+ {
+ uint pattern= bits & 7;
+ if (pattern <= min_bits && (!pattern || pattern >= 5))
+ {
+ if (pattern == min_bits)
+ {
+ best_bits= min_bits;
+ best_data= data;
+ best_pos= i;
+ goto found;
+ }
+ if ((int) pattern > (int) best_bits)
+ {
+ best_bits= pattern;
+ best_data= data;
+ best_pos= i;
+ }
+ }
+ }
+ }
+ if (!best_data)
+ {
+ if (bitmap->used_size == bitmap->total_size)
+ DBUG_RETURN(1);
+ /* Allocate data at end of bitmap */
+ bitmap->used_size+= 6;
+ best_pos= best_bits= 0;
+ }
+
+found:
+ fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Allocate data for full blocks
+
+ SYNOPSIS
+ allocate_full_pages()
+ bitmap bitmap
+ pages_needed Total size in pages (bitmap->total_size) we would like to have
+ block Store found information here
+ full_page 1 if we are not allowed to split extent
+
+ IMPLEMENTATION
+ We will return the smallest area >= size. If there is no such
+ block, we will return the biggest area that satisfies
+ area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
+
+ To speed up searches, we will only consider areas that has at least 16 free
+ pages starting on an even boundary. When finding such an area, we will
+ extend it with all previous and following free pages. This will ensure
+ we don't get holes between areas
+
+ RETURN
+ # Blocks used
+ 0 error (no space in bitmap; block is not touched)
+*/
+
+static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
+ ulong pages_needed,
+ MARIA_BITMAP_BLOCK *block, my_bool full_page)
+{
+ uchar *data= bitmap->map, *data_end= data + bitmap->used_size;
+ uchar *page_end= data + bitmap->total_size;
+ uchar *best_data= 0;
+ uint min_size;
+ uint best_area_size, best_prefix_area_size, best_suffix_area_size;
+ uint page, size;
+ ulonglong best_prefix_bits;
+ DBUG_ENTER("allocate_full_pages");
+ DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed));
+
+ /* Following variables are only used if best_data is set */
+ LINT_INIT(best_prefix_bits);
+ LINT_INIT(best_prefix_area_size);
+ LINT_INIT(best_suffix_area_size);
+
+ min_size= pages_needed;
+ if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE)
+ min_size= BLOB_SEGMENT_MIN_SIZE;
+ best_area_size= ~(uint) 0;
+
+ for (; data < page_end; data+= 6)
+ {
+ ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
+ uchar *data_start;
+ ulonglong prefix_bits= 0;
+ uint area_size, prefix_area_size, suffix_area_size;
+
+ /* Find area with at least 16 free pages */
+ if (bits)
+ continue;
+ data_start= data;
+ /* Find size of area */
+ for (data+=6 ; data < data_end ; data+= 6)
+ {
+ if ((bits= uint6korr(data)))
+ break;
+ }
+ area_size= (data - data_start) / 6 * 16;
+ if (area_size >= best_area_size)
+ continue;
+ prefix_area_size= suffix_area_size= 0;
+ if (!bits)
+ {
+ /*
+ End of page; All the rest of the bits on page are part of area
+ This is needed because bitmap->used_size only covers the set bits
+ in the bitmap.
+ */
+ area_size+= (page_end - data) / 6 * 16;
+ if (area_size >= best_area_size)
+ break;
+ data= page_end;
+ }
+ else
+ {
+ /* Add bits at end of page */
+ for (; !(bits & 7); bits >>= 3)
+ suffix_area_size++;
+ area_size+= suffix_area_size;
+ }
+ if (data_start != bitmap->map)
+ {
+ /* Add bits before page */
+ bits= prefix_bits= uint6korr(data_start - 6);
+ DBUG_ASSERT(bits != 0);
+ /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */
+ if (!(bits & LL(07000000000000000)))
+ {
+ data_start-= 6;
+ do
+ {
+ prefix_area_size++;
+ bits<<= 3;
+ } while (!(bits & LL(07000000000000000)));
+ area_size+= prefix_area_size;
+ /* Calculate offset to page from data_start */
+ prefix_area_size= 16 - prefix_area_size;
+ }
+ }
+ if (area_size >= min_size && area_size <= best_area_size)
+ {
+ best_data= data_start;
+ best_area_size= area_size;
+ best_prefix_bits= prefix_bits;
+ best_prefix_area_size= prefix_area_size;
+ best_suffix_area_size= suffix_area_size;
+
+ /* Prefer to put data in biggest possible area */
+ if (area_size <= pages_needed)
+ min_size= area_size;
+ else
+ min_size= pages_needed;
+ }
+ }
+ if (!best_data)
+ DBUG_RETURN(0); /* No room on page */
+
+ /*
+ Now allocate min(pages_needed, area_size), starting from
+ best_start + best_prefix_area_size
+ */
+ if (best_area_size > pages_needed)
+ best_area_size= pages_needed;
+
+ /* For each 6 bytes we have 6*8/3= 16 patterns */
+ page= ((best_data - bitmap->map) * 8) / 3 + best_prefix_area_size;
+ block->page= bitmap->page + 1 + page;
+ block->page_count= best_area_size;
+ block->empty_space= 0;
+ block->sub_blocks= 1;
+ block->org_bitmap_value= 0;
+ block->used= 0;
+ DBUG_PRINT("info", ("page: %lu page_count: %u",
+ (ulong) block->page, block->page_count));
+
+ if (best_prefix_area_size)
+ {
+ ulonglong tmp;
+ /* Convert offset back to bits */
+ best_prefix_area_size= 16 - best_prefix_area_size;
+ if (best_area_size < best_prefix_area_size)
+ {
+ tmp= (LL(1) << best_area_size*3) - 1;
+ best_area_size= best_prefix_area_size; /* for easy end test */
+ }
+ else
+ tmp= (LL(1) << best_prefix_area_size*3) - 1;
+ tmp<<= (16 - best_prefix_area_size) * 3;
+ DBUG_ASSERT((best_prefix_bits & tmp) == 0);
+ best_prefix_bits|= tmp;
+ int6store(best_data, best_prefix_bits);
+ if (!(best_area_size-= best_prefix_area_size))
+ {
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+ DBUG_RETURN(block->page_count);
+ }
+ best_data+= 6;
+ }
+ best_area_size*= 3; /* Bits to set */
+ size= best_area_size/8; /* Bytes to set */
+ bfill(best_data, size, 255);
+ best_data+= size;
+ if ((best_area_size-= size * 8))
+ {
+ /* fill last byte */
+ *best_data|= (uchar) ((1 << best_area_size) -1);
+ best_data++;
+ }
+ if (data_end < best_data)
+ bitmap->used_size= (uint) (best_data - bitmap->map);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+ DBUG_RETURN(block->page_count);
+}
+
+
+/****************************************************************************
+ Find right bitmaps where to store data
+****************************************************************************/
+
+/*
+ Find right bitmap and position for head block
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_head(MARIA_HA *info, uint length, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ /* There is always place for head blocks in bitmap_blocks */
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (allocate_head(bitmap, length, block))
+ if (move_to_next_bitmap(info, bitmap))
+ return 1;
+ return 0;
+}
+
+
+/*
+ Find right bitmap and position for tail
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_tail(MARIA_HA *info, uint length, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ DBUG_ENTER("find_tail");
+
+ /* Needed, as there is no error checking in dynamic_element */
+ if (allocate_dynamic(&info->bitmap_blocks, position))
+ DBUG_RETURN(1);
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (allocate_tail(bitmap, length, block))
+ if (move_to_next_bitmap(info, bitmap))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Find right bitmap and position for full blocks in one extent
+
+ NOTES
+ This is used to allocate the main extent after the 'head' block
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_mid(MARIA_HA *info, ulong pages, uint position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
+
+ while (allocate_full_pages(bitmap, pages, block, 1))
+ {
+ if (move_to_next_bitmap(info, bitmap))
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ Find right bitmap and position for putting a blob
+
+ NOTES
+ The extents are stored last in info->bitmap_blocks
+
+ IMPLEMENTATION
+ Allocate all full pages for the block + optionally one tail
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool find_blob(MARIA_HA *info, ulong length)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint full_page_size= FULL_PAGE_SIZE(info->s->block_size);
+ ulong pages;
+ uint rest_length, used;
+ uint first_block_pos;
+ MARIA_BITMAP_BLOCK *first_block= 0;
+ DBUG_ENTER("find_blob");
+ DBUG_PRINT("enter", ("length: %lu", length));
+
+ pages= length / full_page_size;
+ rest_length= (uint) (length - pages * full_page_size);
+ if (rest_length >= MAX_TAIL_SIZE(info->s->block_size))
+ {
+ pages++;
+ rest_length= 0;
+ }
+
+ if (pages)
+ {
+ MARIA_BITMAP_BLOCK *block;
+ if (allocate_dynamic(&info->bitmap_blocks,
+ info->bitmap_blocks.elements +
+ pages / BLOB_SEGMENT_MIN_SIZE + 2))
+ DBUG_RETURN(1);
+ first_block_pos= info->bitmap_blocks.elements;
+ block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements,
+ MARIA_BITMAP_BLOCK*);
+ first_block= block;
+ do
+ {
+ used= allocate_full_pages(bitmap,
+ (pages >= 65535 ? 65535 : (uint) pages), block,
+ 0);
+ if (!used && move_to_next_bitmap(info, bitmap))
+ DBUG_RETURN(1);
+ info->bitmap_blocks.elements++;
+ block++;
+ } while ((pages-= used) != 0);
+ }
+ if (rest_length && find_tail(info, rest_length,
+ info->bitmap_blocks.elements++))
+ DBUG_RETURN(1);
+ if (first_block)
+ first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos;
+ DBUG_RETURN(0);
+}
+
+
+static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row)
+{
+ ulong *length, *end;
+ uint elements;
+ /*
+ Reserve size for:
+ head block
+ one extent
+ tail block
+ */
+ elements= info->bitmap_blocks.elements;
+ for (length= row->blob_lengths, end= length + info->s->base.blobs;
+ length < end; length++)
+ {
+ if (*length && find_blob(info, *length))
+ return 1;
+ }
+ row->extents_count= (info->bitmap_blocks.elements - elements);
+ return 0;
+}
+
+
+static void use_head(MARIA_HA *info, ulonglong page, uint size,
+ uint block_position)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ MARIA_BITMAP_BLOCK *block;
+ uchar *data;
+ uint offset, tmp, offset_page;
+
+ block= dynamic_element(&info->bitmap_blocks, block_position,
+ MARIA_BITMAP_BLOCK*);
+ block->page= page;
+ block->page_count= 1 + TAIL_BIT;
+ block->empty_space= size;
+ block->sub_blocks= 1;
+ block->used= BLOCKUSED_TAIL;
+
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page= (uint) (page - bitmap->page - 1) * 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ tmp= uint2korr(data);
+ block->org_bitmap_value= (tmp >> offset) & 7;
+ tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset);
+ int2store(data, tmp);
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+}
+
+
+/*
+ Find out where to split the row;
+*/
+
+static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
+ uint extents_length, uint split_size)
+{
+ uint row_length= row->base_length;
+ uint *lengths, *lengths_end;
+
+ DBUG_ASSERT(row_length < split_size);
+ /*
+ Store first in all_field_lengths the different parts that are written
+ to the row. This needs to be in same order as in
+ ma_block_rec.c::write_block_record()
+ */
+ row->null_field_lengths[-3]= extents_length;
+ row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length;
+ row->null_field_lengths[-1]= row->field_lengths_length;
+ for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS,
+ lengths_end= (lengths + share->base.pack_fields - share->base.blobs +
+ EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++)
+ {
+ if (row_length + *lengths > split_size)
+ break;
+ row_length+= *lengths;
+ }
+ return row_length;
+}
+
+
+static my_bool write_rest_of_head(MARIA_HA *info, uint position,
+ ulong rest_length)
+{
+ MARIA_SHARE *share= info->s;
+ uint full_page_size= FULL_PAGE_SIZE(share->block_size);
+ MARIA_BITMAP_BLOCK *block;
+
+ if (position == 0)
+ {
+ /* Write out full pages */
+ uint pages= rest_length / full_page_size;
+
+ rest_length%= full_page_size;
+ if (rest_length >= MAX_TAIL_SIZE(share->block_size))
+ {
+ /* Put tail on a full page */
+ pages++;
+ rest_length= 0;
+ }
+ if (find_mid(info, rest_length / full_page_size, 1))
+ return 1;
+ /*
+ Insert empty block after full pages, to allow write_block_record() to
+ split segment into used + free page
+ */
+ block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*);
+ block->page_count= 0;
+ block->used= 0;
+ }
+ if (rest_length)
+ {
+ if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1))
+ return 1;
+ }
+ else
+ {
+ /* Empty tail block */
+ block= dynamic_element(&info->bitmap_blocks,
+ ELEMENTS_RESERVED_FOR_MAIN_PART - 1,
+ MARIA_BITMAP_BLOCK *);
+ block->page_count= 0;
+ block->used= 0;
+ }
+ return 0;
+}
+
+
+/*
+ Find where to store one row
+
+ SYNPOSIS
+ _ma_bitmap_find_place()
+ info Maria handler
+ row Information about row to write
+ blocks Store data about allocated places here
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_SHARE *share= info->s;
+ my_bool res= 1;
+ uint full_page_size, position, max_page_size;
+ uint head_length, row_length, rest_length, extents_length;
+ DBUG_ENTER("_ma_bitmap_find_place");
+
+ blocks->count= 0;
+ blocks->tail_page_skipped= blocks->page_skipped= 0;
+ row->extents_count= 0;
+ /*
+ Reserver place for the following blocks:
+ - Head block
+ - Full page block
+ - Marker block to allow write_block_record() to split full page blocks
+ into full and free part
+ - Tail block
+ */
+
+ info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
+ max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE);
+
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+
+ if (row->total_length <= max_page_size)
+ {
+ /* Row fits in one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ if (find_head(info, (uint) row->total_length, position))
+ goto abort;
+ goto end;
+ }
+
+ /*
+ First allocate all blobs (so that we can find out the needed size for
+ the main block.
+ */
+ if (row->blob_length && allocate_blobs(info, row))
+ goto abort;
+
+ extents_length= row->extents_count * ROW_EXTENT_SIZE;
+ if ((head_length= (row->head_length + extents_length)) <= max_page_size)
+ {
+ /* Main row part fits into one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ if (find_head(info, head_length, position))
+ goto abort;
+ goto end;
+ }
+
+ /* Allocate enough space */
+ head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
+
+ /* The first segment size is stored in 'row_length' */
+ row_length= find_where_to_split_row(share, row, extents_length,
+ max_page_size);
+
+ full_page_size= FULL_PAGE_SIZE(share->block_size);
+ position= 0;
+ if (head_length - row_length <= full_page_size)
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
+ if (find_head(info, row_length, position))
+ goto abort;
+ rest_length= head_length - row_length;
+ if (write_rest_of_head(info, position, rest_length))
+ goto abort;
+
+end:
+ blocks->block= dynamic_element(&info->bitmap_blocks, position,
+ MARIA_BITMAP_BLOCK*);
+ blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
+ /* First block's page_count is for all blocks */
+ blocks->count= info->bitmap_blocks.elements - position;
+ res= 0;
+
+abort:
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Find where to put row on update (when head page is already defined)
+
+ SYNPOSIS
+ _ma_bitmap_find_new_place()
+ info Maria handler
+ row Information about row to write
+ page On which page original row was stored
+ free_size Free size on head page
+ blocks Store data about allocated places here
+
+ NOTES
+ This function is only called when the new row can't fit in the space of
+ the old row in the head page.
+
+ This is essently same as _ma_bitmap_find_place() except that
+ we don't call find_head() to search in bitmaps where to put the page.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
+ ulonglong page, uint free_size,
+ MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_SHARE *share= info->s;
+ my_bool res= 1;
+ uint full_page_size, position;
+ uint head_length, row_length, rest_length, extents_length;
+ DBUG_ENTER("_ma_bitmap_find_new_place");
+
+ blocks->count= 0;
+ blocks->tail_page_skipped= blocks->page_skipped= 0;
+ row->extents_count= 0;
+ info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
+
+ pthread_mutex_lock(&share->bitmap.bitmap_lock);
+ if (share->bitmap.page != page / share->bitmap.pages_covered &&
+ _ma_change_bitmap_page(info, &share->bitmap,
+ page / share->bitmap.pages_covered))
+ goto abort;
+
+ /*
+ First allocate all blobs (so that we can find out the needed size for
+ the main block.
+ */
+ if (row->blob_length && allocate_blobs(info, row))
+ goto abort;
+
+ extents_length= row->extents_count * ROW_EXTENT_SIZE;
+ if ((head_length= (row->head_length + extents_length)) <= free_size)
+ {
+ /* Main row part fits into one page */
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
+ use_head(info, page, head_length, position);
+ goto end;
+ }
+
+ /* Allocate enough space */
+ head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
+
+ /* The first segment size is stored in 'row_length' */
+ row_length= find_where_to_split_row(share, row, extents_length, free_size);
+
+ full_page_size= FULL_PAGE_SIZE(share->block_size);
+ position= 0;
+ if (head_length - row_length <= full_page_size)
+ position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
+ use_head(info, page, row_length, position);
+ rest_length= head_length - row_length;
+
+ if (write_rest_of_head(info, position, rest_length))
+ goto abort;
+
+end:
+ blocks->block= dynamic_element(&info->bitmap_blocks, position,
+ MARIA_BITMAP_BLOCK*);
+ blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
+ /* First block's page_count is for all blocks */
+ blocks->count= info->bitmap_blocks.elements - position;
+ res= 0;
+
+abort:
+ pthread_mutex_unlock(&share->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/****************************************************************************
+ Clear and reset bits
+****************************************************************************/
+
+static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint fill_pattern)
+{
+ ulonglong bitmap_page;
+ uint offset_page, offset, tmp, org_tmp;
+ uchar *data;
+ DBUG_ENTER("set_page_bits");
+
+ bitmap_page= page / bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(1);
+
+ /* Find page number from start of bitmap */
+ offset_page= page - bitmap->page - 1;
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page*= 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ org_tmp= tmp= uint2korr(data);
+ tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
+ if (tmp == org_tmp)
+ DBUG_RETURN(0); /* No changes */
+ int2store(data, tmp);
+
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+ if (fill_pattern != 3 && fill_pattern != 7 &&
+ page < info->s->state.first_bitmap_with_space)
+ info->s->state.first_bitmap_with_space= page;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get bitmap pattern for a given page
+
+ SYNOPSIS
+
+ get_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Page number
+
+ RETURN
+ 0-7 Bitmap pattern
+ ~0 Error (couldn't read page)
+*/
+
+static uint get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page)
+{
+ ulonglong bitmap_page;
+ uint offset_page, offset, tmp;
+ uchar *data;
+ DBUG_ENTER("get_page_bits");
+
+ bitmap_page= page / bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(~ (uint) 0);
+
+ /* Find page number from start of bitmap */
+ offset_page= page - bitmap->page - 1;
+ /*
+ Mark place used by reading/writing 2 bytes at a time to handle
+ bitmaps in overlapping bytes
+ */
+ offset_page*= 3;
+ offset= offset_page & 7;
+ data= bitmap->map + offset_page / 8;
+ tmp= uint2korr(data);
+ DBUG_RETURN((tmp >> offset) & 7);
+}
+
+
+/*
+ Mark all pages in a region as free
+
+ SYNOPSIS
+ reset_full_page_bits()
+ info Maria handler
+ bitmap Bitmap handler
+ page Start page
+ page_count Number of pages
+
+ NOTES
+ We assume that all pages in region is covered by same bitmap
+ One must have a lock on info->s->bitmap.bitmap_lock
+
+ RETURN
+ 0 ok
+ 1 Error (when reading bitmap)
+*/
+
+my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count)
+{
+ ulonglong bitmap_page;
+ uint offset, bit_start, bit_count, tmp;
+ uchar *data;
+ DBUG_ENTER("_ma_reset_full_page_bits");
+ DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
+ safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
+
+ bitmap_page= page / bitmap->pages_covered;
+ if (bitmap_page != bitmap->page &&
+ _ma_change_bitmap_page(info, bitmap, bitmap_page))
+ DBUG_RETURN(1);
+
+ /* Find page number from start of bitmap */
+ page= page - bitmap->page - 1;
+
+ /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */
+ bit_start= page * 3;
+ bit_count= page_count * 3;
+
+ data= bitmap->map + bit_start / 8;
+ offset= bit_start & 7;
+
+ tmp= (255 << offset); /* Bits to keep */
+ if (bit_count + offset < 8)
+ {
+ /* Only clear bits between 'offset' and 'offset+bit_count-1' */
+ tmp^= (255 << (offset + bit_count));
+ }
+ *data&= ~tmp;
+
+ if ((int) (bit_count-= (8 - offset)) > 0)
+ {
+ uint fill;
+ data++;
+ /*
+ -1 is here to avoid one 'if' statement and to let the following code
+ handle the last byte
+ */
+ if ((fill= (bit_count - 1) / 8))
+ {
+ bzero(data, fill);
+ data+= fill;
+ }
+ bit_count-= fill * 8; /* Bits left to clear */
+ tmp= (1 << bit_count) - 1;
+ *data&= ~tmp;
+ }
+ if (bitmap->page < (ulonglong) info->s->state.first_bitmap_with_space)
+ info->s->state.first_bitmap_with_space= bitmap->page;
+ bitmap->changed= 1;
+ DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap););
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Correct bitmap pages to reflect the true allocation
+
+ SYNOPSIS
+ _ma_bitmap_release_unused()
+ info Maria handle
+ blocks Bitmap blocks
+
+ IMPLEMENTATION
+ If block->used & BLOCKUSED_TAIL is set:
+ If block->used & BLOCKUSED_USED is set, then the bits for the
+ corresponding page is set according to block->empty_space
+ If block->used & BLOCKUSED_USED is not set, then the bits for
+ the corresponding page is set to org_bitmap_value;
+
+ If block->used & BLOCKUSED_TAIL is not set:
+ if block->used is not set, the bits for the corresponding page are
+ cleared
+
+ For the first block (head block) the logic is same as for a tail block
+
+ RETURN
+ 0 ok
+ 1 error (Couldn't write or read bitmap page)
+*/
+
+my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
+{
+ MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count;
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint bits, current_bitmap_value;
+ DBUG_ENTER("_ma_bitmap_release_unused");
+
+ /*
+ We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full'
+ when we allocated space in the page
+ */
+ current_bitmap_value= FULL_HEAD_PAGE;
+
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+
+ /* First handle head block */
+ if (block->used & BLOCKUSED_USED)
+ {
+ DBUG_PRINT("info", ("head empty_space: %u", block->empty_space));
+ bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space);
+ if (block->used & BLOCKUSED_USE_ORG_BITMAP)
+ current_bitmap_value= block->org_bitmap_value;
+ }
+ else
+ bits= block->org_bitmap_value;
+ if (bits != current_bitmap_value &&
+ set_page_bits(info, bitmap, block->page, bits))
+ goto err;
+
+
+ /* Handle all full pages and tail pages (for head page and blob) */
+ for (block++; block < end; block++)
+ {
+ if (block->used & BLOCKUSED_TAIL)
+ {
+ if (block->used & BLOCKUSED_USED)
+ {
+ DBUG_PRINT("info", ("tail empty_space: %u", block->empty_space));
+ bits= free_size_to_tail_pattern(bitmap, block->empty_space);
+ }
+ else
+ bits= block->org_bitmap_value;
+ if (bits != FULL_TAIL_PAGE &&
+ set_page_bits(info, bitmap, block->page, bits))
+ goto err;
+ }
+ if (!(block->used & BLOCKUSED_USED) &&
+ _ma_reset_full_page_bits(info, bitmap,
+ block->page, block->page_count))
+ goto err;
+ }
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(0);
+
+err:
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Free full pages from bitmap
+
+ SYNOPSIS
+ _ma_bitmap_free_full_pages()
+ info Maria handle
+ extents Extents (as stored on disk)
+ count Number of extents
+
+ IMPLEMENTATION
+ Mark all full pages (not tails) from extents as free
+
+ RETURN
+ 0 ok
+ 1 error (Couldn't write or read bitmap page)
+*/
+
+my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents,
+ uint count)
+{
+ DBUG_ENTER("_ma_bitmap_free_full_pages");
+
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ for (; count--; extents += ROW_EXTENT_SIZE)
+ {
+ ulonglong page= uint5korr(extents);
+ uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
+ if (!(page_count & TAIL_BIT))
+ {
+ if (_ma_reset_full_page_bits(info, &info->s->bitmap, page, page_count))
+ {
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(0);
+}
+
+
+
+my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head,
+ uint empty_space)
+{
+ MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
+ uint bits;
+ my_bool res;
+ DBUG_ENTER("_ma_bitmap_set");
+
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ bits= (head ?
+ _ma_free_size_to_head_pattern(bitmap, empty_space) :
+ free_size_to_tail_pattern(bitmap, empty_space));
+ res= set_page_bits(info, bitmap, pos, bits);
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Check that bitmap pattern is correct for a page
+
+ NOTES
+ Used in maria_chk
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_check_bitmap_data(MARIA_HA *info,
+ enum en_page_type page_type, ulonglong page,
+ uint empty_space, uint *bitmap_pattern)
+{
+ uint bits;
+ switch (page_type) {
+ case UNALLOCATED_PAGE:
+ case MAX_PAGE_TYPE:
+ bits= 0;
+ break;
+ case HEAD_PAGE:
+ bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space);
+ break;
+ case TAIL_PAGE:
+ bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space);
+ break;
+ case BLOB_PAGE:
+ bits= FULL_TAIL_PAGE;
+ break;
+ }
+ return (*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) !=
+ bits;
+}
+
+
+/*
+ Check that bitmap pattern is correct for a page
+
+ NOTES
+ Used in maria_chk
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
+ enum en_page_type page_type,
+ ulonglong page,
+ uint *bitmap_pattern)
+{
+ if ((*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) > 7)
+ return 1; /* Couldn't read page */
+ switch (page_type) {
+ case HEAD_PAGE:
+ return *bitmap_pattern < 1 || *bitmap_pattern > 4;
+ case TAIL_PAGE:
+ return *bitmap_pattern < 5;
+ case BLOB_PAGE:
+ return *bitmap_pattern != 7;
+ default:
+ break;
+ }
+ DBUG_ASSERT(0);
+ return 1;
+}
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
new file mode 100644
index 00000000000..f1345b2c2f3
--- /dev/null
+++ b/storage/maria/ma_blockrec.c
@@ -0,0 +1,2743 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Storage of records in block
+
+ Maria will have a LSN at start of each page (including the bitmap page)
+ Maria will for each row have the additional information:
+
+ TRANSID Transaction ID that last updated row (6 bytes)
+ VER_PTR Version pointer that points on the UNDO entry that
+ contains last version of the row versions (7 bytes)
+
+ The different page types that are in a data file are:
+
+ Bitmap pages Map of free pages in the next extent (8129 page size
+ gives us 256M of mapped pages / bitmap)
+ Head page Start of rows are stored on this page.
+ A rowid always points to a head page
+ Blob page This page is totally filled with data from one blob or by
+ a set of long VARCHAR/CHAR fields
+ Tail page This contains the last part from different rows, blobs
+ or varchar fields.
+
+ The data file starts with a bitmap page, followed by as many data
+ pages as the bitmap can cover. After this there is a new bitmap page
+ and more data pages etc.
+
+ For information about the bitmap page, see ma_bitmap.c
+
+ Structure of data and tail page:
+
+ The page has a row directory at end of page to allow us to do deletes
+ without having to reorganize the page. It also allows us to store some
+ extra bytes after each row to allow them to grow without having to move
+ around other rows
+
+ Page header:
+
+ LSN 7 bytes Log position for last page change
+ PAGE_TYPE 1 byte 1 for head / 2 for tail / 3 for blob
+ NO 1 byte Number of row/tail entries on page
+ empty space 2 bytes Empty space on page
+
+ The upmost bit in PAGE_TYPE is set to 1 if the data on the page
+ can be compacted to get more space. (PAGE_CAN_BE_COMPACTED)
+
+ Row data
+
+ Row directory of NO entires, that consist of the following for each row
+ (in reverse order; ie, first record is stored last):
+
+ Position 2 bytes Position of row on page
+ Length 2 bytes Length of entry
+
+ For Position and Length, the 1 upmost bit of the position and the 1
+ upmost bit of the length could be used for some states of the row (in
+ other words, we should try to keep these reserved)
+
+ eof flag 1 byte Reserved for full page read testing
+
+ ----------------
+
+ Structure of blob pages:
+
+ LSN 7 bytes Log position for last page change
+ PAGE_TYPE 1 byte 3
+
+ data
+
+ -----------------
+
+ Row data structure:
+
+ Flag 1 byte Marker of which header field exists
+ TRANSID 6 bytes TRANSID of changing transaction
+ (optional, added on insert and first
+ update/delete)
+ VER_PTR 7 bytes Pointer to older version in log
+ (undo record)
+ (optional, added after first
+ update/delete)
+ DELETE_TRANSID 6 bytes (optional). TRANSID of original row.
+ Added on delete.
+ Nulls_extended 1 byte To allow us to add new DEFAULT NULL
+ fields (optional, added after first
+ change of row after alter table)
+ Number of ROW_EXTENT's 1-3 byte Length encoded, optional
+ This is the number of extents the
+ row is split into
+ First row_extent 7 byte Pointer to first row extent (optional)
+
+ Total length of length array 1-3 byte Only used if we have
+ char/varchar/blob fields.
+ Row checksum 1 byte Only if table created with checksums
+ Null_bits .. One bit for each NULL field
+ Empty_bits .. One bit for each NOT NULL field. This bit is
+ 0 if the value is 0 or empty string.
+
+ field_offsets 2 byte/offset
+ For each 32 field, there is one offset that
+ points to where the field information starts
+ in the block. This is to provide fast access
+ to later field in the row when we only need
+ to return a small set of fields.
+
+ Things marked above as 'optional' will only be present if the corresponding
+ bit is set in 'Flag' field.
+
+ Data in the following order:
+ (Field order is precalculated when table is created)
+
+ Critical fixed length, not null, fields. (Note, these can't be dropped)
+ Fixed length, null fields
+
+ Length array, 1-4 byte per field for all CHAR/VARCHAR/BLOB fields.
+ Number of bytes used in length array per entry is depending on max length
+ for field.
+
+ ROW_EXTENT's
+ CHAR data (space stripped)
+ VARCHAR data
+ BLOB data
+
+ Fields marked in null_bits or empty_bits are not stored in data part or
+ length array.
+
+ If row doesn't fit into the given block, then the first EXTENT will be
+ stored last on the row. This is done so that we don't break any field
+ data in the middle.
+
+ We first try to store the full row into one block. If that's not possible
+ we move out each big blob into their own extents. If this is not enough we
+ move out a concatenation of all varchars to their own extent.
+
+ Each blob and the concatenated char/varchar fields are stored the following
+ way:
+ - Store the parts in as many full-contiguous pages as possible.
+ - The last part, that doesn't fill a full page, is stored in tail page.
+
+ When doing an insert of a new row, we don't have to have
+ VER_PTR in the row. This will make rows that are not changed stored
+ efficiently. On update and delete we would add TRANSID (if it was an old
+ committed row) and VER_PTR to
+ the row. On row page compaction we can easily detect rows where
+ TRANSID was committed before the the longest running transaction
+ started and we can then delete TRANSID and VER_PTR from the row to
+ gain more space.
+
+ If a row is deleted in Maria, we change TRANSID to current transid and
+ change VER_PTR to point to the undo record for the delete. The undo
+ record must contain the original TRANSID, so that another transaction
+ can use this to check if they should use the found row or go to the
+ previous row pointed to by the VER_PTR in the undo row.
+
+ Description of the different parts:
+
+ Flag is coded as:
+
+ Description bit
+ TRANS_ID_exists 0
+ VER_PTR_exists 1
+ Row is deleted 2 (Means that DELETE_TRANSID exists)
+ Nulls_extended_exists 3
+ Row is split 7 This means that 'Number_of_row_extents' exists
+
+
+ This would be a way to get more space on a page when doing page
+ compaction as we don't need to store TRANSID that have committed
+ before the smallest running transaction we have in memory.
+
+ Nulls_extended is the number of new DEFAULT NULL fields in the row
+ compared to the number of DEFAULT NULL fields when the first version
+ of the table was created. If Nulls_extended doesn't exist in the row,
+ we know it's 0 as this must be one of the original rows from when the
+ table was created first time. This coding allows us to add 255*8 =
+ 2048 new fields without requiring a full alter table.
+
+ Empty_bits is used to allow us to store 0, 0.0, empty string, empty
+ varstring and empty blob efficiently. (This is very good for data
+ warehousing where NULL's are often regarded as evil). Having this
+ bitmap also allows us to drop information of a field during a future
+ delete if field was deleted with ALTER TABLE DROP COLUMN. To be able
+ to handle DROP COLUMN, we must store in the index header the fields
+ that has been dropped. When unpacking a row we will ignore dropped
+ fields. When storing a row, we will mark a dropped field either with a
+ null in the null bit map or in the empty_bits and not store any data
+ for it.
+
+ One ROW_EXTENT is coded as:
+
+ START_PAGE 5 bytes
+ PAGE_COUNT 2 bytes. High bit is used to indicate tail page/
+ end of blob
+ With 8K pages, we can cover 256M in one extent. This coding gives us a
+ maximum file size of 2^40*8192 = 8192 tera
+
+ As an example of ROW_EXTENT handling, assume a row with one integer
+ field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2
+ big BLOB fields that we have updated.
+
+ The record format for storing this into an empty file would be:
+
+ Page 1:
+
+ 00 00 00 00 00 00 00 LSN
+ 01 Only one row in page
+ xx xx Empty space on page
+
+ 10 Flag: row split, VER_PTR exists
+ 01 00 00 00 00 00 TRANSID 1
+ 00 00 00 00 00 01 00 VER_PTR to first block in LOG file 1
+ 5 Number of row extents
+ 02 00 00 00 00 03 00 VARCHAR's are stored in full pages 2,3,4
+ 0 No null fields
+ 0 No empty fields
+ 05 00 00 00 00 00 80 Tail page for VARCHAR, rowid 0
+ 06 00 00 00 00 80 00 First blob, stored at page 6-133
+ 05 00 00 00 00 01 80 Tail of first blob (896 bytes) at page 5
+ 86 00 00 00 00 80 00 Second blob, stored at page 134-262
+ 05 00 00 00 00 02 80 Tail of second blob (896 bytes) at page 5
+ 05 00 5 integer
+ FA Length of first varchar field (size 250)
+ 00 60 Length of second varchar field (size 8192*3)
+ 00 60 10 First medium BLOB, 1M
+ 01 00 10 00 Second BLOB, 1M
+ xx xx xx xx xx xx Varchars are stored here until end of page
+
+ ..... until end of page
+
+ 09 00 F4 1F 00 (Start position 9, length 8180, end byte)
+*/
+
+#define SANITY_CHECKS
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+
+typedef struct st_maria_extent_cursor
+{
+ byte *extent;
+ byte *data_start; /* For error checking */
+ MARIA_RECORD_POS *tail_positions;
+ my_off_t page;
+ uint extent_count, page_count;
+ uint tail; /* <> 0 if current extent is a tail page */
+ my_bool first_extent;
+} MARIA_EXTENT_CURSOR;
+
+
+static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails);
+static my_bool delete_head_or_tail(MARIA_HA *info,
+ ulonglong page, uint record_number,
+ my_bool head);
+static void _ma_print_directory(byte *buff, uint block_size);
+
+/****************************************************************************
+ Initialization
+****************************************************************************/
+
+/*
+ Initialize data needed for block structures
+*/
+
+
+/* Size of the different header elements for a row */
+
+static uchar header_sizes[]=
+{
+ TRANSID_SIZE,
+ VERPTR_SIZE,
+ TRANSID_SIZE, /* Delete transid */
+ 1 /* Null extends */
+};
+
+/*
+ Calculate array of all used headers
+
+ Used to speed up:
+
+ size= 1;
+ if (flag & 1)
+ size+= TRANSID_SIZE;
+ if (flag & 2)
+ size+= VERPTR_SIZE;
+ if (flag & 4)
+ size+= TRANSID_SIZE
+ if (flag & 8)
+ size+= 1;
+
+ NOTES
+ This is called only once at startup of Maria
+*/
+
+static uchar total_header_size[1 << array_elements(header_sizes)];
+#define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1)
+
+void _ma_init_block_record_data(void)
+{
+ uint i;
+ bzero(total_header_size, sizeof(total_header_size));
+ total_header_size[0]= FLAG_SIZE; /* Flag byte */
+ for (i= 1; i < array_elements(total_header_size); i++)
+ {
+ uint size= FLAG_SIZE, j, bit;
+ for (j= 0; (bit= (1 << j)) <= i; j++)
+ {
+ if (i & bit)
+ size+= header_sizes[j];
+ }
+ total_header_size[i]= size;
+ }
+}
+
+
+my_bool _ma_once_init_block_row(MARIA_SHARE *share, File data_file)
+{
+
+ share->base.max_data_file_length=
+ (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) *
+ share->block_size;
+#if SIZEOF_OFF_T == 4
+ set_if_smaller(max_data_file_length, INT_MAX32);
+#endif
+ return _ma_bitmap_init(share, data_file);
+}
+
+
+my_bool _ma_once_end_block_row(MARIA_SHARE *share)
+{
+ int res= _ma_bitmap_end(share);
+ if (flush_key_blocks(share->key_cache, share->bitmap.file,
+ share->temporary ? FLUSH_IGNORE_CHANGED :
+ FLUSH_RELEASE))
+ res= 1;
+ if (share->bitmap.file >= 0 && my_close(share->bitmap.file, MYF(MY_WME)))
+ res= 1;
+ return res;
+}
+
+
+/* Init info->cur_row structure */
+
+my_bool _ma_init_block_row(MARIA_HA *info)
+{
+ MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row;
+ DBUG_ENTER("_ma_init_block_row");
+
+ if (!my_multi_malloc(MY_WME,
+ &row->empty_bits_buffer, info->s->base.pack_bytes,
+ &row->field_lengths, info->s->base.max_field_lengths,
+ &row->blob_lengths, sizeof(ulong) * info->s->base.blobs,
+ &row->null_field_lengths, (sizeof(uint) *
+ (info->s->base.fields -
+ info->s->base.blobs)),
+ &row->tail_positions, (sizeof(MARIA_RECORD_POS) *
+ (info->s->base.blobs + 2)),
+ &new_row->empty_bits_buffer, info->s->base.pack_bytes,
+ &new_row->field_lengths,
+ info->s->base.max_field_lengths,
+ &new_row->blob_lengths,
+ sizeof(ulong) * info->s->base.blobs,
+ &new_row->null_field_lengths, (sizeof(uint) *
+ (info->s->base.fields -
+ info->s->base.blobs)),
+ NullS, 0))
+ DBUG_RETURN(1);
+ if (my_init_dynamic_array(&info->bitmap_blocks,
+ sizeof(MARIA_BITMAP_BLOCK),
+ ELEMENTS_RESERVED_FOR_MAIN_PART, 16))
+ my_free((char*) &info->bitmap_blocks, MYF(0));
+ row->base_length= new_row->base_length= info->s->base_length;
+ DBUG_RETURN(0);
+}
+
+void _ma_end_block_row(MARIA_HA *info)
+{
+ DBUG_ENTER("_ma_end_block_row");
+ my_free((gptr) info->cur_row.empty_bits_buffer, MYF(MY_ALLOW_ZERO_PTR));
+ delete_dynamic(&info->bitmap_blocks);
+ my_free((gptr) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ Helper functions
+****************************************************************************/
+
+static inline uint empty_pos_after_row(byte *dir)
+{
+ byte *prev;
+ /*
+ Find previous used entry. (There is always a previous entry as
+ the directory never starts with a deleted entry)
+ */
+ for (prev= dir - DIR_ENTRY_SIZE ;
+ prev[0] == 0 && prev[1] == 0 ;
+ prev-= DIR_ENTRY_SIZE)
+ {}
+ return (uint) uint2korr(prev);
+}
+
+
+static my_bool check_if_zero(byte *pos, uint length)
+{
+ byte *end;
+ for (end= pos+ length; pos != end ; pos++)
+ if (pos[0] != 0)
+ return 1;
+ return 0;
+}
+
+
+/*
+ Find free postion in directory
+
+ SYNOPSIS
+ find_free_position()
+ buff Page
+ block_size Size of page
+ res_rownr Store index to free position here
+ res_length Store length of found segment here
+ empty_space Store length of empty space on disk here. This is
+ all empty space, including the found block.
+
+ NOTES
+ If there is a free directory entry (entry with postion == 0),
+ then use it and change it to be the size of the empty block
+ after the previous entry. This guarantees that all row entries
+ are stored on disk in inverse directory order, which makes life easier for
+ 'compact_page()' and to know if there is free space after any block.
+
+ If there is no free entry (entry with postion == 0), then we create
+ a new one.
+
+ We will update the offset and the length of the found dir entry to
+ match the position and empty space found.
+
+ buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller
+
+ RETURN
+ 0 Error (directory full)
+ # Pointer to directory entry on page
+*/
+
+static byte *find_free_position(byte *buff, uint block_size, uint *res_rownr,
+ uint *res_length, uint *empty_space)
+{
+ uint max_entry= (uint) ((uchar*) buff)[DIR_ENTRY_OFFSET];
+ uint entry, length, first_pos;
+ byte *dir, *end;
+
+ dir= (buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE);
+ end= buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE;
+
+ first_pos= PAGE_HEADER_SIZE;
+ *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+
+ /* Search after first empty position */
+ for (entry= 0 ; dir <= end ; end-= DIR_ENTRY_SIZE, entry--)
+ {
+ if (end[0] == 0 && end[1] == 0) /* Found not used entry */
+ {
+ length= empty_pos_after_row(end) - first_pos;
+ int2store(end, first_pos); /* Update dir entry */
+ int2store(end + 2, length);
+ *res_rownr= entry;
+ *res_length= length;
+ return end;
+ }
+ first_pos= uint2korr(end) + uint2korr(end + 2);
+ }
+ /* No empty places in dir; create a new one */
+ if (max_entry == MAX_ROWS_PER_PAGE)
+ return 0;
+ buff[DIR_ENTRY_OFFSET]= (byte) (uchar) max_entry+1;
+ dir-= DIR_ENTRY_SIZE;
+ length= (uint) (dir - buff - first_pos);
+ DBUG_ASSERT(length <= *empty_space - DIR_ENTRY_SIZE);
+ int2store(dir, first_pos);
+ int2store(dir+2, length); /* Current max length */
+ *res_rownr= max_entry;
+ *res_length= length;
+
+ /* Reduce directory entry size from free space size */
+ (*empty_space)-= DIR_ENTRY_SIZE;
+ return dir;
+
+}
+
+/****************************************************************************
+ Updating records
+****************************************************************************/
+
+/*
+ Calculate length of all the different field parts
+*/
+
+static void calc_record_size(MARIA_HA *info, const byte *record,
+ MARIA_ROW *row)
+{
+ MARIA_SHARE *share= info->s;
+ byte *field_length_data;
+ MARIA_COLUMNDEF *rec, *end_field;
+ uint blob_count= 0, *null_field_lengths= row->null_field_lengths;
+
+ row->normal_length= row->char_length= row->varchar_length=
+ row->blob_length= row->extents_count= 0;
+
+ /* Create empty bitmap and calculate length of each varlength/char field */
+ bzero(row->empty_bits_buffer, share->base.pack_bytes);
+ row->empty_bits= row->empty_bits_buffer;
+ field_length_data= row->field_lengths;
+ for (rec= share->rec + share->base.fixed_not_null_fields,
+ end_field= share->rec + share->base.fields;
+ rec < end_field; rec++, null_field_lengths++)
+ {
+ if ((record[rec->null_pos] & rec->null_bit))
+ {
+ if (rec->type != FIELD_BLOB)
+ *null_field_lengths= 0;
+ continue;
+ }
+ switch ((enum en_fieldtype) rec->type) {
+ case FIELD_CHECK:
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_ZERO:
+ DBUG_ASSERT(rec->empty_bit == 0);
+ /* fall through */
+ case FIELD_SKIP_PRESPACE: /* Not packed */
+ row->normal_length+= rec->length;
+ *null_field_lengths= rec->length;
+ break;
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ if (memcmp(record+ rec->null_pos, maria_zero_string,
+ rec->length) == 0)
+ {
+ row->empty_bits[rec->empty_pos] |= rec->empty_bit;
+ *null_field_lengths= 0;
+ }
+ else
+ {
+ row->normal_length+= rec->length;
+ *null_field_lengths= rec->length;
+ }
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ const char *pos, *end;
+ for (pos= record + rec->offset, end= pos + rec->length;
+ end > pos && end[-1] == ' '; end--)
+ ;
+ if (pos == end) /* If empty string */
+ {
+ row->empty_bits[rec->empty_pos]|= rec->empty_bit;
+ *null_field_lengths= 0;
+ }
+ else
+ {
+ uint length= (end - pos);
+ if (rec->length <= 255)
+ *field_length_data++= (byte) (uchar) length;
+ else
+ {
+ int2store(field_length_data, length);
+ field_length_data+= 2;
+ }
+ row->char_length+= length;
+ *null_field_lengths= length;
+ }
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint length;
+ const byte *field_pos= record + rec->offset;
+ /* 256 is correct as this includes the length byte */
+ if (rec->length <= 256)
+ {
+ if (!(length= (uint) (uchar) *field_pos))
+ {
+ row->empty_bits[rec->empty_pos]|= rec->empty_bit;
+ *null_field_lengths= 0;
+ break;
+ }
+ *field_length_data++= *field_pos;
+ }
+ else
+ {
+ if (!(length= uint2korr(field_pos)))
+ {
+ row->empty_bits[rec->empty_pos]|= rec->empty_bit;
+ break;
+ }
+ field_length_data[0]= field_pos[0];
+ field_length_data[1]= field_pos[1];
+ field_length_data+= 2;
+ }
+ row->varchar_length+= length;
+ *null_field_lengths= length;
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ const byte *field_pos= record + rec->offset;
+ uint size_length= rec->length - maria_portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length, field_pos);
+ if (!blob_length)
+ {
+ row->empty_bits[rec->empty_pos]|= rec->empty_bit;
+ row->blob_lengths[blob_count++]= 0;
+ break;
+ }
+ row->blob_length+= blob_length;
+ row->blob_lengths[blob_count++]= blob_length;
+ memcpy(field_length_data, field_pos, size_length);
+ field_length_data+= size_length;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ row->field_lengths_length= (uint) (field_length_data - row->field_lengths);
+ row->head_length= (row->base_length +
+ share->base.fixed_not_null_fields_length +
+ row->field_lengths_length +
+ size_to_store_key_length(row->field_lengths_length) +
+ row->normal_length +
+ row->char_length + row->varchar_length);
+ row->total_length= (row->head_length + row->blob_length);
+ if (row->total_length < share->base.min_row_length)
+ row->total_length= share->base.min_row_length;
+}
+
+
+/*
+ Compact page by removing all space between rows
+
+ IMPLEMENTATION
+ Move up all rows to start of page.
+ Move blocks that are directly after each other with one memmove.
+
+ TODO LATER
+ Remove TRANSID from rows that are visible to all transactions
+
+ SYNOPSIS
+ compact_page()
+ buff Page to compact
+ block_size Size of page
+ recnr Put empty data after this row
+*/
+
+
+void compact_page(byte *buff, uint block_size, uint rownr)
+{
+ uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET];
+ uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block;
+ byte *dir, *end;
+ DBUG_ENTER("compact_page");
+ DBUG_PRINT("enter", ("rownr: %u", rownr));
+ DBUG_ASSERT(max_entry > 0 &&
+ max_entry < (block_size - PAGE_HEADER_SIZE -
+ PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE);
+
+ /* Move all entries before and including rownr up to start of page */
+ dir= buff + block_size - DIR_ENTRY_SIZE * (rownr+1) - PAGE_SUFFIX_SIZE;
+ end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE;
+ page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE;
+ diff= 0;
+ for (; dir <= end ; end-= DIR_ENTRY_SIZE)
+ {
+ uint offset= uint2korr(end);
+
+ if (offset)
+ {
+ uint row_length= uint2korr(end + 2);
+ DBUG_ASSERT(offset >= page_pos);
+ DBUG_ASSERT(buff + offset + row_length <= dir);
+
+ if (offset != next_free_pos)
+ {
+ uint length= (next_free_pos - start_of_found_block);
+ /*
+ There was empty space before this and prev block
+ Check if we have to move prevous block up to page start
+ */
+ if (page_pos != start_of_found_block)
+ {
+ /* move up previous block */
+ memmove(buff + page_pos, buff + start_of_found_block, length);
+ }
+ page_pos+= length;
+ /* next continous block starts here */
+ start_of_found_block= offset;
+ diff= offset - page_pos;
+ }
+ int2store(end, offset - diff); /* correct current pos */
+ next_free_pos= offset + row_length;
+ }
+ }
+ if (page_pos != start_of_found_block)
+ {
+ uint length= (next_free_pos - start_of_found_block);
+ memmove(buff + page_pos, buff + start_of_found_block, length);
+ }
+ start_of_found_block= uint2korr(dir);
+
+ if (rownr != max_entry - 1)
+ {
+ /* Move all entries after rownr to end of page */
+ uint rownr_length;
+ next_free_pos= end_of_found_block= page_pos=
+ block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE;
+ diff= 0;
+ /* End points to entry before 'rownr' */
+ for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE)
+ {
+ uint offset= uint2korr(dir);
+ uint row_length= uint2korr(dir + 2);
+ uint row_end= offset + row_length;
+ if (!offset)
+ continue;
+ DBUG_ASSERT(offset >= start_of_found_block && row_end <= next_free_pos);
+
+ if (row_end != next_free_pos)
+ {
+ uint length= (end_of_found_block - next_free_pos);
+ if (page_pos != end_of_found_block)
+ {
+ /* move next block down */
+ memmove(buff + page_pos - length, buff + next_free_pos, length);
+ }
+ page_pos-= length;
+ /* next continous block starts here */
+ end_of_found_block= row_end;
+ diff= page_pos - row_end;
+ }
+ int2store(dir, offset + diff); /* correct current pos */
+ next_free_pos= offset;
+ }
+ if (page_pos != end_of_found_block)
+ {
+ uint length= (end_of_found_block - next_free_pos);
+ memmove(buff + page_pos - length, buff + next_free_pos, length);
+ next_free_pos= page_pos- length;
+ }
+ /* Extend rownr block to cover hole */
+ rownr_length= next_free_pos - start_of_found_block;
+ int2store(dir+2, rownr_length);
+ }
+ else
+ {
+ /* Extend last block cover whole page */
+ uint length= (uint) (dir - buff) - start_of_found_block;
+ int2store(dir+2, length);
+
+ buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED;
+ }
+ DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size););
+ DBUG_VOID_RETURN;
+}
+
+
+
+/*
+ Read or initialize new head or tail page
+
+ SYNOPSIS
+ get_head_or_tail_page()
+ info Maria handler
+ block Block to read
+ buff Suggest this buffer to key cache
+ length Minimum space needed
+ page_type HEAD_PAGE || TAIL_PAGE
+ res Store result position here
+
+ NOTES
+ We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data
+ as we don't know how much data the caller will actually use.
+
+ RETURN
+ 0 ok All slots in 'res' are updated
+ 1 error my_errno is set
+*/
+
+struct st_row_pos_info
+{
+ byte *buff; /* page buffer */
+ byte *data; /* Place for data */
+ byte *dir; /* Directory */
+ uint length; /* Length for data */
+ uint offset; /* Offset to directory */
+ uint empty_space; /* Space left on page */
+};
+
+static my_bool get_head_or_tail_page(MARIA_HA *info,
+ MARIA_BITMAP_BLOCK *block,
+ byte *buff, uint length, uint page_type,
+ struct st_row_pos_info *res)
+{
+ uint block_size;
+ DBUG_ENTER("get_head_or_tail_page");
+
+ block_size= info->s->block_size;
+ if (block->org_bitmap_value == 0) /* Empty block */
+ {
+ /* New page */
+ bzero(buff, PAGE_HEADER_SIZE);
+
+ /*
+ We zero the rest of the block to avoid getting old memory information
+ to disk and to allow the file to be compressed better if archived.
+ The rest of the code does not assume the block is zeroed above
+ PAGE_OVERHEAD_SIZE
+ */
+ bzero(buff+ PAGE_HEADER_SIZE + length,
+ block_size - length - PAGE_HEADER_SIZE - DIR_ENTRY_SIZE -
+ PAGE_SUFFIX_SIZE);
+ buff[PAGE_TYPE_OFFSET]= (byte) page_type;
+ buff[DIR_ENTRY_OFFSET]= 1;
+ res->buff= buff;
+ res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE);
+ res->data= (buff + PAGE_HEADER_SIZE);
+ res->dir= res->data + res->length;
+ /* Store poistion to the first row */
+ int2store(res->dir, PAGE_HEADER_SIZE);
+ res->offset= 0;
+ DBUG_ASSERT(length <= res->length);
+ }
+ else
+ {
+ byte *dir;
+ /* Read old page */
+ if (!(res->buff= key_cache_read(info->s->key_cache,
+ info->dfile,
+ (my_off_t) block->page * block_size, 0,
+ buff, block_size, block_size, 0)))
+ DBUG_RETURN(1);
+ DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type);
+ if (!(dir= find_free_position(buff, block_size, &res->offset,
+ &res->length, &res->empty_space)))
+ {
+ if (res->length < length)
+ {
+ if (res->empty_space + res->length < length)
+ {
+ compact_page(res->buff, block_size, res->offset);
+ /* All empty space are now after current position */
+ res->length= res->empty_space= uint2korr(dir+2);
+ }
+ if (res->length < length)
+ goto crashed; /* Wrong bitmap information */
+ }
+ }
+ res->dir= dir;
+ res->data= res->buff + uint2korr(dir);
+ }
+ DBUG_RETURN(0);
+
+crashed:
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Write tail of non-blob-data or blob
+
+ SYNOPSIS
+ write_tail()
+ info Maria handler
+ block Block to tail page
+ row_part Data to write to page
+ length Length of data
+
+ NOTES
+ block->page_count is updated to the directory offset for the tail
+ so that we can store the position in the row extent information
+
+ RETURN
+ 0 ok
+ block->page_count is set to point (dir entry + TAIL_BIT)
+
+ 1 error; In this case my_errno is set to the error
+*/
+
+static my_bool write_tail(MARIA_HA *info,
+ MARIA_BITMAP_BLOCK *block,
+ byte *row_part, uint length)
+{
+ MARIA_SHARE *share= share= info->s;
+ uint block_size= share->block_size, empty_space;
+ struct st_row_pos_info row_pos;
+ my_off_t position;
+ DBUG_ENTER("write_tail");
+ DBUG_PRINT("enter", ("page: %lu length: %u",
+ (ulong) block->page, length));
+
+ info->keybuff_used= 1;
+ if (get_head_or_tail_page(info, block, info->keyread_buff, length,
+ TAIL_PAGE, &row_pos))
+ DBUG_RETURN(1);
+
+ memcpy(row_pos.data, row_part, length);
+ int2store(row_pos.dir + 2, length);
+ empty_space= row_pos.empty_space - length;
+ int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space);
+ block->page_count= row_pos.offset + TAIL_BIT;
+ /*
+ If there is less directory entries free than number of possible tails
+ we can write for a row, we mark the page full to ensure that we don't
+ during _ma_bitmap_find_place() allocate more entires on the tail page
+ than it can hold
+ */
+ block->empty_space= ((uint) ((uchar*) row_pos.buff)[DIR_ENTRY_OFFSET] <=
+ MAX_ROWS_PER_PAGE - 1 - info->s->base.blobs ?
+ empty_space : 0);
+ block->used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+
+ /* Increase data file size, if extended */
+ position= (my_off_t) block->page * block_size;
+ if (info->state->data_file_length <= position)
+ info->state->data_file_length= position + block_size;
+ DBUG_RETURN(key_cache_write(share->key_cache,
+ info->dfile, position, 0,
+ row_pos.buff, block_size, block_size, 1));
+}
+
+
+/*
+ Write full pages
+
+ SYNOPSIS
+ write_full_pages()
+ info Maria handler
+ block Where to write data
+ data Data to write
+ length Length of data
+
+*/
+
+static my_bool write_full_pages(MARIA_HA *info,
+ MARIA_BITMAP_BLOCK *block,
+ byte *data, ulong length)
+{
+ my_off_t page;
+ MARIA_SHARE *share= share= info->s;
+ uint block_size= share->block_size;
+ uint data_size= FULL_PAGE_SIZE(block_size);
+ byte *buff= info->keyread_buff;
+ uint page_count;
+ my_off_t position;
+ DBUG_ENTER("write_full_pages");
+ DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu",
+ (ulong) length, (ulong) block->page,
+ (ulong) block->page_count));
+
+ info->keybuff_used= 1;
+ page= block->page;
+ page_count= block->page_count;
+
+ position= (my_off_t) (page + page_count) * block_size;
+ if (info->state->data_file_length < position)
+ info->state->data_file_length= position;
+
+ /* Increase data file size, if extended */
+
+ for (; length; data+= data_size)
+ {
+ uint copy_length;
+ if (!page_count--)
+ {
+ block++;
+ page= block->page;
+ page_count= block->page_count - 1;
+ DBUG_PRINT("info", ("page: %lu page_count: %lu",
+ (ulong) block->page, (ulong) block->page_count));
+
+ position= (page + page_count + 1) * block_size;
+ if (info->state->data_file_length < position)
+ info->state->data_file_length= position;
+ }
+ bzero(buff, LSN_SIZE);
+ buff[PAGE_TYPE_OFFSET]= (byte) BLOB_PAGE;
+ copy_length= min(data_size, length);
+ memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length);
+ length-= copy_length;
+
+ if (key_cache_write(share->key_cache,
+ info->dfile, (my_off_t) page * block_size, 0,
+ buff, block_size, block_size, 1))
+ DBUG_RETURN(1);
+ page++;
+ block->used= BLOCKUSED_USED;
+ }
+ DBUG_RETURN(0);
+}
+
+
+
+
+/*
+ Store packed extent data
+
+ SYNOPSIS
+ store_extent_info()
+ to Store first packed data here
+ row_extents_second_part Store rest here
+ first_block First block to store
+ count Number of blocks
+
+ NOTES
+ We don't have to store the position for the head block
+*/
+
+static void store_extent_info(byte *to,
+ byte *row_extents_second_part,
+ MARIA_BITMAP_BLOCK *first_block,
+ uint count)
+{
+ MARIA_BITMAP_BLOCK *block, *end_block;
+ uint copy_length;
+ my_bool first_found= 0;
+
+ for (block= first_block, end_block= first_block+count ;
+ block < end_block; block++)
+ {
+ /* The following is only false for marker blocks */
+ if (likely(block->used))
+ {
+ int5store(to, block->page);
+ int2store(to + 5, block->page_count);
+ to+= ROW_EXTENT_SIZE;
+ if (!first_found)
+ {
+ first_found= 1;
+ to= row_extents_second_part;
+ }
+ }
+ }
+ copy_length= (count -1) * ROW_EXTENT_SIZE;
+ /*
+ In some unlikely cases we have allocated to many blocks. Clear this
+ data.
+ */
+ bzero(to, (my_size_t) (row_extents_second_part + copy_length - to));
+}
+
+/*
+ Write a record to a (set of) pages
+*/
+
+static my_bool write_block_record(MARIA_HA *info, const byte *record,
+ MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *bitmap_blocks,
+ struct st_row_pos_info *row_pos)
+{
+ byte *data, *end_of_data, *tmp_data_used, *tmp_data;
+ byte *row_extents_first_part, *row_extents_second_part;
+ byte *field_length_data;
+ byte *page_buff;
+ MARIA_BITMAP_BLOCK *block, *head_block;
+ MARIA_SHARE *share;
+ MARIA_COLUMNDEF *rec, *end_field;
+ uint block_size, flag;
+ ulong *blob_lengths;
+ my_off_t position;
+ my_bool row_extents_in_use;
+ DBUG_ENTER("write_block_record");
+
+ LINT_INIT(row_extents_first_part);
+ LINT_INIT(row_extents_second_part);
+
+ share= info->s;
+ head_block= bitmap_blocks->block;
+ block_size= share->block_size;
+
+ info->cur_row.lastpos= ma_recordpos(head_block->page, row_pos->offset);
+ page_buff= row_pos->buff;
+ data= row_pos->data;
+ end_of_data= data + row_pos->length;
+
+ /* Write header */
+ flag= share->base.default_row_flag;
+ row_extents_in_use= 0;
+ if (unlikely(row->total_length > row_pos->length))
+ {
+ /* Need extent */
+ if (bitmap_blocks->count <= 1)
+ goto crashed; /* Wrong in bitmap */
+ flag|= ROW_FLAG_EXTENTS;
+ row_extents_in_use= 1;
+ }
+ /* For now we have only a minimum header */
+ *data++= (uchar) flag;
+ if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED))
+ *data++= (uchar) (share->base.null_bytes -
+ share->base.original_null_bytes);
+ if (row_extents_in_use)
+ {
+ /* Store first extent in header */
+ store_key_length_inc(data, bitmap_blocks->count - 1);
+ row_extents_first_part= data;
+ data+= ROW_EXTENT_SIZE;
+ }
+ if (share->base.pack_fields)
+ store_key_length_inc(data, row->field_lengths_length);
+ if (share->calc_checksum)
+ *(data++)= (byte) info->cur_row.checksum;
+ memcpy(data, record, share->base.null_bytes);
+ data+= share->base.null_bytes;
+ memcpy(data, row->empty_bits, share->base.pack_bytes);
+ data+= share->base.pack_bytes;
+
+ /*
+ Allocate a buffer of rest of data (except blobs)
+
+ To avoid double copying of data, we copy as many columns that fits into
+ the page. The rest goes into info->packed_row.
+
+ Using an extra buffer, instead of doing continous writes to different
+ pages, uses less code and we don't need to have to do a complex call
+ for every data segment we want to store.
+ */
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ row->head_length))
+ DBUG_RETURN(1);
+
+ tmp_data_used= 0; /* Either 0 or last used byte in 'data' */
+ tmp_data= data;
+
+ if (row_extents_in_use)
+ {
+ uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE;
+ if (!tmp_data_used && tmp_data + copy_length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ row_extents_second_part= tmp_data;
+ /*
+ We will copy the extents here when we have figured out the tail
+ positions.
+ */
+ tmp_data+= copy_length;
+ }
+
+ /* Copy fields that has fixed lengths (primary key etc) */
+ for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields;
+ rec < end_field; rec++)
+ {
+ if (!tmp_data_used && tmp_data + rec->length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ memcpy(tmp_data, record + rec->offset, rec->length);
+ tmp_data+= rec->length;
+ }
+
+ /* Copy length of data for variable length fields */
+ if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data)
+ {
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ field_length_data= row->field_lengths;
+ memcpy(tmp_data, field_length_data, row->field_lengths_length);
+ tmp_data+= row->field_lengths_length;
+
+ /* Copy variable length fields and fields with null/zero */
+ for (end_field= share->rec + share->base.fields - share->base.blobs;
+ rec < end_field ;
+ rec++)
+ {
+ const byte *field_pos;
+ ulong length;
+ if ((record[rec->null_pos] & rec->null_bit) ||
+ (row->empty_bits[rec->empty_pos] & rec->empty_bit))
+ continue;
+
+ field_pos= record + rec->offset;
+ switch ((enum en_fieldtype) rec->type) {
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_SKIP_PRESPACE:
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ length= rec->length;
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ /* Char that is space filled */
+ if (rec->length <= 255)
+ length= (uint) (uchar) *field_length_data++;
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ }
+ break;
+ case FIELD_VARCHAR:
+ if (rec->length <= 256)
+ {
+ length= (uint) (uchar) *field_length_data++;
+ field_pos++; /* Skip length byte */
+ }
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ field_pos+= 2;
+ }
+ break;
+ default: /* Wrong data */
+ DBUG_ASSERT(0);
+ break;
+ }
+ if (!tmp_data_used && tmp_data + length > end_of_data)
+ {
+ /* Data didn't fit in page; Change to use tmp buffer */
+ tmp_data_used= tmp_data;
+ tmp_data= info->rec_buff;
+ }
+ memcpy((char*) tmp_data, (char*) field_pos, length);
+ tmp_data+= length;
+ }
+
+ block= head_block + head_block->sub_blocks; /* Point to first blob data */
+
+ end_field= rec + share->base.blobs;
+ blob_lengths= row->blob_lengths;
+ if (!tmp_data_used)
+ {
+ /* Still room on page; Copy as many blobs we can into this page */
+ data= tmp_data;
+ for (; rec < end_field && *blob_lengths < (ulong) (end_of_data - data);
+ rec++, blob_lengths++)
+ {
+ byte *tmp_pos;
+ uint length;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ length= rec->length - maria_portable_sizeof_char_ptr;
+ memcpy_fixed((byte*) &tmp_pos, record + rec->offset + length,
+ sizeof(char*));
+ memcpy(data, tmp_pos, *blob_lengths);
+ data+= *blob_lengths;
+ /* Skip over tail page that was to be used to store blob */
+ block++;
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ if (head_block->sub_blocks > 1)
+ {
+ /* We have allocated pages that where not used */
+ bitmap_blocks->page_skipped= 1;
+ }
+ }
+ else
+ data= tmp_data_used; /* Get last used on page */
+
+ {
+ /* Update page directory */
+ uint length= (uint) (data - row_pos->data);
+ DBUG_PRINT("info", ("head length: %u", length));
+ if (length < info->s->base.min_row_length)
+ length= info->s->base.min_row_length;
+
+ int2store(row_pos->dir + 2, length);
+ /* update empty space at start of block */
+ row_pos->empty_space-= length;
+ int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space);
+ /* Mark in bitmaps how the current page was actually used */
+ head_block->empty_space= row_pos->empty_space;
+ if (page_buff[DIR_ENTRY_OFFSET] == (char) MAX_ROWS_PER_PAGE)
+ head_block->empty_space= 0; /* Page is full */
+ head_block->used= BLOCKUSED_USED;
+ }
+
+ /*
+ Now we have to write tail pages, as we need to store the position
+ to them in the row extent header.
+
+ We first write out all blob tails, to be able to store them in
+ the current page or 'tmp_data'.
+
+ Then we write the tail of the non-blob fields (The position to the
+ tail page is stored either in row header, the extents in the head
+ page or in the first full page of the non-blob data. It's never in
+ the tail page of the non-blob data)
+ */
+
+ if (row_extents_in_use)
+ {
+ if (rec != end_field) /* If blob fields */
+ {
+ MARIA_COLUMNDEF *save_rec= rec;
+ MARIA_BITMAP_BLOCK *save_block= block;
+ MARIA_BITMAP_BLOCK *end_block;
+ ulong *save_blob_lengths= blob_lengths;
+
+ for (; rec < end_field; rec++, blob_lengths++)
+ {
+ byte *blob_pos;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
+ {
+ uint length;
+ length= rec->length - maria_portable_sizeof_char_ptr;
+ memcpy_fixed((byte *) &blob_pos, record + rec->offset + length,
+ sizeof(char*));
+ length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */
+ if (write_tail(info, block + block->sub_blocks-1,
+ blob_pos + *blob_lengths - length,
+ length))
+ goto disk_err;
+ }
+ for (end_block= block + block->sub_blocks; block < end_block; block++)
+ {
+ /*
+ Set only a bit, to not cause bitmap code to belive a block is full
+ when there is still a lot of entries in it
+ */
+ block->used|= BLOCKUSED_USED;
+ }
+ }
+ rec= save_rec;
+ block= save_block;
+ blob_lengths= save_blob_lengths;
+ }
+
+ if (tmp_data_used) /* non blob data overflows */
+ {
+ MARIA_BITMAP_BLOCK *cur_block, *end_block;
+ MARIA_BITMAP_BLOCK *head_tail_block= 0;
+ ulong length;
+ ulong data_length= (tmp_data - info->rec_buff);
+
+#ifdef SANITY_CHECK
+ if (cur_block->sub_blocks == 1)
+ goto crashed; /* no reserved full or tails */
+#endif
+
+ /*
+ Find out where to write tail for non-blob fields.
+
+ Problem here is that the bitmap code may have allocated more
+ space than we need. We have to handle the following cases:
+
+ - Bitmap code allocated a tail page we don't need.
+ - The last full page allocated needs to be changed to a tail page
+ (Because we put more data than we thought on the head page)
+
+ The reserved pages in bitmap_blocks for the main page has one of
+ the following allocations:
+ - Full pages, with following blocks:
+ # * full pages
+ empty page ; To be used if we change last full to tail page. This
+ has 'count' = 0.
+ tail page (optional, if last full page was part full)
+ - One tail page
+ */
+
+ cur_block= head_block + 1;
+ end_block= head_block + head_block->sub_blocks;
+ while (data_length >= (length= (cur_block->page_count *
+ FULL_PAGE_SIZE(block_size))))
+ {
+#ifdef SANITY_CHECK
+ if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT))
+ goto crashed;
+#endif
+ data_length-= length;
+ (cur_block++)->used= BLOCKUSED_USED;
+ }
+ if (data_length)
+ {
+#ifdef SANITY_CHECK
+ if ((cur_block == end_block))
+ goto crashed;
+#endif
+ if (cur_block->used & BLOCKUSED_TAIL)
+ {
+ DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size));
+ /* tail written to full tail page */
+ cur_block->used= BLOCKUSED_USED;
+ head_tail_block= cur_block;
+ }
+ else if (data_length > length - MAX_TAIL_SIZE(block_size))
+ {
+ /* tail written to full page */
+ cur_block->used= BLOCKUSED_USED;
+ if ((cur_block != end_block - 1) &&
+ (end_block[-1].used & BLOCKUSED_TAIL))
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ else
+ {
+ /*
+ cur_block is a full block, followed by an empty and optional
+ tail block. Change cur_block to a tail block or split it
+ into full blocks and tail blocks.
+ */
+ DBUG_ASSERT(cur_block[1].page_count == 0);
+ if (cur_block->page_count == 1)
+ {
+ /* convert full block to tail block */
+ cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+ head_tail_block= cur_block;
+ }
+ else
+ {
+ DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size));
+ DBUG_PRINT("info", ("Splitting blocks into full and tail"));
+ cur_block[1].page= (cur_block->page + cur_block->page_count - 1);
+ cur_block[1].page_count= 1;
+ cur_block[1].used= 1;
+ cur_block->page_count--;
+ cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL;
+ head_tail_block= cur_block + 1;
+ }
+ if (end_block[-1].used & BLOCKUSED_TAIL)
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+ }
+ else
+ {
+ /* Must be an empty or tail page */
+ DBUG_ASSERT(cur_block->page_count == 0 ||
+ cur_block->used & BLOCKUSED_TAIL);
+ if (end_block[-1].used & BLOCKUSED_TAIL)
+ bitmap_blocks->tail_page_skipped= 1;
+ }
+
+ /*
+ Write all extents into page or tmp_buff
+
+ Note that we still don't have a correct position for the tail
+ of the non-blob fields.
+ */
+ store_extent_info(row_extents_first_part,
+ row_extents_second_part,
+ head_block+1, bitmap_blocks->count - 1);
+ if (head_tail_block)
+ {
+ ulong data_length= (tmp_data - info->rec_buff);
+ uint length;
+ byte *extent_data;
+
+ length= (uint) (data_length % FULL_PAGE_SIZE(block_size));
+ if (write_tail(info, head_tail_block, data + data_length - length,
+ length))
+ goto disk_err;
+ tmp_data-= length; /* Remove the tail */
+
+ /* Store the tail position for the non-blob fields */
+ if (head_tail_block == head_block + 1)
+ extent_data= row_extents_first_part;
+ else
+ extent_data= row_extents_second_part +
+ ((head_tail_block - head_block) - 2) * ROW_EXTENT_SIZE;
+ int5store(extent_data, head_tail_block->page);
+ int2store(extent_data + 5, head_tail_block->page_count);
+ }
+ }
+ else
+ store_extent_info(row_extents_first_part,
+ row_extents_second_part,
+ head_block+1, bitmap_blocks->count - 1);
+ }
+ /* Increase data file size, if extended */
+ position= (my_off_t) head_block->page * block_size;
+ if (info->state->data_file_length <= position)
+ info->state->data_file_length= position + block_size;
+ if (key_cache_write(share->key_cache,
+ info->dfile, position, 0,
+ page_buff, share->block_size, share->block_size, 1))
+ goto disk_err;
+
+ if (tmp_data_used)
+ {
+ /* Write data stored in info->rec_buff to pages */
+ DBUG_ASSERT(bitmap_blocks->count != 0);
+ if (write_full_pages(info, bitmap_blocks->block + 1, info->rec_buff,
+ (ulong) (tmp_data - info->rec_buff)))
+ goto disk_err;
+ }
+
+ /* Write rest of blobs (data, but no tails as they are already written) */
+ for (; rec < end_field; rec++, blob_lengths++)
+ {
+ byte *blob_pos;
+ uint length;
+ ulong blob_length;
+ if (!*blob_lengths) /* Null or "" */
+ continue;
+ length= rec->length - maria_portable_sizeof_char_ptr;
+ memcpy_fixed((byte*) &blob_pos, record + rec->offset + length,
+ sizeof(char*));
+ /* remove tail part */
+ blob_length= *blob_lengths;
+ if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
+ blob_length-= (blob_length % FULL_PAGE_SIZE(block_size));
+
+ if (write_full_pages(info, block, blob_pos, blob_length))
+ goto disk_err;
+ block+= block->sub_blocks;
+ }
+ /* Release not used space in used pages */
+ if (_ma_bitmap_release_unused(info, bitmap_blocks))
+ goto disk_err;
+ DBUG_RETURN(0);
+
+crashed:
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+disk_err:
+ /* Something was wrong with data on record */
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Write a record (to get the row id for it)
+
+ SYNOPSIS
+ _ma_write_init_block_record()
+ info Maria handler
+ record Record to write
+
+ NOTES
+ This is done BEFORE we write the keys to the row!
+
+ RETURN
+ HA_OFFSET_ERROR Something went wrong
+ # Rowid for row
+*/
+
+MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info,
+ const byte *record)
+{
+ MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
+ struct st_row_pos_info row_pos;
+ DBUG_ENTER("_ma_write_init_block_record");
+
+ calc_record_size(info, record, &info->cur_row);
+ if (_ma_bitmap_find_place(info, &info->cur_row, blocks))
+ DBUG_RETURN(HA_OFFSET_ERROR); /* Error reding bitmap */
+ if (get_head_or_tail_page(info, blocks->block, info->buff,
+ info->s->base.min_row_length, HEAD_PAGE, &row_pos))
+ DBUG_RETURN(HA_OFFSET_ERROR);
+ info->cur_row.lastpos= ma_recordpos(blocks->block->page, row_pos.offset);
+ if (info->s->calc_checksum)
+ info->cur_row.checksum= (info->s->calc_checksum)(info,record);
+ if (write_block_record(info, record, &info->cur_row, blocks, &row_pos))
+ DBUG_RETURN(HA_OFFSET_ERROR); /* Error reading bitmap */
+ DBUG_PRINT("exit", ("Rowid: %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(info->cur_row.lastpos);
+}
+
+
+/*
+ Dummy function for (*info->s->write_record)()
+
+ Nothing to do here, as we already wrote the record in
+ _ma_write_init_block_record()
+*/
+
+my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)),
+ const byte *record __attribute__ ((unused))
+)
+{
+ return 0; /* Row already written */
+}
+
+
+/*
+ Remove row written by _ma_write_block_record
+
+ SYNOPSIS
+ _ma_abort_write_block_record()
+ info Maria handler
+
+ INFORMATION
+ This is called in case we got a duplicate unique key while
+ writing keys.
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+my_bool _ma_write_abort_block_record(MARIA_HA *info)
+{
+ my_bool res= 0;
+ MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
+ MARIA_BITMAP_BLOCK *block, *end;
+ DBUG_ENTER("_ma_abort_write_block_record");
+
+ if (delete_head_or_tail(info,
+ ma_recordpos_to_page(info->cur_row.lastpos),
+ ma_recordpos_to_offset(info->cur_row.lastpos), 1))
+ res= 1;
+ for (block= blocks->block + 1, end= block + blocks->count - 1; block < end;
+ block++)
+ {
+ if (block->used & BLOCKUSED_TAIL)
+ {
+ /*
+ block->page_count is set to the tail directory entry number in
+ write_block_record()
+ */
+ if (delete_head_or_tail(info, block->page, block->page_count & ~TAIL_BIT,
+ 0))
+ res= 1;
+ }
+ else
+ {
+ pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ if (_ma_reset_full_page_bits(info, &info->s->bitmap, block->page,
+ block->page_count))
+ res= 1;
+ pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ }
+ }
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Update a record
+
+ NOTES
+ For the moment, we assume that info->curr_row.extents is always updated
+ when a row is read. In the future we may decide to read this on demand
+ for rows split into many extents.
+*/
+
+my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos,
+ const byte *record)
+{
+ MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
+ byte *buff;
+ MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row;
+ uint rownr, org_empty_size, head_length;
+ uint block_size= info->s->block_size;
+ byte *dir;
+ ulonglong page;
+ struct st_row_pos_info row_pos;
+ DBUG_ENTER("_ma_update_block_record");
+ DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos));
+
+ calc_record_size(info, record, new_row);
+ page= ma_recordpos_to_page(record_pos);
+
+ if (!(buff= key_cache_read(info->s->key_cache,
+ info->dfile, (my_off_t) page * block_size, 0,
+ info->buff, block_size, block_size, 0)))
+ DBUG_RETURN(1);
+ org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET);
+ rownr= ma_recordpos_to_offset(record_pos);
+ dir= (buff + block_size - DIR_ENTRY_SIZE * rownr -
+ DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE);
+
+ if ((org_empty_size + cur_row->head_length) >= new_row->total_length)
+ {
+ uint empty, offset, length;
+ MARIA_BITMAP_BLOCK block;
+
+ /*
+ We can fit the new row in the same page as the original head part
+ of the row
+ */
+ block.org_bitmap_value= _ma_free_size_to_head_pattern(&info->s->bitmap,
+ org_empty_size);
+ offset= uint2korr(dir);
+ length= uint2korr(dir + 2);
+ empty= 0;
+ if (new_row->total_length > length)
+ {
+ /* See if there is empty space after */
+ if (rownr != (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET] - 1)
+ empty= empty_pos_after_row(dir) - (offset + length);
+ if (new_row->total_length > length + empty)
+ {
+ compact_page(buff, info->s->block_size, rownr);
+ org_empty_size= 0;
+ length= uint2korr(dir + 2);
+ }
+ }
+ row_pos.buff= buff;
+ row_pos.offset= rownr;
+ row_pos.empty_space= org_empty_size + length;
+ row_pos.dir= dir;
+ row_pos.data= buff + uint2korr(dir);
+ row_pos.length= length + empty;
+ blocks->block= &block;
+ blocks->count= 1;
+ block.page= page;
+ block.sub_blocks= 1;
+ block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP;
+ block.empty_space= row_pos.empty_space;
+ /* Update cur_row, if someone calls update at once again */
+ cur_row->head_length= new_row->total_length;
+ if (_ma_bitmap_free_full_pages(info, cur_row->extents,
+ cur_row->extents_count))
+ DBUG_RETURN(1);
+ DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos));
+ }
+ /*
+ Allocate all size in block for record
+ QQ: Need to improve this to do compact if we can fit one more blob into
+ the head page
+ */
+ head_length= uint2korr(dir + 2);
+ if (buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED && org_empty_size &&
+ (head_length < new_row->head_length ||
+ (new_row->total_length <= head_length &&
+ org_empty_size + head_length >= new_row->total_length)))
+ {
+ compact_page(buff, info->s->block_size, rownr);
+ org_empty_size= 0;
+ head_length= uint2korr(dir + 2);
+ }
+
+ /* Delete old row */
+ if (delete_tails(info, cur_row->tail_positions))
+ DBUG_RETURN(1);
+ if (_ma_bitmap_free_full_pages(info, cur_row->extents,
+ cur_row->extents_count))
+ DBUG_RETURN(1);
+ if (_ma_bitmap_find_new_place(info, new_row, page, head_length, blocks))
+ DBUG_RETURN(1);
+
+ row_pos.buff= buff;
+ row_pos.offset= rownr;
+ row_pos.empty_space= org_empty_size + head_length;
+ row_pos.dir= dir;
+ row_pos.data= buff + uint2korr(dir);
+ row_pos.length= head_length;
+ DBUG_RETURN(write_block_record(info, record, new_row, blocks, &row_pos));
+}
+
+
+/*
+ Delete a head a tail part
+
+ SYNOPSIS
+ delete_head_or_tail()
+ info Maria handler
+ page Page (not file offset!) on which the row is
+ head 1 if this is a head page
+
+ NOTES
+ Uses info->keyread_buff
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool delete_head_or_tail(MARIA_HA *info,
+ ulonglong page, uint record_number,
+ my_bool head)
+{
+ MARIA_SHARE *share= info->s;
+ uint number_of_records, empty_space, length;
+ uint block_size= share->block_size;
+ byte *buff, *dir;
+ my_off_t position;
+ DBUG_ENTER("delete_head_or_tail");
+
+ info->keybuff_used= 1;
+ if (!(buff= key_cache_read(share->key_cache,
+ info->dfile, page * block_size, 0,
+ info->keyread_buff,
+ block_size, block_size, 0)))
+ DBUG_RETURN(1);
+
+ number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET];
+#ifdef SANITY_CHECKS
+ if (record_number >= number_of_records ||
+ record_number > MAX_ROWS_PER_PAGE ||
+ record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 -
+ PAGE_SUFFIX_SIZE) / (DIR_ENTRY_SIZE + MIN_TAIL_SIZE)))
+ {
+ DBUG_PRINT("error", ("record_number: %u number_of_records: %u",
+ record_number, number_of_records));
+ DBUG_RETURN(1);
+ }
+#endif
+
+ dir= (buff + block_size - DIR_ENTRY_SIZE * record_number -
+ DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE);
+ dir[0]= dir[1]= 0; /* Delete entry */
+ length= uint2korr(dir + 2);
+ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
+
+ if (record_number == number_of_records - 1)
+ {
+ /* Delete this entry and all following empty directory entries */
+ byte *end= buff + block_size - PAGE_SUFFIX_SIZE;
+ do
+ {
+ number_of_records--;
+ dir+= DIR_ENTRY_SIZE;
+ empty_space+= DIR_ENTRY_SIZE;
+ } while (dir < end && dir[0] == 0 && dir[1] == 0);
+ buff[DIR_ENTRY_OFFSET]= (byte) (uchar) number_of_records;
+ }
+ empty_space+= length;
+ if (number_of_records != 0)
+ {
+ int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
+ buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED;
+ position= (my_off_t) page * block_size;
+ if (key_cache_write(share->key_cache,
+ info->dfile, position, 0,
+ buff, block_size, block_size, 1))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]);
+ }
+ DBUG_PRINT("info", ("empty_space: %u", empty_space));
+ DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space));
+}
+
+
+/*
+ delete all tails
+
+ SYNOPSIS
+ delete_tails()
+ info Handler
+ tails Pointer to vector of tail positions, ending with 0
+
+ NOTES
+ Uses info->keyread_buff
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails)
+{
+ my_bool res= 0;
+ DBUG_ENTER("delete_tails");
+ for (; *tails; tails++)
+ {
+ if (delete_head_or_tail(info,
+ ma_recordpos_to_page(*tails),
+ ma_recordpos_to_offset(*tails), 0))
+ res= 1;
+ }
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Delete a record
+
+ NOTES
+ For the moment, we assume that info->cur_row.extents is always updated
+ when a row is read. In the future we may decide to read this on demand
+ for rows with many splits.
+*/
+
+my_bool _ma_delete_block_record(MARIA_HA *info)
+{
+ DBUG_ENTER("_ma_delete_block_record");
+ if (delete_head_or_tail(info,
+ ma_recordpos_to_page(info->cur_row.lastpos),
+ ma_recordpos_to_offset(info->cur_row.lastpos),
+ 1) ||
+ delete_tails(info, info->cur_row.tail_positions))
+ DBUG_RETURN(1);
+ DBUG_RETURN(_ma_bitmap_free_full_pages(info, info->cur_row.extents,
+ info->cur_row.extents_count));
+}
+
+
+/****************************************************************************
+ Reading of records
+****************************************************************************/
+
+/*
+ Read position to record from record directory at end of page
+
+ SYNOPSIS
+ get_record_position()
+ buff page buffer
+ block_size block size for page
+ record_number Record number in index
+ end_of_data pointer to end of data for record
+
+ RETURN
+ 0 Error in data
+ # Pointer to start of record.
+ In this case *end_of_data is set.
+*/
+
+static byte *get_record_position(byte *buff, uint block_size,
+ uint record_number, byte **end_of_data)
+{
+ uint number_of_records= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET];
+ byte *dir;
+ byte *data;
+ uint offset, length;
+
+#ifdef SANITY_CHECKS
+ if (record_number >= number_of_records ||
+ record_number > MAX_ROWS_PER_PAGE ||
+ record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) /
+ (DIR_ENTRY_SIZE + MIN_TAIL_SIZE)))
+ {
+ DBUG_PRINT("error",
+ ("Wrong row number: record_number: %u number_of_records: %u",
+ record_number, number_of_records));
+ return 0;
+ }
+#endif
+
+ dir= (buff + block_size - DIR_ENTRY_SIZE * record_number -
+ DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE);
+ offset= uint2korr(dir);
+ length= uint2korr(dir + 2);
+#ifdef SANITY_CHECKS
+ if (offset < PAGE_HEADER_SIZE ||
+ offset + length > (block_size -
+ number_of_records * DIR_ENTRY_SIZE -
+ PAGE_SUFFIX_SIZE))
+ {
+ DBUG_PRINT("error",
+ ("Wrong row position: record_number: %u offset: %u "
+ "length: %u number_of_records: %u",
+ record_number, offset, length, number_of_records));
+ return 0;
+ }
+#endif
+ data= buff + offset;
+ *end_of_data= data + length;
+ return data;
+}
+
+
+/*
+ Init extent
+
+ NOTES
+ extent is a cursor over which pages to read
+*/
+
+static void init_extent(MARIA_EXTENT_CURSOR *extent, byte *extent_info,
+ uint extents, MARIA_RECORD_POS *tail_positions)
+{
+ uint page_count;
+ extent->extent= extent_info;
+ extent->extent_count= extents;
+ extent->page= uint5korr(extent_info); /* First extent */
+ page_count= uint2korr(extent_info+5);
+ extent->page_count= page_count & ~TAIL_BIT;
+ extent->tail= page_count & TAIL_BIT;
+ extent->tail_positions= tail_positions;
+}
+
+
+/*
+ Read next extent
+
+ SYNOPSIS
+ read_next_extent()
+ info Maria handler
+ extent Pointer to current extent (this is updated to point
+ to next)
+ end_of_data Pointer to end of data in read block (out)
+
+ NOTES
+ New block is read into info->buff
+
+ RETURN
+ 0 Error; my_errno is set
+ # Pointer to start of data in read block
+ In this case end_of_data is updated to point to end of data.
+*/
+
+static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
+ byte **end_of_data)
+{
+ MARIA_SHARE *share= info->s;
+ byte *buff, *data;
+ DBUG_ENTER("read_next_extent");
+
+ if (!extent->page_count)
+ {
+ uint page_count;
+ if (!--extent->extent_count)
+ goto crashed;
+ extent->extent+= ROW_EXTENT_SIZE;
+ extent->page= uint5korr(extent->extent);
+ page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE);
+ extent->tail= page_count & TAIL_BIT;
+ extent->page_count= (page_count & ~TAIL_BIT);
+ extent->first_extent= 0;
+ DBUG_PRINT("info",("New extent. Page: %lu page_count: %u tail_flag: %d",
+ (ulong) extent->page, extent->page_count,
+ extent->tail != 0));
+ }
+
+ if (info->cur_row.empty_bits != info->cur_row.empty_bits_buffer)
+ {
+ /*
+ First read of extents: Move data from info->buff to
+ internals buffers.
+ */
+ memcpy(info->cur_row.empty_bits_buffer, info->cur_row.empty_bits,
+ share->base.pack_bytes);
+ info->cur_row.empty_bits= info->cur_row.empty_bits_buffer;
+ }
+
+ if (!(buff= key_cache_read(share->key_cache,
+ info->dfile, extent->page * share->block_size, 0,
+ info->buff,
+ share->block_size, share->block_size, 0)))
+ {
+ /* check if we tried to read over end of file (ie: bad data in record) */
+ if ((extent->page + 1) * share->block_size > info->state->data_file_length)
+ goto crashed;
+ DBUG_RETURN(0);
+ }
+ if (!extent->tail)
+ {
+ /* Full data page */
+ DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE);
+ extent->page++; /* point to next page */
+ extent->page_count--;
+ *end_of_data= buff + share->block_size;
+ info->cur_row.full_page_count++; /* For maria_chk */
+ DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE);
+ }
+ /* Found tail. page_count is in this case the position in the tail page */
+
+ DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE);
+ *(extent->tail_positions++)= ma_recordpos(extent->page,
+ extent->page_count);
+ info->cur_row.tail_count++; /* For maria_chk */
+
+ if (!(data= get_record_position(buff, share->block_size,
+ extent->page_count,
+ end_of_data)))
+ goto crashed;
+ extent->data_start= data;
+ extent->page_count= 0; /* No more data in extent */
+ DBUG_RETURN(data);
+
+
+crashed:
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_PRINT("error", ("wrong extent information"));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read data that may be split over many blocks
+
+ SYNOPSIS
+ read_long_data()
+ info Maria handler
+ to Store result string here (this is allocated)
+ extent Pointer to current extent position
+ data Current position in buffer
+ end_of_data End of data in buffer
+
+ NOTES
+ When we have to read a new buffer, it's read into info->buff
+
+ This loop is implemented by goto's instead of a for() loop as
+ the code is notable smaller and faster this way (and it's not nice
+ to jump into a for loop() or into a 'then' clause)
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static my_bool read_long_data(MARIA_HA *info, byte *to, ulong length,
+ MARIA_EXTENT_CURSOR *extent,
+ byte **data, byte **end_of_data)
+{
+ DBUG_ENTER("read_long_data");
+ DBUG_PRINT("enter", ("length: %lu", length));
+ DBUG_ASSERT(*data <= *end_of_data);
+
+ for(;;)
+ {
+ uint left_length;
+ left_length= (uint) (*end_of_data - *data);
+ if (likely(left_length >= length))
+ {
+ memcpy(to, *data, length);
+ (*data)+= length;
+ DBUG_RETURN(0);
+ }
+ memcpy(to, *data, left_length);
+ to+= left_length;
+ length-= left_length;
+ if (!(*data= read_next_extent(info, extent, end_of_data)))
+ break;
+ }
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read a record from page (helper function for _ma_read_block_record())
+
+ SYNOPSIS
+ _ma_read_block_record2()
+ info Maria handler
+ record Store record here
+ data Start of head data for row
+ end_of_data End of data for row
+
+ NOTES
+ The head page is already read by caller
+ Following data is update in info->cur_row:
+
+ cur_row.head_length is set to size of entry in head block
+ cur_row.tail_positions is set to point to all tail blocks
+ cur_row.extents points to extents data
+ cur_row.extents_counts contains number of extents
+ cur_row.empty_bits points to empty bits part in read record
+ cur_row.field_lengths contains packed length of all fields
+
+ RETURN
+ 0 ok
+ # Error code
+*/
+
+int _ma_read_block_record2(MARIA_HA *info, byte *record,
+ byte *data, byte *end_of_data)
+{
+ MARIA_SHARE *share= info->s;
+ byte *field_length_data, *blob_buffer, *start_of_data;
+ uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths;
+ my_bool found_blob= 0;
+ MARIA_EXTENT_CURSOR extent;
+ MARIA_COLUMNDEF *rec, *end_field;
+ DBUG_ENTER("_ma_read_block_record2");
+
+ LINT_INIT(field_lengths);
+ LINT_INIT(field_length_data);
+ LINT_INIT(blob_buffer);
+
+ start_of_data= data;
+ flag= (uint) (uchar) data[0];
+ cur_null_bytes= share->base.original_null_bytes;
+ null_bytes= share->base.null_bytes;
+ info->cur_row.head_length= (uint) (end_of_data - data);
+ info->cur_row.full_page_count= info->cur_row.tail_count= 0;
+
+ /* Skip trans header (for now, until we have MVCC csupport) */
+ data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)];
+ if (flag & ROW_FLAG_NULLS_EXTENDED)
+ cur_null_bytes+= data[-1];
+
+ row_extents= 0;
+ if (flag & ROW_FLAG_EXTENTS)
+ {
+ uint row_extent_size;
+ /*
+ Record is split over many data pages.
+ Get number of extents and first extent
+ */
+ get_key_length(row_extents, data);
+ info->cur_row.extents_count= row_extents;
+ row_extent_size= row_extents * ROW_EXTENT_SIZE;
+ if (info->cur_row.extents_buffer_length < row_extent_size &&
+ _ma_alloc_buffer(&info->cur_row.extents,
+ &info->cur_row.extents_buffer_length,
+ row_extent_size))
+ DBUG_RETURN(my_errno);
+ memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE);
+ data+= ROW_EXTENT_SIZE;
+ init_extent(&extent, info->cur_row.extents, row_extents,
+ info->cur_row.tail_positions);
+ }
+ else
+ {
+ info->cur_row.extents_count= 0;
+ (*info->cur_row.tail_positions)= 0;
+ extent.page_count= 0;
+ extent.extent_count= 1;
+ }
+ extent.first_extent= 1;
+
+ if (share->base.max_field_lengths)
+ {
+ get_key_length(field_lengths, data);
+#ifdef SANITY_CHECKS
+ if (field_lengths > share->base.max_field_lengths)
+ goto err;
+#endif
+ }
+
+ if (share->calc_checksum)
+ info->cur_row.checksum= (uint) (uchar) *data++;
+ /* data now points on null bits */
+ memcpy(record, data, cur_null_bytes);
+ if (unlikely(cur_null_bytes != null_bytes))
+ {
+ /*
+ This only happens if we have added more NULL columns with
+ ALTER TABLE and are fetching an old, not yet modified old row
+ */
+ bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes));
+ }
+ data+= null_bytes;
+ info->cur_row.empty_bits= (byte*) data; /* Pointer to empty bitmask */
+ data+= share->base.pack_bytes;
+
+ /* TODO: Use field offsets, instead of just skipping them */
+ data+= share->base.field_offsets * FIELD_OFFSET_SIZE;
+
+ /*
+ Read row extents (note that first extent was already read into
+ info->cur_row.extents above)
+ */
+ if (row_extents)
+ {
+ if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE,
+ (row_extents - 1) * ROW_EXTENT_SIZE,
+ &extent, &data, &end_of_data))
+ DBUG_RETURN(my_errno);
+ }
+
+ /*
+ Data now points to start of fixed length field data that can't be null
+ or 'empty'. Note that these fields can't be split over blocks
+ */
+ for (rec= share->rec, end_field= rec + share->base.fixed_not_null_fields;
+ rec < end_field; rec++)
+ {
+ uint rec_length= rec->length;
+ if (data >= end_of_data &&
+ !(data= read_next_extent(info, &extent, &end_of_data)))
+ goto err;
+ memcpy(record + rec->offset, data, rec_length);
+ data+= rec_length;
+ }
+
+ /* Read array of field lengths. This may be stored in several extents */
+ if (share->base.max_field_lengths)
+ {
+ field_length_data= info->cur_row.field_lengths;
+ if (read_long_data(info, field_length_data, field_lengths, &extent,
+ &data, &end_of_data))
+ DBUG_RETURN(my_errno);
+ }
+
+ /* Read variable length data. Each of these may be split over many extents */
+ for (end_field= share->rec + share->base.fields; rec < end_field; rec++)
+ {
+ enum en_fieldtype type= (enum en_fieldtype) rec->type;
+ byte *field_pos= record + rec->offset;
+ /* First check if field is present in record */
+ if (record[rec->null_pos] & rec->null_bit)
+ continue;
+ else if (info->cur_row.empty_bits[rec->empty_pos] & rec->empty_bit)
+ {
+ if (type == FIELD_SKIP_ENDSPACE)
+ bfill(record + rec->offset, rec->length, ' ');
+ else
+ bzero(record + rec->offset, rec->fill_length);
+ continue;
+ }
+ switch (type) {
+ case FIELD_NORMAL: /* Fixed length field */
+ case FIELD_SKIP_PRESPACE:
+ case FIELD_SKIP_ZERO: /* Fixed length field */
+ if (data >= end_of_data &&
+ !(data= read_next_extent(info, &extent, &end_of_data)))
+ goto err;
+ memcpy(field_pos, data, rec->length);
+ data+= rec->length;
+ break;
+ case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ /* Char that is space filled */
+ uint length;
+ if (rec->length <= 255)
+ length= (uint) (uchar) *field_length_data++;
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_length_data+= 2;
+ }
+#ifdef SANITY_CHECKS
+ if (length > rec->length)
+ goto err;
+#endif
+ if (read_long_data(info, field_pos, length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ bfill(field_pos + length, rec->length - length, ' ');
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ ulong length;
+ if (rec->length <= 256)
+ {
+ length= (uint) (uchar) (*field_pos++= *field_length_data++);
+ }
+ else
+ {
+ length= uint2korr(field_length_data);
+ field_pos[0]= field_length_data[0];
+ field_pos[1]= field_length_data[1];
+ field_pos+= 2;
+ field_length_data+= 2;
+ }
+ if (read_long_data(info, field_pos, length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ break;
+ }
+ case FIELD_BLOB:
+ {
+ uint size_length= rec->length - maria_portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length, field_length_data);
+
+ if (!found_blob)
+ {
+ /* Calculate total length for all blobs */
+ ulong blob_lengths= 0;
+ byte *length_data= field_length_data;
+ MARIA_COLUMNDEF *blob_field= rec;
+
+ found_blob= 1;
+ for (; blob_field < end_field; blob_field++)
+ {
+ uint size_length;
+ if ((record[blob_field->null_pos] & blob_field->null_bit) ||
+ (info->cur_row.empty_bits[blob_field->empty_pos] &
+ blob_field->empty_bit))
+ continue;
+ size_length= blob_field->length - maria_portable_sizeof_char_ptr;
+ blob_lengths+= _ma_calc_blob_length(size_length, length_data);
+ length_data+= size_length;
+ }
+ DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths));
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ blob_lengths))
+ DBUG_RETURN(my_errno);
+ blob_buffer= info->rec_buff;
+ }
+
+ memcpy(field_pos, field_length_data, size_length);
+ memcpy_fixed(field_pos + size_length, (byte *) & blob_buffer,
+ sizeof(char*));
+ field_length_data+= size_length;
+
+ /*
+ After we have read one extent, then each blob is in it's own extent
+ */
+ if (extent.first_extent && (ulong) (end_of_data - data) < blob_length)
+ end_of_data= data; /* Force read of next extent */
+
+ if (read_long_data(info, blob_buffer, blob_length, &extent, &data,
+ &end_of_data))
+ DBUG_RETURN(my_errno);
+ blob_buffer+= blob_length;
+ break;
+ }
+ default:
+#ifdef EXTRA_DEBUG
+ DBUG_ASSERT(0); /* purecov: deadcode */
+#endif
+ goto err;
+ }
+ continue;
+ }
+
+ if (row_extents)
+ {
+ DBUG_PRINT("info", ("Row read: page_count: %u extent_count: %u",
+ extent.page_count, extent.extent_count));
+ *extent.tail_positions= 0; /* End marker */
+ if (extent.page_count)
+ goto err;
+ if (extent.extent_count > 1)
+ if (check_if_zero(extent.extent,
+ (extent.extent_count-1) * ROW_EXTENT_SIZE))
+ goto err;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Row read"));
+ if (data != end_of_data && (uint) (end_of_data - start_of_data) >=
+ info->s->base.min_row_length)
+ goto err;
+ }
+
+ info->update|= HA_STATE_AKTIV; /* We have a aktive record */
+ DBUG_RETURN(0);
+
+err:
+ /* Something was wrong with data on record */
+ DBUG_PRINT("error", ("Found record with wrong data"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+}
+
+
+/*
+ Read a record based on record position
+
+ SYNOPSIS
+ _ma_read_block_record()
+ info Maria handler
+ record Store record here
+ record_pos Record position
+*/
+
+int _ma_read_block_record(MARIA_HA *info, byte *record,
+ MARIA_RECORD_POS record_pos)
+{
+ byte *data, *end_of_data, *buff;
+ my_off_t page;
+ uint offset;
+ uint block_size= info->s->block_size;
+ DBUG_ENTER("_ma_read_block_record");
+ DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos));
+
+ page= ma_recordpos_to_page(record_pos) * block_size;
+ offset= ma_recordpos_to_offset(record_pos);
+
+ if (!(buff= key_cache_read(info->s->key_cache,
+ info->dfile, page, 0, info->buff,
+ block_size, block_size, 1)))
+ DBUG_RETURN(1);
+ DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE);
+ if (!(data= get_record_position(buff, block_size, offset, &end_of_data)))
+ {
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */
+ DBUG_PRINT("error", ("Wrong directory entry in data block"));
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
+}
+
+
+/* compare unique constraint between stored rows */
+
+my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos)
+{
+ byte *org_rec_buff, *old_record;
+ my_size_t org_rec_buff_size;
+ int error;
+ DBUG_ENTER("_ma_cmp_block_unique");
+
+ if (!(old_record= my_alloca(info->s->base.reclength)))
+ DBUG_RETURN(1);
+
+ /* Don't let the compare destroy blobs that may be in use */
+ org_rec_buff= info->rec_buff;
+ org_rec_buff_size= info->rec_buff_size;
+ if (info->s->base.blobs)
+ {
+ /* Force realloc of record buffer*/
+ info->rec_buff= 0;
+ info->rec_buff_size= 0;
+ }
+ error= _ma_read_block_record(info, old_record, pos);
+ if (!error)
+ error= _ma_unique_comp(def, record, old_record, def->null_are_equal);
+ if (info->s->base.blobs)
+ {
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ info->rec_buff= org_rec_buff;
+ info->rec_buff_size= org_rec_buff_size;
+ }
+ DBUG_PRINT("exit", ("result: %d", error));
+ my_afree(old_record);
+ DBUG_RETURN(error != 0);
+}
+
+
+/****************************************************************************
+ Table scan
+****************************************************************************/
+
+/*
+ Allocate buffers for table scan
+
+ SYNOPSIS
+ _ma_scan_init_block_record(MARIA_HA *info)
+
+ IMPLEMENTATION
+ We allocate one buffer for the current bitmap and one buffer for the
+ current page
+*/
+
+my_bool _ma_scan_init_block_record(MARIA_HA *info)
+{
+ byte *ptr;
+ if (!(ptr= (byte *) my_malloc(info->s->block_size * 2, MYF(MY_WME))))
+ return (1);
+ info->scan.bitmap_buff= ptr;
+ info->scan.page_buff= ptr + info->s->block_size;
+ info->scan.bitmap_end= info->scan.bitmap_buff + info->s->bitmap.total_size;
+
+ /* Set scan variables to get _ma_scan_block() to start with reading bitmap */
+ info->scan.number_of_rows= 0;
+ info->scan.bitmap_pos= info->scan.bitmap_end;
+ info->scan.bitmap_page= (ulong) - (long) info->s->bitmap.pages_covered;
+ /*
+ We have to flush bitmap as we will read the bitmap from the page cache
+ while scanning rows
+ */
+ return _ma_flush_bitmap(info->s);
+}
+
+
+/* Free buffers allocated by _ma_scan_block_init() */
+
+void _ma_scan_end_block_record(MARIA_HA *info)
+{
+ my_free(info->scan.bitmap_buff, MYF(0));
+ info->scan.bitmap_buff= 0;
+}
+
+
+/*
+ Read next record while scanning table
+
+ SYNOPSIS
+ _ma_scan_block_record()
+ info Maria handler
+ record Store found here
+ record_pos Value stored in info->cur_row.next_pos after last call
+ skip_deleted
+
+ NOTES
+ - One must have called mi_scan() before this
+ - In this version, we don't actually need record_pos, we as easily
+ use a variable in info->scan
+
+ IMPLEMENTATION
+ Current code uses a lot of goto's to separate the different kind of
+ states we may be in. This gives us a minimum of executed if's for
+ the normal cases. I tried several different ways to code this, but
+ the current one was in the end the most readable and fastest.
+
+ RETURN
+ 0 ok
+ # Error code
+*/
+
+int _ma_scan_block_record(MARIA_HA *info, byte *record,
+ MARIA_RECORD_POS record_pos,
+ my_bool skip_deleted __attribute__ ((unused)))
+{
+ uint block_size;
+ my_off_t filepos;
+ DBUG_ENTER("_ma_scan_block_record");
+
+restart_record_read:
+ /* Find next row in current page */
+ if (likely(record_pos < info->scan.number_of_rows))
+ {
+ uint length, offset;
+ byte *data, *end_of_data;
+
+ while (!(offset= uint2korr(info->scan.dir)))
+ {
+ info->scan.dir-= DIR_ENTRY_SIZE;
+ record_pos++;
+#ifdef SANITY_CHECKS
+ if (info->scan.dir < info->scan.dir_end)
+ goto err;
+#endif
+ }
+ /* found row */
+ info->cur_row.lastpos= info->scan.row_base_page + record_pos;
+ info->cur_row.nextpos= record_pos + 1;
+ data= info->scan.page_buff + offset;
+ length= uint2korr(info->scan.dir + 2);
+ end_of_data= data + length;
+ info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
+#ifdef SANITY_CHECKS
+ if (end_of_data > info->scan.dir_end ||
+ offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length)
+ goto err;
+#endif
+ DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
+ }
+
+ /* Find next head page in current bitmap */
+restart_bitmap_scan:
+ block_size= info->s->block_size;
+ if (likely(info->scan.bitmap_pos < info->scan.bitmap_end))
+ {
+ byte *data= info->scan.bitmap_pos;
+ longlong bits= info->scan.bits;
+ uint bit_pos= info->scan.bit_pos;
+
+ do
+ {
+ while (likely(bits))
+ {
+ uint pattern= bits & 7;
+ bits >>= 3;
+ bit_pos++;
+ if (pattern > 0 && pattern <= 4)
+ {
+ /* Found head page; Read it */
+ ulong page;
+ info->scan.bitmap_pos= data;
+ info->scan.bits= bits;
+ info->scan.bit_pos= bit_pos;
+ page= (info->scan.bitmap_page + 1 +
+ (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1);
+ info->scan.row_base_page= ma_recordpos(page, 0);
+ if (!(key_cache_read(info->s->key_cache,
+ info->dfile,
+ (my_off_t) page * block_size,
+ 0, info->scan.page_buff,
+ block_size, block_size, 0)))
+ DBUG_RETURN(my_errno);
+ if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) !=
+ HEAD_PAGE) ||
+ (info->scan.number_of_rows=
+ (uint) (uchar) info->scan.page_buff[DIR_ENTRY_OFFSET]) == 0)
+ {
+ DBUG_PRINT("error", ("Wrong page header"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+ }
+ info->scan.dir= (info->scan.page_buff + block_size -
+ PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
+ info->scan.dir_end= (info->scan.dir -
+ (info->scan.number_of_rows - 1) *
+ DIR_ENTRY_SIZE);
+ record_pos= 0;
+ goto restart_record_read;
+ }
+ }
+ for (data+= 6; data < info->scan.bitmap_end; data+= 6)
+ {
+ bits= uint6korr(data);
+ if (bits && ((bits & LL(04444444444444444)) != LL(04444444444444444)))
+ break;
+ }
+ bit_pos= 0;
+ } while (data < info->scan.bitmap_end);
+ }
+
+ /* Read next bitmap */
+ info->scan.bitmap_page+= info->s->bitmap.pages_covered;
+ filepos= (my_off_t) info->scan.bitmap_page * block_size;
+ if (unlikely(filepos >= info->state->data_file_length))
+ {
+ DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
+ }
+ if (!(key_cache_read(info->s->key_cache, info->dfile, filepos,
+ 0, info->scan.bitmap_buff, block_size, block_size, 0)))
+ DBUG_RETURN(my_errno);
+ /* Skip scanning 'bits' in bitmap scan code */
+ info->scan.bitmap_pos= info->scan.bitmap_buff - 6;
+ info->scan.bits= 0;
+ goto restart_bitmap_scan;
+
+err:
+ DBUG_PRINT("error", ("Wrong data on page"));
+ DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD));
+}
+
+
+/*
+ Compare a row against a stored one
+
+ NOTES
+ Not implemented, as block record is not supposed to be used in a shared
+ global environment
+*/
+
+my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)),
+ const byte *record __attribute__ ((unused)))
+{
+ return 0;
+}
+
+
+#ifndef DBUG_OFF
+
+static void _ma_print_directory(byte *buff, uint block_size)
+{
+ uint max_entry= (uint) ((uchar *) buff)[DIR_ENTRY_OFFSET], row= 0;
+ uint end_of_prev_row= PAGE_HEADER_SIZE;
+ byte *dir, *end;
+
+ dir= buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE;
+ end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE;
+
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE,"Directory dump (pos:length):\n");
+
+ for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++)
+ {
+ uint offset= uint2korr(end);
+ uint length= uint2korr(end+2);
+ fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0);
+ if (!(row % (80/12)))
+ fputc('\n', DBUG_FILE);
+ if (offset)
+ {
+ DBUG_ASSERT(offset >= end_of_prev_row);
+ end_of_prev_row= offset + length;
+ }
+ }
+ fputc('\n', DBUG_FILE);
+ fflush(DBUG_FILE);
+ DBUG_UNLOCK_FILE;
+}
+#endif /* DBUG_OFF */
+
diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h
new file mode 100644
index 00000000000..ec99dbfcae2
--- /dev/null
+++ b/storage/maria/ma_blockrec.h
@@ -0,0 +1,160 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Storage of records in block
+*/
+
+#define LSN_SIZE 7
+#define DIRCOUNT_SIZE 1 /* Stores number of rows on page */
+#define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */
+#define PAGE_TYPE_SIZE 1
+#define PAGE_SUFFIX_SIZE 0 /* Bytes for page suffix */
+#define PAGE_HEADER_SIZE (LSN_SIZE + DIRCOUNT_SIZE + EMPTY_SPACE_SIZE +\
+ PAGE_TYPE_SIZE)
+#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \
+ PAGE_SUFFIX_SIZE)
+#define BLOCK_RECORD_POINTER_SIZE 6
+
+#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - PAGE_TYPE_SIZE)
+
+#define ROW_EXTENT_PAGE_SIZE 5
+#define ROW_EXTENT_COUNT_SIZE 2
+#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE)
+#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */
+#define ELEMENTS_RESERVED_FOR_MAIN_PART 4
+#define EXTRA_LENGTH_FIELDS 3
+
+#define FLAG_SIZE 1
+#define TRANSID_SIZE 6
+#define VERPTR_SIZE 7
+#define DIR_ENTRY_SIZE 4
+#define FIELD_OFFSET_SIZE 2
+
+/* Minimum header size needed for a new row */
+#define BASE_ROW_HEADER_SIZE FLAG_SIZE
+#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE
+
+#define PAGE_TYPE_MASK 127
+enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE };
+
+#define PAGE_TYPE_OFFSET LSN_SIZE
+#define DIR_ENTRY_OFFSET LSN_SIZE+PAGE_TYPE_SIZE
+#define EMPTY_SPACE_OFFSET (DIR_ENTRY_OFFSET + DIRCOUNT_SIZE)
+
+#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */
+
+/* Bits used for flag byte (one byte, first in record) */
+#define ROW_FLAG_TRANSID 1
+#define ROW_FLAG_VER_PTR 2
+#define ROW_FLAG_DELETE_TRANSID 4
+#define ROW_FLAG_NULLS_EXTENDED 8
+#define ROW_FLAG_EXTENTS 128
+#define ROW_FLAG_ALL (1+2+4+8+128)
+
+/* Variables that affects how data pages are utilized */
+#define MIN_TAIL_SIZE 32
+
+/* Fixed part of Max possible header size; See table in ma_blockrec.c */
+#define MAX_FIXED_HEADER_SIZE (FLAG_SIZE + 3 + ROW_EXTENT_SIZE + 3)
+#define TRANS_MAX_FIXED_HEADER_SIZE (MAX_FIXED_HEADER_SIZE + \
+ FLAG_SIZE + TRANSID_SIZE + VERPTR_SIZE + \
+ TRANSID_SIZE)
+
+/* We use 1 byte in record header to store number of directory entries */
+#define MAX_ROWS_PER_PAGE 255
+
+/* Bits for MARIA_BITMAP_BLOCKS->used */
+#define BLOCKUSED_USED 1
+#define BLOCKUSED_USE_ORG_BITMAP 2
+#define BLOCKUSED_TAIL 4
+
+/* defines that affects allocation (density) of data */
+
+/* If we fill up a block to 75 %, don't create a new tail page for it */
+#define MAX_TAIL_SIZE(block_size) ((block_size) *3 / 4)
+
+/* Functions to convert MARIA_RECORD_POS to/from page:offset */
+
+static inline MARIA_RECORD_POS ma_recordpos(ulonglong page, uint offset)
+{
+ return (MARIA_RECORD_POS) ((page << 8) | offset);
+}
+
+static inline my_off_t ma_recordpos_to_page(MARIA_RECORD_POS record_pos)
+{
+ return record_pos >> 8;
+}
+
+static inline my_off_t ma_recordpos_to_offset(MARIA_RECORD_POS record_pos)
+{
+ return record_pos & 255;
+}
+
+/* ma_blockrec.c */
+void _ma_init_block_record_data(void);
+my_bool _ma_once_init_block_row(MARIA_SHARE *share, File dfile);
+my_bool _ma_once_end_block_row(MARIA_SHARE *share);
+my_bool _ma_init_block_row(MARIA_HA *info);
+void _ma_end_block_row(MARIA_HA *info);
+
+my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const byte *record);
+my_bool _ma_delete_block_record(MARIA_HA *info);
+int _ma_read_block_record(MARIA_HA *info, byte *record,
+ MARIA_RECORD_POS record_pos);
+int _ma_read_block_record2(MARIA_HA *info, byte *record,
+ byte *data, byte *end_of_data);
+int _ma_scan_block_record(MARIA_HA *info, byte *record,
+ MARIA_RECORD_POS, my_bool);
+my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos);
+my_bool _ma_scan_init_block_record(MARIA_HA *info);
+void _ma_scan_end_block_record(MARIA_HA *info);
+
+MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info,
+ const byte *record);
+my_bool _ma_write_block_record(MARIA_HA *info, const byte *record);
+my_bool _ma_write_abort_block_record(MARIA_HA *info);
+my_bool _ma_compare_block_record(register MARIA_HA *info,
+ register const byte *record);
+
+/* ma_bitmap.c */
+my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
+my_bool _ma_bitmap_end(MARIA_SHARE *share);
+my_bool _ma_flush_bitmap(MARIA_SHARE *share);
+my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page);
+my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
+ MARIA_BITMAP_BLOCKS *result_blocks);
+my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks);
+my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents,
+ uint count);
+my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head,
+ uint empty_space);
+my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
+ ulonglong page, uint page_count);
+uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size);
+my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row,
+ ulonglong page, uint free_size,
+ MARIA_BITMAP_BLOCKS *result_blocks);
+my_bool _ma_check_bitmap_data(MARIA_HA *info,
+ enum en_page_type page_type, ulonglong page,
+ uint empty_space, uint *bitmap_pattern);
+my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
+ enum en_page_type page_type,
+ ulonglong page,
+ uint *bitmap_pattern);
diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c
new file mode 100644
index 00000000000..d6061c647ec
--- /dev/null
+++ b/storage/maria/ma_cache.c
@@ -0,0 +1,108 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions for read record cacheing with maria
+ Used for reading dynamic/compressed records from datafile.
+
+ Can fetch data directly from file (outside cache),
+ if reading a small chunk straight before the cached part (with possible
+ overlap).
+
+ Can be explicitly asked not to use cache (by not setting READING_NEXT in
+ flag) - useful for occasional out-of-cache reads, when the next read is
+ expected to hit the cache again.
+
+ Allows "partial read" errors in the record header (when READING_HEADER flag
+ is set) - unread part is bzero'ed
+
+ Note: out-of-cache reads are enabled for shared IO_CACHE's too,
+ as these reads will be cached by OS cache (and my_pread is always atomic)
+*/
+
+
+#include "maria_def.h"
+
+int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, uint length,
+ int flag)
+{
+ uint read_length,in_buff_length;
+ my_off_t offset;
+ char *in_buff_pos;
+ DBUG_ENTER("_ma_read_cache");
+
+ if (pos < info->pos_in_file)
+ {
+ read_length=length;
+ if ((my_off_t) read_length > (my_off_t) (info->pos_in_file-pos))
+ read_length=(uint) (info->pos_in_file-pos);
+ info->seek_not_done=1;
+ if (my_pread(info->file,buff,read_length,pos,MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ if (!(length-=read_length))
+ DBUG_RETURN(0);
+ pos+=read_length;
+ buff+=read_length;
+ }
+ if (pos >= info->pos_in_file &&
+ (offset= (my_off_t) (pos - info->pos_in_file)) <
+ (my_off_t) (info->read_end - info->request_pos))
+ {
+ in_buff_pos=info->request_pos+(uint) offset;
+ in_buff_length= min(length,(uint) (info->read_end-in_buff_pos));
+ memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
+ if (!(length-=in_buff_length))
+ DBUG_RETURN(0);
+ pos+=in_buff_length;
+ buff+=in_buff_length;
+ }
+ else
+ in_buff_length=0;
+ if (flag & READING_NEXT)
+ {
+ if (pos != (info->pos_in_file +
+ (uint) (info->read_end - info->request_pos)))
+ {
+ info->pos_in_file=pos; /* Force start here */
+ info->read_pos=info->read_end=info->request_pos; /* Everything used */
+ info->seek_not_done=1;
+ }
+ else
+ info->read_pos=info->read_end; /* All block used */
+ if (!(*info->read_function)(info,buff,length))
+ DBUG_RETURN(0);
+ read_length=info->error;
+ }
+ else
+ {
+ info->seek_not_done=1;
+ if ((read_length=my_pread(info->file,buff,length,pos,MYF(0))) == length)
+ DBUG_RETURN(0);
+ }
+ if (!(flag & READING_HEADER) || (int) read_length == -1 ||
+ read_length+in_buff_length < 3)
+ {
+ DBUG_PRINT("error",
+ ("Error %d reading next-multi-part block (Got %d bytes)",
+ my_errno, (int) read_length));
+ if (!my_errno || my_errno == -1)
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1);
+ }
+ bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length -
+ read_length);
+ DBUG_RETURN(0);
+} /* _ma_read_cache */
diff --git a/storage/maria/ma_changed.c b/storage/maria/ma_changed.c
new file mode 100644
index 00000000000..9e86212baa6
--- /dev/null
+++ b/storage/maria/ma_changed.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Check if somebody has changed table since last check. */
+
+#include "maria_def.h"
+
+ /* Return 0 if table isn't changed */
+
+int maria_is_changed(MARIA_HA *info)
+{
+ int result;
+ DBUG_ENTER("maria_is_changed");
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(-1);
+ VOID(_ma_writeinfo(info,0));
+ result=(int) info->data_changed;
+ info->data_changed=0;
+ DBUG_PRINT("exit",("result: %d",result));
+ DBUG_RETURN(result);
+}
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
new file mode 100644
index 00000000000..ccce19de994
--- /dev/null
+++ b/storage/maria/ma_check.c
@@ -0,0 +1,5096 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Describe, check and repair of MARIA tables */
+
+/*
+ About checksum calculation.
+
+ There are two types of checksums. Table checksum and row checksum.
+
+ Row checksum is an additional byte at the end of dynamic length
+ records. It must be calculated if the table is configured for them.
+ Otherwise they must not be used. The variable
+ MYISAM_SHARE::calc_checksum determines if row checksums are used.
+ MI_INFO::checksum is used as temporary storage during row handling.
+ For parallel repair we must assure that only one thread can use this
+ variable. There is no problem on the write side as this is done by one
+ thread only. But when checking a record after read this could go
+ wrong. But since all threads read through a common read buffer, it is
+ sufficient if only one thread checks it.
+
+ Table checksum is an eight byte value in the header of the index file.
+ It can be calculated even if row checksums are not used. The variable
+ MI_CHECK::glob_crc is calculated over all records.
+ MI_SORT_PARAM::calc_checksum determines if this should be done. This
+ variable is not part of MI_CHECK because it must be set per thread for
+ parallel repair. The global glob_crc must be changed by one thread
+ only. And it is sufficient to calculate the checksum once only.
+*/
+
+#include "ma_ftdefs.h"
+#include <myisamchk.h>
+#include <m_ctype.h>
+#include <stdarg.h>
+#include <my_getopt.h>
+#ifdef HAVE_SYS_VADVISE_H
+#include <sys/vadvise.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+
+/* Functions defined in this file */
+
+static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
+static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ my_off_t page, byte *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level);
+static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
+static ha_checksum calc_checksum(ha_rows count);
+static int writekeys(MARIA_SORT_PARAM *sort_param);
+static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t pagepos, File new_file);
+static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key);
+static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key);
+static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
+static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
+ const void *b);
+static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
+ const byte *a);
+static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a);
+static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ const byte *key);
+static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
+ reg1 SORT_KEY_BLOCKS *key_block,
+ const byte *key, my_off_t prev_block);
+static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
+/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
+ uint buffer_length);
+static ha_checksum maria_byte_checksum(const byte *buf, uint length);
+static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
+static void restore_data_file_type(MARIA_SHARE *share);
+
+void maria_chk_init(HA_CHECK *param)
+{
+ bzero((gptr) param,sizeof(*param));
+ param->opt_follow_links=1;
+ param->keys_in_use= ~(ulonglong) 0;
+ param->search_after_block=HA_OFFSET_ERROR;
+ param->auto_increment_value= 0;
+ param->use_buffers=USE_BUFFER_INIT;
+ param->read_buffer_length=READ_BUFFER_INIT;
+ param->write_buffer_length=READ_BUFFER_INIT;
+ param->sort_buffer_length=SORT_BUFFER_INIT;
+ param->sort_key_blocks=BUFFERS_WHEN_SORTING;
+ param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
+ param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
+ param->start_check_pos=0;
+ param->max_record_length= LONGLONG_MAX;
+ param->key_cache_block_size= KEY_CACHE_BLOCK_SIZE;
+ param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+}
+
+ /* Check the status flags for the table */
+
+int maria_chk_status(HA_CHECK *param, register MARIA_HA *info)
+{
+ MARIA_SHARE *share=info->s;
+
+ if (maria_is_crashed_on_repair(info))
+ _ma_check_print_warning(param,
+ "Table is marked as crashed and last repair failed");
+ else if (maria_is_crashed(info))
+ _ma_check_print_warning(param,
+ "Table is marked as crashed");
+ if (share->state.open_count != (uint) (info->s->global_changed ? 1 : 0))
+ {
+ /* Don't count this as a real warning, as check can correct this ! */
+ uint save=param->warning_printed;
+ _ma_check_print_warning(param,
+ share->state.open_count==1 ?
+ "%d client is using or hasn't closed the table properly" :
+ "%d clients are using or haven't closed the table properly",
+ share->state.open_count);
+ /* If this will be fixed by the check, forget the warning */
+ if (param->testflag & T_UPDATE_STATE)
+ param->warning_printed=save;
+ }
+ return 0;
+}
+
+ /* Check delete links */
+
+int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag)
+{
+ reg2 ha_rows i;
+ uint delete_link_length;
+ my_off_t empty,next_link,old_link;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_del");
+
+ LINT_INIT(old_link);
+ param->record_checksum=0;
+ delete_link_length=((info->s->options & HA_OPTION_PACK_RECORD) ? 20 :
+ info->s->rec_reflength+1);
+
+ if (!(test_flag & T_SILENT))
+ puts("- check record delete-chain");
+
+ next_link=info->s->state.dellink;
+ if (info->state->del == 0)
+ {
+ if (test_flag & T_VERBOSE)
+ {
+ puts("No recordlinks");
+ }
+ }
+ else
+ {
+ if (test_flag & T_VERBOSE)
+ printf("Recordlinks: ");
+ empty=0;
+ for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+ if (test_flag & T_VERBOSE)
+ printf(" %9s",llstr(next_link,buff));
+ if (next_link >= info->state->data_file_length)
+ goto wrong;
+ if (my_pread(info->dfile,(char*) buff,delete_link_length,
+ next_link,MYF(MY_NABP)))
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Can't read delete-link at filepos: %s",
+ llstr(next_link,buff));
+ DBUG_RETURN(1);
+ }
+ if (*buff != '\0')
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Record at pos: %s is not remove-marked",
+ llstr(next_link,buff));
+ goto wrong;
+ }
+ if (info->s->options & HA_OPTION_PACK_RECORD)
+ {
+ my_off_t prev_link=mi_sizekorr(buff+12);
+ if (empty && prev_link != old_link)
+ {
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2));
+ goto wrong;
+ }
+ old_link=next_link;
+ next_link=mi_sizekorr(buff+4);
+ empty+=mi_uint3korr(buff+1);
+ }
+ else
+ {
+ param->record_checksum+=(ha_checksum) next_link;
+ next_link= _ma_rec_pos(info->s, buff+1);
+ empty+=info->s->base.pack_reclength;
+ }
+ }
+ if (test_flag & T_VERBOSE)
+ puts("\n");
+ if (empty != info->state->empty)
+ {
+ _ma_check_print_warning(param,
+ "Found %s deleted space in delete link chain. Should be %s",
+ llstr(empty,buff2),
+ llstr(info->state->empty,buff));
+ }
+ if (next_link != HA_OFFSET_ERROR)
+ {
+ _ma_check_print_error(param,
+ "Found more than the expected %s deleted rows in delete link chain",
+ llstr(info->state->del, buff));
+ goto wrong;
+ }
+ if (i != 0)
+ {
+ _ma_check_print_error(param,
+ "Found %s deleted rows in delete link chain. Should be %s",
+ llstr(info->state->del - i, buff2),
+ llstr(info->state->del, buff));
+ goto wrong;
+ }
+ }
+ DBUG_RETURN(0);
+
+wrong:
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ if (test_flag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"record delete-link-chain corrupted");
+ DBUG_RETURN(1);
+} /* maria_chk_del */
+
+
+ /* Check delete links in index file */
+
+static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
+ my_off_t next_link)
+{
+ uint block_size= info->s->block_size;
+ ha_rows records;
+ char llbuff[21], llbuff2[21], *buff;
+ DBUG_ENTER("check_k_link");
+
+ records= (ha_rows) (info->state->key_file_length / block_size);
+ while (next_link != HA_OFFSET_ERROR && records > 0)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+ if (param->testflag & T_VERBOSE)
+ printf("%16s",llstr(next_link,llbuff));
+
+ /* Key blocks must lay within the key file length entirely. */
+ if (next_link + block_size > info->state->key_file_length)
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Invalid key block position: %s "
+ "key block size: %u file_length: %s",
+ llstr(next_link, llbuff), block_size,
+ llstr(info->state->key_file_length, llbuff2));
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+
+ /* Key blocks must be aligned at block_size */
+ if (next_link & (block_size -1))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Mis-aligned key block: %s "
+ "minimum key block length: %u",
+ llstr(next_link, llbuff),
+ block_size);
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+
+ if (!(buff=key_cache_read(info->s->key_cache,
+ info->s->kfile, next_link, DFLT_INIT_HITS,
+ (byte*) info->buff,
+ block_size, block_size, 1)))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "key cache read error for block: %s",
+ llstr(next_link,llbuff));
+ DBUG_RETURN(1);
+ /* purecov: end */
+ }
+ next_link=mi_sizekorr(buff);
+ records--;
+ param->key_file_blocks+=block_size;
+ }
+ if (param->testflag & T_VERBOSE)
+ {
+ if (next_link != HA_OFFSET_ERROR)
+ printf("%16s\n",llstr(next_link,llbuff));
+ else
+ puts("");
+ }
+ DBUG_RETURN (next_link != HA_OFFSET_ERROR);
+} /* check_k_link */
+
+
+ /* Check sizes of files */
+
+int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
+{
+ int error=0;
+ register my_off_t skr,size;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_size");
+
+ if (!(param->testflag & T_SILENT))
+ puts("- check file-size");
+
+ /* The following is needed if called externally (not from maria_chk) */
+ flush_key_blocks(info->s->key_cache,
+ info->s->kfile, FLUSH_FORCE_WRITE);
+
+ size=my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0));
+ if ((skr=(my_off_t) info->state->key_file_length) != size)
+ {
+ /* Don't give error if file generated by mariapack */
+ if (skr > size && maria_is_any_key_active(info->s->state.key_map))
+ {
+ error=1;
+ _ma_check_print_error(param,
+ "Size of indexfile is: %-8s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ else if (!(param->testflag & T_VERY_SILENT))
+ _ma_check_print_warning(param,
+ "Size of indexfile is: %-8s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ if (!(param->testflag & T_VERY_SILENT) &&
+ ! (info->s->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(info->state->key_file_length) >
+ ulonglong2double(info->s->base.margin_key_file_length)*0.9)
+ _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
+ llstr(info->state->key_file_length,buff),
+ llstr(info->s->base.max_key_file_length-1,buff));
+
+ size=my_seek(info->dfile,0L,MY_SEEK_END,MYF(0));
+ skr=(my_off_t) info->state->data_file_length;
+ if (info->s->options & HA_OPTION_COMPRESS_RECORD)
+ skr+= MEMMAP_EXTRA_MARGIN;
+#ifdef USE_RELOC
+ if (info->data_file_type == STATIC_RECORD &&
+ skr < (my_off_t) info->s->base.reloc*info->s->base.min_pack_length)
+ skr=(my_off_t) info->s->base.reloc*info->s->base.min_pack_length;
+#endif
+ if (skr != size)
+ {
+ info->state->data_file_length=size; /* Skip other errors */
+ if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
+ {
+ error=1;
+ _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ }
+ else
+ {
+ _ma_check_print_warning(param,
+ "Size of datafile is: %-9s Should be: %s",
+ llstr(size,buff), llstr(skr,buff2));
+ }
+ }
+ if (!(param->testflag & T_VERY_SILENT) &&
+ !(info->s->options & HA_OPTION_COMPRESS_RECORD) &&
+ ulonglong2double(info->state->data_file_length) >
+ (ulonglong2double(info->s->base.max_data_file_length)*0.9))
+ _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
+ llstr(info->state->data_file_length,buff),
+ llstr(info->s->base.max_data_file_length-1,buff2));
+ DBUG_RETURN(error);
+} /* maria_chk_size */
+
+
+/* Check keys */
+
+int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
+{
+ uint key,found_keys=0,full_text_keys=0,result=0;
+ ha_rows keys;
+ ha_checksum old_record_checksum,init_checksum;
+ my_off_t all_keydata,all_totaldata,key_totlength,length;
+ ulong *rec_per_key_part;
+ MARIA_SHARE *share=info->s;
+ MARIA_KEYDEF *keyinfo;
+ char buff[22],buff2[22];
+ DBUG_ENTER("maria_chk_key");
+
+ if (!(param->testflag & T_SILENT))
+ puts("- check key delete-chain");
+
+ param->key_file_blocks=info->s->base.keystart;
+ if (check_k_link(param, info, info->s->state.key_del))
+ {
+ if (param->testflag & T_VERBOSE) puts("");
+ _ma_check_print_error(param,"key delete-link-chain corrupted");
+ DBUG_RETURN(-1);
+ }
+
+ if (!(param->testflag & T_SILENT)) puts("- check index reference");
+
+ all_keydata=all_totaldata=key_totlength=0;
+ old_record_checksum=0;
+ init_checksum=param->record_checksum;
+ if (share->data_file_type == STATIC_RECORD)
+ old_record_checksum= (calc_checksum(info->state->records +
+ info->state->del-1) *
+ share->base.pack_reclength);
+ rec_per_key_part= param->rec_per_key_part;
+ for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
+ rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
+ {
+ param->key_crc[key]=0;
+ if (! maria_is_key_active(share->state.key_map, key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part +
+ (uint) (rec_per_key_part - param->rec_per_key_part)),
+ keyinfo->keysegs*sizeof(*rec_per_key_part));
+ continue;
+ }
+ found_keys++;
+
+ param->record_checksum=init_checksum;
+
+ bzero((char*) &param->unique_count,sizeof(param->unique_count));
+ bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
+
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- check data record references index: %d\n",key+1);
+ if (keyinfo->flag & HA_FULLTEXT)
+ full_text_keys++;
+ if (share->state.key_root[key] == HA_OFFSET_ERROR &&
+ (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT))
+ goto do_stat;
+ if (!_ma_fetch_keypage(info,keyinfo,share->state.key_root[key],
+ DFLT_INIT_HITS,info->buff,0))
+ {
+ _ma_check_print_error(param,"Can't read indexpage from filepos: %s",
+ llstr(share->state.key_root[key],buff));
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ param->key_file_blocks+=keyinfo->block_length;
+ keys=0;
+ param->keydata=param->totaldata=0;
+ param->key_blocks=0;
+ param->max_level=0;
+ if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff,
+ &keys, param->key_crc+key,1))
+ DBUG_RETURN(-1);
+ if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL)))
+ {
+ if (keys != info->state->records)
+ {
+ _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
+ llstr(info->state->records,buff2));
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ if ((found_keys - full_text_keys == 1 &&
+ !(share->data_file_type == STATIC_RECORD)) ||
+ (param->testflag & T_DONT_CHECK_CHECKSUM))
+ old_record_checksum= param->record_checksum;
+ else if (old_record_checksum != param->record_checksum)
+ {
+ if (key)
+ _ma_check_print_error(param,"Key %u doesn't point at same records that key 1",
+ key+1);
+ else
+ _ma_check_print_error(param,"Key 1 doesn't point at all records");
+ if (!(param->testflag & T_INFO))
+ DBUG_RETURN(-1);
+ result= -1;
+ continue;
+ }
+ }
+ if ((uint) share->base.auto_key -1 == key)
+ {
+ /* Check that auto_increment key is bigger than max key value */
+ ulonglong auto_increment;
+ info->lastinx=key;
+ _ma_read_key_record(info, info->rec_buff, 0);
+ auto_increment= ma_retrieve_auto_increment(info, info->rec_buff);
+ if (auto_increment > info->s->state.auto_increment)
+ {
+ _ma_check_print_warning(param, "Auto-increment value: %s is smaller "
+ "than max used value: %s",
+ llstr(info->s->state.auto_increment,buff2),
+ llstr(auto_increment, buff));
+ }
+ if (param->testflag & T_AUTO_INC)
+ {
+ set_if_bigger(info->s->state.auto_increment,
+ auto_increment);
+ set_if_bigger(info->s->state.auto_increment,
+ param->auto_increment_value);
+ }
+
+ /* Check that there isn't a row with auto_increment = 0 in the table */
+ maria_extra(info,HA_EXTRA_KEYREAD,0);
+ bzero(info->lastkey,keyinfo->seg->length);
+ if (!maria_rkey(info, info->rec_buff, key, (const byte*) info->lastkey,
+ keyinfo->seg->length, HA_READ_KEY_EXACT))
+ {
+ /* Don't count this as a real warning, as maria_chk can't correct it */
+ uint save=param->warning_printed;
+ _ma_check_print_warning(param, "Found row where the auto_increment "
+ "column has the value 0");
+ param->warning_printed=save;
+ }
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ }
+
+ length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
+ if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
+ printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n",
+ key+1,
+ (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
+ (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
+ my_off_t2double(length)),
+ param->max_level);
+ all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
+
+do_stat:
+ if (param->testflag & T_STATISTICS)
+ maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ param->notnull_count: NULL,
+ (ulonglong)info->state->records);
+ }
+ if (param->testflag & T_INFO)
+ {
+ if (all_totaldata != 0L && found_keys > 0)
+ printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n",
+ (int) (my_off_t2double(all_keydata)*100.0/
+ my_off_t2double(all_totaldata)),
+ (int) ((my_off_t2double(key_totlength) -
+ my_off_t2double(all_keydata))*100.0/
+ my_off_t2double(key_totlength)));
+ else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
+ puts("");
+ }
+ if (param->key_file_blocks != info->state->key_file_length &&
+ param->keys_in_use != ~(ulonglong) 0)
+ _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
+ if (found_keys != full_text_keys)
+ param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
+ else
+ param->record_checksum=0;
+ DBUG_RETURN(result);
+} /* maria_chk_key */
+
+
+static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, byte *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level)
+{
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("chk_index_down");
+
+ /* Key blocks must lay within the key file length entirely. */
+ if (page + keyinfo->block_length > info->state->key_file_length)
+ {
+ /* purecov: begin tested */
+ /* Give it a chance to fit in the real file size. */
+ my_off_t max_length= my_seek(info->s->kfile, 0L, MY_SEEK_END, MYF(0));
+ _ma_check_print_error(param, "Invalid key block position: %s "
+ "key block size: %u file_length: %s",
+ llstr(page, llbuff), keyinfo->block_length,
+ llstr(info->state->key_file_length, llbuff2));
+ if (page + keyinfo->block_length > max_length)
+ goto err;
+ /* Fix the remembered key file length. */
+ info->state->key_file_length= (max_length &
+ ~ (my_off_t) (keyinfo->block_length - 1));
+ /* purecov: end */
+ }
+
+ /* Key blocks must be aligned at block length */
+ if (page & (info->s->block_size -1))
+ {
+ /* purecov: begin tested */
+ _ma_check_print_error(param, "Mis-aligned key block: %s "
+ "minimum key block length: %u",
+ llstr(page, llbuff), info->s->block_size);
+ goto err;
+ /* purecov: end */
+ }
+
+ if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0))
+ {
+ _ma_check_print_error(param,"Can't read key from filepos: %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ param->key_file_blocks+=keyinfo->block_length;
+ if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level))
+ goto err;
+
+ DBUG_RETURN(0);
+
+ /* purecov: begin tested */
+err:
+ DBUG_RETURN(1);
+ /* purecov: end */
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process first index tuple.
+
+ SYNOPSIS
+ maria_collect_stats_nonulls_first()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ key IN Key values tuple
+
+ DESCRIPTION
+ Process the first index tuple - find out which prefix tuples don't
+ contain NULLs, and update the array of notnull counters accordingly.
+*/
+
+static
+void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
+ const byte *key)
+{
+ uint first_null, kp;
+ first_null= ha_find_null(keyseg, (uchar*) key) - keyseg;
+ /*
+ All prefix tuples that don't include keypart_{first_null} are not-null
+ tuples (and all others aren't), increment counters for them.
+ */
+ for (kp= 0; kp < first_null; kp++)
+ notnull[kp]++;
+}
+
+
+/*
+ "Ignore NULLs" statistics collection method: process next index tuple.
+
+ SYNOPSIS
+ maria_collect_stats_nonulls_next()
+ keyseg IN Array of key part descriptions
+ notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
+ tuples that don't contain NULLs)
+ prev_key IN Previous key values tuple
+ last_key IN Next key values tuple
+
+ DESCRIPTION
+ Process the next index tuple:
+ 1. Find out which prefix tuples of last_key don't contain NULLs, and
+ update the array of notnull counters accordingly.
+ 2. Find the first keypart number where the prev_key and last_key tuples
+ are different(A), or last_key has NULL value(B), and return it, so the
+ caller can count number of unique tuples for each key prefix. We don't
+ need (B) to be counted, and that is compensated back in
+ maria_update_key_parts().
+
+ RETURN
+ 1 + number of first keypart where values differ or last_key tuple has NULL
+*/
+
+static
+int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
+ const byte *prev_key,
+ const byte *last_key)
+{
+ uint diffs[2];
+ uint first_null_seg, kp;
+ HA_KEYSEG *seg;
+
+ /*
+ Find the first keypart where values are different or either of them is
+ NULL. We get results in diffs array:
+ diffs[0]= 1 + number of first different keypart
+ diffs[1]=offset: (last_key + diffs[1]) points to first value in
+ last_key that is NULL or different from corresponding
+ value in prev_key.
+ */
+ ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
+ seg= keyseg + diffs[0] - 1;
+
+ /* Find first NULL in last_key */
+ first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg;
+ for (kp= 0; kp < first_null_seg; kp++)
+ notnull[kp]++;
+
+ /*
+ Return 1+ number of first key part where values differ. Don't care if
+ these were NULLs and not .... We compensate for that in
+ maria_update_key_parts.
+ */
+ return diffs[0];
+}
+
+
+ /* Check if index is ok */
+
+static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, byte *buff, ha_rows *keys,
+ ha_checksum *key_checksum, uint level)
+{
+ int flag;
+ uint used_length,comp_flag,nod_flag,key_length=0;
+ byte key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
+ my_off_t next_page,record;
+ char llbuff[22];
+ uint diff_pos[2];
+ DBUG_ENTER("chk_index");
+ DBUG_DUMP("buff",(byte*) buff,maria_getint(buff));
+
+ /* TODO: implement appropriate check for RTree keys */
+ if (keyinfo->flag & HA_SPATIAL)
+ DBUG_RETURN(0);
+
+ if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for keyblock");
+ DBUG_RETURN(-1);
+ }
+
+ if (keyinfo->flag & HA_NOSAME)
+ comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* Not real duplicates */
+ else
+ comp_flag=SEARCH_SAME; /* Keys in positionorder */
+ nod_flag=_ma_test_if_nod(buff);
+ used_length=maria_getint(buff);
+ keypos=buff+2+nod_flag;
+ endpos=buff+used_length;
+
+ param->keydata+=used_length; param->totaldata+=keyinfo->block_length; /* INFO */
+ param->key_blocks++;
+ if (level > param->max_level)
+ param->max_level=level;
+
+ if (used_length > keyinfo->block_length)
+ {
+ _ma_check_print_error(param,"Wrong pageinfo at page: %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ for ( ;; )
+ {
+ if (*_ma_killed_ptr(param))
+ goto err;
+ memcpy(info->lastkey, key, key_length);
+ info->lastkey_length= key_length;
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag,keypos);
+ if (chk_index_down(param,info,keyinfo,next_page,
+ temp_buff,keys,key_checksum,level+1))
+ goto err;
+ }
+ old_keypos=keypos;
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
+ break;
+ if (keypos > endpos)
+ {
+ _ma_check_print_error(param,"Wrong key block length at page: %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ if ((*keys)++ &&
+ (flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
+ key_length, comp_flag, diff_pos)) >=0)
+ {
+ DBUG_DUMP("old", info->lastkey, info->lastkey_length);
+ DBUG_DUMP("new", key, key_length);
+ DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
+
+ if (comp_flag & SEARCH_FIND && flag == 0)
+ _ma_check_print_error(param,"Found duplicated key at page %s",
+ llstr(page,llbuff));
+ else
+ _ma_check_print_error(param,"Key in wrong position at page %s",
+ llstr(page,llbuff));
+ goto err;
+ }
+ if (param->testflag & T_STATISTICS)
+ {
+ if (*keys != 1L) /* not first_key */
+ {
+ if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
+ ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key,
+ USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
+ diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
+ param->notnull_count,
+ info->lastkey, key);
+ }
+ param->unique_count[diff_pos[0]-1]++;
+ }
+ else
+ {
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
+ key);
+ }
+ }
+ (*key_checksum)+= maria_byte_checksum((byte*) key,
+ key_length- info->s->rec_reflength);
+ record= _ma_dpos(info,0,key+key_length);
+ if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
+ {
+ uint off;
+ int subkeys;
+ get_key_full_length_rdonly(off, key);
+ subkeys=ft_sintXkorr(key+off);
+ if (subkeys < 0)
+ {
+ ha_rows tmp_keys=0;
+ if (chk_index_down(param,info,&info->s->ft2_keyinfo,record,
+ temp_buff,&tmp_keys,key_checksum,1))
+ goto err;
+ if (tmp_keys + subkeys)
+ {
+ _ma_check_print_error(param,
+ "Number of words in the 2nd level tree "
+ "does not match the number in the header. "
+ "Parent word in on the page %s, offset %u",
+ llstr(page,llbuff), (uint) (old_keypos-buff));
+ goto err;
+ }
+ (*keys)+=tmp_keys-1;
+ continue;
+ }
+ /* fall through */
+ }
+ if (record >= info->state->data_file_length)
+ {
+#ifndef DBUG_OFF
+ char llbuff2[22], llbuff3[22];
+#endif
+ _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff));
+ DBUG_PRINT("test",("page: %s record: %s filelength: %s",
+ llstr(page,llbuff),llstr(record,llbuff2),
+ llstr(info->state->data_file_length,llbuff3)));
+ DBUG_DUMP("key",(byte*) key,key_length);
+ DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ goto err;
+ }
+ param->record_checksum+= (ha_checksum) record;
+ }
+ if (keypos != endpos)
+ {
+ _ma_check_print_error(param,"Keyblock size at page %s is not correct. Block length: %d key length: %d",
+ llstr(page,llbuff), used_length, (keypos - buff));
+ goto err;
+ }
+ my_afree((byte*) temp_buff);
+ DBUG_RETURN(0);
+ err:
+ my_afree((byte*) temp_buff);
+ DBUG_RETURN(1);
+} /* chk_index */
+
+
+ /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
+
+static ha_checksum calc_checksum(ha_rows count)
+{
+ ulonglong sum,a,b;
+ DBUG_ENTER("calc_checksum");
+
+ sum=0;
+ a=count; b=count+1;
+ if (a & 1)
+ b>>=1;
+ else
+ a>>=1;
+ while (b)
+ {
+ if (b & 1)
+ sum+=a;
+ a<<=1; b>>=1;
+ }
+ DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
+ DBUG_RETURN((ha_checksum) sum);
+} /* calc_checksum */
+
+
+ /* Calc length of key in normal isam */
+
+static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
+{
+ uint length;
+ HA_KEYSEG *keyseg;
+ DBUG_ENTER("isam_key_length");
+
+ length= info->s->rec_reflength;
+ for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
+ length+= keyseg->length;
+
+ DBUG_PRINT("exit",("length: %d",length));
+ DBUG_RETURN(length);
+} /* key_length */
+
+
+
+static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
+ char *buff)
+{
+ if (info->s->data_file_type != BLOCK_RECORD)
+ llstr(recpos, buff);
+ else
+ {
+ my_off_t page= ma_recordpos_to_page(recpos);
+ uint row= ma_recordpos_to_offset(recpos);
+ char *end= longlong10_to_str(page, buff, 10);
+ *(end++)= ':';
+ longlong10_to_str(row, end, 10);
+ }
+}
+
+
+/*
+ Check that keys in records exist in index tree
+
+ SYNOPSIS
+ check_keys_in_record()
+ param Check paramenter
+ info Maria handler
+ extend Type of check (extended or normal)
+ start_recpos Position to row
+ record Record buffer
+
+ NOTES
+ This function also calculates record checksum & number of rows
+*/
+
+static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ my_off_t start_recpos, byte *record)
+{
+ MARIA_KEYDEF *keyinfo;
+ char llbuff[22+4];
+ uint key;
+
+ param->tmp_record_checksum+= (ha_checksum) start_recpos;
+ param->records++;
+ if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0)
+ {
+ printf("%s\r", llstr(param->records, llbuff));
+ VOID(fflush(stdout));
+ }
+
+ /* Check if keys match the record */
+ for (key=0, keyinfo= info->s->keyinfo; key < info->s->base.keys;
+ key++,keyinfo++)
+ {
+ if (maria_is_key_active(info->s->state.key_map, key))
+ {
+ if(!(keyinfo->flag & HA_FULLTEXT))
+ {
+ uint key_length= _ma_make_key(info,key,info->lastkey,record,
+ start_recpos);
+ if (extend)
+ {
+ /* We don't need to lock the key tree here as we don't allow
+ concurrent threads when running maria_chk
+ */
+ int search_result=
+#ifdef HAVE_RTREE_KEYS
+ (keyinfo->flag & HA_SPATIAL) ?
+ maria_rtree_find_first(info, key, info->lastkey, key_length,
+ MBR_EQUAL | MBR_DATA) :
+#endif
+ _ma_search(info,keyinfo,info->lastkey,key_length,
+ SEARCH_SAME, info->s->state.key_root[key]);
+ if (search_result)
+ {
+ record_pos_to_txt(info, start_recpos, llbuff);
+ _ma_check_print_error(param,
+ "Record at: %14s "
+ "Can't find key for index: %2d",
+ llbuff, key+1);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ return -1;
+ }
+ }
+ else
+ param->tmp_key_crc[key]+=
+ maria_byte_checksum((byte*) info->lastkey, key_length);
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Functions to loop through all rows and check if they are ok
+
+ NOTES
+ One function for each record format
+
+ RESULT
+ 0 ok
+ -1 Interrupted by user
+ 1 Error
+*/
+
+static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ byte *record)
+{
+ my_off_t start_recpos, pos;
+ char llbuff[22];
+
+ pos= 0;
+ while (pos < info->state->data_file_length)
+ {
+ if (*_ma_killed_ptr(param))
+ return -1;
+ if (my_b_read(&param->read_cache,(byte*) record,
+ info->s->base.pack_reclength))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(pos, llbuff));
+ return 1;
+ }
+ start_recpos= pos;
+ pos+= info->s->base.pack_reclength;
+ param->splits++;
+ if (*record == '\0')
+ {
+ param->del_blocks++;
+ param->del_length+= info->s->base.pack_reclength;
+ continue; /* Record removed */
+ }
+ param->glob_crc+= _ma_static_checksum(info,record);
+ param->used+= info->s->base.pack_reclength;
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ return 1;
+ }
+ return 0;
+}
+
+
+static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ byte *record)
+{
+ MARIA_BLOCK_INFO block_info;
+ my_off_t start_recpos, start_block, pos;
+ byte *to;
+ ulong left_length;
+ uint b_type;
+ char llbuff[22],llbuff2[22],llbuff3[22];
+ DBUG_ENTER("check_dynamic_record");
+
+ LINT_INIT(left_length);
+ LINT_INIT(start_recpos);
+ LINT_INIT(to);
+
+ pos= 0;
+ while (pos < info->state->data_file_length)
+ {
+ my_bool got_error= 0;
+ int flag;
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(-1);
+
+ flag= block_info.second_read=0;
+ block_info.next_filepos=pos;
+ do
+ {
+ if (_ma_read_cache(&param->read_cache,(byte*) block_info.header,
+ (start_block=block_info.next_filepos),
+ sizeof(block_info.header),
+ (flag ? 0 : READING_NEXT) | READING_HEADER))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at "
+ "position: %s",
+ my_errno, llstr(start_block, llbuff));
+ DBUG_RETURN(1);
+ }
+
+ if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
+ {
+ _ma_check_print_error(param,"Wrong aligned block at %s",
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ b_type= _ma_get_block_info(&block_info,-1,start_block);
+ if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & BLOCK_SYNC_ERROR)
+ {
+ if (flag)
+ {
+ _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
+ (int) block_info.header[0],
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ pos=block_info.filepos+block_info.block_len;
+ goto next;
+ }
+ if (b_type & BLOCK_DELETED)
+ {
+ if (block_info.block_len < info->s->base.min_block_length)
+ {
+ _ma_check_print_error(param,
+ "Deleted block with impossible length %lu at %s",
+ block_info.block_len,llstr(pos,llbuff));
+ DBUG_RETURN(1);
+ }
+ if ((block_info.next_filepos != HA_OFFSET_ERROR &&
+ block_info.next_filepos >= info->state->data_file_length) ||
+ (block_info.prev_filepos != HA_OFFSET_ERROR &&
+ block_info.prev_filepos >= info->state->data_file_length))
+ {
+ _ma_check_print_error(param,"Delete link points outside datafile at %s",
+ llstr(pos,llbuff));
+ DBUG_RETURN(1);
+ }
+ param->del_blocks++;
+ param->del_length+= block_info.block_len;
+ param->splits++;
+ pos= block_info.filepos+block_info.block_len;
+ goto next;
+ }
+ _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
+ block_info.header[0],block_info.header[1],
+ block_info.header[2],
+ llstr(start_block,llbuff));
+ DBUG_RETURN(1);
+ }
+ if (info->state->data_file_length < block_info.filepos+
+ block_info.block_len)
+ {
+ _ma_check_print_error(param,
+ "Recordlink that points outside datafile at %s",
+ llstr(pos,llbuff));
+ got_error=1;
+ break;
+ }
+ param->splits++;
+ if (!flag++) /* First block */
+ {
+ start_recpos=pos;
+ pos=block_info.filepos+block_info.block_len;
+ if (block_info.rec_len > (uint) info->s->base.max_pack_length)
+ {
+ _ma_check_print_error(param,"Found too long record (%lu) at %s",
+ (ulong) block_info.rec_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ if (info->s->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+
+ {
+ _ma_check_print_error(param,
+ "Not enough memory (%lu) for blob at %s",
+ (ulong) block_info.rec_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ }
+ to= info->rec_buff;
+ left_length= block_info.rec_len;
+ }
+ if (left_length < block_info.data_len)
+ {
+ _ma_check_print_error(param,"Found too long record (%lu) at %s",
+ (ulong) block_info.data_len,
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ break;
+ }
+ if (_ma_read_cache(&param->read_cache,(byte*) to,block_info.filepos,
+ (uint) block_info.data_len,
+ flag == 1 ? READING_NEXT : 0))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff));
+
+ DBUG_RETURN(1);
+ }
+ to+=block_info.data_len;
+ param->link_used+= block_info.filepos-start_block;
+ param->used+= block_info.filepos - start_block + block_info.data_len;
+ param->empty+= block_info.block_len-block_info.data_len;
+ left_length-= block_info.data_len;
+ if (left_length)
+ {
+ if (b_type & BLOCK_LAST)
+ {
+ _ma_check_print_error(param,
+ "Wrong record length %s of %s at %s",
+ llstr(block_info.rec_len-left_length,llbuff),
+ llstr(block_info.rec_len, llbuff2),
+ llstr(start_recpos,llbuff3));
+ got_error=1;
+ break;
+ }
+ if (info->state->data_file_length < block_info.next_filepos)
+ {
+ _ma_check_print_error(param,
+ "Found next-recordlink that points outside datafile at %s",
+ llstr(block_info.filepos,llbuff));
+ got_error=1;
+ break;
+ }
+ }
+ } while (left_length);
+
+ if (! got_error)
+ {
+ if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
+ MY_FILE_ERROR)
+ {
+ _ma_check_print_error(param,"Found wrong record at %s",
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ }
+ else
+ {
+ info->cur_row.checksum= _ma_checksum(info,record);
+ if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
+ {
+ if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
+ test(info->s->calc_checksum)))
+ {
+ _ma_check_print_error(param,"Found wrong packed record at %s",
+ llstr(start_recpos,llbuff));
+ got_error= 1;
+ }
+ }
+ param->glob_crc+= info->cur_row.checksum;
+ }
+
+ if (! got_error)
+ {
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ else if (!flag)
+ pos= block_info.filepos+block_info.block_len;
+next:;
+ }
+ DBUG_RETURN(0);
+}
+
+
+static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ byte *record)
+{
+ my_off_t start_recpos, pos;
+ char llbuff[22];
+ bool got_error= 0;
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("check_compressed_record");
+
+ pos= info->s->pack.header_length; /* Skip header */
+ while (pos < info->state->data_file_length)
+ {
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(-1);
+
+ if (_ma_read_cache(&param->read_cache,(byte*) block_info.header, pos,
+ info->s->pack.ref_length, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(pos, llbuff));
+ DBUG_RETURN(1);
+ }
+
+ start_recpos= pos;
+ param->splits++;
+ VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size, -1,
+ start_recpos));
+ pos=block_info.filepos+block_info.rec_len;
+ if (block_info.rec_len < (uint) info->s->min_pack_length ||
+ block_info.rec_len > (uint) info->s->max_pack_length)
+ {
+ _ma_check_print_error(param,
+ "Found block with wrong recordlength: %d at %s",
+ block_info.rec_len, llstr(start_recpos,llbuff));
+ got_error=1;
+ goto end;
+ }
+ if (_ma_read_cache(&param->read_cache,(byte*) info->rec_buff,
+ block_info.filepos, block_info.rec_len, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "got error: %d when reading datafile at position: %s",
+ my_errno, llstr(block_info.filepos, llbuff));
+ DBUG_RETURN(1);
+ }
+ if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
+ info->rec_buff, block_info.rec_len))
+ {
+ _ma_check_print_error(param,"Found wrong record at %s",
+ llstr(start_recpos,llbuff));
+ got_error=1;
+ goto end;
+ }
+ param->glob_crc+= (*info->s->calc_checksum)(info,record);
+ param->link_used+= (block_info.filepos - start_recpos);
+ param->used+= (pos-start_recpos);
+
+end:
+ if (! got_error)
+ {
+ if (check_keys_in_record(param, info, extend, start_recpos, record))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ got_error= 0; /* Reset for next loop */
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check if layout on a page is ok
+*/
+
+static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
+ my_off_t page_pos, byte *page,
+ uint row_count, uint head_empty,
+ uint *real_rows_found)
+{
+ uint empty, last_row_end, row, first_dir_entry;
+ byte *dir_entry;
+ char llbuff[22];
+ DBUG_ENTER("check_page_layout");
+
+ empty= 0;
+ last_row_end= PAGE_HEADER_SIZE;
+ *real_rows_found= 0;
+
+ dir_entry= page+ info->s->block_size - PAGE_SUFFIX_SIZE;
+ first_dir_entry= info->s->block_size - row_count* DIR_ENTRY_SIZE;
+ for (row= 0 ; row < row_count ; row++)
+ {
+ uint pos, length;
+ dir_entry-= DIR_ENTRY_SIZE;
+ pos= uint2korr(dir_entry);
+ if (!pos)
+ {
+ if (row == row_count -1)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: First entry in directory is 0",
+ llstr(page_pos, llbuff));
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ continue; /* Deleted row */
+ }
+ (*real_rows_found)++;
+ length= uint2korr(dir_entry+2);
+ param->used+= length;
+ if (pos < last_row_end)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u overlapps with previous row",
+ llstr(page_pos, llbuff), row);
+ DBUG_RETURN(1);
+ }
+ empty+= (pos - last_row_end);
+ last_row_end= pos + length;
+ if (last_row_end > first_dir_entry)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u overlapps with directory",
+ llstr(page_pos, llbuff), row);
+ DBUG_RETURN(1);
+ }
+ }
+ empty+= (first_dir_entry - last_row_end);
+
+ if (empty != head_empty)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Wrong empty size. Stored: %5u Actual: %5u",
+ llstr(page_pos, llbuff), head_empty, empty);
+ DBUG_RETURN(param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE));
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check all rows on head page
+
+ NOTES
+ Before this, we have already called check_page_layout(), so
+ we know the block is logicaly correct (even if the rows may not be that)
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+
+static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record,
+ int extend, my_off_t page_pos, byte *page_buff,
+ uint row_count)
+{
+ byte *dir_entry;
+ uint row;
+ char llbuff[22], llbuff2[22];
+ DBUG_ENTER("check_head_page");
+
+ dir_entry= page_buff+ info->s->block_size - PAGE_SUFFIX_SIZE;
+ for (row= 0 ; row < row_count ; row++)
+ {
+ uint pos, length, flag;
+ dir_entry-= DIR_ENTRY_SIZE;
+ pos= uint2korr(dir_entry);
+ if (!pos)
+ continue;
+ length= uint2korr(dir_entry+2);
+ if (length < info->s->base.min_block_length)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u is too short (%d bytes)",
+ llstr(page_pos, llbuff), row, length);
+ DBUG_RETURN(1);
+ }
+ flag= (uint) (uchar) page_buff[pos];
+ if (flag & ~(ROW_FLAG_ALL))
+ _ma_check_print_error(param,
+ "Page %9s: Row %3u has wrong flag: %d",
+ llstr(page_pos, llbuff), row, flag);
+
+ DBUG_PRINT("info", ("rowid: %s page: %lu row: %u",
+ llstr(ma_recordpos(page_pos/info->s->block_size, row),
+ llbuff),
+ (ulong) (page_pos / info->s->block_size), row));
+ if (_ma_read_block_record2(info, record, page_buff+pos,
+ page_buff+pos+length))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row %3d is crashed",
+ llstr(page_pos, llbuff), row);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ continue;
+ }
+ if (info->s->calc_checksum)
+ {
+ info->cur_row.checksum= _ma_checksum(info, record);
+ param->glob_crc+= info->cur_row.checksum;
+ }
+ if (info->cur_row.extents_count)
+ {
+ byte *extents= info->cur_row.extents;
+ uint i;
+ /* Check that bitmap has the right marker for the found extents */
+ for (i= 0 ; i < info->cur_row.extents_count ; i++)
+ {
+ uint page, page_count, page_type;
+ page= uint5korr(extents);
+ page_count= uint2korr(extents+5);
+ extents+= ROW_EXTENT_SIZE;
+ page_type= BLOB_PAGE;
+ if (page_count & TAIL_BIT)
+ {
+ page_count= 1;
+ page_type= TAIL_PAGE;
+ }
+ for ( ; page_count--; page++)
+ {
+ uint bitmap_pattern;
+ if (_ma_check_if_right_bitmap_type(info, page_type, page,
+ &bitmap_pattern))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Row: %3d has an extent with wrong information in bitmap: Page %9s Page_type: %d Bitmap: %d",
+ llstr(page_pos, llbuff), row,
+ llstr(page * info->s->bitmap.block_size,
+ llbuff2),
+ page_type,
+ bitmap_pattern);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ }
+ param->full_page_count+= info->cur_row.full_page_count;
+ param->tail_count+= info->cur_row.tail_count;
+ if (check_keys_in_record(param, info, extend,
+ ma_recordpos(page_pos/info->s->block_size, row),
+ record))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+
+static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
+ byte *record)
+{
+ my_off_t pos;
+ byte *page_buff, *bitmap_buff, *data;
+ char llbuff[22], llbuff2[22];
+ uint block_size= info->s->block_size;
+ ha_rows full_page_count, tail_count;
+ my_bool full_dir;
+ uint offset_page, offset;
+
+ if (_ma_scan_init_block_record(info))
+ {
+ _ma_check_print_error(param, "got error %d when initializing scan",
+ my_errno);
+ return 1;
+ }
+ bitmap_buff= info->scan.bitmap_buff;
+ page_buff= info->scan.page_buff;
+ full_page_count= tail_count= 0;
+ param->full_page_count= param->tail_count= 0;
+ param->used= param->link_used= 0;
+
+ for (pos= 0;
+ pos < info->state->data_file_length;
+ pos+= block_size)
+ {
+ uint row_count, real_row_count, empty_space, page_type, bitmap_pattern;
+ LINT_INIT(row_count);
+ LINT_INIT(empty_space);
+
+ if (*_ma_killed_ptr(param))
+ {
+ _ma_scan_end_block_record(info);
+ return -1;
+ }
+ if (((pos / block_size) % info->s->bitmap.pages_covered) == 0)
+ {
+ /* Bitmap page */
+ if (_ma_read_cache(&param->read_cache, bitmap_buff, pos,
+ block_size, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Got error: %d when reading datafile",
+ my_errno, llstr(pos, llbuff));
+ goto err;
+ }
+ param->used+= block_size;
+ param->link_used+= block_size;
+ continue;
+ }
+ /* Skip pages marked as empty in bitmap */
+ offset_page= (((pos / block_size) % info->s->bitmap.pages_covered) -1) * 3;
+ offset= offset_page & 7;
+ data= bitmap_buff + offset_page / 8;
+ bitmap_pattern= uint2korr(data);
+ param->splits++;
+ if (!((bitmap_pattern >> offset) & 7))
+ {
+ param->empty+= block_size;
+ param->del_blocks++;
+ continue;
+ }
+
+ if (_ma_read_cache(&param->read_cache, page_buff, pos,
+ block_size, READING_NEXT))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Got error: %d when reading datafile",
+ my_errno, llstr(pos, llbuff));
+ goto err;
+ }
+ page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
+ if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Found wrong page type %d\n",
+ llstr(pos, llbuff), page_type);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ goto err;
+ }
+ switch ((enum en_page_type) page_type) {
+ case UNALLOCATED_PAGE:
+ case MAX_PAGE_TYPE:
+ DBUG_PRINT("warning",
+ ("Found page with wrong page type: %d", page_type));
+ DBUG_ASSERT(0);
+ break;
+ case HEAD_PAGE:
+ row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET];
+ empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
+ param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ full_dir= row_count == MAX_ROWS_PER_PAGE;
+ break;
+ case TAIL_PAGE:
+ row_count= ((uchar*) page_buff)[DIR_ENTRY_OFFSET];
+ empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
+ param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE +
+ row_count * DIR_ENTRY_SIZE);
+ full_dir= row_count == MAX_ROWS_PER_PAGE;
+ break;
+ case BLOB_PAGE:
+ full_page_count++;
+ full_dir= 0;
+ empty_space= block_size; /* for error reporting */
+ param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE);
+ param->used+= block_size;
+ break;
+ }
+ if (_ma_check_bitmap_data(info, page_type, pos / block_size,
+ full_dir ? 0 : empty_space,
+ &bitmap_pattern))
+ {
+ _ma_check_print_error(param,
+ "Page %9s: Wrong data in bitmap. Page_type: %d empty_space: %u Bitmap: %d",
+ llstr(pos, llbuff), page_type, empty_space,
+ bitmap_pattern);
+ if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
+ goto err;
+ }
+ if ((enum en_page_type) page_type == BLOB_PAGE)
+ continue;
+ param->empty+= empty_space;
+ if (check_page_layout(param, info, pos, page_buff, row_count,
+ empty_space, &real_row_count))
+ goto err;
+ if ((enum en_page_type) page_type == TAIL_PAGE)
+ {
+ tail_count+= real_row_count;
+ continue;
+ }
+ if (check_head_page(param, info, record, extend, pos, page_buff,
+ row_count))
+ goto err;
+ }
+
+ _ma_scan_end_block_record(info);
+
+ if (full_page_count != param->full_page_count)
+ _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table",
+ llstr(param->full_page_count, llbuff),
+ llstr(full_page_count, llbuff2));
+ if (tail_count != param->tail_count)
+ _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table",
+ llstr(param->tail_count, llbuff),
+ llstr(tail_count, llbuff2));
+
+ /* Update splits to avoid warning */
+ info->s->state.split= param->splits;
+ info->state->del= param->del_blocks;
+ return param->error_printed != 0;
+
+err:
+ _ma_scan_end_block_record(info);
+ return 1;
+}
+
+
+ /* Check that record-link is ok */
+
+int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend)
+{
+ int error;
+ byte *record;
+ char llbuff[22],llbuff2[22],llbuff3[22];
+ DBUG_ENTER("maria_chk_data_link");
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (extend)
+ puts("- check records and index references");
+ else
+ puts("- check record links");
+ }
+
+ if (!(record= (byte*) my_malloc(info->s->base.pack_reclength,MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for record");
+ DBUG_RETURN(-1);
+ }
+ param->records= param->del_blocks= 0;
+ param->used= param->link_used= param->splits= param->del_length= 0;
+ param->tmp_record_checksum= param->glob_crc= 0;
+ param->err_count= 0;
+
+ error= 0;
+ param->empty= info->s->pack.header_length;
+
+ bzero((char*) param->tmp_key_crc,
+ info->s->base.keys * sizeof(param->tmp_key_crc[0]));
+
+ switch (info->s->data_file_type) {
+ case BLOCK_RECORD:
+ error= check_block_record(param, info, extend, record);
+ break;
+ case STATIC_RECORD:
+ error= check_static_record(param, info, extend, record);
+ break;
+ case DYNAMIC_RECORD:
+ error= check_dynamic_record(param, info, extend, record);
+ break;
+ case COMPRESSED_RECORD:
+ error= check_compressed_record(param, info, extend, record);
+ break;
+ } /* switch */
+
+ if (error)
+ goto err;
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ if (param->records != info->state->records)
+ {
+ _ma_check_print_error(param,
+ "Record-count is not ok; found %-10s Should be: %s",
+ llstr(param->records,llbuff),
+ llstr(info->state->records,llbuff2));
+ error=1;
+ }
+ else if (param->record_checksum &&
+ param->record_checksum != param->tmp_record_checksum)
+ {
+ _ma_check_print_error(param,
+ "Key pointers and record positions doesn't match");
+ error=1;
+ }
+ else if (param->glob_crc != info->state->checksum &&
+ (info->s->options &
+ (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
+ {
+ _ma_check_print_warning(param,
+ "Record checksum is not the same as checksum stored in the index file\n");
+ error=1;
+ }
+ else if (!extend)
+ {
+ uint key;
+ for (key=0 ; key < info->s->base.keys; key++)
+ {
+ if (param->tmp_key_crc[key] != param->key_crc[key] &&
+ !(info->s->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL)))
+ {
+ _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records",
+ key+1);
+ error=1;
+ }
+ }
+ }
+
+ if (param->del_length != info->state->empty)
+ {
+ _ma_check_print_warning(param,
+ "Found %s deleted space. Should be %s",
+ llstr(param->del_length,llbuff2),
+ llstr(info->state->empty,llbuff));
+ }
+ if (param->used + param->empty + param->del_length !=
+ info->state->data_file_length)
+ {
+ _ma_check_print_warning(param,
+ "Found %s record data and %s unused data and %s deleted data",
+ llstr(param->used, llbuff),
+ llstr(param->empty,llbuff2),
+ llstr(param->del_length,llbuff3));
+ _ma_check_print_warning(param,
+ "Total %s Should be: %s",
+ llstr((param->used+param->empty+param->del_length),
+ llbuff),
+ llstr(info->state->data_file_length,llbuff2));
+ }
+ if (param->del_blocks != info->state->del)
+ {
+ _ma_check_print_warning(param,
+ "Found %10s deleted blocks Should be: %s",
+ llstr(param->del_blocks,llbuff),
+ llstr(info->state->del,llbuff2));
+ }
+ if (param->splits != info->s->state.split)
+ {
+ _ma_check_print_warning(param,
+ "Found %10s parts Should be: %s parts",
+ llstr(param->splits, llbuff),
+ llstr(info->s->state.split,llbuff2));
+ }
+ if (param->testflag & T_INFO)
+ {
+ if (param->warning_printed || param->error_printed)
+ puts("");
+ if (param->used != 0 && ! param->error_printed)
+ {
+ if (param->records)
+ {
+ printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n",
+ llstr(param->records,llbuff),
+ (long)((param->used - param->link_used)/param->records),
+ (info->s->base.blobs ? 0.0 :
+ (ulonglong2double((ulonglong) info->s->base.reclength *
+ param->records)-
+ my_off_t2double(param->used))/
+ ulonglong2double((ulonglong) info->s->base.reclength *
+ param->records)*100.0));
+ printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n",
+ (ulonglong2double(param->used - param->link_used)/
+ ulonglong2double(param->used-param->link_used+param->empty)*100.0),
+ (!param->records ? 100 :
+ (int) (ulonglong2double(param->del_length+param->empty)/
+ my_off_t2double(param->used)*100.0)),
+ ulonglong2double(param->splits - param->del_blocks) /
+ param->records);
+ }
+ else
+ printf("Records:%18s\n", "0");
+ }
+ printf("Record blocks:%12s Delete blocks:%10s\n",
+ llstr(param->splits - param->del_blocks, llbuff),
+ llstr(param->del_blocks, llbuff2));
+ printf("Record data: %12s Deleted data: %10s\n",
+ llstr(param->used - param->link_used,llbuff),
+ llstr(param->del_length, llbuff2));
+ printf("Lost space: %12s Linkdata: %10s\n",
+ llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
+ }
+ my_free((gptr) record,MYF(0));
+ DBUG_RETURN (error);
+
+ err:
+ my_free((gptr) record,MYF(0));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1);
+} /* maria_chk_data_link */
+
+
+ /* Recover old table by reading each record and writing all keys */
+ /* Save new datafile-name in temp_filename */
+
+int maria_repair(HA_CHECK *param, register MARIA_HA *info,
+ my_string name, int rep_quick)
+{
+ int error,got_error;
+ uint i;
+ ha_rows start_records,new_header_length;
+ my_off_t del;
+ File new_file;
+ MARIA_SHARE *share=info->s;
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO sort_info;
+ MARIA_SORT_PARAM sort_param;
+ DBUG_ENTER("maria_repair");
+
+ bzero((char *)&sort_info, sizeof(sort_info));
+ bzero((char *)&sort_param, sizeof(sort_param));
+ start_records=info->state->records;
+ new_header_length=(param->testflag & T_UNPACK) ? 0L :
+ share->pack.header_length;
+ got_error=1;
+ new_file= -1;
+ sort_param.sort_info=&sort_info;
+
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- recovering (with keycache) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ }
+ param->testflag|=T_REP; /* for easy checking */
+
+ if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ param->testflag|=T_CALC_CHECKSUM;
+
+ if (!param->using_global_keycache)
+ VOID(init_key_cache(maria_key_cache, param->key_cache_block_size,
+ param->use_buffers, 0, 0));
+
+ if (init_io_cache(&param->read_cache,info->dfile,
+ (uint) param->read_buffer_length,
+ READ_CACHE,share->pack.header_length,1,MYF(MY_WME)))
+ {
+ bzero(&info->rec_cache,sizeof(info->rec_cache));
+ goto err;
+ }
+ if (!rep_quick)
+ if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL)))
+ goto err;
+ info->opt_flag|=WRITE_CACHE_USED;
+ if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))) ||
+ _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
+ info->s->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param, "Not enough memory for extra record");
+ goto err;
+ }
+
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file= my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param,new_file,info->dfile,0L,new_header_length,
+ "datafile-header"))
+ goto err;
+ info->s->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file=new_file;
+ if (param->testflag & T_UNPACK)
+ restore_data_file_type(share);
+ }
+ sort_info.info=info;
+ sort_info.param = param;
+ sort_param.read_cache=param->read_cache;
+ sort_param.pos=sort_param.max_pos=share->pack.header_length;
+ sort_param.filepos=new_header_length;
+ param->read_cache.end_of_file=sort_info.filelength=
+ my_seek(info->dfile,0L,MY_SEEK_END,MYF(0));
+ sort_info.dupp=0;
+ sort_param.fix_datafile= (my_bool) (! rep_quick);
+ sort_param.master=1;
+ sort_info.max_records= ~(ha_rows) 0;
+
+ set_data_file_type(&sort_info, share);
+ del=info->state->del;
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+ param->glob_crc=0;
+ if (param->testflag & T_CALC_CHECKSUM)
+ sort_param.calc_checksum= 1;
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ /*
+ Clear all keys. Note that all key blocks allocated until now remain
+ "dead" parts of the key file. (Bug #4692)
+ */
+ for (i=0 ; i < info->s->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+
+ /* Drop the delete chain. */
+ share->state.key_del= HA_OFFSET_ERROR;
+
+ /*
+ If requested, activate (enable) all keys in key_map. In this case,
+ all indexes will be (re-)built.
+ */
+ if (param->testflag & T_CREATE_MISSING_KEYS)
+ maria_set_all_keys_active(share->state.key_map, share->base.keys);
+
+ info->state->key_file_length=share->base.keystart;
+
+ maria_lock_memory(param); /* Everything is alloced */
+
+ /* Re-create all keys, which are set in key_map. */
+ while (!(error=sort_get_next_record(&sort_param)))
+ {
+ if (writekeys(&sort_param))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY)
+ goto err;
+ DBUG_DUMP("record",(byte*) sort_param.record,share->base.pack_reclength);
+ _ma_check_print_info(param,"Duplicate key %2d for record at %10s against new record at %10s",
+ info->errkey+1,
+ llstr(sort_param.start_recpos,llbuff),
+ llstr(info->dup_key_pos,llbuff2));
+ if (param->testflag & T_VERBOSE)
+ {
+ VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey,
+ sort_param.record,0L));
+ _ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey,
+ USE_WHOLE_KEY);
+ }
+ sort_info.dupp++;
+ if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
+ {
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ param->error_printed=1;
+ goto err;
+ }
+ continue;
+ }
+ if (_ma_sort_write_record(&sort_param))
+ goto err;
+ }
+ if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) ||
+ flush_io_cache(&info->rec_cache) || param->read_cache.error < 0)
+ goto err;
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0)))
+ {
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d",
+ my_errno);
+ goto err;
+ }
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ got_error=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ got_error=1;
+ goto err;
+ }
+ }
+
+ if (!rep_quick)
+ {
+ my_close(info->dfile,MYF(0));
+ info->dfile=new_file;
+ info->state->data_file_length=sort_param.filepos;
+ share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
+ }
+ else
+ {
+ info->state->data_file_length=sort_param.max_pos;
+ }
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum=param->glob_crc;
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+
+ got_error=0;
+ /* If invoked by external program that uses thr_lock */
+ if (&share->state.state != info->state)
+ memcpy( &share->state.state, info->state, sizeof(*info->state));
+
+err:
+ if (!got_error)
+ {
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ {
+ my_close(new_file,MYF(0));
+ info->dfile=new_file= -1;
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT, (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
+ _ma_open_datafile(info,share,-1))
+ got_error=1;
+ }
+ }
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d for record at pos %s",my_errno,
+ llstr(sort_param.start_recpos,llbuff));
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ info->rec_cache.file=-1; /* don't flush data to new_file, it's closed */
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ VOID(end_io_cache(&param->read_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ VOID(end_io_cache(&info->rec_cache));
+ got_error|=_ma_flush_blocks(param, share->key_cache, share->kfile);
+ if (!got_error && (param->testflag & T_UNPACK))
+ restore_data_file_type(share);
+ share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
+ STATE_NOT_ANALYZED);
+ DBUG_RETURN(got_error);
+}
+
+
+/* Uppdate keyfile when doing repair */
+
+static int writekeys(MARIA_SORT_PARAM *sort_param)
+{
+ register uint i;
+ byte *key;
+ MARIA_HA *info= sort_param->sort_info->info;
+ byte *buff= sort_param->record;
+ my_off_t filepos= sort_param->filepos;
+ DBUG_ENTER("writekeys");
+
+ key= info->lastkey+info->s->base.max_key_length;
+ for (i=0 ; i < info->s->base.keys ; i++)
+ {
+ if (maria_is_key_active(info->s->state.key_map, i))
+ {
+ if (info->s->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_add(info,i,(char*) key,buff,filepos))
+ goto err;
+ }
+#ifdef HAVE_SPATIAL
+ else if (info->s->keyinfo[i].flag & HA_SPATIAL)
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (maria_rtree_insert(info, i, key, key_length))
+ goto err;
+ }
+#endif /*HAVE_SPATIAL*/
+ else
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (_ma_ck_write(info,i,key,key_length))
+ goto err;
+ }
+ }
+ }
+ DBUG_RETURN(0);
+
+ err:
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY)
+ {
+ info->errkey=(int) i; /* This key was found */
+ while ( i-- > 0 )
+ {
+ if (maria_is_key_active(info->s->state.key_map, i))
+ {
+ if (info->s->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (_ma_ft_del(info,i,(char*) key,buff,filepos))
+ break;
+ }
+ else
+ {
+ uint key_length= _ma_make_key(info,i,key,buff,filepos);
+ if (_ma_ck_delete(info,i,key,key_length))
+ break;
+ }
+ }
+ }
+ }
+ /* Remove checksum that was added to glob_crc in sort_get_next_record */
+ if (sort_param->calc_checksum)
+ sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
+ DBUG_PRINT("error",("errno: %d",my_errno));
+ DBUG_RETURN(-1);
+} /* writekeys */
+
+
+ /* Change all key-pointers that points to a records */
+
+int maria_movepoint(register MARIA_HA *info, byte *record,
+ MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
+ uint prot_key)
+{
+ register uint i;
+ byte *key;
+ uint key_length;
+ DBUG_ENTER("maria_movepoint");
+
+ key= info->lastkey+info->s->base.max_key_length;
+ for (i=0 ; i < info->s->base.keys; i++)
+ {
+ if (i != prot_key && maria_is_key_active(info->s->state.key_map, i))
+ {
+ key_length= _ma_make_key(info,i,key,record,oldpos);
+ if (info->s->keyinfo[i].flag & HA_NOSAME)
+ { /* Change pointer direct */
+ uint nod_flag;
+ MARIA_KEYDEF *keyinfo;
+ keyinfo=info->s->keyinfo+i;
+ if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY,
+ (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF),
+ info->s->state.key_root[i]))
+ DBUG_RETURN(-1);
+ nod_flag=_ma_test_if_nod(info->buff);
+ _ma_dpointer(info,info->int_keypos-nod_flag-
+ info->s->rec_reflength,newpos);
+ if (_ma_write_keypage(info,keyinfo,info->last_keypage,
+ DFLT_INIT_HITS,info->buff))
+ DBUG_RETURN(-1);
+ }
+ else
+ { /* Change old key to new */
+ if (_ma_ck_delete(info,i,key,key_length))
+ DBUG_RETURN(-1);
+ key_length= _ma_make_key(info,i,key,record,newpos);
+ if (_ma_ck_write(info,i,key,key_length))
+ DBUG_RETURN(-1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+} /* maria_movepoint */
+
+
+ /* Tell system that we want all memory for our cache */
+
+void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
+{
+#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
+ if (param->opt_maria_lock_memory)
+ {
+ int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */
+ if (geteuid() == 0 && success != 0)
+ _ma_check_print_warning(param,
+ "Failed to lock memory. errno %d",my_errno);
+ }
+#endif
+} /* maria_lock_memory */
+
+
+ /* Flush all changed blocks to disk */
+
+int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file)
+{
+ if (flush_key_blocks(key_cache, file, FLUSH_RELEASE))
+ {
+ _ma_check_print_error(param,"%d when trying to write bufferts",my_errno);
+ return(1);
+ }
+ if (!param->using_global_keycache)
+ end_key_cache(key_cache,1);
+ return 0;
+} /* _ma_flush_blocks */
+
+
+ /* Sort index for more efficent reads */
+
+int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name)
+{
+ reg2 uint key;
+ reg1 MARIA_KEYDEF *keyinfo;
+ File new_file;
+ my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
+ uint r_locks,w_locks;
+ int old_lock;
+ MARIA_SHARE *share=info->s;
+ MARIA_STATE_INFO old_state;
+ DBUG_ENTER("maria_sort_index");
+
+ if (!(param->testflag & T_SILENT))
+ printf("- Sorting index for MARIA-table '%s'\n",name);
+
+ /* Get real path for index file */
+ fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
+ if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename,
+ "", INDEX_TMP_EXT,2+4),
+ 0,param->tmpfile_createflag,MYF(0))) <= 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ DBUG_RETURN(-1);
+ }
+ if (maria_filecopy(param, new_file,share->kfile,0L,
+ (ulong) share->base.keystart, "headerblock"))
+ goto err;
+
+ param->new_file_pos=share->base.keystart;
+ for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
+ key++,keyinfo++)
+ {
+ if (! maria_is_key_active(info->s->state.key_map, key))
+ continue;
+
+ if (share->state.key_root[key] != HA_OFFSET_ERROR)
+ {
+ index_pos[key]=param->new_file_pos; /* Write first block here */
+ if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
+ new_file))
+ goto err;
+ }
+ else
+ index_pos[key]= HA_OFFSET_ERROR; /* No blocks */
+ }
+
+ /* Flush key cache for this file if we are calling this outside maria_chk */
+ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED);
+
+ share->state.version=(ulong) time((time_t*) 0);
+ old_state= share->state; /* save state if not stored */
+ r_locks= share->r_locks;
+ w_locks= share->w_locks;
+ old_lock= info->lock_type;
+
+ /* Put same locks as old file */
+ share->r_locks= share->w_locks= share->tot_locks= 0;
+ (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
+ VOID(my_close(share->kfile,MYF(MY_WME)));
+ share->kfile = -1;
+ VOID(my_close(new_file,MYF(MY_WME)));
+ if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT,
+ INDEX_TMP_EXT, MYF(0)) ||
+ _ma_open_keyfile(share))
+ goto err2;
+ info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
+ _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */
+ info->lock_type= old_lock;
+ share->r_locks= r_locks;
+ share->w_locks= w_locks;
+ share->tot_locks= r_locks+w_locks;
+ share->state= old_state; /* Restore old state */
+
+ info->state->key_file_length=param->new_file_pos;
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ for (key=0 ; key < info->s->base.keys ; key++)
+ info->s->state.key_root[key]=index_pos[key];
+ info->s->state.key_del= HA_OFFSET_ERROR;
+
+ info->s->state.changed&= ~STATE_NOT_SORTED_PAGES;
+ DBUG_RETURN(0);
+
+err:
+ VOID(my_close(new_file,MYF(MY_WME)));
+err2:
+ VOID(my_delete(param->temp_filename,MYF(MY_WME)));
+ DBUG_RETURN(-1);
+} /* maria_sort_index */
+
+
+ /* Sort records recursive using one index */
+
+static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t pagepos, File new_file)
+{
+ uint length,nod_flag,used_length, key_length;
+ byte *buff,*keypos,*endpos;
+ byte key[HA_MAX_POSSIBLE_KEY_BUFF];
+ my_off_t new_page_pos,next_page;
+ char llbuff[22];
+ DBUG_ENTER("sort_one_index");
+
+ new_page_pos=param->new_file_pos;
+ param->new_file_pos+=keyinfo->block_length;
+
+ if (!(buff= (byte*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for key block");
+ DBUG_RETURN(-1);
+ }
+ if (!_ma_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0))
+ {
+ _ma_check_print_error(param,"Can't read key block from filepos: %s",
+ llstr(pagepos,llbuff));
+ goto err;
+ }
+ if ((nod_flag=_ma_test_if_nod(buff)) || keyinfo->flag & HA_FULLTEXT)
+ {
+ used_length=maria_getint(buff);
+ keypos=buff+2+nod_flag;
+ endpos=buff+used_length;
+ for ( ;; )
+ {
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag,keypos);
+ /* Save new pos */
+ _ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
+ if (sort_one_index(param,info,keyinfo,next_page,new_file))
+ {
+ DBUG_PRINT("error",
+ ("From page: %ld, keyoffset: %lu used_length: %d",
+ (ulong) pagepos, (ulong) (keypos - buff),
+ (int) used_length));
+ DBUG_DUMP("buff",(byte*) buff,used_length);
+ goto err;
+ }
+ }
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0)
+ break;
+ DBUG_ASSERT(keypos <= endpos);
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ uint off;
+ int subkeys;
+ get_key_full_length_rdonly(off, key);
+ subkeys=ft_sintXkorr(key+off);
+ if (subkeys < 0)
+ {
+ next_page= _ma_dpos(info,0,key+key_length);
+ _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength,
+ param->new_file_pos); /* Save new pos */
+ if (sort_one_index(param,info,&info->s->ft2_keyinfo,
+ next_page,new_file))
+ goto err;
+ }
+ }
+ }
+ }
+
+ /* Fill block with zero and write it to the new index file */
+ length=maria_getint(buff);
+ bzero((byte*) buff+length,keyinfo->block_length-length);
+ if (my_pwrite(new_file,(byte*) buff,(uint) keyinfo->block_length,
+ new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
+ goto err;
+ }
+ my_afree((gptr) buff);
+ DBUG_RETURN(0);
+err:
+ my_afree((gptr) buff);
+ DBUG_RETURN(1);
+} /* sort_one_index */
+
+
+ /*
+ Let temporary file replace old file.
+ This assumes that the new file was created in the same
+ directory as given by realpath(filename).
+ This will ensure that any symlinks that are used will still work.
+ Copy stats from old file to new file, deletes orignal and
+ changes new file name to old file name
+ */
+
+int maria_change_to_newfile(const char * filename, const char * old_ext,
+ const char * new_ext, myf MyFlags)
+{
+ char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
+#ifdef USE_RAID
+ if (raid_chunks)
+ return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4),
+ fn_format(new_filename,filename,"",new_ext,2+4),
+ raid_chunks,
+ MYF(MY_WME | MY_LINK_WARNING | MyFlags));
+#endif
+ /* Get real path to filename */
+ (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
+ return my_redel(old_filename,
+ fn_format(new_filename,old_filename,"",new_ext,2+4),
+ MYF(MY_WME | MY_LINK_WARNING | MyFlags));
+} /* maria_change_to_newfile */
+
+
+/* Copy a block between two files */
+
+int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
+ my_off_t length, const char *type)
+{
+ char tmp_buff[IO_SIZE],*buff;
+ ulong buff_length;
+ DBUG_ENTER("maria_filecopy");
+
+ buff_length=(ulong) min(param->write_buffer_length,length);
+ if (!(buff=my_malloc(buff_length,MYF(0))))
+ {
+ buff=tmp_buff; buff_length=IO_SIZE;
+ }
+
+ VOID(my_seek(from,start,MY_SEEK_SET,MYF(0)));
+ while (length > buff_length)
+ {
+ if (my_read(from,(byte*) buff,buff_length,MYF(MY_NABP)) ||
+ my_write(to,(byte*) buff,buff_length,param->myf_rw))
+ goto err;
+ length-= buff_length;
+ }
+ if (my_read(from,(byte*) buff,(uint) length,MYF(MY_NABP)) ||
+ my_write(to,(byte*) buff,(uint) length,param->myf_rw))
+ goto err;
+ if (buff != tmp_buff)
+ my_free(buff,MYF(0));
+ DBUG_RETURN(0);
+err:
+ if (buff != tmp_buff)
+ my_free(buff,MYF(0));
+ _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
+ type,my_errno);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Repair table or given index using sorting
+
+ SYNOPSIS
+ maria_repair_by_sort()
+ param Repair parameters
+ info MARIA handler to repair
+ name Name of table (for warnings)
+ rep_quick set to <> 0 if we should not change data file
+
+ RESULT
+ 0 ok
+ <>0 Error
+*/
+
+int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
+ const char * name, int rep_quick)
+{
+ int got_error;
+ uint i;
+ ulong length;
+ ha_rows start_records;
+ my_off_t new_header_length,del;
+ File new_file;
+ MARIA_SORT_PARAM sort_param;
+ MARIA_SHARE *share=info->s;
+ HA_KEYSEG *keyseg;
+ ulong *rec_per_key_part;
+ char llbuff[22];
+ MARIA_SORT_INFO sort_info;
+ ulonglong key_map=share->state.key_map;
+ DBUG_ENTER("maria_repair_by_sort");
+
+ start_records=info->state->records;
+ got_error=1;
+ new_file= -1;
+ new_header_length=(param->testflag & T_UNPACK) ? 0 :
+ share->pack.header_length;
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- recovering (with sort) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(start_records,llbuff));
+ }
+ param->testflag|=T_REP; /* for easy checking */
+
+ if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ param->testflag|=T_CALC_CHECKSUM;
+
+ bzero((char*)&sort_info,sizeof(sort_info));
+ bzero((char *)&sort_param, sizeof(sort_param));
+ if (!(sort_info.key_block=
+ alloc_key_blocks(param,
+ (uint) param->sort_key_blocks,
+ share->base.max_key_block_length))
+ || init_io_cache(&param->read_cache,info->dfile,
+ (uint) param->read_buffer_length,
+ READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) ||
+ (! rep_quick &&
+ init_io_cache(&info->rec_cache,info->dfile,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE,new_header_length,1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw)))
+ goto err;
+ sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
+ info->opt_flag|=WRITE_CACHE_USED;
+ info->rec_cache.file=info->dfile; /* for sort_delete_record */
+
+ if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))) ||
+ _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
+ info->s->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param, "Not enough memory for extra record");
+ goto err;
+ }
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file=my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT, 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param, new_file,info->dfile,0L,new_header_length,
+ "datafile-header"))
+ goto err;
+ if (param->testflag & T_UNPACK)
+ restore_data_file_type(share);
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file=new_file;
+ }
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if (!(param->testflag & T_CREATE_MISSING_KEYS))
+ {
+ /*
+ Flush key cache for this file if we are calling this outside
+ maria_chk
+ */
+ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED);
+ /* Clear the pointers to the given rows */
+ for (i=0 ; i < share->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ info->state->key_file_length=share->base.keystart;
+ }
+ else
+ {
+ if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE))
+ goto err;
+ key_map= ~key_map; /* Create the missing keys */
+ }
+
+ sort_info.info=info;
+ sort_info.param = param;
+
+ set_data_file_type(&sort_info, share);
+ sort_param.filepos=new_header_length;
+ sort_info.dupp=0;
+ sort_info.buff=0;
+ param->read_cache.end_of_file=sort_info.filelength=
+ my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0));
+
+ sort_param.wordlist=NULL;
+ init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+
+ if (share->data_file_type == DYNAMIC_RECORD)
+ length=max(share->base.min_pack_length+1,share->base.min_block_length);
+ else if (share->data_file_type == COMPRESSED_RECORD)
+ length=share->base.min_block_length;
+ else
+ length=share->base.pack_reclength;
+ sort_info.max_records=
+ ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records :
+ (ha_rows) (sort_info.filelength/length+1));
+ sort_param.key_cmp=sort_key_cmp;
+ sort_param.lock_in_memory=maria_lock_memory;
+ sort_param.tmpdir=param->tmpdir;
+ sort_param.sort_info=&sort_info;
+ sort_param.fix_datafile= (my_bool) (! rep_quick);
+ sort_param.master =1;
+
+ del=info->state->del;
+ param->glob_crc=0;
+ if (param->testflag & T_CALC_CHECKSUM)
+ sort_param.calc_checksum= 1;
+
+ rec_per_key_part= param->rec_per_key_part;
+ for (sort_param.key=0 ; sort_param.key < share->base.keys ;
+ rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
+ {
+ sort_param.read_cache=param->read_cache;
+ sort_param.keyinfo=share->keyinfo+sort_param.key;
+ sort_param.seg=sort_param.keyinfo->seg;
+ if (! maria_is_key_active(key_map, sort_param.key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part +
+ (uint) (rec_per_key_part - param->rec_per_key_part)),
+ sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
+ continue;
+ }
+
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- Fixing index %d\n",sort_param.key+1);
+ sort_param.max_pos=sort_param.pos=share->pack.header_length;
+ keyseg=sort_param.seg;
+ bzero((char*) sort_param.unique,sizeof(sort_param.unique));
+ sort_param.key_length=share->rec_reflength;
+ for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
+ {
+ sort_param.key_length+=keyseg[i].length;
+ if (keyseg[i].flag & HA_SPACE_PACK)
+ sort_param.key_length+=get_pack_length(keyseg[i].length);
+ if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ sort_param.key_length+=2 + test(keyseg[i].length >= 127);
+ if (keyseg[i].flag & HA_NULL_PART)
+ sort_param.key_length++;
+ }
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+
+ if (sort_param.keyinfo->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ sort_param.keyinfo->seg->charset->mbmaxlen;
+ sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ /*
+ fulltext indexes may have much more entries than the
+ number of rows in the table. We estimate the number here.
+
+ Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
+ */
+ if (sort_param.keyinfo->ftparser_nr == 0)
+ {
+ /*
+ for built-in parser the number of generated index entries
+ cannot be larger than the size of the data file divided
+ by the minimal word's length
+ */
+ sort_info.max_records=
+ (ha_rows) (sort_info.filelength/ft_min_word_len+1);
+ }
+ else
+ {
+ /*
+ for external plugin parser we cannot tell anything at all :(
+ so, we'll use all the sort memory and start from ~10 buffpeks.
+ (see _create_index_by_sort)
+ */
+ sort_info.max_records=
+ 10*param->sort_buffer_length/sort_param.key_length;
+ }
+
+ sort_param.key_read= sort_maria_ft_key_read;
+ sort_param.key_write= sort_maria_ft_key_write;
+ }
+ else
+ {
+ sort_param.key_read= sort_key_read;
+ sort_param.key_write= sort_key_write;
+ }
+
+ if (_ma_create_index_by_sort(&sort_param,
+ (my_bool) (!(param->testflag & T_VERBOSE)),
+ (uint) param->sort_buffer_length))
+ {
+ param->retry_repair=1;
+ goto err;
+ }
+ /* No need to calculate checksum again. */
+ sort_param.calc_checksum= 0;
+ free_root(&sort_param.wordroot, MYF(0));
+
+ /* Set for next loop */
+ sort_info.max_records= (ha_rows) info->state->records;
+
+ if (param->testflag & T_STATISTICS)
+ maria_update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sort_param.notnull: NULL,(ulonglong) info->state->records);
+ maria_set_key_active(share->state.key_map, sort_param.key);
+
+ if (sort_param.fix_datafile)
+ {
+ param->read_cache.end_of_file=sort_param.filepos;
+ if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache))
+ goto err;
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ goto err;
+ }
+ }
+ share->state.state.data_file_length = info->state->data_file_length=
+ sort_param.filepos;
+ /* Only whole records */
+ share->state.version=(ulong) time((time_t*) 0);
+ my_close(info->dfile,MYF(0));
+ info->dfile=new_file;
+ share->data_file_type=sort_info.new_data_file_type;
+ share->pack.header_length=(ulong) new_header_length;
+ sort_param.fix_datafile=0;
+ }
+ else
+ info->state->data_file_length=sort_param.max_pos;
+
+ param->read_cache.file=info->dfile; /* re-init read cache */
+ reinit_io_cache(&param->read_cache,READ_CACHE,share->pack.header_length,
+ 1,1);
+ }
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ got_error=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+
+ if (rep_quick & T_FORCE_UNIQUENESS)
+ {
+ my_off_t skr=info->state->data_file_length+
+ (share->options & HA_OPTION_COMPRESS_RECORD ?
+ MEMMAP_EXTRA_MARGIN : 0);
+#ifdef USE_RELOC
+ if (share->data_file_type == STATIC_RECORD &&
+ skr < share->base.reloc*share->base.min_pack_length)
+ skr=share->base.reloc*share->base.min_pack_length;
+#endif
+ if (skr != sort_info.filelength)
+ if (my_chsize(info->dfile,skr,0,MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of datafile, error: %d",
+ my_errno);
+ }
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum=param->glob_crc;
+
+ if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d",
+ my_errno);
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+ got_error=0;
+
+ if (&share->state.state != info->state)
+ memcpy( &share->state.state, info->state, sizeof(*info->state));
+
+err:
+ got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile);
+ VOID(end_io_cache(&info->rec_cache));
+ if (!got_error)
+ {
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ {
+ my_close(new_file,MYF(0));
+ info->dfile=new_file= -1;
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT,
+ (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
+ _ma_open_datafile(info,share,-1))
+ got_error=1;
+ }
+ }
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d when fixing table",my_errno);
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ if (info->dfile == new_file)
+ info->dfile= -1;
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ else if (key_map == share->state.key_map)
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
+ share->state.changed|=STATE_NOT_SORTED_PAGES;
+
+ my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ VOID(end_io_cache(&param->read_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ if (!got_error && (param->testflag & T_UNPACK))
+ restore_data_file_type(share);
+ DBUG_RETURN(got_error);
+}
+
+/*
+ Threaded repair of table using sorting
+
+ SYNOPSIS
+ maria_repair_parallel()
+ param Repair parameters
+ info MARIA handler to repair
+ name Name of table (for warnings)
+ rep_quick set to <> 0 if we should not change data file
+
+ DESCRIPTION
+ Same as maria_repair_by_sort but do it multithreaded
+ Each key is handled by a separate thread.
+ TODO: make a number of threads a parameter
+
+ In parallel repair we use one thread per index. There are two modes:
+
+ Quick
+
+ Only the indexes are rebuilt. All threads share a read buffer.
+ Every thread that needs fresh data in the buffer enters the shared
+ cache lock. The last thread joining the lock reads the buffer from
+ the data file and wakes all other threads.
+
+ Non-quick
+
+ The data file is rebuilt and all indexes are rebuilt to point to
+ the new record positions. One thread is the master thread. It
+ reads from the old data file and writes to the new data file. It
+ also creates one of the indexes. The other threads read from a
+ buffer which is filled by the master. If they need fresh data,
+ they enter the shared cache lock. If the masters write buffer is
+ full, it flushes it to the new data file and enters the shared
+ cache lock too. When all threads joined in the lock, the master
+ copies its write buffer to the read buffer for the other threads
+ and wakes them.
+
+ RESULT
+ 0 ok
+ <>0 Error
+*/
+
+int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
+ const char * name, int rep_quick)
+{
+#ifndef THREAD
+ return maria_repair_by_sort(param, info, name, rep_quick);
+#else
+ int got_error;
+ uint i,key, total_key_length, istep;
+ ulong rec_length;
+ ha_rows start_records;
+ my_off_t new_header_length,del;
+ File new_file;
+ MARIA_SORT_PARAM *sort_param=0;
+ MARIA_SHARE *share=info->s;
+ ulong *rec_per_key_part;
+ HA_KEYSEG *keyseg;
+ char llbuff[22];
+ IO_CACHE new_data_cache; /* For non-quick repair. */
+ IO_CACHE_SHARE io_share;
+ MARIA_SORT_INFO sort_info;
+ ulonglong key_map=share->state.key_map;
+ pthread_attr_t thr_attr;
+ DBUG_ENTER("maria_repair_parallel");
+
+ start_records=info->state->records;
+ got_error=1;
+ new_file= -1;
+ new_header_length=(param->testflag & T_UNPACK) ? 0 :
+ share->pack.header_length;
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- parallel recovering (with sort) MARIA-table '%s'\n",name);
+ printf("Data records: %s\n", llstr(start_records,llbuff));
+ }
+ param->testflag|=T_REP; /* for easy checking */
+
+ if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ param->testflag|=T_CALC_CHECKSUM;
+
+ /*
+ Quick repair (not touching data file, rebuilding indexes):
+ {
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ }
+
+ Non-quick repair (rebuilding data file and indexes):
+ {
+ Master thread:
+
+ Read cache is (MI_CHECK *param)->read_cache using info->dfile.
+ Write cache is (MI_INFO *info)->rec_cache using new_file.
+
+ Slave threads:
+
+ Read cache is new_data_cache synced to master rec_cache.
+
+ The final assignment of the filedescriptor for rec_cache is done
+ after the cache creation.
+
+ Don't check file size on new_data_cache, as the resulting file size
+ is not known yet.
+
+ As rec_cache and new_data_cache are synced, write_buffer_length is
+ used for the read cache 'new_data_cache'. Both start at the same
+ position 'new_header_length'.
+ }
+ */
+ DBUG_PRINT("info", ("is quick repair: %d", rep_quick));
+ bzero((char*)&sort_info,sizeof(sort_info));
+ /* Initialize pthread structures before goto err. */
+ pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&sort_info.cond, 0);
+
+ if (!(sort_info.key_block=
+ alloc_key_blocks(param, (uint) param->sort_key_blocks,
+ share->base.max_key_block_length)) ||
+ init_io_cache(&param->read_cache, info->dfile,
+ (uint) param->read_buffer_length,
+ READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) ||
+ (!rep_quick &&
+ (init_io_cache(&info->rec_cache, info->dfile,
+ (uint) param->write_buffer_length,
+ WRITE_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) ||
+ init_io_cache(&new_data_cache, -1,
+ (uint) param->write_buffer_length,
+ READ_CACHE, new_header_length, 1,
+ MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))))
+ goto err;
+ sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
+ info->opt_flag|=WRITE_CACHE_USED;
+ info->rec_cache.file=info->dfile; /* for sort_delete_record */
+
+ if (!rep_quick)
+ {
+ /* Get real path for data file */
+ if ((new_file= my_create(fn_format(param->temp_filename,
+ share->data_file_name, "",
+ DATA_TMP_EXT,
+ 2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0))) < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (new_header_length &&
+ maria_filecopy(param, new_file,info->dfile,0L,new_header_length,
+ "datafile-header"))
+ goto err;
+ if (param->testflag & T_UNPACK)
+ restore_data_file_type(share);
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->rec_cache.file=new_file;
+ }
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if (!(param->testflag & T_CREATE_MISSING_KEYS))
+ {
+ /*
+ Flush key cache for this file if we are calling this outside
+ maria_chk
+ */
+ flush_key_blocks(share->key_cache,share->kfile, FLUSH_IGNORE_CHANGED);
+ /* Clear the pointers to the given rows */
+ for (i=0 ; i < share->base.keys ; i++)
+ share->state.key_root[i]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ info->state->key_file_length=share->base.keystart;
+ }
+ else
+ {
+ if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_FORCE_WRITE))
+ goto err;
+ key_map= ~key_map; /* Create the missing keys */
+ }
+
+ sort_info.info=info;
+ sort_info.param = param;
+
+ set_data_file_type(&sort_info, share);
+ sort_info.dupp=0;
+ sort_info.buff=0;
+ param->read_cache.end_of_file=sort_info.filelength=
+ my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0));
+
+ if (share->data_file_type == DYNAMIC_RECORD)
+ rec_length=max(share->base.min_pack_length+1,share->base.min_block_length);
+ else if (share->data_file_type == COMPRESSED_RECORD)
+ rec_length=share->base.min_block_length;
+ else
+ rec_length=share->base.pack_reclength;
+ /*
+ +1 below is required hack for parallel repair mode.
+ The info->state->records value, that is compared later
+ to sort_info.max_records and cannot exceed it, is
+ increased in sort_key_write. In maria_repair_by_sort, sort_key_write
+ is called after sort_key_read, where the comparison is performed,
+ but in parallel mode master thread can call sort_key_write
+ before some other repair thread calls sort_key_read.
+ Furthermore I'm not even sure +1 would be enough.
+ May be sort_info.max_records shold be always set to max value in
+ parallel mode.
+ */
+ sort_info.max_records=
+ ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records + 1:
+ (ha_rows) (sort_info.filelength/rec_length+1));
+
+ del=info->state->del;
+ param->glob_crc=0;
+
+ if (!(sort_param=(MARIA_SORT_PARAM *)
+ my_malloc((uint) share->base.keys *
+ (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
+ MYF(MY_ZEROFILL))))
+ {
+ _ma_check_print_error(param,"Not enough memory for key!");
+ goto err;
+ }
+ total_key_length=0;
+ rec_per_key_part= param->rec_per_key_part;
+ info->state->records=info->state->del=share->state.split=0;
+ info->state->empty=0;
+
+ for (i=key=0, istep=1 ; key < share->base.keys ;
+ rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
+ {
+ sort_param[i].key=key;
+ sort_param[i].keyinfo=share->keyinfo+key;
+ sort_param[i].seg=sort_param[i].keyinfo->seg;
+ if (! maria_is_key_active(key_map, key))
+ {
+ /* Remember old statistics for key */
+ memcpy((char*) rec_per_key_part,
+ (char*) (share->state.rec_per_key_part+
+ (uint) (rec_per_key_part - param->rec_per_key_part)),
+ sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
+ istep=0;
+ continue;
+ }
+ istep=1;
+ if ((!(param->testflag & T_SILENT)))
+ printf ("- Fixing index %d\n",key+1);
+ if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
+ {
+ sort_param[i].key_read=sort_maria_ft_key_read;
+ sort_param[i].key_write=sort_maria_ft_key_write;
+ }
+ else
+ {
+ sort_param[i].key_read=sort_key_read;
+ sort_param[i].key_write=sort_key_write;
+ }
+ sort_param[i].key_cmp=sort_key_cmp;
+ sort_param[i].lock_in_memory=maria_lock_memory;
+ sort_param[i].tmpdir=param->tmpdir;
+ sort_param[i].sort_info=&sort_info;
+ sort_param[i].master=0;
+ sort_param[i].fix_datafile=0;
+ sort_param[i].calc_checksum= 0;
+
+ sort_param[i].filepos=new_header_length;
+ sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
+
+ sort_param[i].record= (((char *)(sort_param+share->base.keys))+
+ (share->base.pack_reclength * i));
+ if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
+ share->base.default_rec_buff_size))
+ {
+ _ma_check_print_error(param,"Not enough memory!");
+ goto err;
+ }
+ sort_param[i].key_length=share->rec_reflength;
+ for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
+ keyseg++)
+ {
+ sort_param[i].key_length+=keyseg->length;
+ if (keyseg->flag & HA_SPACE_PACK)
+ sort_param[i].key_length+=get_pack_length(keyseg->length);
+ if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ sort_param[i].key_length+=2 + test(keyseg->length >= 127);
+ if (keyseg->flag & HA_NULL_PART)
+ sort_param[i].key_length++;
+ }
+ total_key_length+=sort_param[i].key_length;
+
+ if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ sort_param[i].keyinfo->seg->charset->mbmaxlen;
+ sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+ }
+ }
+ sort_info.total_keys=i;
+ sort_param[0].master= 1;
+ sort_param[0].fix_datafile= (my_bool)(! rep_quick);
+ sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM);
+
+ sort_info.got_error=0;
+ pthread_mutex_lock(&sort_info.mutex);
+
+ /*
+ Initialize the I/O cache share for use with the read caches and, in
+ case of non-quick repair, the write cache. When all threads join on
+ the cache lock, the writer copies the write cache contents to the
+ read caches.
+ */
+ if (i > 1)
+ {
+ if (rep_quick)
+ init_io_cache_share(&param->read_cache, &io_share, NULL, i);
+ else
+ init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
+ }
+ else
+ io_share.total_threads= 0; /* share not used */
+
+ (void) pthread_attr_init(&thr_attr);
+ (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+
+ for (i=0 ; i < sort_info.total_keys ; i++)
+ {
+ /*
+ Copy the properly initialized IO_CACHE structure so that every
+ thread has its own copy. In quick mode param->read_cache is shared
+ for use by all threads. In non-quick mode all threads but the
+ first copy the shared new_data_cache, which is synchronized to the
+ write cache of the first thread. The first thread copies
+ param->read_cache, which is not shared.
+ */
+ sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
+ new_data_cache);
+ DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx",
+ i, (long) &sort_param[i].read_cache));
+
+ /*
+ two approaches: the same amount of memory for each thread
+ or the memory for the same number of keys for each thread...
+ In the second one all the threads will fill their sort_buffers
+ (and call write_keys) at the same time, putting more stress on i/o.
+ */
+ sort_param[i].sortbuff_size=
+#ifndef USING_SECOND_APPROACH
+ param->sort_buffer_length/sort_info.total_keys;
+#else
+ param->sort_buffer_length*sort_param[i].key_length/total_key_length;
+#endif
+ if (pthread_create(&sort_param[i].thr, &thr_attr,
+ _ma_thr_find_all_keys,
+ (void *) (sort_param+i)))
+ {
+ _ma_check_print_error(param,"Cannot start a repair thread");
+ /* Cleanup: Detach from the share. Avoid others to be blocked. */
+ if (io_share.total_threads)
+ remove_io_thread(&sort_param[i].read_cache);
+ DBUG_PRINT("error", ("Cannot start a repair thread"));
+ sort_info.got_error=1;
+ }
+ else
+ sort_info.threads_running++;
+ }
+ (void) pthread_attr_destroy(&thr_attr);
+
+ /* waiting for all threads to finish */
+ while (sort_info.threads_running)
+ pthread_cond_wait(&sort_info.cond, &sort_info.mutex);
+ pthread_mutex_unlock(&sort_info.mutex);
+
+ if ((got_error= _ma_thr_write_keys(sort_param)))
+ {
+ param->retry_repair=1;
+ goto err;
+ }
+ got_error=1; /* Assume the following may go wrong */
+
+ if (sort_param[0].fix_datafile)
+ {
+ /*
+ Append some nuls to the end of a memory mapped file. Destroy the
+ write cache. The master thread did already detach from the share
+ by remove_io_thread() in sort.c:thr_find_all_keys().
+ */
+ if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache))
+ goto err;
+ if (param->testflag & T_SAFE_REPAIR)
+ {
+ /* Don't repair if we loosed more than one row */
+ if (info->state->records+1 < start_records)
+ {
+ info->state->records=start_records;
+ goto err;
+ }
+ }
+ share->state.state.data_file_length= info->state->data_file_length=
+ sort_param->filepos;
+ /* Only whole records */
+ share->state.version=(ulong) time((time_t*) 0);
+ /*
+ Exchange the data file descriptor of the table, so that we use the
+ new file from now on.
+ */
+ my_close(info->dfile,MYF(0));
+ info->dfile=new_file;
+
+ share->data_file_type=sort_info.new_data_file_type;
+ share->pack.header_length=(ulong) new_header_length;
+ }
+ else
+ info->state->data_file_length=sort_param->max_pos;
+
+ if (rep_quick && del+sort_info.dupp != info->state->del)
+ {
+ _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records");
+ _ma_check_print_error(param,"Run recovery again without -q");
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ goto err;
+ }
+
+ if (rep_quick & T_FORCE_UNIQUENESS)
+ {
+ my_off_t skr=info->state->data_file_length+
+ (share->options & HA_OPTION_COMPRESS_RECORD ?
+ MEMMAP_EXTRA_MARGIN : 0);
+#ifdef USE_RELOC
+ if (share->data_file_type == STATIC_RECORD &&
+ skr < share->base.reloc*share->base.min_pack_length)
+ skr=share->base.reloc*share->base.min_pack_length;
+#endif
+ if (skr != sort_info.filelength)
+ if (my_chsize(info->dfile,skr,0,MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of datafile, error: %d",
+ my_errno);
+ }
+ if (param->testflag & T_CALC_CHECKSUM)
+ info->state->checksum=param->glob_crc;
+
+ if (my_chsize(share->kfile,info->state->key_file_length,0,MYF(0)))
+ _ma_check_print_warning(param,
+ "Can't change size of indexfile, error: %d", my_errno);
+
+ if (!(param->testflag & T_SILENT))
+ {
+ if (start_records != info->state->records)
+ printf("Data records: %s\n", llstr(info->state->records,llbuff));
+ if (sort_info.dupp)
+ _ma_check_print_warning(param,
+ "%s records have been removed",
+ llstr(sort_info.dupp,llbuff));
+ }
+ got_error=0;
+
+ if (&share->state.state != info->state)
+ memcpy(&share->state.state, info->state, sizeof(*info->state));
+
+err:
+ got_error|= _ma_flush_blocks(param, share->key_cache, share->kfile);
+ /*
+ Destroy the write cache. The master thread did already detach from
+ the share by remove_io_thread() or it was not yet started (if the
+ error happend before creating the thread).
+ */
+ VOID(end_io_cache(&info->rec_cache));
+ /*
+ Destroy the new data cache in case of non-quick repair. All slave
+ threads did either detach from the share by remove_io_thread()
+ already or they were not yet started (if the error happend before
+ creating the threads).
+ */
+ if (!rep_quick)
+ VOID(end_io_cache(&new_data_cache));
+ if (!got_error)
+ {
+ /* Replace the actual file with the temporary file */
+ if (new_file >= 0)
+ {
+ my_close(new_file,MYF(0));
+ info->dfile=new_file= -1;
+ if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT,
+ DATA_TMP_EXT,
+ (param->testflag & T_BACKUP_DATA ?
+ MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) ||
+ _ma_open_datafile(info,share,-1))
+ got_error=1;
+ }
+ }
+ if (got_error)
+ {
+ if (! param->error_printed)
+ _ma_check_print_error(param,"%d when fixing table",my_errno);
+ if (new_file >= 0)
+ {
+ VOID(my_close(new_file,MYF(0)));
+ VOID(my_delete(param->temp_filename, MYF(MY_WME)));
+ if (info->dfile == new_file)
+ info->dfile= -1;
+ }
+ maria_mark_crashed_on_repair(info);
+ }
+ else if (key_map == share->state.key_map)
+ share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
+ share->state.changed|=STATE_NOT_SORTED_PAGES;
+
+ pthread_cond_destroy (&sort_info.cond);
+ pthread_mutex_destroy(&sort_info.mutex);
+
+ my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR));
+ my_free((gptr) sort_param,MYF(MY_ALLOW_ZERO_PTR));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ VOID(end_io_cache(&param->read_cache));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ if (!got_error && (param->testflag & T_UNPACK))
+ restore_data_file_type(share);
+ DBUG_RETURN(got_error);
+#endif /* THREAD */
+}
+
+ /* Read next record and return next key */
+
+static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key)
+{
+ int error;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ MARIA_HA *info= sort_info->info;
+ DBUG_ENTER("sort_key_read");
+
+ if ((error=sort_get_next_record(sort_param)))
+ DBUG_RETURN(error);
+ if (info->state->records == sort_info->max_records)
+ {
+ _ma_check_print_error(sort_info->param,
+ "Key %d - Found too many records; Can't continue",
+ sort_param->key+1);
+ DBUG_RETURN(1);
+ }
+ sort_param->real_key_length=
+ (info->s->rec_reflength+
+ _ma_make_key(info, sort_param->key, key,
+ sort_param->record, sort_param->filepos));
+#ifdef HAVE_purify
+ bzero(key+sort_param->real_key_length,
+ (sort_param->key_length-sort_param->real_key_length));
+#endif
+ DBUG_RETURN(_ma_sort_write_record(sort_param));
+} /* sort_key_read */
+
+
+static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key)
+{
+ int error;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ MARIA_HA *info=sort_info->info;
+ FT_WORD *wptr=0;
+ DBUG_ENTER("sort_maria_ft_key_read");
+
+ if (!sort_param->wordlist)
+ {
+ for (;;)
+ {
+ free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
+ if ((error=sort_get_next_record(sort_param)))
+ DBUG_RETURN(error);
+ if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
+ &sort_param->wordroot)))
+
+ DBUG_RETURN(1);
+ if (wptr->pos)
+ break;
+ error=_ma_sort_write_record(sort_param);
+ }
+ sort_param->wordptr=sort_param->wordlist=wptr;
+ }
+ else
+ {
+ error=0;
+ wptr=(FT_WORD*)(sort_param->wordptr);
+ }
+
+ sort_param->real_key_length=(info->s->rec_reflength+
+ _ma_ft_make_key(info, sort_param->key,
+ key, wptr++,
+ sort_param->filepos));
+#ifdef HAVE_purify
+ if (sort_param->key_length > sort_param->real_key_length)
+ bzero(key+sort_param->real_key_length,
+ (sort_param->key_length-sort_param->real_key_length));
+#endif
+ if (!wptr->pos)
+ {
+ free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
+ sort_param->wordlist=0;
+ error=_ma_sort_write_record(sort_param);
+ }
+ else
+ sort_param->wordptr=(void*)wptr;
+
+ DBUG_RETURN(error);
+} /* sort_maria_ft_key_read */
+
+
+/*
+ Read next record from file using parameters in sort_info.
+
+ SYNOPSIS
+ sort_get_next_record()
+ sort_param Information about and for the sort process
+
+ NOTE
+
+ Dynamic Records With Non-Quick Parallel Repair
+
+ For non-quick parallel repair we use a synchronized read/write
+ cache. This means that one thread is the master who fixes the data
+ file by reading each record from the old data file and writing it
+ to the new data file. By doing this the records in the new data
+ file are written contiguously. Whenever the write buffer is full,
+ it is copied to the read buffer. The slaves read from the read
+ buffer, which is not associated with a file. Thus read_cache.file
+ is -1. When using _mi_read_cache(), the slaves must always set
+ flag to READING_NEXT so that the function never tries to read from
+ file. This is safe because the records are contiguous. There is no
+ need to read outside the cache. This condition is evaluated in the
+ variable 'parallel_flag' for quick reference. read_cache.file must
+ be >= 0 in every other case.
+
+ RETURN
+ -1 end of file
+ 0 ok
+ > 0 error
+*/
+
+static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
+{
+ int searching;
+ int parallel_flag;
+ uint found_record,b_type,left_length;
+ my_off_t pos;
+ byte *to;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_HA *info=sort_info->info;
+ MARIA_SHARE *share=info->s;
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("sort_get_next_record");
+
+ if (*_ma_killed_ptr(param))
+ DBUG_RETURN(1);
+
+ switch (share->data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0);
+ break;
+ case STATIC_RECORD:
+ for (;;)
+ {
+ if (my_b_read(&sort_param->read_cache,sort_param->record,
+ share->base.pack_reclength))
+ {
+ if (sort_param->read_cache.error)
+ param->out_flag |= O_DATA_LOST;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(-1);
+ }
+ sort_param->start_recpos=sort_param->pos;
+ if (!sort_param->fix_datafile)
+ {
+ sort_param->filepos=sort_param->pos;
+ if (sort_param->master)
+ share->state.split++;
+ }
+ sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
+ if (*sort_param->record)
+ {
+ if (sort_param->calc_checksum)
+ param->glob_crc+= (info->cur_row.checksum=
+ _ma_static_checksum(info,sort_param->record));
+ DBUG_RETURN(0);
+ }
+ if (!sort_param->fix_datafile && sort_param->master)
+ {
+ info->state->del++;
+ info->state->empty+=share->base.pack_reclength;
+ }
+ }
+ case DYNAMIC_RECORD:
+ LINT_INIT(to);
+ pos=sort_param->pos;
+ searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
+ parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
+ for (;;)
+ {
+ found_record=block_info.second_read= 0;
+ left_length=1;
+ if (searching)
+ {
+ pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ sort_param->start_recpos=pos;
+ }
+ do
+ {
+ if (pos > sort_param->max_pos)
+ sort_param->max_pos=pos;
+ if (pos & (MARIA_DYN_ALIGN_SIZE-1))
+ {
+ if ((param->testflag & T_VERBOSE) || searching == 0)
+ _ma_check_print_info(param,"Wrong aligned block at %s",
+ llstr(pos,llbuff));
+ if (searching)
+ goto try_next;
+ }
+ if (found_record && pos == param->search_after_block)
+ _ma_check_print_info(param,"Block: %s used by record at %s",
+ llstr(param->search_after_block,llbuff),
+ llstr(sort_param->start_recpos,llbuff2));
+ if (_ma_read_cache(&sort_param->read_cache,
+ (byte*) block_info.header,pos,
+ MARIA_BLOCK_INFO_HEADER_LENGTH,
+ (! found_record ? READING_NEXT : 0) |
+ parallel_flag | READING_HEADER))
+ {
+ if (found_record)
+ {
+ _ma_check_print_info(param,
+ "Can't read whole record at %s (errno: %d)",
+ llstr(sort_param->start_recpos,llbuff),errno);
+ goto try_next;
+ }
+ DBUG_RETURN(-1);
+ }
+ if (searching && ! sort_param->fix_datafile)
+ {
+ param->error_printed=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1); /* Something wrong with data */
+ }
+ b_type= _ma_get_block_info(&block_info,-1,pos);
+ if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
+ ((b_type & BLOCK_FIRST) &&
+ (block_info.rec_len < (uint) share->base.min_pack_length ||
+ block_info.rec_len > (uint) share->base.max_pack_length)))
+ {
+ uint i;
+ if (param->testflag & T_VERBOSE || searching == 0)
+ _ma_check_print_info(param,
+ "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
+ block_info.header[0],block_info.header[1],
+ block_info.header[2],llstr(pos,llbuff));
+ if (found_record)
+ goto try_next;
+ block_info.second_read=0;
+ searching=1;
+ /* Search after block in read header string */
+ for (i=MARIA_DYN_ALIGN_SIZE ;
+ i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
+ i+= MARIA_DYN_ALIGN_SIZE)
+ if (block_info.header[i] >= 1 &&
+ block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
+ break;
+ pos+=(ulong) i;
+ sort_param->start_recpos=pos;
+ continue;
+ }
+ if (b_type & BLOCK_DELETED)
+ {
+ bool error=0;
+ if (block_info.block_len+ (uint) (block_info.filepos-pos) <
+ share->base.min_block_length)
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Deleted block with impossible length %u at %s",
+ block_info.block_len,llstr(pos,llbuff));
+ error=1;
+ }
+ else
+ {
+ if ((block_info.next_filepos != HA_OFFSET_ERROR &&
+ block_info.next_filepos >=
+ info->state->data_file_length) ||
+ (block_info.prev_filepos != HA_OFFSET_ERROR &&
+ block_info.prev_filepos >= info->state->data_file_length))
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Delete link points outside datafile at %s",
+ llstr(pos,llbuff));
+ error=1;
+ }
+ }
+ if (error)
+ {
+ if (found_record)
+ goto try_next;
+ searching=1;
+ pos+= MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ block_info.second_read=0;
+ continue;
+ }
+ }
+ else
+ {
+ if (block_info.block_len+ (uint) (block_info.filepos-pos) <
+ share->base.min_block_length ||
+ block_info.block_len > (uint) share->base.max_pack_length+
+ MARIA_SPLIT_LENGTH)
+ {
+ if (!searching)
+ _ma_check_print_info(param,
+ "Found block with impossible length %u at %s; Skipped",
+ block_info.block_len+ (uint) (block_info.filepos-pos),
+ llstr(pos,llbuff));
+ if (found_record)
+ goto try_next;
+ searching=1;
+ pos+= MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ block_info.second_read=0;
+ continue;
+ }
+ }
+ if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ {
+ if (!sort_param->fix_datafile && sort_param->master &&
+ (b_type & BLOCK_DELETED))
+ {
+ info->state->empty+=block_info.block_len;
+ info->state->del++;
+ share->state.split++;
+ }
+ if (found_record)
+ goto try_next;
+ if (searching)
+ {
+ pos+=MARIA_DYN_ALIGN_SIZE;
+ sort_param->start_recpos=pos;
+ }
+ else
+ pos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue;
+ }
+
+ if (!sort_param->fix_datafile && sort_param->master)
+ share->state.split++;
+ if (! found_record++)
+ {
+ sort_param->find_length=left_length=block_info.rec_len;
+ sort_param->start_recpos=pos;
+ if (!sort_param->fix_datafile)
+ sort_param->filepos=sort_param->start_recpos;
+ if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
+ sort_param->pos=block_info.filepos+1;
+ else
+ sort_param->pos=block_info.filepos+block_info.block_len;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&sort_param->rec_buff,
+ &sort_param->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+
+ {
+ if (param->max_record_length >= block_info.rec_len)
+ {
+ _ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)",
+ llstr(sort_param->start_recpos,llbuff),
+ (ulong) block_info.rec_len);
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ _ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped",
+ llstr(sort_param->start_recpos,llbuff),
+ (ulong) block_info.rec_len);
+ goto try_next;
+ }
+ }
+ }
+ to= sort_param->rec_buff;
+ }
+ if (left_length < block_info.data_len || ! block_info.data_len)
+ {
+ _ma_check_print_info(param,
+ "Found block with too small length at %s; "
+ "Skipped",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ if (block_info.filepos + block_info.data_len >
+ sort_param->read_cache.end_of_file)
+ {
+ _ma_check_print_info(param,
+ "Found block that points outside data file "
+ "at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ /*
+ Copy information that is already read. Avoid accessing data
+ below the cache start. This could happen if the header
+ streched over the end of the previous buffer contents.
+ */
+ {
+ uint header_len= (uint) (block_info.filepos - pos);
+ uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy(to, block_info.header + header_len, prefetch_len);
+ block_info.filepos+= prefetch_len;
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ if (block_info.data_len &&
+ _ma_read_cache(&sort_param->read_cache,to,block_info.filepos,
+ block_info.data_len,
+ (found_record == 1 ? READING_NEXT : 0) |
+ parallel_flag))
+ {
+ _ma_check_print_info(param,
+ "Read error for block at: %s (error: %d); Skipped",
+ llstr(block_info.filepos,llbuff),my_errno);
+ goto try_next;
+ }
+ left_length-=block_info.data_len;
+ to+=block_info.data_len;
+ pos=block_info.next_filepos;
+ if (pos == HA_OFFSET_ERROR && left_length)
+ {
+ _ma_check_print_info(param,"Wrong block with wrong total length starting at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file)
+ {
+ _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s",
+ llstr(pos,llbuff2),
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ } while (left_length);
+
+ if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
+ sort_param->find_length) != MY_FILE_ERROR)
+ {
+ if (sort_param->read_cache.error < 0)
+ DBUG_RETURN(1);
+ if (sort_param->calc_checksum)
+ info->cur_row.checksum= _ma_checksum(info, sort_param->record);
+ if ((param->testflag & (T_EXTEND | T_REP)) || searching)
+ {
+ if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
+ sort_param->find_length,
+ (param->testflag & T_QUICK) &&
+ sort_param->calc_checksum &&
+ test(info->s->calc_checksum)))
+ {
+ _ma_check_print_info(param,"Found wrong packed record at %s",
+ llstr(sort_param->start_recpos,llbuff));
+ goto try_next;
+ }
+ }
+ if (sort_param->calc_checksum)
+ param->glob_crc+= info->cur_row.checksum;
+ DBUG_RETURN(0);
+ }
+ if (!searching)
+ _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
+ sort_param->key+1,
+ llstr(sort_param->start_recpos,llbuff));
+ try_next:
+ pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
+ searching=1;
+ }
+ case COMPRESSED_RECORD:
+ for (searching=0 ;; searching=1, sort_param->pos++)
+ {
+ if (_ma_read_cache(&sort_param->read_cache,(byte*) block_info.header,
+ sort_param->pos,
+ share->pack.ref_length,READING_NEXT))
+ DBUG_RETURN(-1);
+ if (searching && ! sort_param->fix_datafile)
+ {
+ param->error_printed=1;
+ param->retry_repair=1;
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ DBUG_RETURN(1); /* Something wrong with data */
+ }
+ sort_param->start_recpos=sort_param->pos;
+ if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
+ &sort_param->rec_buff,
+ &sort_param->rec_buff_size, -1,
+ sort_param->pos))
+ DBUG_RETURN(-1);
+ if (!block_info.rec_len &&
+ sort_param->pos + MEMMAP_EXTRA_MARGIN ==
+ sort_param->read_cache.end_of_file)
+ DBUG_RETURN(-1);
+ if (block_info.rec_len < (uint) share->min_pack_length ||
+ block_info.rec_len > (uint) share->max_pack_length)
+ {
+ if (! searching)
+ _ma_check_print_info(param,"Found block with wrong recordlength: %d at %s\n",
+ block_info.rec_len,
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+ if (_ma_read_cache(&sort_param->read_cache,(byte*) sort_param->rec_buff,
+ block_info.filepos, block_info.rec_len,
+ READING_NEXT))
+ {
+ if (! searching)
+ _ma_check_print_info(param,"Couldn't read whole record from %s",
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+ if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
+ sort_param->rec_buff, block_info.rec_len))
+ {
+ if (! searching)
+ _ma_check_print_info(param,"Found wrong record at %s",
+ llstr(sort_param->pos,llbuff));
+ continue;
+ }
+ if (!sort_param->fix_datafile)
+ {
+ sort_param->filepos=sort_param->pos;
+ if (sort_param->master)
+ share->state.split++;
+ }
+ sort_param->max_pos=(sort_param->pos=block_info.filepos+
+ block_info.rec_len);
+ info->packed_length=block_info.rec_len;
+
+ if (sort_param->calc_checksum)
+ {
+ info->cur_row.checksum= (*info->s->calc_checksum)(info,
+ sort_param->record);
+ param->glob_crc+= info->cur_row.checksum;
+ }
+ DBUG_RETURN(0);
+ }
+ }
+ DBUG_RETURN(1); /* Impossible */
+}
+
+
+/*
+ Write record to new file.
+
+ SYNOPSIS
+ _ma_sort_write_record()
+ sort_param Sort parameters.
+
+ NOTE
+ This is only called by a master thread if parallel repair is used.
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
+{
+ int flag;
+ uint length;
+ ulong block_length,reclength;
+ byte *from;
+ byte block_buff[8];
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_HA *info=sort_info->info;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_sort_write_record");
+
+ if (sort_param->fix_datafile)
+ {
+ switch (sort_info->new_data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0);
+ break;
+ case STATIC_RECORD:
+ if (my_b_write(&info->rec_cache,sort_param->record,
+ share->base.pack_reclength))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=share->base.pack_reclength;
+ info->s->state.split++;
+ break;
+ case DYNAMIC_RECORD:
+ if (! info->blobs)
+ from=sort_param->rec_buff;
+ else
+ {
+ /* must be sure that local buffer is big enough */
+ reclength=info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,sort_param->record)+
+ ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER;
+ if (sort_info->buff_length < reclength)
+ {
+ if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
+ MYF(MY_FREE_ON_ERROR |
+ MY_ALLOW_ZERO_PTR))))
+ DBUG_RETURN(1);
+ sort_info->buff_length=reclength;
+ }
+ from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
+ }
+ /* We can use info->checksum here as only one thread calls this */
+ info->cur_row.checksum= _ma_checksum(info,sort_param->record);
+ reclength= _ma_rec_pack(info,from,sort_param->record);
+ flag=0;
+
+ do
+ {
+ block_length=reclength+ 3 + test(reclength >= (65520-3));
+ if (block_length < share->base.min_block_length)
+ block_length=share->base.min_block_length;
+ info->update|=HA_STATE_WRITE_AT_END;
+ block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
+ if (block_length > MARIA_MAX_BLOCK_LENGTH)
+ block_length=MARIA_MAX_BLOCK_LENGTH;
+ if (_ma_write_part_record(info,0L,block_length,
+ sort_param->filepos+block_length,
+ &from,&reclength,&flag))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=block_length;
+ info->s->state.split++;
+ } while (reclength);
+ break;
+ case COMPRESSED_RECORD:
+ reclength=info->packed_length;
+ length= _ma_save_pack_length((uint) share->pack.version, block_buff,
+ reclength);
+ if (info->s->base.blobs)
+ length+= _ma_save_pack_length((uint) share->pack.version,
+ block_buff + length, info->blob_length);
+ if (my_b_write(&info->rec_cache,block_buff,length) ||
+ my_b_write(&info->rec_cache,(byte*) sort_param->rec_buff,reclength))
+ {
+ _ma_check_print_error(param,"%d when writing to datafile",my_errno);
+ DBUG_RETURN(1);
+ }
+ sort_param->filepos+=reclength+length;
+ info->s->state.split++;
+ break;
+ }
+ }
+ if (sort_param->master)
+ {
+ info->state->records++;
+ if ((param->testflag & T_WRITE_LOOP) &&
+ (info->state->records % WRITE_COUNT) == 0)
+ {
+ char llbuff[22];
+ printf("%s\r", llstr(info->state->records,llbuff));
+ VOID(fflush(stdout));
+ }
+ }
+ DBUG_RETURN(0);
+} /* _ma_sort_write_record */
+
+
+ /* Compare two keys from _ma_create_index_by_sort */
+
+static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
+ const void *b)
+{
+ uint not_used[2];
+ return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b),
+ USE_WHOLE_KEY, SEARCH_SAME, not_used));
+} /* sort_key_cmp */
+
+
+static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a)
+{
+ uint diff_pos[2];
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param= sort_info->param;
+ int cmp;
+
+ if (sort_info->key_block->inited)
+ {
+ cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
+ (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
+ diff_pos);
+ if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
+ ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
+ (uchar*) a, USE_WHOLE_KEY,
+ SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
+ else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ {
+ diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
+ sort_param->notnull,
+ sort_info->key_block->lastkey,
+ a);
+ }
+ sort_param->unique[diff_pos[0]-1]++;
+ }
+ else
+ {
+ cmp= -1;
+ if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
+ maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
+ a);
+ }
+ if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
+ {
+ sort_info->dupp++;
+ sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info,
+ sort_param->keyinfo,
+ a);
+ _ma_check_print_warning(param,
+ "Duplicate key for record at %10s against record at %10s",
+ llstr(sort_info->info->cur_row.lastpos, llbuff),
+ llstr(get_record_for_key(sort_info->info,
+ sort_param->keyinfo,
+ sort_info->key_block->
+ lastkey),
+ llbuff2));
+ param->testflag|=T_RETRY_WITHOUT_QUICK;
+ if (sort_info->param->testflag & T_VERBOSE)
+ _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY);
+ return (sort_delete_record(sort_param));
+ }
+#ifndef DBUG_OFF
+ if (cmp > 0)
+ {
+ _ma_check_print_error(param,
+ "Internal error: Keys are not in order from sort");
+ return(1);
+ }
+#endif
+ return (sort_insert_key(sort_param, sort_info->key_block,
+ a, HA_OFFSET_ERROR));
+} /* sort_key_write */
+
+
+int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
+{
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ SORT_KEY_BLOCKS *key_block=sort_info->key_block;
+ MARIA_SHARE *share=sort_info->info->s;
+ uint val_off, val_len;
+ int error;
+ SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
+ byte *from, *to;
+
+ val_len=share->ft2_keyinfo.keylength;
+ get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
+ to= maria_ft_buf->lastkey+val_off;
+
+ if (maria_ft_buf->buf)
+ {
+ /* flushing first-level tree */
+ error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
+ HA_OFFSET_ERROR);
+ for (from=to+val_len;
+ !error && from < maria_ft_buf->buf;
+ from+= val_len)
+ {
+ memcpy(to, from, val_len);
+ error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
+ HA_OFFSET_ERROR);
+ }
+ return error;
+ }
+ /* flushing second-level tree keyblocks */
+ error=_ma_flush_pending_blocks(sort_param);
+ /* updating lastkey with second-level tree info */
+ ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
+ _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
+ share->state.key_root[sort_param->key]);
+ /* restoring first level tree data in sort_info/sort_param */
+ sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
+ sort_param->keyinfo=share->keyinfo+sort_param->key;
+ share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
+ /* writing lastkey in first-level tree */
+ return error ? error :
+ sort_insert_key(sort_param,sort_info->key_block,
+ maria_ft_buf->lastkey,HA_OFFSET_ERROR);
+}
+
+
+static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
+ const byte *a)
+{
+ uint a_len, val_off, val_len, error;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ SORT_FT_BUF *ft_buf= sort_info->ft_buf;
+ SORT_KEY_BLOCKS *key_block= sort_info->key_block;
+
+ val_len=HA_FT_WLEN+sort_info->info->s->base.rec_reflength;
+ get_key_full_length_rdonly(a_len, (uchar *)a);
+
+ if (!ft_buf)
+ {
+ /*
+ use two-level tree only if key_reflength fits in rec_reflength place
+ and row format is NOT static - for _ma_dpointer not to garble offsets
+ */
+ if ((sort_info->info->s->base.key_reflength <=
+ sort_info->info->s->base.rec_reflength) &&
+ (sort_info->info->s->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
+ ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
+ sizeof(SORT_FT_BUF), MYF(MY_WME));
+
+ if (!ft_buf)
+ {
+ sort_param->key_write=sort_key_write;
+ return sort_key_write(sort_param, a);
+ }
+ sort_info->ft_buf= ft_buf;
+ goto word_init_ft_buf; /* no need to duplicate the code */
+ }
+ get_key_full_length_rdonly(val_off, ft_buf->lastkey);
+
+ if (ha_compare_text(sort_param->seg->charset,
+ ((uchar *)a)+1,a_len-1,
+ (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ {
+ byte *p;
+ if (!ft_buf->buf) /* store in second-level tree */
+ {
+ ft_buf->count++;
+ return sort_insert_key(sort_param,key_block,
+ a + a_len, HA_OFFSET_ERROR);
+ }
+
+ /* storing the key in the buffer. */
+ memcpy (ft_buf->buf, (char *)a+a_len, val_len);
+ ft_buf->buf+=val_len;
+ if (ft_buf->buf < ft_buf->end)
+ return 0;
+
+ /* converting to two-level tree */
+ p=ft_buf->lastkey+val_off;
+
+ while (key_block->inited)
+ key_block++;
+ sort_info->key_block=key_block;
+ sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo;
+ ft_buf->count=(ft_buf->buf - p)/val_len;
+
+ /* flushing buffer to second-level tree */
+ for (error=0; !error && p < ft_buf->buf; p+= val_len)
+ error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
+ ft_buf->buf=0;
+ return error;
+ }
+
+ /* flushing buffer */
+ if ((error=_ma_sort_ft_buf_flush(sort_param)))
+ return error;
+
+word_init_ft_buf:
+ a_len+=val_len;
+ memcpy(ft_buf->lastkey, a, a_len);
+ ft_buf->buf=ft_buf->lastkey+a_len;
+ /*
+ 32 is just a safety margin here
+ (at least max(val_len, sizeof(nod_flag)) should be there).
+ May be better performance could be achieved if we'd put
+ (sort_info->keyinfo->block_length-32)/XXX
+ instead.
+ TODO: benchmark the best value for XXX.
+ */
+ ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
+ return 0;
+} /* sort_maria_ft_key_write */
+
+
+ /* get pointer to record from a key */
+
+static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ const byte *key)
+{
+ return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key));
+} /* get_record_for_key */
+
+
+ /* Insert a key in sort-key-blocks */
+
+static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
+ register SORT_KEY_BLOCKS *key_block,
+ const byte *key,
+ my_off_t prev_block)
+{
+ uint a_length,t_length,nod_flag;
+ my_off_t filepos,key_file_length;
+ byte *anc_buff,*lastkey;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_HA *info;
+ MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ DBUG_ENTER("sort_insert_key");
+
+ anc_buff= key_block->buff;
+ info=sort_info->info;
+ lastkey=key_block->lastkey;
+ nod_flag= (key_block == sort_info->key_block ? 0 :
+ info->s->base.key_reflength);
+
+ if (!key_block->inited)
+ {
+ key_block->inited=1;
+ if (key_block == sort_info->key_block_end)
+ {
+ _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks");
+ DBUG_RETURN(1);
+ }
+ a_length=2+nod_flag;
+ key_block->end_pos=anc_buff+2;
+ lastkey=0; /* No previous key in block */
+ }
+ else
+ a_length=maria_getint(anc_buff);
+
+ /* Save pointer to previous block */
+ if (nod_flag)
+ _ma_kpointer(info,key_block->end_pos,prev_block);
+
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (byte*) 0,lastkey,lastkey,key,
+ &s_temp);
+ (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
+ a_length+=t_length;
+ maria_putint(anc_buff,a_length,nod_flag);
+ key_block->end_pos+=t_length;
+ if (a_length <= keyinfo->block_length)
+ {
+ VOID(_ma_move_key(keyinfo, key_block->lastkey, key));
+ key_block->last_length=a_length-t_length;
+ DBUG_RETURN(0);
+ }
+
+ /* Fill block with end-zero and write filled block */
+ maria_putint(anc_buff,key_block->last_length,nod_flag);
+ bzero(anc_buff+key_block->last_length,
+ keyinfo->block_length- key_block->last_length);
+ key_file_length=info->state->key_file_length;
+ if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ DBUG_RETURN(1);
+
+ /* If we read the page from the key cache, we have to write it back to it */
+ if (key_file_length == info->state->key_file_length)
+ {
+ if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff))
+ DBUG_RETURN(1);
+ }
+ else if (my_pwrite(info->s->kfile,anc_buff,
+ (uint) keyinfo->block_length,filepos, param->myf_rw))
+ DBUG_RETURN(1);
+ DBUG_DUMP("buff",anc_buff,maria_getint(anc_buff));
+
+ /* Write separator-key to block in next level */
+ if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
+ DBUG_RETURN(1);
+
+ /* clear old block and write new key in it */
+ key_block->inited=0;
+ DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
+} /* sort_insert_key */
+
+
+ /* Delete record when we found a duplicated key */
+
+static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
+{
+ uint i;
+ int old_file,error;
+ byte *key;
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ MARIA_HA *info=sort_info->info;
+ DBUG_ENTER("sort_delete_record");
+
+ if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
+ {
+ _ma_check_print_error(param,
+ "Quick-recover aborted; Run recovery without switch -q or with switch -qq");
+ DBUG_RETURN(1);
+ }
+ if (info->s->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ _ma_check_print_error(param,
+ "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'maria_chk --safe-recover' to fix it\n",stderr);;
+ DBUG_RETURN(1);
+ }
+
+ old_file=info->dfile;
+ info->dfile=info->rec_cache.file;
+ if (sort_info->current_key)
+ {
+ key= info->lastkey+info->s->base.max_key_length;
+ if ((error=(*info->s->read_record)(info,sort_param->record,
+ info->cur_row.lastpos)) &&
+ error != HA_ERR_RECORD_DELETED)
+ {
+ _ma_check_print_error(param,"Can't read record to be removed");
+ info->dfile=old_file;
+ DBUG_RETURN(1);
+ }
+
+ for (i=0 ; i < sort_info->current_key ; i++)
+ {
+ uint key_length= _ma_make_key(info, i, key, sort_param->record,
+ info->cur_row.lastpos);
+ if (_ma_ck_delete(info, i, key, key_length))
+ {
+ _ma_check_print_error(param,
+ "Can't delete key %d from record to be removed",
+ i+1);
+ info->dfile=old_file;
+ DBUG_RETURN(1);
+ }
+ }
+ if (sort_param->calc_checksum)
+ param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record);
+ }
+ error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info);
+ info->dfile=old_file; /* restore actual value */
+ info->state->records--;
+ DBUG_RETURN(error);
+} /* sort_delete_record */
+
+ /* Fix all pending blocks and flush everything to disk */
+
+int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
+{
+ uint nod_flag,length;
+ my_off_t filepos,key_file_length;
+ SORT_KEY_BLOCKS *key_block;
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ myf myf_rw=sort_info->param->myf_rw;
+ MARIA_HA *info=sort_info->info;
+ MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
+ DBUG_ENTER("_ma_flush_pending_blocks");
+
+ filepos= HA_OFFSET_ERROR; /* if empty file */
+ nod_flag=0;
+ for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
+ {
+ key_block->inited=0;
+ length=maria_getint(key_block->buff);
+ if (nod_flag)
+ _ma_kpointer(info,key_block->end_pos,filepos);
+ key_file_length=info->state->key_file_length;
+ bzero(key_block->buff+length, keyinfo->block_length-length);
+ if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ DBUG_RETURN(1);
+
+ /* If we read the page from the key cache, we have to write it back */
+ if (key_file_length == info->state->key_file_length)
+ {
+ if (_ma_write_keypage(info, keyinfo, filepos,
+ DFLT_INIT_HITS, key_block->buff))
+ DBUG_RETURN(1);
+ }
+ else if (my_pwrite(info->s->kfile,key_block->buff,
+ (uint) keyinfo->block_length,filepos, myf_rw))
+ DBUG_RETURN(1);
+ DBUG_DUMP("buff",key_block->buff,length);
+ nod_flag=1;
+ }
+ info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
+ DBUG_RETURN(0);
+} /* _ma_flush_pending_blocks */
+
+ /* alloc space and pointers for key_blocks */
+
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
+ uint buffer_length)
+{
+ reg1 uint i;
+ SORT_KEY_BLOCKS *block;
+ DBUG_ENTER("alloc_key_blocks");
+
+ if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
+ buffer_length+IO_SIZE)*blocks,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
+ return(0);
+ }
+ for (i=0 ; i < blocks ; i++)
+ {
+ block[i].inited=0;
+ block[i].buff= (byte*) (block+blocks)+(buffer_length+IO_SIZE)*i;
+ }
+ DBUG_RETURN(block);
+} /* alloc_key_blocks */
+
+
+ /* Check if file is almost full */
+
+int maria_test_if_almost_full(MARIA_HA *info)
+{
+ if (info->s->options & HA_OPTION_COMPRESS_RECORD)
+ return 0;
+ return (my_seek(info->s->kfile,0L,MY_SEEK_END,MYF(0))/10*9 >
+ (my_off_t) (info->s->base.max_key_file_length) ||
+ my_seek(info->dfile,0L,MY_SEEK_END,MYF(0))/10*9 >
+ (my_off_t) info->s->base.max_data_file_length);
+}
+
+ /* Recreate table with bigger more alloced record-data */
+
+int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
+{
+ int error;
+ MARIA_HA info;
+ MARIA_SHARE share;
+ MARIA_KEYDEF *keyinfo,*key,*key_end;
+ HA_KEYSEG *keysegs,*keyseg;
+ MARIA_COLUMNDEF *recdef,*rec,*end;
+ MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
+ MARIA_STATUS_INFO status_info;
+ uint unpack,key_parts;
+ ha_rows max_records;
+ ulonglong file_length,tmp_length;
+ MARIA_CREATE_INFO create_info;
+ DBUG_ENTER("maria_recreate_table");
+
+ error=1; /* Default error */
+ info= **org_info;
+ status_info= (*org_info)->state[0];
+ info.state= &status_info;
+ share= *(*org_info)->s;
+ unpack= (share.options & HA_OPTION_COMPRESS_RECORD) &&
+ (param->testflag & T_UNPACK);
+ if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
+ share.base.keys)))
+ DBUG_RETURN(0);
+ memcpy((byte*) keyinfo,(byte*) share.keyinfo,
+ (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
+
+ key_parts= share.base.all_key_parts;
+ if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
+ (key_parts+share.base.keys))))
+ {
+ my_afree((gptr) keyinfo);
+ DBUG_RETURN(1);
+ }
+ if (!(recdef=(MARIA_COLUMNDEF*)
+ my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
+ {
+ my_afree((gptr) keyinfo);
+ my_afree((gptr) keysegs);
+ DBUG_RETURN(1);
+ }
+ if (!(uniquedef=(MARIA_UNIQUEDEF*)
+ my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
+ {
+ my_afree((gptr) recdef);
+ my_afree((gptr) keyinfo);
+ my_afree((gptr) keysegs);
+ DBUG_RETURN(1);
+ }
+
+ /* Copy the column definitions */
+ memcpy((byte*) recdef,(byte*) share.rec,
+ (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)));
+ for (rec=recdef,end=recdef+share.base.fields; rec != end ; rec++)
+ {
+ if (unpack && !(share.options & HA_OPTION_PACK_RECORD) &&
+ rec->type != FIELD_BLOB &&
+ rec->type != FIELD_VARCHAR &&
+ rec->type != FIELD_CHECK)
+ rec->type=(int) FIELD_NORMAL;
+ }
+
+ /* Change the new key to point at the saved key segments */
+ memcpy((byte*) keysegs,(byte*) share.keyparts,
+ (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
+ share.state.header.uniques)));
+ keyseg=keysegs;
+ for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
+ {
+ key->seg=keyseg;
+ for (; keyseg->type ; keyseg++)
+ {
+ if (param->language)
+ keyseg->language=param->language; /* change language */
+ }
+ keyseg++; /* Skip end pointer */
+ }
+
+ /*
+ Copy the unique definitions and change them to point at the new key
+ segments
+ */
+ memcpy((byte*) uniquedef,(byte*) share.uniqueinfo,
+ (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
+ for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
+ u_ptr != u_end ; u_ptr++)
+ {
+ u_ptr->seg=keyseg;
+ keyseg+=u_ptr->keysegs+1;
+ }
+ if (share.options & HA_OPTION_COMPRESS_RECORD)
+ share.base.records=max_records=info.state->records;
+ else if (share.base.min_pack_length)
+ max_records=(ha_rows) (my_seek(info.dfile,0L,MY_SEEK_END,MYF(0)) /
+ (ulong) share.base.min_pack_length);
+ else
+ max_records=0;
+ unpack= (share.data_file_type == COMPRESSED_RECORD) &&
+ (param->testflag & T_UNPACK);
+ share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
+
+ file_length=(ulonglong) my_seek(info.dfile,0L,MY_SEEK_END,MYF(0));
+ tmp_length= file_length+file_length/10;
+ set_if_bigger(file_length,param->max_data_file_length);
+ set_if_bigger(file_length,tmp_length);
+ set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
+
+ VOID(maria_close(*org_info));
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=max(max_records,share.base.records);
+ create_info.reloc_rows=share.base.reloc;
+ create_info.old_options=(share.options |
+ (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
+
+ create_info.data_file_length=file_length;
+ create_info.auto_increment=share.state.auto_increment;
+ create_info.language = (param->language ? param->language :
+ share.state.header.language);
+ create_info.key_file_length= status_info.key_file_length;
+ create_info.org_data_file_type= ((enum data_file_type)
+ share.state.header.org_data_file_type);
+
+ /*
+ Allow for creating an auto_increment key. This has an effect only if
+ an auto_increment key exists in the original table.
+ */
+ create_info.with_auto_increment= TRUE;
+ create_info.null_bytes= share.base.null_bytes;
+ /*
+ We don't have to handle symlinks here because we are using
+ HA_DONT_TOUCH_DATA
+ */
+ if (maria_create(filename, share.data_file_type,
+ share.base.keys - share.state.header.uniques,
+ keyinfo, share.base.fields, recdef,
+ share.state.header.uniques, uniquedef,
+ &create_info,
+ HA_DONT_TOUCH_DATA))
+ {
+ _ma_check_print_error(param,
+ "Got error %d when trying to recreate indexfile",
+ my_errno);
+ goto end;
+ }
+ *org_info=maria_open(filename,O_RDWR,
+ (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED :
+ (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED :
+ HA_OPEN_ABORT_IF_LOCKED);
+ if (!*org_info)
+ {
+ _ma_check_print_error(param,
+ "Got error %d when trying to open re-created indexfile",
+ my_errno);
+ goto end;
+ }
+ /* We are modifing */
+ (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
+ VOID(_ma_readinfo(*org_info,F_WRLCK,0));
+ (*org_info)->state->records=info.state->records;
+ if (share.state.create_time)
+ (*org_info)->s->state.create_time=share.state.create_time;
+ (*org_info)->s->state.unique=(*org_info)->this_unique=
+ share.state.unique;
+ (*org_info)->state->checksum=info.state->checksum;
+ (*org_info)->state->del=info.state->del;
+ (*org_info)->s->state.dellink=share.state.dellink;
+ (*org_info)->state->empty=info.state->empty;
+ (*org_info)->state->data_file_length=info.state->data_file_length;
+ if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
+ UPDATE_OPEN_COUNT))
+ goto end;
+ error=0;
+end:
+ my_afree((gptr) uniquedef);
+ my_afree((gptr) keyinfo);
+ my_afree((gptr) recdef);
+ my_afree((gptr) keysegs);
+ DBUG_RETURN(error);
+}
+
+
+ /* write suffix to data file if neaded */
+
+int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
+{
+ MARIA_HA *info=sort_info->info;
+
+ if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile)
+ {
+ char buff[MEMMAP_EXTRA_MARGIN];
+ bzero(buff,sizeof(buff));
+ if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
+ {
+ _ma_check_print_error(sort_info->param,
+ "%d when writing to datafile",my_errno);
+ return 1;
+ }
+ sort_info->param->read_cache.end_of_file+=sizeof(buff);
+ }
+ return 0;
+}
+
+
+/* Update state and maria_chk time of indexfile */
+
+int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
+{
+ MARIA_SHARE *share=info->s;
+
+ if (update & UPDATE_OPEN_COUNT)
+ {
+ share->state.open_count=0;
+ share->global_changed=0;
+ }
+ if (update & UPDATE_STAT)
+ {
+ uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
+ share->state.rec_per_key_rows=info->state->records;
+ share->state.changed&= ~STATE_NOT_ANALYZED;
+ if (info->state->records)
+ {
+ for (i=0; i<key_parts; i++)
+ {
+ if (!(share->state.rec_per_key_part[i]=param->rec_per_key_part[i]))
+ share->state.changed|= STATE_NOT_ANALYZED;
+ }
+ }
+ }
+ if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
+ {
+ if (update & UPDATE_TIME)
+ {
+ share->state.check_time= (long) time((time_t*) 0);
+ if (!share->state.create_time)
+ share->state.create_time=share->state.check_time;
+ }
+ /*
+ When tables are locked we haven't synched the share state and the
+ real state for a while so we better do it here before synching
+ the share state to disk. Only when table is write locked is it
+ necessary to perform this synch.
+ */
+ if (info->lock_type == F_WRLCK)
+ share->state.state= *info->state;
+ if (_ma_state_info_write(share->kfile,&share->state,1+2))
+ goto err;
+ share->changed=0;
+ }
+ { /* Force update of status */
+ int error;
+ uint r_locks=share->r_locks,w_locks=share->w_locks;
+ share->r_locks= share->w_locks= share->tot_locks= 0;
+ error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
+ share->r_locks=r_locks;
+ share->w_locks=w_locks;
+ share->tot_locks=r_locks+w_locks;
+ if (!error)
+ return 0;
+ }
+err:
+ _ma_check_print_error(param,"%d when updating keyfile",my_errno);
+ return 1;
+}
+
+ /*
+ Update auto increment value for a table
+ When setting the 'repair_only' flag we only want to change the
+ old auto_increment value if its wrong (smaller than some given key).
+ The reason is that we shouldn't change the auto_increment value
+ for a table without good reason when only doing a repair; If the
+ user have inserted and deleted rows, the auto_increment value
+ may be bigger than the biggest current row and this is ok.
+
+ If repair_only is not set, we will update the flag to the value in
+ param->auto_increment is bigger than the biggest key.
+ */
+
+void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
+ my_bool repair_only)
+{
+ byte *record;
+ DBUG_ENTER("update_auto_increment_key");
+
+ if (!info->s->base.auto_key ||
+ ! maria_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1))
+ {
+ if (!(param->testflag & T_VERY_SILENT))
+ _ma_check_print_info(param,
+ "Table: %s doesn't have an auto increment key\n",
+ param->isam_file_name);
+ DBUG_VOID_RETURN;
+ }
+ if (!(param->testflag & T_SILENT) &&
+ !(param->testflag & T_REP))
+ printf("Updating MARIA file: %s\n", param->isam_file_name);
+ /*
+ We have to use an allocated buffer instead of info->rec_buff as
+ _ma_put_key_in_record() may use info->rec_buff
+ */
+ if (!(record= (byte*) my_malloc((uint) info->s->base.pack_reclength,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for extra record");
+ DBUG_VOID_RETURN;
+ }
+
+ maria_extra(info,HA_EXTRA_KEYREAD,0);
+ if (maria_rlast(info, record, info->s->base.auto_key-1))
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ my_free((char*) record, MYF(0));
+ _ma_check_print_error(param,"%d when reading last record",my_errno);
+ DBUG_VOID_RETURN;
+ }
+ if (!repair_only)
+ info->s->state.auto_increment=param->auto_increment_value;
+ }
+ else
+ {
+ ulonglong auto_increment= ma_retrieve_auto_increment(info, record);
+ set_if_bigger(info->s->state.auto_increment,auto_increment);
+ if (!repair_only)
+ set_if_bigger(info->s->state.auto_increment, param->auto_increment_value);
+ }
+ maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
+ my_free((char*) record, MYF(0));
+ maria_update_state_info(param, info, UPDATE_AUTO_INC);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Update statistics for each part of an index
+
+ SYNOPSIS
+ maria_update_key_parts()
+ keyinfo IN Index information (only key->keysegs used)
+ rec_per_key_part OUT Store statistics here
+ unique IN Array of (#distinct tuples)
+ notnull_tuples IN Array of (#tuples), or NULL
+ records Number of records in the table
+
+ DESCRIPTION
+ This function is called produce index statistics values from unique and
+ notnull_tuples arrays after these arrays were produced with sequential
+ index scan (the scan is done in two places: chk_index() and
+ sort_key_write()).
+
+ This function handles all 3 index statistics collection methods.
+
+ Unique is an array:
+ unique[0]= (#different values of {keypart1}) - 1
+ unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
+ ...
+
+ For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
+ notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
+ notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
+ keypart{i} are not NULL)
+ ...
+ For all other statistics collection methods notnull_tuples==NULL.
+
+ Output is an array:
+ rec_per_key_part[k] =
+ = E(#records in the table such that keypart_1=c_1 AND ... AND
+ keypart_k=c_k for arbitrary constants c_1 ... c_k)
+
+ = {assuming that values have uniform distribution and index contains all
+ tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
+ index tuples}
+
+ = #tuples-in-the-index / #distinct-tuples-in-the-index.
+
+ The #tuples-in-the-index and #distinct-tuples-in-the-index have different
+ meaning depending on which statistics collection method is used:
+
+ MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
+ NULLS_EQUAL NULL == NULL all tuples in table
+ NULLS_NOT_EQUAL NULL != NULL all tuples in table
+ IGNORE_NULLS n/a tuples that don't have NULLs
+*/
+
+void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part,
+ ulonglong *unique, ulonglong *notnull,
+ ulonglong records)
+{
+ ulonglong count=0,tmp, unique_tuples;
+ ulonglong tuples= records;
+ uint parts;
+ for (parts=0 ; parts < keyinfo->keysegs ; parts++)
+ {
+ count+=unique[parts];
+ unique_tuples= count + 1;
+ if (notnull)
+ {
+ tuples= notnull[parts];
+ /*
+ #(unique_tuples not counting tuples with NULLs) =
+ #(unique_tuples counting tuples with NULLs as different) -
+ #(tuples with NULLs)
+ */
+ unique_tuples -= (records - notnull[parts]);
+ }
+
+ if (unique_tuples == 0)
+ tmp= 1;
+ else if (count == 0)
+ tmp= tuples; /* 1 unique tuple */
+ else
+ tmp= (tuples + unique_tuples/2) / unique_tuples;
+
+ /*
+ for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
+ let's ensure it is not
+ */
+ set_if_bigger(tmp,1);
+ if (tmp >= (ulonglong) ~(ulong) 0)
+ tmp=(ulonglong) ~(ulong) 0;
+
+ *rec_per_key_part=(ulong) tmp;
+ rec_per_key_part++;
+ }
+}
+
+
+static ha_checksum maria_byte_checksum(const byte *buf, uint length)
+{
+ ha_checksum crc;
+ const byte *end=buf+length;
+ for (crc=0; buf != end; buf++)
+ crc=((crc << 1) + *((uchar*) buf)) +
+ test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1)));
+ return crc;
+}
+
+static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
+{
+ uint key_maxlength=key->maxlength;
+ if (key->flag & HA_FULLTEXT)
+ {
+ uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
+ key->seg->charset->mbmaxlen;
+ key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
+ }
+ return (key->flag & HA_SPATIAL) ||
+ (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
+ ((ulonglong) rows * key_maxlength >
+ (ulonglong) maria_max_temp_length));
+}
+
+/*
+ Deactivate all not unique index that can be recreated fast
+ These include packed keys on which sorting will use more temporary
+ space than the max allowed file length or for which the unpacked keys
+ will take much more space than packed keys.
+ Note that 'rows' may be zero for the case when we don't know how many
+ rows we will put into the file.
+ */
+
+void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows)
+{
+ MARIA_SHARE *share=info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+
+ DBUG_ASSERT(info->state->records == 0 &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) &&
+ ! maria_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1)
+ {
+ maria_clear_key_active(share->state.key_map, i);
+ info->update|= HA_STATE_CHANGED;
+ }
+ }
+}
+
+
+/*
+ Return TRUE if we can use repair by sorting
+ One can set the force argument to force to use sorting
+ even if the temporary file would be quite big!
+*/
+
+my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
+ ulonglong key_map, my_bool force)
+{
+ MARIA_SHARE *share=info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+
+ /*
+ maria_repair_by_sort only works if we have at least one key. If we don't
+ have any keys, we should use the normal repair.
+ */
+ if (! maria_is_any_key_active(key_map))
+ return FALSE; /* Can't use sort */
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!force && maria_too_big_key_for_sort(key,rows))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+static void
+set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
+{
+ if ((sort_info->new_data_file_type=share->data_file_type) ==
+ COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
+ {
+ MARIA_SHARE tmp;
+ sort_info->new_data_file_type= share->state.header.org_data_file_type;
+ /* Set delete_function for sort_delete_record() */
+ memcpy((char*) &tmp, share, sizeof(*share));
+ tmp.options= ~HA_OPTION_COMPRESS_RECORD;
+ _ma_setup_functions(&tmp);
+ share->delete_record=tmp.delete_record;
+ }
+}
+
+static void restore_data_file_type(MARIA_SHARE *share)
+{
+ share->options&= ~HA_OPTION_COMPRESS_RECORD;
+ mi_int2store(share->state.header.options,share->options);
+ share->state.header.data_file_type=
+ share->state.header.org_data_file_type;
+ share->data_file_type= share->state.header.data_file_type=
+ share->pack.header_length= 0;
+}
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
new file mode 100644
index 00000000000..02d887f758a
--- /dev/null
+++ b/storage/maria/ma_checkpoint.c
@@ -0,0 +1,429 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3071 Maria checkpoint
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* Here is the implementation of this module */
+
+/*
+ Summary:
+ - there are asynchronous checkpoints (a writer to the log notices that it's
+ been a long time since we last checkpoint-ed, so posts a request for a
+ background thread to do a checkpoint; does not care about the success of the
+ checkpoint). Then the checkpoint is done by the checkpoint thread, at an
+ unspecified moment ("later") (==soon, of course).
+ - there are synchronous checkpoints: a thread requests a checkpoint to
+ happen now and wants to know when it finishes and if it succeeded; then the
+ checkpoint is done by that same thread.
+*/
+
+#include "page_cache.h"
+#include "least_recently_dirtied.h"
+#include "transaction.h"
+#include "share.h"
+#include "log.h"
+
+#define LSN_IMPOSSIBLE ((LSN)0) /* could also be called LSN_ERROR */
+#define LSN_MAX ((LSN)ULONGLONG_MAX)
+
+/*
+ this transaction is used for any system work (purge, checkpoint writing
+ etc), that is, background threads. It will not be declared/initialized here
+ in the final version.
+*/
+st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,...};
+
+/* those three are protected by the log's mutex */
+/*
+ The maximum rec_lsn in the LRD when last checkpoint was run, serves for the
+ MEDIUM checkpoint.
+*/
+LSN max_rec_lsn_at_last_checkpoint= 0;
+/* last submitted checkpoint request; cleared when starts */
+CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE;
+CHECKPOINT_LEVEL checkpoint_in_progress= NONE;
+
+static inline ulonglong read_non_atomic(ulonglong volatile *x);
+
+/*
+ Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA
+ ENGINE DO CHECKPOINT"), and probably by maria_panic(), and at the end of the
+ UNDO recovery phase.
+*/
+my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level)
+{
+ my_bool result;
+ DBUG_ENTER("execute_synchronous_checkpoint");
+ DBUG_ASSERT(level > NONE);
+
+ lock(log_mutex);
+ while (checkpoint_in_progress != NONE)
+ wait_on_checkpoint_done_cond();
+
+ result= execute_checkpoint(level);
+ DBUG_RETURN(result);
+}
+
+/*
+ If no checkpoint is running, and there is a pending asynchronous checkpoint
+ request, executes it.
+ Is safe if multiple threads call it, though in first version only one will.
+ It's intended to be used by a thread which regularly calls this function;
+ this is why, if there is a request, it does not wait in a loop for
+ synchronous checkpoints to be finished, but just exits (because the thread
+ may want to do something useful meanwhile (flushing dirty pages for example)
+ instead of waiting).
+*/
+my_bool execute_asynchronous_checkpoint_if_any()
+{
+ my_bool result;
+ CHECKPOINT_LEVEL level;
+ DBUG_ENTER("execute_asynchronous_checkpoint");
+
+ /* first check without mutex, ok to see old data */
+ if (likely((next_asynchronous_checkpoint_to_do == NONE) ||
+ (checkpoint_in_progress != NONE)))
+ DBUG_RETURN(FALSE);
+
+ lock(log_mutex);
+ if (likely((next_asynchronous_checkpoint_to_do == NONE) ||
+ (checkpoint_in_progress != NONE)))
+ {
+ unlock(log_mutex);
+ DBUG_RETURN(FALSE);
+ }
+
+ result= execute_checkpoint(next_asynchronous_checkpoint_to_do);
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Does the actual checkpointing. Called by
+ execute_synchronous_checkpoint() and
+ execute_asynchronous_checkpoint_if_any().
+*/
+my_bool execute_checkpoint(CHECKPOINT_LEVEL level)
+{
+ my_bool result;
+ DBUG_ENTER("execute_checkpoint");
+
+ safemutex_assert_owner(log_mutex);
+ if (next_asynchronous_checkpoint_to_do <= level)
+ next_asynchronous_checkpoint_to_do= NONE;
+ checkpoint_in_progress= level;
+
+ if (unlikely(level > INDIRECT))
+ {
+ LSN copy_of_max_rec_lsn_at_last_checkpoint=
+ max_rec_lsn_at_last_checkpoint;
+ /* much I/O work to do, release log mutex */
+ unlock(log_mutex);
+
+ switch (level)
+ {
+ case FULL:
+ /* flush all pages up to the current end of the LRD */
+ flush_all_LRD_to_lsn(LSN_MAX);
+ /* this will go full speed (normal scheduling, no sleep) */
+ break;
+ case MEDIUM:
+ /*
+ flush all pages which were already dirty at last checkpoint:
+ ensures that recovery will never start from before the next-to-last
+ checkpoint (two-checkpoint rule).
+ */
+ flush_all_LRD_to_lsn(copy_of_max_rec_lsn_at_last_checkpoint);
+ /* this will go full speed (normal scheduling, no sleep) */
+ break;
+ }
+ lock(log_mutex);
+ }
+
+ result= execute_checkpoint_indirect();
+ checkpoint_in_progress= NONE;
+ unlock(log_mutex);
+ broadcast(checkpoint_done_cond);
+ DBUG_RETURN(result);
+}
+
+
+/*
+ Does an indirect checpoint (collects data from data structures, writes into
+ a checkpoint log record).
+ Starts and ends while having log's mutex (released in the middle).
+*/
+my_bool execute_checkpoint_indirect()
+{
+ int error= 0, i;
+ /* checkpoint record data: */
+ LSN checkpoint_start_lsn;
+ char checkpoint_start_lsn_char[8];
+ LEX_STRING strings[6]=
+ {checkpoint_start_lsn_char, 8}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} };
+ char *ptr;
+ LSN checkpoint_lsn;
+ LSN candidate_max_rec_lsn_at_last_checkpoint;
+ DBUG_ENTER("execute_checkpoint_indirect");
+
+ DBUG_ASSERT(sizeof(byte *) <= 8);
+ DBUG_ASSERT(sizeof(LSN) <= 8);
+
+ safemutex_assert_owner(log_mutex);
+
+ /* STEP 1: record current end-of-log LSN */
+ checkpoint_start_lsn= log_read_end_lsn();
+ if (LSN_IMPOSSIBLE == checkpoint_start_lsn) /* error */
+ DBUG_RETURN(TRUE);
+ unlock(log_mutex);
+
+ DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn));
+ int8store(strings[0].str, checkpoint_start_lsn);
+
+ /* STEP 2: fetch information about dirty pages */
+
+ if (pagecache_collect_changed_blocks_with_LSN(pagecache, &strings[1],
+ &candidate_max_rec_lsn_at_last_checkpoint))
+ goto err;
+
+ /* STEP 3: fetch information about transactions */
+ if (trnman_collect_transactions(&strings[2], &strings[3]))
+ goto err;
+
+ /* STEP 4: fetch information about table files */
+
+ {
+ /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */
+ lock(global_share_list_mutex);
+ strings[4].length= 8+(8+8)*share_list->count;
+ if (NULL == (strings[4].str= my_malloc(strings[4].length)))
+ goto err;
+ ptr= string3.str;
+ /*
+ Note that maria_open_list is a list of MARIA_HA*, while we would prefer
+ a list of MARIA_SHARE* here (we are interested in the short id,
+ unique file name, members of MARIA_SHARE*, and in file descriptors,
+ which will in the end be in MARIA_SHARE*).
+ */
+ for (iterate on the maria_open_list)
+ {
+ /* latch each MARIA_SHARE, one by one, like this: */
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ TODO:
+ we need to prevent the share from going away while we later flush and
+ force it without holding THR_LOCK_maria. For example if the share is
+ free()d by maria_close() we'll have a problem. Or if the share's file
+ descriptor is closed by maria_close() we will not be able to my_sync()
+ it.
+ */
+ pthread_mutex_unlock(&share->intern_lock);
+ store the share pointer into a private array;
+ }
+ unlock(global_share_list_mutex);
+
+ /* work on copy */
+ int8store(ptr, elements_in_array);
+ ptr+= 8;
+ for (el in array)
+ {
+ int8store(ptr, array[...].short_id);
+ ptr+= 8;
+ memcpy(ptr, array[...].unique_file_name[_length], ...);
+ ptr+= ...;
+ /* maybe we need to lock share->intern_lock here */
+ /*
+ these two are long ops (involving disk I/O) that's why we copied the
+ list, to not keep the list locked for long:
+ */
+ flush_bitmap_pages(el);
+ /* TODO: and also autoinc counter, logical file end, free page list */
+
+ /*
+ fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per
+ second, so if you have touched 1000 files it's 7 seconds).
+ */
+ force_file(el);
+ }
+ }
+
+ /* LAST STEP: now write the checkpoint log record */
+
+ checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT,
+ &system_trans, strings);
+
+ /*
+ Do nothing between the log write and the control file write, for the
+ "repair control file" tool to be possible one day.
+ */
+
+ if (LSN_IMPOSSIBLE == checkpoint_lsn)
+ goto err;
+
+ if (0 != control_file_write_and_force(checkpoint_lsn, NULL))
+ goto err;
+
+ /*
+ Note that we should not alter memory structures until we have successfully
+ written the checkpoint record and control file.
+ Btw, a log write failure is serious:
+ - if we know how many bytes we managed to write, we should try to write
+ more, keeping the log's mutex (MY_FULL_IO)
+ - if we don't know, this log record is corrupted and we have no way to
+ "de-corrupt" it, so it will stay corrupted, and as the log is sequential,
+ any log record written after it will not be reachable (for example if we
+ would write UNDOs and crash, we would not be able to read the log and so
+ not be able to rollback), so we should stop the engine now (holding the
+ log's mutex) and do a recovery.
+ */
+ goto end;
+
+err:
+ print_error_to_error_log(the_error_message);
+ candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE;
+
+end:
+
+ for (i= 1; i<6; i++)
+ my_free(strings[i].str, MYF(MY_ALLOW_ZERO_PTR));
+
+ /*
+ this portion cannot be done as a hook in write_log_record() for the
+ LOGREC_CHECKPOINT type because:
+ - at that moment we still have not written to the control file so cannot
+ mark the request as done; this could be solved by writing to the control
+ file in the hook but that would be an I/O under the log's mutex, bad.
+ - it would not be nice organisation of code (I tried it :).
+ */
+ if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE)
+ {
+ /* checkpoint succeeded */
+ /*
+ TODO: compute log's low water mark (how to do that with our fuzzy
+ ARIES-like reads of data structures? TODO think about it :).
+ */
+ lock(log_mutex);
+ /* That LSN is used for the "two-checkpoint rule" (MEDIUM checkpoints) */
+ maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint;
+ DBUG_RETURN(FALSE);
+ }
+ lock(log_mutex);
+ DBUG_RETURN(TRUE);
+ /*
+ keep mutex locked upon exit because callers will want to clear
+ mutex-protected status variables
+ */
+}
+
+
+
+/*
+ Here's what should be put in log_write_record() in the log handler:
+*/
+log_write_record(...)
+{
+ ...;
+ lock(log_mutex);
+ ...;
+ write_to_log(length);
+ written_since_last_checkpoint+= length;
+ if (written_since_last_checkpoint >
+ MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS)
+ {
+ /*
+ ask one system thread (the "LRD background flusher and checkpointer
+ thread" WL#3261) to do a checkpoint
+ */
+ request_asynchronous_checkpoint(INDIRECT);
+ /* prevent similar redundant requests */
+ written_since_last_checkpoint= (my_off_t)0;
+ }
+ ...;
+ unlock(log_mutex);
+ ...;
+}
+
+/*
+ Requests a checkpoint from the background thread, *asynchronously*
+ (requestor does not wait for completion, and does not even later check the
+ result).
+ In real life it will be called by log_write_record().
+*/
+void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level);
+{
+ safemutex_assert_owner(log_mutex);
+
+ DBUG_ASSERT(level > NONE);
+ if ((next_asynchronous_checkpoint_to_do < level) &&
+ (checkpoint_in_progress < level))
+ {
+ /* no equal or stronger running or to run, we post request */
+ /*
+ We just don't broacast a cond, the checkpoint thread
+ (see ma_least_recently_dirtied.c) will notice our request in max a few
+ seconds.
+ */
+ next_asynchronous_checkpoint_to_do= level; /* post request */
+ }
+
+ /*
+ If there was an error, only an error
+ message to the error log will say it; normal, for a checkpoint triggered
+ by a log write, we probably don't want the client's log write to throw an
+ error, as the log write succeeded and a checkpoint failure is not
+ critical: the failure in this case is more for the DBA to know than for
+ the end user.
+ */
+}
+
+
+/*
+ If a 64-bit variable transitions from both halves being zero to both halves
+ being non-zero, and never changes after that (like the transaction's
+ first_undo_lsn), this function can be used to do a read of it (without
+ mutex, without atomic load) which always produces a correct (though maybe
+ slightly old) value (even on 32-bit CPUs).
+ The prototype will change with Sanja's new LSN type.
+*/
+static inline ulonglong read_non_atomic(ulonglong volatile *x)
+{
+#if ( SIZEOF_CHARP >= 8 )
+ /* 64-bit CPU (right?), 64-bit reads are atomic */
+ return *x;
+#else
+ /*
+ 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old
+ low bits and new high bits, or the contrary).
+ As the variable we read transitions from both halves being zero to both
+ halves being non-zero, and never changes then, we can detect atomicity
+ problems:
+ */
+ ulonglong y;
+ for (;;) /* loop until no atomicity problems */
+ {
+ y= *x;
+ if (likely(((0 == y) ||
+ ((0 != (y >> 32)) && (0 != (y << 32)))))
+ return y;
+ /* Worth seeing it! */
+ DBUG_PRINT("info",("atomicity problem"));
+ }
+#endif
+}
diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h
new file mode 100644
index 00000000000..1b8064fa755
--- /dev/null
+++ b/storage/maria/ma_checkpoint.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3071 Maria checkpoint
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* This is the interface of this module. */
+
+typedef enum enum_checkpoint_level {
+ NONE=-1,
+ INDIRECT, /* just write dirty_pages, transactions table and sync files */
+ MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/
+ FULL /* also flush all dirty pages */
+} CHECKPOINT_LEVEL;
+
+void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level);
+my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level);
+my_bool execute_asynchronous_checkpoint_if_any();
+/* that's all that's needed in the interface */
diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c
new file mode 100644
index 00000000000..1b0f683fe63
--- /dev/null
+++ b/storage/maria/ma_checksum.c
@@ -0,0 +1,68 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Calculate a checksum for a row */
+
+#include "maria_def.h"
+
+ha_checksum _ma_checksum(MARIA_HA *info, const byte *record)
+{
+ uint i;
+ ha_checksum crc=0;
+ MARIA_COLUMNDEF *rec=info->s->rec;
+
+ if (info->s->base.null_bytes)
+ crc= my_checksum(crc, record, info->s->base.null_bytes);
+
+ for (i=info->s->base.fields ; i-- ; )
+ {
+ const byte *pos= record + rec->offset;
+ ulong length;
+
+ switch (rec->type) {
+ case FIELD_BLOB:
+ {
+ length= _ma_calc_blob_length(rec->length-
+ maria_portable_sizeof_char_ptr,
+ pos);
+ memcpy((char*) &pos, pos+rec->length- maria_portable_sizeof_char_ptr,
+ sizeof(char*));
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1);
+ if (pack_length == 1)
+ length= (ulong) *(uchar*) pos;
+ else
+ length= uint2korr(pos);
+ pos+= pack_length;
+ break;
+ }
+ default:
+ length= rec->length;
+ break;
+ }
+ crc= my_checksum(crc, pos ? pos : "", length);
+ }
+ return crc;
+}
+
+
+ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *pos)
+{
+ return my_checksum(0, pos, info->s->base.reclength);
+}
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
new file mode 100644
index 00000000000..2a874079961
--- /dev/null
+++ b/storage/maria/ma_close.c
@@ -0,0 +1,133 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* close a isam-database */
+/*
+ TODO:
+ We need to have a separate mutex on the closed file to allow other threads
+ to open other files during the time we flush the cache and close this file
+*/
+
+#include "maria_def.h"
+
+int maria_close(register MARIA_HA *info)
+{
+ int error=0,flag;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_close");
+ DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u",
+ (long) info, (uint) share->reopen,
+ (uint) share->tot_locks));
+
+ pthread_mutex_lock(&THR_LOCK_maria);
+ if (info->lock_type == F_EXTRA_LCK)
+ info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */
+
+ if (share->reopen == 1 && share->kfile >= 0)
+ _ma_decrement_open_count(info);
+
+ if (info->lock_type != F_UNLCK)
+ {
+ if (maria_lock_database(info,F_UNLCK))
+ error=my_errno;
+ }
+ pthread_mutex_lock(&share->intern_lock);
+
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ share->r_locks--;
+ share->tot_locks--;
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ if (end_io_cache(&info->rec_cache))
+ error=my_errno;
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ }
+ flag= !--share->reopen;
+ /*
+ RECOVERYTODO:
+ If "flag" is TRUE, in the line below we are going to make the table
+ unknown to future checkpoints, so it needs to have fsync'ed itself
+ entirely (bitmap, pages, etc) at this point.
+ The flushing is currently done a few lines further (which is ok, as we
+ still hold THR_LOCK_maria), but syncing is missing.
+ */
+ maria_open_list=list_delete(maria_open_list,&info->open_list);
+ pthread_mutex_unlock(&share->intern_lock);
+
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ (share->end)(info);
+
+ if (info->s->data_file_type == BLOCK_RECORD)
+ info->dfile= -1; /* Closed in ma_end_once_block_row */
+ if (flag)
+ {
+ if (share->kfile >= 0)
+ {
+ if ((*share->once_end)(share))
+ error= my_errno;
+ if (flush_key_blocks(share->key_cache, share->kfile,
+ share->temporary ? FLUSH_IGNORE_CHANGED :
+ FLUSH_RELEASE))
+ error= my_errno;
+
+ /*
+ If we are crashed, we can safely flush the current state as it will
+ not change the crashed state.
+ We can NOT write the state in other cases as other threads
+ may be using the file at this point
+ */
+ if (share->mode != O_RDONLY && maria_is_crashed(info))
+ _ma_state_info_write(share->kfile, &share->state, 1);
+ if (my_close(share->kfile, MYF(0)))
+ error= my_errno;
+ }
+#ifdef HAVE_MMAP
+ if (share->file_map)
+ _ma_unmap_file(info);
+#endif
+#ifdef THREAD
+ thr_lock_delete(&share->lock);
+ VOID(pthread_mutex_destroy(&share->intern_lock));
+ {
+ int i,keys;
+ keys = share->state.header.keys;
+ VOID(rwlock_destroy(&share->mmap_lock));
+ for(i=0; i<keys; i++) {
+ VOID(rwlock_destroy(&share->key_root_lock[i]));
+ }
+ }
+#endif
+ my_free((gptr) info->s,MYF(0));
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ if (info->ftparser_param)
+ {
+ my_free((gptr)info->ftparser_param, MYF(0));
+ info->ftparser_param= 0;
+ }
+ if (info->dfile >= 0 && my_close(info->dfile,MYF(0)))
+ error = my_errno;
+
+ my_free((gptr) info,MYF(0));
+
+ if (error)
+ {
+ DBUG_RETURN(my_errno=error);
+ }
+ DBUG_RETURN(0);
+} /* maria_close */
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
new file mode 100644
index 00000000000..07eddb956a2
--- /dev/null
+++ b/storage/maria/ma_control_file.c
@@ -0,0 +1,320 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3234 Maria control file
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+#include "maria_def.h"
+
+/* Here is the implementation of this module */
+
+/*
+ a control file contains 3 objects: magic string, LSN of last checkpoint,
+ number of last log.
+*/
+
+/* total size should be < sector size for atomic write operation */
+#define CONTROL_FILE_MAGIC_STRING "\xfe\xfe\xc\1MACF"
+#define CONTROL_FILE_MAGIC_STRING_OFFSET 0
+#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1)
+#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE)
+#define CONTROL_FILE_CHECKSUM_SIZE 1
+#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE)
+#define CONTROL_FILE_LSN_SIZE LSN_STORE_SIZE
+#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE)
+#define CONTROL_FILE_FILENO_SIZE 4
+#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE)
+
+/*
+ This module owns these two vars.
+ uint32 is always atomically updated, but LSN is 8 bytes, we will need
+ provisions to ensure that it's updated atomically in
+ ma_control_file_write_and_force(). Probably the log mutex could be
+ used. TODO.
+*/
+LSN last_checkpoint_lsn;
+uint32 last_logno;
+
+
+/*
+ Control file is less then 512 bytes (a disk sector),
+ to be as atomic as possible
+*/
+static int control_file_fd= -1;
+
+static char simple_checksum(char *buffer, uint size)
+{
+ /* TODO: improve this sum if we want */
+ char s= 0;
+ uint i;
+ for (i= 0; i<size; i++)
+ s+= buffer[i];
+ return s;
+}
+
+/*
+ Initialize control file subsystem
+
+ SYNOPSIS
+ ma_control_file_create_or_open()
+
+ Looks for the control file. If absent, it's a fresh start, creates file.
+ If present, reads it to find out last checkpoint's LSN and last log, updates
+ the last_checkpoint_lsn and last_logno global variables.
+ Called at engine's start.
+
+ The format of the control file is:
+ 4 bytes: magic string
+ 1 byte: checksum of the following bytes
+ 4 bytes: number of log where last checkpoint is
+ 4 bytes: offset in log where last checkpoint is
+ 4 bytes: number of last log
+
+ RETURN
+ 0 - OK
+ 1 - Error (in which case the file is left closed)
+*/
+CONTROL_FILE_ERROR ma_control_file_create_or_open()
+{
+ char buffer[CONTROL_FILE_SIZE];
+ char name[FN_REFLEN];
+ MY_STAT stat_buff;
+ my_bool create_file;
+ int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR;
+ int error= CONTROL_FILE_UNKNOWN_ERROR;
+ DBUG_ENTER("ma_control_file_create_or_open");
+
+ /*
+ If you change sizes in the #defines, you at least have to change the
+ "*store" and "*korr" calls in this file, and can even create backward
+ compatibility problems. Beware!
+ */
+ DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (3+4));
+ DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4);
+
+ if (control_file_fd >= 0) /* already open */
+ DBUG_RETURN(0);
+
+ if (fn_format(name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) == NullS)
+ DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
+
+ create_file= test(my_access(name,F_OK));
+
+ if (create_file)
+ {
+ if ((control_file_fd= my_create(name, 0,
+ open_flags, MYF(MY_SYNC_DIR))) < 0)
+ DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
+
+ /*
+ To be safer we should make sure that there are no logs or data/index
+ files around (indeed it could be that the control file alone was deleted
+ or not restored, and we should not go on with life at this point).
+
+ TODO: For now we trust (this is alpha version), but for beta if would
+ be great to verify.
+
+ We could have a tool which can rebuild the control file, by reading the
+ directory of logs, finding the newest log, reading it to find last
+ checkpoint... Slow but can save your db. For this to be possible, we
+ must always write to the control file right after writing the checkpoint
+ log record, and do nothing in between (i.e. the checkpoint must be
+ usable as soon as it has been written to the log).
+ */
+
+ /* init the file with these "undefined" values */
+ DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN,
+ CONTROL_FILE_IMPOSSIBLE_FILENO,
+ CONTROL_FILE_UPDATE_ALL));
+ }
+
+ /* Otherwise, file exists */
+
+ if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0)
+ goto err;
+
+ if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL)
+ goto err;
+
+ if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE)
+ {
+ /*
+ Given that normally we write only a sector and it's atomic, the only
+ possibility for a file to be of too short size is if we crashed at the
+ very first startup, between file creation and file write. Quite unlikely
+ (and can be made even more unlikely by doing this: create a temp file,
+ write it, and then rename it to be the control file).
+ What's more likely is if someone forgot to restore the control file,
+ just did a "touch control" to try to get Maria to start, or if the
+ disk/filesystem has a problem.
+ So let's be rigid.
+ */
+ /*
+ TODO: store a message "too small file" somewhere, so that it goes to
+ MySQL's error log at startup.
+ */
+ error= CONTROL_FILE_TOO_SMALL;
+ goto err;
+ }
+
+ if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE)
+ {
+ /* TODO: store "too big file" message */
+ error= CONTROL_FILE_TOO_BIG;
+ goto err;
+ }
+
+ if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE,
+ MYF(MY_FNABP | MY_WME)))
+ goto err;
+ if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
+ CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE))
+ {
+ /* TODO: store message "bad magic string" somewhere */
+ error= CONTROL_FILE_BAD_MAGIC_STRING;
+ goto err;
+ }
+ if (simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET,
+ CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) !=
+ buffer[CONTROL_FILE_CHECKSUM_OFFSET])
+ {
+ /* TODO: store message "checksum mismatch" somewhere */
+ error= CONTROL_FILE_BAD_CHECKSUM;
+ goto err;
+ }
+ last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET);
+ last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET);
+
+ DBUG_RETURN(0);
+err:
+ ma_control_file_end();
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Write information durably to the control file; stores this information into
+ the last_checkpoint_lsn and last_logno global variables.
+ Called when we have created a new log (after syncing this log's creation)
+ and when we have written a checkpoint (after syncing this log record).
+
+ SYNOPSIS
+ ma_control_file_write_and_force()
+ checkpoint_lsn LSN of last checkpoint
+ logno last log file number
+ objs_to_write which of the arguments should be used as new values
+ (for example, CONTROL_FILE_UPDATE_ONLY_LSN will not
+ write the logno argument to the control file and will
+ not update the last_logno global variable); can be:
+ CONTROL_FILE_UPDATE_ALL
+ CONTROL_FILE_UPDATE_ONLY_LSN
+ CONTROL_FILE_UPDATE_ONLY_LOGNO.
+
+ NOTE
+ We always want to do one single my_pwrite() here to be as atomic as
+ possible.
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
+ uint objs_to_write)
+{
+ char buffer[CONTROL_FILE_SIZE];
+ my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE;
+ DBUG_ENTER("ma_control_file_write_and_force");
+
+ DBUG_ASSERT(control_file_fd >= 0); /* must be open */
+
+ memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
+ CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE);
+
+ /* TODO: you need some protection to be able to read last_* global vars */
+
+ if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN)
+ update_checkpoint_lsn= TRUE;
+ else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO)
+ update_logno= TRUE;
+ else if (objs_to_write == CONTROL_FILE_UPDATE_ALL)
+ update_checkpoint_lsn= update_logno= TRUE;
+ else /* incorrect value of objs_to_write */
+ DBUG_ASSERT(0);
+
+ if (update_checkpoint_lsn)
+ lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn);
+ else /* store old value == change nothing */
+ lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn);
+
+ if (update_logno)
+ int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno);
+ else
+ int4store(buffer + CONTROL_FILE_FILENO_OFFSET, last_logno);
+
+ buffer[CONTROL_FILE_CHECKSUM_OFFSET]=
+ simple_checksum(buffer + CONTROL_FILE_LSN_OFFSET,
+ CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET);
+
+ if (my_pwrite(control_file_fd, buffer, sizeof(buffer),
+ 0, MYF(MY_FNABP | MY_WME)) ||
+ my_sync(control_file_fd, MYF(MY_WME)))
+ DBUG_RETURN(1);
+
+ /* TODO: you need some protection to be able to write last_* global vars */
+ if (update_checkpoint_lsn)
+ last_checkpoint_lsn= checkpoint_lsn;
+ if (update_logno)
+ last_logno= logno;
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Free resources taken by control file subsystem
+
+ SYNOPSIS
+ ma_control_file_end()
+*/
+
+int ma_control_file_end()
+{
+ int close_error;
+ DBUG_ENTER("ma_control_file_end");
+
+ if (control_file_fd < 0) /* already closed */
+ DBUG_RETURN(0);
+
+ close_error= my_close(control_file_fd, MYF(MY_WME));
+ /*
+ As my_close() frees structures even if close() fails, we do the same,
+ i.e. we mark the file as closed in all cases.
+ */
+ control_file_fd= -1;
+ /*
+ As this module owns these variables, closing the module forbids access to
+ them (just a safety):
+ */
+ last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
+ last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO;
+
+ DBUG_RETURN(close_error);
+}
diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h
new file mode 100644
index 00000000000..616babc3bb2
--- /dev/null
+++ b/storage/maria/ma_control_file.h
@@ -0,0 +1,77 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3234 Maria control file
+ First version written by Guilhem Bichot on 2006-04-27.
+*/
+
+#define CONTROL_FILE_BASE_NAME "maria_control"
+/*
+ indicate absence of the log file number; first log is always number 1, 0 is
+ impossible.
+*/
+#define CONTROL_FILE_IMPOSSIBLE_FILENO 0
+/* logs always have a header */
+#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0
+/* indicate absence of LSN. */
+#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0)
+
+/* Here is the interface of this module */
+
+/*
+ LSN of the last checkoint
+ (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN
+ then there was never a checkpoint)
+*/
+extern LSN last_checkpoint_lsn;
+/*
+ Last log number (if last_logno ==
+ CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet)
+*/
+extern uint32 last_logno;
+
+typedef enum enum_control_file_error {
+ CONTROL_FILE_OK= 0,
+ CONTROL_FILE_TOO_SMALL,
+ CONTROL_FILE_TOO_BIG,
+ CONTROL_FILE_BAD_MAGIC_STRING,
+ CONTROL_FILE_BAD_CHECKSUM,
+ CONTROL_FILE_UNKNOWN_ERROR /* any other error */
+} CONTROL_FILE_ERROR;
+
+#define CONTROL_FILE_UPDATE_ALL 0
+#define CONTROL_FILE_UPDATE_ONLY_LSN 1
+#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2
+
+
+/*
+ Looks for the control file. If absent, it's a fresh start, create file.
+ If present, read it to find out last checkpoint's LSN and last log.
+ Called at engine's start.
+*/
+CONTROL_FILE_ERROR ma_control_file_create_or_open();
+/*
+ Write information durably to the control file.
+ Called when we have created a new log (after syncing this log's creation)
+ and when we have written a checkpoint (after syncing this log record).
+*/
+int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
+ uint objs_to_write);
+
+
+/* Free resources taken by control file subsystem */
+int ma_control_file_end();
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
new file mode 100644
index 00000000000..59f5f7d1a4d
--- /dev/null
+++ b/storage/maria/ma_create.c
@@ -0,0 +1,1043 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Create a MARIA table */
+
+#include "ma_ftdefs.h"
+#include "ma_sp_defs.h"
+#include <my_bit.h>
+#include "ma_blockrec.h"
+
+#if defined(MSDOS) || defined(__WIN__)
+#ifdef __WIN__
+#include <fcntl.h>
+#else
+#include <process.h> /* Prototype for getpid */
+#endif
+#endif
+#include <m_ctype.h>
+
+static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b);
+
+/*
+ Old options is used when recreating database, from maria_chk
+*/
+
+int maria_create(const char *name, enum data_file_type record_type,
+ uint keys,MARIA_KEYDEF *keydefs,
+ uint columns, MARIA_COLUMNDEF *recinfo,
+ uint uniques, MARIA_UNIQUEDEF *uniquedefs,
+ MARIA_CREATE_INFO *ci,uint flags)
+{
+ register uint i,j;
+ File dfile,file;
+ int errpos,save_errno, create_mode= O_RDWR | O_TRUNC;
+ myf create_flag;
+ uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
+ key_length,info_length,key_segs,options,min_key_length_skip,
+ base_pos,long_varchar_count,varchar_length,
+ unique_key_parts,fulltext_keys,offset;
+ uint max_field_lengths, extra_header_size;
+ ulong reclength, real_reclength,min_pack_length;
+ char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr;
+ ulong pack_reclength;
+ ulonglong tot_length,max_rows, tmp;
+ enum en_fieldtype type;
+ enum data_file_type org_record_type= record_type;
+ MARIA_SHARE share;
+ MARIA_KEYDEF *keydef,tmp_keydef;
+ MARIA_UNIQUEDEF *uniquedef;
+ HA_KEYSEG *keyseg,tmp_keyseg;
+ MARIA_COLUMNDEF *rec, *rec_end;
+ ulong *rec_per_key_part;
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY];
+ MARIA_CREATE_INFO tmp_create_info;
+ my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */
+ myf sync_dir= MY_SYNC_DIR;
+ DBUG_ENTER("maria_create");
+ DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u",
+ keys, columns, uniques, flags));
+
+ LINT_INIT(dfile);
+ LINT_INIT(file);
+
+ if (!ci)
+ {
+ bzero((char*) &tmp_create_info,sizeof(tmp_create_info));
+ ci=&tmp_create_info;
+ }
+
+ if (keys + uniques > MARIA_MAX_KEY || columns == 0)
+ {
+ DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION);
+ }
+ errpos=0;
+ options=0;
+ bzero((byte*) &share,sizeof(share));
+
+ if (flags & HA_DONT_TOUCH_DATA)
+ {
+ org_record_type= ci->org_data_file_type;
+ if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD))
+ options=ci->old_options &
+ (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD |
+ HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM |
+ HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE);
+ else
+ {
+ /* Uncompressing rows */
+ options=ci->old_options &
+ (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE);
+ }
+ }
+
+ if (ci->reloc_rows > ci->max_rows)
+ ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
+
+ if (!(rec_per_key_part=
+ (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long),
+ MYF(MY_WME | MY_ZEROFILL))))
+ DBUG_RETURN(my_errno);
+
+ /* Start by checking fields and field-types used */
+
+ varchar_length=long_varchar_count=packed=
+ pack_reclength= max_field_lengths= 0;
+ reclength= min_pack_length= ci->null_bytes;
+
+ for (rec= recinfo, rec_end= rec + columns ; rec != rec_end ; rec++)
+ {
+ /* Fill in not used struct parts */
+ rec->offset= reclength;
+ rec->empty_pos= 0;
+ rec->empty_bit= 0;
+ rec->fill_length= rec->length;
+
+ reclength+= rec->length;
+ type= rec->type;
+ if (type == FIELD_SKIP_PRESPACE && record_type == BLOCK_RECORD)
+ type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */
+
+ if (type != FIELD_NORMAL && type != FIELD_CHECK)
+ {
+ rec->empty_pos= packed/8;
+ rec->empty_bit= (1 << (packed & 7));
+ if (type == FIELD_BLOB)
+ {
+ packed++;
+ share.base.blobs++;
+ if (pack_reclength != INT_MAX32)
+ {
+ if (rec->length == 4+maria_portable_sizeof_char_ptr)
+ pack_reclength= INT_MAX32;
+ else
+ {
+ /* Add max possible blob length */
+ pack_reclength+= (1 << ((rec->length-
+ maria_portable_sizeof_char_ptr)*8));
+ }
+ }
+ max_field_lengths+= (rec->length - maria_portable_sizeof_char_ptr);
+ }
+ else if (type == FIELD_SKIP_PRESPACE ||
+ type == FIELD_SKIP_ENDSPACE)
+ {
+ max_field_lengths+= rec->length > 255 ? 2 : 1;
+ min_pack_length++;
+ packed++;
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ varchar_length+= rec->length-1; /* Used for min_pack_length */
+ pack_reclength++;
+ min_pack_length++;
+ max_field_lengths++;
+ packed++;
+ /* We must test for 257 as length includes pack-length */
+ if (test(rec->length >= 257))
+ {
+ long_varchar_count++;
+ max_field_lengths++;
+ }
+ }
+ else if (type != FIELD_SKIP_ZERO)
+ {
+ min_pack_length+=rec->length;
+ rec->empty_pos= 0;
+ rec->empty_bit= 0;
+ }
+ else
+ packed++;
+ }
+ else /* FIELD_NORMAL */
+ {
+ min_pack_length+=rec->length;
+ if (!rec->null_bit)
+ {
+ share.base.fixed_not_null_fields++;
+ share.base.fixed_not_null_fields_length+= rec->length;
+ }
+ }
+ }
+ if ((packed & 7) == 1)
+ {
+ /*
+ Not optimal packing, try to remove a 1 byte length zero-field as
+ this will get same record length, but smaller pack overhead
+ */
+ while (rec != recinfo)
+ {
+ rec--;
+ if (rec->type == (int) FIELD_SKIP_ZERO && rec->length == 1)
+ {
+ rec->type=(int) FIELD_NORMAL;
+ packed--;
+ min_pack_length++;
+ break;
+ }
+ }
+ }
+ share.base.null_bytes= ci->null_bytes;
+ share.base.original_null_bytes= ci->null_bytes;
+ share.base.transactional= ci->transactional;
+ share.base.max_field_lengths= max_field_lengths;
+ share.base.field_offsets= 0; /* for future */
+
+ if (pack_reclength != INT_MAX32)
+ pack_reclength+= max_field_lengths + long_varchar_count;
+
+ if (packed && record_type == STATIC_RECORD)
+ record_type= BLOCK_RECORD;
+ if (record_type == DYNAMIC_RECORD)
+ options|= HA_OPTION_PACK_RECORD; /* Must use packed records */
+
+ if (record_type == STATIC_RECORD)
+ {
+ /* We can't use checksum with static length rows */
+ flags&= ~HA_CREATE_CHECKSUM;
+ options&= ~HA_OPTION_CHECKSUM;
+ min_pack_length+= varchar_length;
+ }
+ if (flags & HA_CREATE_TMP_TABLE)
+ {
+ options|= HA_OPTION_TMP_TABLE;
+ create_mode|= O_EXCL | O_NOFOLLOW;
+ }
+ if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM))
+ {
+ options|= HA_OPTION_CHECKSUM;
+ min_pack_length++;
+ pack_reclength++;
+ }
+ if (flags & HA_CREATE_DELAY_KEY_WRITE)
+ options|= HA_OPTION_DELAY_KEY_WRITE;
+ if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
+ options|= HA_OPTION_RELIES_ON_SQL_LAYER;
+
+ pack_bytes= (packed + 7) / 8;
+ if (pack_reclength != INT_MAX32)
+ pack_reclength+= reclength+pack_bytes +
+ test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD));
+ min_pack_length+= pack_bytes;
+ /* Calculate min possible row length for rows-in-block */
+ extra_header_size= MAX_FIXED_HEADER_SIZE;
+ if (ci->transactional)
+ extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE;
+ share.base.min_row_length= (extra_header_size + share.base.null_bytes +
+ pack_bytes);
+ if (!ci->data_file_length && ci->max_rows)
+ {
+ if (pack_reclength == INT_MAX32 ||
+ (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength)
+ ci->data_file_length= ~(ulonglong) 0;
+ else
+ ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength;
+ }
+ else if (!ci->max_rows)
+ {
+ if (record_type == BLOCK_RECORD)
+ {
+ uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) /
+ (min_pack_length + extra_header_size +
+ DIR_ENTRY_SIZE));
+ ulonglong data_file_length= ci->data_file_length;
+ if (data_file_length)
+ data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) *
+ 8)) -1));
+ if (rows_per_page > 0)
+ {
+ set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE);
+ ci->max_rows= ci->data_file_length / maria_block_size * rows_per_page;
+ }
+ else
+ ci->max_rows= ci->data_file_length / (min_pack_length +
+ extra_header_size +
+ DIR_ENTRY_SIZE);
+ }
+ else
+ ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length +
+ ((options &
+ HA_OPTION_PACK_RECORD) ?
+ 3 : 0)));
+ }
+ max_rows= (ulonglong) ci->max_rows;
+ if (record_type == BLOCK_RECORD)
+ {
+ /* The + 1 is for record position withing page */
+ pointer= maria_get_pointer_length((ci->data_file_length /
+ maria_block_size), 3) + 1;
+ set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE);
+
+ if (!max_rows)
+ max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) /
+ min_pack_length);
+ }
+ else
+ {
+ if (record_type != STATIC_RECORD)
+ pointer= maria_get_pointer_length(ci->data_file_length,
+ maria_data_pointer_size);
+ else
+ pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size);
+ if (!max_rows)
+ max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length);
+ }
+
+ real_reclength=reclength;
+ if (record_type == STATIC_RECORD)
+ {
+ if (reclength <= pointer)
+ reclength=pointer+1; /* reserve place for delete link */
+ }
+ else
+ reclength+= long_varchar_count; /* We need space for varchar! */
+
+ max_key_length=0; tot_length=0 ; key_segs=0;
+ fulltext_keys=0;
+ share.state.rec_per_key_part=rec_per_key_part;
+ share.state.key_root=key_root;
+ share.state.key_del= HA_OFFSET_ERROR;
+ if (uniques)
+ max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer;
+
+ for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++)
+ {
+ share.state.key_root[i]= HA_OFFSET_ERROR;
+ min_key_length_skip=length=real_length_diff=0;
+ key_length=pointer;
+ if (keydef->flag & HA_SPATIAL)
+ {
+#ifdef HAVE_SPATIAL
+ /* BAR TODO to support 3D and more dimensions in the future */
+ uint sp_segs=SPDIMS*2;
+ keydef->flag=HA_SPATIAL;
+
+ if (flags & HA_DONT_TOUCH_DATA)
+ {
+ /*
+ Called by maria_chk - i.e. table structure was taken from
+ MYI file and SPATIAL key *does have* additional sp_segs keysegs.
+ keydef->seg here points right at the GEOMETRY segment,
+ so we only need to decrease keydef->keysegs.
+ (see maria_recreate_table() in _ma_check.c)
+ */
+ keydef->keysegs-=sp_segs-1;
+ }
+
+ for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
+ j++, keyseg++)
+ {
+ if (keyseg->type != HA_KEYTYPE_BINARY &&
+ keyseg->type != HA_KEYTYPE_VARBINARY1 &&
+ keyseg->type != HA_KEYTYPE_VARBINARY2)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+ }
+ keydef->keysegs+=sp_segs;
+ key_length+=SPLEN*sp_segs;
+ length++; /* At least one length byte */
+ min_key_length_skip+=SPLEN*2*SPDIMS;
+#else
+ my_errno= HA_ERR_UNSUPPORTED;
+ goto err;
+#endif /*HAVE_SPATIAL*/
+ }
+ else if (keydef->flag & HA_FULLTEXT)
+ {
+ keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+
+ for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ;
+ j++, keyseg++)
+ {
+ if (keyseg->type != HA_KEYTYPE_TEXT &&
+ keyseg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyseg->type != HA_KEYTYPE_VARTEXT2)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+ if (!(keyseg->flag & HA_BLOB_PART) &&
+ (keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARTEXT2))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)?
+ 1 : 2);
+ }
+ }
+
+ fulltext_keys++;
+ key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN;
+ length++; /* At least one length byte */
+ min_key_length_skip+=HA_FT_MAXBYTELEN;
+ real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT;
+ }
+ else
+ {
+ /* Test if prefix compression */
+ if (keydef->flag & HA_PACK_KEY)
+ {
+ /* Can't use space_compression on number keys */
+ if ((keydef->seg[0].flag & HA_SPACE_PACK) &&
+ keydef->seg[0].type == (int) HA_KEYTYPE_NUM)
+ keydef->seg[0].flag&= ~HA_SPACE_PACK;
+
+ /* Only use HA_PACK_KEY when first segment is a variable length key */
+ if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART |
+ HA_VAR_LENGTH_PART)))
+ {
+ /* pack relative to previous key */
+ keydef->flag&= ~HA_PACK_KEY;
+ keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY;
+ }
+ else
+ {
+ keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */
+ keydef->flag|=HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ }
+ }
+ if (keydef->flag & HA_BINARY_PACK_KEY)
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+
+ if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment)
+ share.base.auto_key=i+1;
+ for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++)
+ {
+ /* numbers are stored with high by first to make compression easier */
+ switch (keyseg->type) {
+ case HA_KEYTYPE_SHORT_INT:
+ case HA_KEYTYPE_LONG_INT:
+ case HA_KEYTYPE_FLOAT:
+ case HA_KEYTYPE_DOUBLE:
+ case HA_KEYTYPE_USHORT_INT:
+ case HA_KEYTYPE_ULONG_INT:
+ case HA_KEYTYPE_LONGLONG:
+ case HA_KEYTYPE_ULONGLONG:
+ case HA_KEYTYPE_INT24:
+ case HA_KEYTYPE_UINT24:
+ case HA_KEYTYPE_INT8:
+ keyseg->flag|= HA_SWAP_KEY;
+ break;
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ /* Make a flag that this is a VARCHAR */
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ /* Store in bit_start number of bytes used to pack the length */
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
+ default:
+ break;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART));
+ keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY;
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ length++; /* At least one length byte */
+ min_key_length_skip+=keyseg->length;
+ if (keyseg->length >= 255)
+ { /* prefix may be 3 bytes */
+ min_key_length_skip+=2;
+ length+=2;
+ }
+ }
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
+ DBUG_ASSERT(!test_all_bits(keyseg->flag,
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART)));
+ keydef->flag|=HA_VAR_LENGTH_KEY;
+ length++; /* At least one length byte */
+ options|=HA_OPTION_PACK_KEYS; /* Using packed keys */
+ min_key_length_skip+=keyseg->length;
+ if (keyseg->length >= 255)
+ { /* prefix may be 3 bytes */
+ min_key_length_skip+=2;
+ length+=2;
+ }
+ }
+ key_length+= keyseg->length;
+ if (keyseg->null_bit)
+ {
+ key_length++;
+ options|=HA_OPTION_PACK_KEYS;
+ keyseg->flag|=HA_NULL_PART;
+ keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY;
+ }
+ }
+ } /* if HA_FULLTEXT */
+ key_segs+=keydef->keysegs;
+ if (keydef->keysegs > HA_MAX_KEY_SEG)
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+ /*
+ key_segs may be 0 in the case when we only want to be able to
+ add on row into the table. This can happen with some DISTINCT queries
+ in MySQL
+ */
+ if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME &&
+ key_segs)
+ share.state.rec_per_key_part[key_segs-1]=1L;
+ length+=key_length;
+ if (length >= min(HA_MAX_KEY_BUFF, MARIA_MAX_KEY_LENGTH))
+ {
+ my_errno=HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+ keydef->block_length= maria_block_size;
+ keydef->keylength= (uint16) key_length;
+ keydef->minlength= (uint16) (length-min_key_length_skip);
+ keydef->maxlength= (uint16) length;
+
+ if (length > max_key_length)
+ max_key_length= length;
+ tot_length+= ((max_rows/(ulong) (((uint) maria_block_size-5)/
+ (length*2))) *
+ maria_block_size);
+ }
+
+ unique_key_parts=0;
+ offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH;
+ for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++)
+ {
+ uniquedef->key=keys+i;
+ unique_key_parts+=uniquedef->keysegs;
+ share.state.key_root[keys+i]= HA_OFFSET_ERROR;
+ tot_length+= (max_rows/(ulong) (((uint) maria_block_size-5)/
+ ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))*
+ (ulong) maria_block_size;
+ }
+ keys+=uniques; /* Each unique has 1 key */
+ key_segs+=uniques; /* Each unique has 1 key seg */
+
+ base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE +
+ key_segs * MARIA_STATE_KEYSEG_SIZE);
+ info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+
+ keys * MARIA_KEYDEF_SIZE+
+ uniques * MARIA_UNIQUEDEF_SIZE +
+ (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+
+ columns*MARIA_COLUMNDEF_SIZE);
+
+ DBUG_PRINT("info", ("info_length: %u", info_length));
+ /* There are only 16 bits for the total header length. */
+ if (info_length > 65535)
+ {
+ my_printf_error(0, "Maria table '%s' has too many columns and/or "
+ "indexes and/or unique constraints.",
+ MYF(0), name + dirname_length(name));
+ my_errno= HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+
+ bmove(share.state.header.file_version,(byte*) maria_file_magic,4);
+ ci->old_options=options| (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ?
+ HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD: 0);
+ mi_int2store(share.state.header.options,ci->old_options);
+ mi_int2store(share.state.header.header_length,info_length);
+ mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE);
+ mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE);
+ mi_int2store(share.state.header.base_pos,base_pos);
+ share.state.header.data_file_type= record_type;
+ share.state.header.org_data_file_type= org_record_type;
+ share.state.header.language= (ci->language ?
+ ci->language : default_charset_info->number);
+
+ share.state.dellink = HA_OFFSET_ERROR;
+ share.state.first_bitmap_with_space= 0;
+ share.state.process= (ulong) getpid();
+ share.state.unique= (ulong) 0;
+ share.state.update_count=(ulong) 0;
+ share.state.version= (ulong) time((time_t*) 0);
+ share.state.sortkey= (ushort) ~0;
+ share.state.auto_increment=ci->auto_increment;
+ share.options=options;
+ share.base.rec_reflength=pointer;
+ share.base.block_size= maria_block_size;
+
+ /* Get estimate for index file length (this may be wrong for FT keys) */
+ tmp= (tot_length + maria_block_size * keys *
+ MARIA_INDEX_BLOCK_MARGIN) / maria_block_size;
+ /*
+ use maximum of key_file_length we calculated and key_file_length value we
+ got from MYI file header (see also mariapack.c:save_state)
+ */
+ share.base.key_reflength=
+ maria_get_pointer_length(max(ci->key_file_length,tmp),3);
+ share.base.keys= share.state.header.keys= keys;
+ share.state.header.uniques= uniques;
+ share.state.header.fulltext_keys= fulltext_keys;
+ mi_int2store(share.state.header.key_parts,key_segs);
+ mi_int2store(share.state.header.unique_key_parts,unique_key_parts);
+
+ maria_set_all_keys_active(share.state.key_map, keys);
+
+ share.base.keystart = share.state.state.key_file_length=
+ MY_ALIGN(info_length, maria_block_size);
+ share.base.max_key_block_length= maria_block_size;
+ share.base.max_key_length=ALIGN_SIZE(max_key_length+4);
+ share.base.records=ci->max_rows;
+ share.base.reloc= ci->reloc_rows;
+ share.base.reclength=real_reclength;
+ share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM);
+ share.base.max_pack_length=pack_reclength;
+ share.base.min_pack_length=min_pack_length;
+ share.base.pack_bytes= pack_bytes;
+ share.base.fields= columns;
+ share.base.pack_fields= packed;
+#ifdef USE_RAID
+ share.base.raid_type=ci->raid_type;
+ share.base.raid_chunks=ci->raid_chunks;
+ share.base.raid_chunksize=ci->raid_chunksize;
+#endif
+
+ /* max_data_file_length and max_key_file_length are recalculated on open */
+ if (options & HA_OPTION_TMP_TABLE)
+ {
+ tmp_table= TRUE;
+ sync_dir= 0;
+ share.base.max_data_file_length= (my_off_t) ci->data_file_length;
+ }
+
+ share.base.min_block_length=
+ (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH &&
+ ! share.base.blobs) ?
+ max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
+ MARIA_EXTEND_BLOCK_LENGTH;
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ share.state.create_time= (long) time((time_t*) 0);
+
+ pthread_mutex_lock(&THR_LOCK_maria);
+
+ if (ci->index_file_name)
+ {
+ char *iext= strrchr(ci->index_file_name, '.');
+ int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT);
+ if (tmp_table)
+ {
+ char *path;
+ /* chop off the table name, tempory tables use generated name */
+ if ((path= strrchr(ci->index_file_name, FN_LIBCHAR)))
+ *path= '\0';
+ fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT,
+ MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ }
+ else
+ {
+ fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT :
+ MY_APPEND_EXT));
+ }
+ fn_format(linkname, name, "", MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ linkname_ptr=linkname;
+ /*
+ Don't create the table if the link or file exists to ensure that one
+ doesn't accidently destroy another table.
+ Don't sync dir now if the data file has the same path.
+ */
+ create_flag=
+ (ci->data_file_name &&
+ !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir;
+ }
+ else
+ {
+ fn_format(filename, name, "", MARIA_NAME_IEXT,
+ (MY_UNPACK_FILENAME |
+ (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) |
+ MY_APPEND_EXT);
+ linkname_ptr=0;
+ /*
+ Replace the current file.
+ Don't sync dir now if the data file has the same path.
+ */
+ create_flag= MY_DELETE_OLD | (!ci->data_file_name ? 0 : sync_dir);
+ }
+
+ /*
+ If a MRG_MARIA table is in use, the mapped MARIA tables are open,
+ but no entry is made in the table cache for them.
+ A TRUNCATE command checks for the table in the cache only and could
+ be fooled to believe, the table is not open.
+ Pull the emergency brake in this situation. (Bug #8306)
+ */
+ if (_ma_test_if_reopen(filename))
+ {
+ my_printf_error(0, "MARIA table '%s' is in use "
+ "(most likely by a MERGE table). Try FLUSH TABLES.",
+ MYF(0), name + dirname_length(name));
+ goto err;
+ }
+
+ if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME|create_flag))) < 0)
+ goto err;
+ errpos=1;
+
+ if (!(flags & HA_DONT_TOUCH_DATA))
+ {
+ if (ci->data_file_name)
+ {
+ char *dext= strrchr(ci->data_file_name, '.');
+ int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT);
+
+ if (tmp_table)
+ {
+ char *path;
+ /* chop off the table name, tempory tables use generated name */
+ if ((path= strrchr(ci->data_file_name, FN_LIBCHAR)))
+ *path= '\0';
+ fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT,
+ MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ }
+ else
+ {
+ fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME |
+ (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT));
+ }
+ fn_format(linkname, name, "",MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ linkname_ptr=linkname;
+ create_flag=0;
+ }
+ else
+ {
+ fn_format(filename,name,"", MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT);
+ linkname_ptr=0;
+ create_flag=MY_DELETE_OLD;
+ }
+ if ((dfile=
+ my_create_with_symlink(linkname_ptr, filename, 0, create_mode,
+ MYF(MY_WME | create_flag | sync_dir))) < 0)
+ goto err;
+ errpos=3;
+
+ if (record_type == BLOCK_RECORD)
+ {
+ /* Write one bitmap page */
+ char buff[IO_SIZE];
+ uint i;
+ bzero(buff, sizeof(buff));
+ for (i= 0 ; i < maria_block_size ; i+= IO_SIZE)
+ if (my_write(dfile, (byte*) buff, sizeof(buff), MYF(MY_NABP)))
+ goto err;
+ share.state.state.data_file_length= maria_block_size;
+ }
+ }
+ DBUG_PRINT("info", ("write state info and base info"));
+ if (_ma_state_info_write(file, &share.state, 2) ||
+ _ma_base_info_write(file, &share.base))
+ goto err;
+ DBUG_PRINT("info", ("base_pos: %d base_info_size: %d",
+ base_pos, MARIA_BASE_INFO_SIZE));
+ DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE);
+
+ /* Write key and keyseg definitions */
+ DBUG_PRINT("info", ("write key and keyseg definitions"));
+ for (i=0 ; i < share.base.keys - uniques; i++)
+ {
+ uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0;
+
+ if (_ma_keydef_write(file, &keydefs[i]))
+ goto err;
+ for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++)
+ if (_ma_keyseg_write(file, &keydefs[i].seg[j]))
+ goto err;
+#ifdef HAVE_SPATIAL
+ for (j=0 ; j < sp_segs ; j++)
+ {
+ HA_KEYSEG sseg;
+ sseg.type=SPTYPE;
+ sseg.language= 7; /* Binary */
+ sseg.null_bit=0;
+ sseg.bit_start=0;
+ sseg.bit_end=0;
+ sseg.bit_length= 0;
+ sseg.bit_pos= 0;
+ sseg.length=SPLEN;
+ sseg.null_pos=0;
+ sseg.start=j*SPLEN;
+ sseg.flag= HA_SWAP_KEY;
+ if (_ma_keyseg_write(file, &sseg))
+ goto err;
+ }
+#endif
+ }
+ /* Create extra keys for unique definitions */
+ offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH;
+ bzero((char*) &tmp_keydef,sizeof(tmp_keydef));
+ bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg));
+ for (i=0; i < uniques ; i++)
+ {
+ tmp_keydef.keysegs=1;
+ tmp_keydef.flag= HA_UNIQUE_CHECK;
+ tmp_keydef.block_length= (uint16) maria_block_size;
+ tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer;
+ tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength;
+ tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE;
+ tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH;
+ tmp_keyseg.start= offset;
+ offset+= MARIA_UNIQUE_HASH_LENGTH;
+ if (_ma_keydef_write(file,&tmp_keydef) ||
+ _ma_keyseg_write(file,(&tmp_keyseg)))
+ goto err;
+ }
+
+ /* Save unique definition */
+ DBUG_PRINT("info", ("write unique definitions"));
+ for (i=0 ; i < share.state.header.uniques ; i++)
+ {
+ HA_KEYSEG *keyseg_end;
+ keyseg= uniquedefs[i].seg;
+ if (_ma_uniquedef_write(file, &uniquedefs[i]))
+ goto err;
+ for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs;
+ keyseg < keyseg_end;
+ keyseg++)
+ {
+ switch (keyseg->type) {
+ case HA_KEYTYPE_VARTEXT1:
+ case HA_KEYTYPE_VARTEXT2:
+ case HA_KEYTYPE_VARBINARY1:
+ case HA_KEYTYPE_VARBINARY2:
+ if (!(keyseg->flag & HA_BLOB_PART))
+ {
+ keyseg->flag|= HA_VAR_LENGTH_PART;
+ keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 ||
+ keyseg->type == HA_KEYTYPE_VARBINARY1) ?
+ 1 : 2);
+ }
+ break;
+ default:
+ DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0);
+ break;
+ }
+ if (_ma_keyseg_write(file, keyseg))
+ goto err;
+ }
+ }
+ DBUG_PRINT("info", ("write field definitions"));
+ if (record_type == BLOCK_RECORD)
+ {
+ /* Store columns in a more efficent order */
+ MARIA_COLUMNDEF **tmp, **pos;
+ if (!(tmp= (MARIA_COLUMNDEF**) my_malloc(share.base.fields *
+ sizeof(MARIA_COLUMNDEF*),
+ MYF(MY_WME))))
+ goto err;
+ for (rec= recinfo, pos= tmp ; rec != rec_end ; rec++, pos++)
+ *pos= rec;
+ qsort(tmp, share.base.fields, sizeof(*tmp), (qsort_cmp) compare_columns);
+ for (i=0 ; i < share.base.fields ; i++)
+ if (_ma_recinfo_write(file, tmp[i]))
+ goto err;
+ }
+ else
+ {
+ for (i=0 ; i < share.base.fields ; i++)
+ if (_ma_recinfo_write(file, &recinfo[i]))
+ goto err;
+ }
+
+#ifndef DBUG_OFF
+ if ((uint) my_tell(file,MYF(0)) != info_length)
+ {
+ uint pos= (uint) my_tell(file,MYF(0));
+ DBUG_PRINT("warning",("info_length: %d != used_length: %d",
+ info_length, pos));
+ }
+#endif
+
+ /* Enlarge files */
+ DBUG_PRINT("info", ("enlarge to keystart: %lu",
+ (ulong) share.base.keystart));
+ if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0)))
+ goto err;
+
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ {
+#ifdef USE_RELOC
+ if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0)))
+ goto err;
+ if (!tmp_table && my_sync(file, MYF(0)))
+ goto err;
+#endif
+ /* if !USE_RELOC, there was no write to the file, no need to sync it */
+ errpos=2;
+ if (my_close(dfile,MYF(0)))
+ goto err;
+ }
+ errpos=0;
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ if (my_close(file,MYF(0)))
+ goto err;
+ /*
+ RECOVERYTODO
+ Write a log record describing the CREATE operation (just the file
+ names, link names, and the full header's content).
+ For this record to be of any use for Recovery, we need the upper
+ MySQL layer to be crash-safe, which it is not now (that would require work
+ using the ddl_log of sql/sql_table.cc); when is is, we should reconsider
+ the moment of writing this log record (before or after op, under
+ THR_LOCK_maria or not...), how to use it in Recovery, and force the log.
+ For now this record is just informative.
+ If operation failed earlier, we clean up in "err:" and the MySQL layer
+ will clean up the frm, so we needn't write anything to the log.
+ */
+ my_free((char*) rec_per_key_part,MYF(0));
+ DBUG_RETURN(0);
+
+err:
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ save_errno=my_errno;
+ switch (errpos) {
+ case 3:
+ VOID(my_close(dfile,MYF(0)));
+ /* fall through */
+ case 2:
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ sync_dir);
+ /* fall through */
+ case 1:
+ VOID(my_close(file,MYF(0)));
+ if (! (flags & HA_DONT_TOUCH_DATA))
+ my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ sync_dir);
+ }
+ my_free((char*) rec_per_key_part, MYF(0));
+ DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */
+}
+
+
+uint maria_get_pointer_length(ulonglong file_length, uint def)
+{
+ DBUG_ASSERT(def >= 2 && def <= 7);
+ if (file_length) /* If not default */
+ {
+#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS
+ if (file_length >= (longlong) 1 << 56)
+ def=8;
+#endif
+ if (file_length >= (longlong) 1 << 48)
+ def=7;
+ if (file_length >= (longlong) 1 << 40)
+ def=6;
+ else if (file_length >= (longlong) 1 << 32)
+ def=5;
+ else if (file_length >= (1L << 24))
+ def=4;
+ else if (file_length >= (1L << 16))
+ def=3;
+ else
+ def=2;
+ }
+ return def;
+}
+
+
+/*
+ Sort columns for records-in-block
+
+ IMPLEMENTATION
+ Sort columns in following order:
+
+ Fixed size, not null columns
+ Fixed length, null fields
+ Variable length fields (CHAR, VARCHAR)
+ Blobs
+
+ For same kind of fields, keep fields in original order
+*/
+
+static inline int sign(longlong a)
+{
+ return a < 0 ? -1 : (a > 0 ? 1 : 0);
+}
+
+
+int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr)
+{
+ MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr;
+ enum en_fieldtype a_type, b_type;
+
+ a_type= (a->type == FIELD_NORMAL || a->type == FIELD_CHECK ?
+ FIELD_NORMAL : a->type);
+ b_type= (b->type == FIELD_NORMAL || b->type == FIELD_CHECK ?
+ FIELD_NORMAL : b->type);
+
+ if (a_type == FIELD_NORMAL && !a->null_bit)
+ {
+ if (b_type != FIELD_NORMAL || b->null_bit)
+ return -1;
+ return sign((long) (a->offset - b->offset));
+ }
+ if (b_type == FIELD_NORMAL && !b->null_bit)
+ return 1;
+ if (a_type == b_type)
+ return sign((long) (a->offset - b->offset));
+ if (a_type == FIELD_NORMAL)
+ return -1;
+ if (b_type == FIELD_NORMAL)
+ return 1;
+ if (a_type == FIELD_BLOB)
+ return 1;
+ if (b_type == FIELD_BLOB)
+ return -1;
+ return sign((long) (a->offset - b->offset));
+}
+
+
+
+
diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c
new file mode 100644
index 00000000000..596bf18bfe5
--- /dev/null
+++ b/storage/maria/ma_dbug.c
@@ -0,0 +1,193 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Support rutiner with are using with dbug */
+
+#include "maria_def.h"
+
+ /* Print a key in user understandable format */
+
+void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg,
+ const byte *key, uint length)
+{
+ int flag;
+ short int s_1;
+ long int l_1;
+ float f_1;
+ double d_1;
+ const byte *end;
+ const byte *key_end= key + length;
+
+ VOID(fputs("Key: \"",stream));
+ flag=0;
+ for (; keyseg->type && key < key_end ;keyseg++)
+ {
+ if (flag++)
+ VOID(putc('-',stream));
+ end= key+ keyseg->length;
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ /* A NULL value is encoded by a 1-byte flag. Zero means NULL. */
+ if (! *(key++))
+ {
+ fprintf(stream,"NULL");
+ continue;
+ }
+ }
+
+ switch (keyseg->type) {
+ case HA_KEYTYPE_BINARY:
+ if (!(keyseg->flag & HA_SPACE_PACK) && keyseg->length == 1)
+ { /* packed binary digit */
+ VOID(fprintf(stream,"%d",(uint) *key++));
+ break;
+ }
+ /* fall through */
+ case HA_KEYTYPE_TEXT:
+ case HA_KEYTYPE_NUM:
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ VOID(fprintf(stream,"%.*s",(int) *key,key+1));
+ key+= (int) *key+1;
+ }
+ else
+ {
+ VOID(fprintf(stream,"%.*s",(int) keyseg->length,key));
+ key=end;
+ }
+ break;
+ case HA_KEYTYPE_INT8:
+ VOID(fprintf(stream,"%d",(int) *((signed char*) key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ s_1= mi_sint2korr(key);
+ VOID(fprintf(stream,"%d",(int) s_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ {
+ ushort u_1;
+ u_1= mi_uint2korr(key);
+ VOID(fprintf(stream,"%u",(uint) u_1));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_LONG_INT:
+ l_1=mi_sint4korr(key);
+ VOID(fprintf(stream,"%ld",l_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ l_1=mi_sint4korr(key);
+ VOID(fprintf(stream,"%lu",(ulong) l_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_INT24:
+ VOID(fprintf(stream,"%ld",(long) mi_sint3korr(key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_UINT24:
+ VOID(fprintf(stream,"%lu",(ulong) mi_uint3korr(key)));
+ key=end;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ mi_float4get(f_1,key);
+ VOID(fprintf(stream,"%g",(double) f_1));
+ key=end;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ mi_float8get(d_1,key);
+ VOID(fprintf(stream,"%g",d_1));
+ key=end;
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ {
+ char buff[21];
+ longlong2str(mi_sint8korr(key),buff,-10);
+ VOID(fprintf(stream,"%s",buff));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_ULONGLONG:
+ {
+ char buff[21];
+ longlong2str(mi_sint8korr(key),buff,10);
+ VOID(fprintf(stream,"%s",buff));
+ key=end;
+ break;
+ }
+ case HA_KEYTYPE_BIT:
+ {
+ uint i;
+ fputs("0x",stream);
+ for (i=0 ; i < keyseg->length ; i++)
+ fprintf(stream, "%02x", (uint) *key++);
+ key= end;
+ break;
+ }
+
+#endif
+ case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */
+ case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */
+ case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */
+ {
+ uint tmp_length;
+ get_key_length(tmp_length,key);
+ /*
+ The following command sometimes gives a warning from valgrind.
+ Not yet sure if the bug is in valgrind, glibc or mysqld
+ */
+ VOID(fprintf(stream,"%.*s",(int) tmp_length,key));
+ key+=tmp_length;
+ break;
+ }
+ default: break; /* This never happens */
+ }
+ }
+ VOID(fputs("\"\n",stream));
+ return;
+} /* print_key */
+
+
+#ifdef EXTRA_DEBUG
+
+my_bool _ma_check_table_is_closed(const char *name, const char *where)
+{
+ char filename[FN_REFLEN];
+ LIST *pos;
+ DBUG_ENTER("_ma_check_table_is_closed");
+
+ (void) fn_format(filename,name,"",MARIA_NAME_IEXT,4+16+32);
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info=(MARIA_HA*) pos->data;
+ MARIA_SHARE *share=info->s;
+ if (!strcmp(share->unique_file_name,filename))
+ {
+ if (share->last_version)
+ {
+ fprintf(stderr,"Warning: Table: %s is open on %s\n", name,where);
+ DBUG_PRINT("warning",("Table: %s is open on %s", name,where));
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+}
+#endif /* EXTRA_DEBUG */
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
new file mode 100644
index 00000000000..9198989dcb7
--- /dev/null
+++ b/storage/maria/ma_delete.c
@@ -0,0 +1,891 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Remove a row from a MARIA table */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+
+static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag,
+ byte *key,uint key_length,my_off_t page,byte *anc_buff);
+static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key,byte *anc_buff,
+ my_off_t leaf_page,byte *leaf_buff,byte *keypos,
+ my_off_t next_block,byte *ret_key);
+static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *anc_buff,
+ my_off_t leaf_page,byte *leaf_buff,byte *keypos);
+static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,byte *keypos,
+ byte *lastkey,byte *page_end,
+ my_off_t *next_block);
+static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length, my_off_t *root);
+
+
+int maria_delete(MARIA_HA *info,const byte *record)
+{
+ uint i;
+ byte *old_key;
+ int save_errno;
+ char lastpos[8];
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_delete");
+
+ /* Test if record is in datafile */
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ DBUG_EXECUTE_IF("my_error_test_undefined_error",
+ maria_print_error(info->s, INT_MAX);
+ DBUG_RETURN(my_errno= INT_MAX););
+ if (!(info->update & HA_STATE_AKTIV))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */
+ }
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ if ((*share->compare_record)(info,record))
+ goto err; /* Error on read-check */
+
+ if (_ma_mark_file_changed(info))
+ goto err;
+
+ /* Remove all keys from the index file */
+
+ old_key= info->lastkey2;
+ for (i=0 ; i < share->base.keys ; i++ )
+ {
+ if (maria_is_key_active(info->s->state.key_map, i))
+ {
+ info->s->keyinfo[i].version++;
+ if (info->s->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_del(info,i,(char*) old_key,record,info->cur_row.lastpos))
+ goto err;
+ }
+ else
+ {
+ if (info->s->keyinfo[i].ck_delete(info,i,old_key,
+ _ma_make_key(info,i,old_key,record,info->cur_row.lastpos)))
+ goto err;
+ }
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+ }
+ }
+
+ if ((*share->delete_record)(info))
+ goto err; /* Remove record from database */
+
+ /*
+ We can't use the row based checksum as this doesn't have enough
+ precision.
+ */
+ if (info->s->calc_checksum)
+ {
+ info->cur_row.checksum= (*info->s->calc_checksum)(info,record);
+ info->state->checksum-= info->cur_row.checksum;
+ }
+
+ info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED;
+ info->state->records--;
+
+ mi_sizestore(lastpos, info->cur_row.lastpos);
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (delete)", info->filename));
+ (*info->invalidator)(info->filename);
+ info->invalidator=0;
+ }
+ DBUG_RETURN(0);
+
+err:
+ save_errno=my_errno;
+ mi_sizestore(lastpos, info->cur_row.lastpos);
+ if (save_errno != HA_ERR_RECORD_CHANGED)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* mark table crashed */
+ }
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ info->update|=HA_STATE_WRITTEN; /* Buffer changed */
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ my_errno=save_errno;
+ if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ }
+
+ DBUG_RETURN(my_errno);
+} /* maria_delete */
+
+
+ /* Remove a key from the btree index */
+
+int _ma_ck_delete(register MARIA_HA *info, uint keynr, byte *key,
+ uint key_length)
+{
+ return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length,
+ &info->s->state.key_root[keynr]);
+} /* _ma_ck_delete */
+
+
+static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length, my_off_t *root)
+{
+ int error;
+ uint nod_flag;
+ my_off_t old_root;
+ byte *root_buff;
+ DBUG_ENTER("_ma_ck_real_delete");
+
+ if ((old_root=*root) == HA_OFFSET_ERROR)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno=HA_ERR_CRASHED);
+ }
+ if (!(root_buff= (byte*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ {
+ DBUG_PRINT("error",("Couldn't allocate memory"));
+ DBUG_RETURN(my_errno=ENOMEM);
+ }
+ DBUG_PRINT("info",("root_page: %ld", (long) old_root));
+ if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0))
+ {
+ error= -1;
+ goto err;
+ }
+ if ((error=d_search(info,keyinfo,
+ (keyinfo->flag & HA_FULLTEXT ? SEARCH_FIND | SEARCH_UPDATE
+ : SEARCH_SAME),
+ key,key_length,old_root,root_buff)) >0)
+ {
+ if (error == 2)
+ {
+ DBUG_PRINT("test",("Enlarging of root when deleting"));
+ error= _ma_enlarge_root(info,keyinfo,key,root);
+ }
+ else /* error == 1 */
+ {
+ if (maria_getint(root_buff) <= (nod_flag=_ma_test_if_nod(root_buff))+3)
+ {
+ error=0;
+ if (nod_flag)
+ *root= _ma_kpos(nod_flag,root_buff+2+nod_flag);
+ else
+ *root=HA_OFFSET_ERROR;
+ if (_ma_dispose(info,keyinfo,old_root,DFLT_INIT_HITS))
+ error= -1;
+ }
+ else
+ error= _ma_write_keypage(info,keyinfo,old_root,
+ DFLT_INIT_HITS,root_buff);
+ }
+ }
+err:
+ my_afree((gptr) root_buff);
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+} /* _ma_ck_real_delete */
+
+
+ /*
+ ** Remove key below key root
+ ** Return values:
+ ** 1 if there are less buffers; In this case anc_buff is not saved
+ ** 2 if there are more buffers
+ ** -1 on errors
+ */
+
+static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, byte *key, uint key_length,
+ my_off_t page, byte *anc_buff)
+{
+ int flag,ret_value,save_flag;
+ uint length,nod_flag,search_key_length;
+ my_bool last_key;
+ byte *leaf_buff,*keypos;
+ my_off_t leaf_page,next_block;
+ byte lastkey[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("d_search");
+ DBUG_DUMP("page",anc_buff,maria_getint(anc_buff));
+
+ search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
+ flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length,
+ comp_flag, &keypos, lastkey, &last_key);
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ {
+ DBUG_PRINT("error",("Found wrong key"));
+ DBUG_RETURN(-1);
+ }
+ nod_flag=_ma_test_if_nod(anc_buff);
+
+ if (!flag && keyinfo->flag & HA_FULLTEXT)
+ {
+ uint off;
+ int subkeys;
+
+ get_key_full_length_rdonly(off, lastkey);
+ subkeys=ft_sintXkorr(lastkey+off);
+ DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0);
+ comp_flag=SEARCH_SAME;
+ if (subkeys >= 0)
+ {
+ /* normal word, one-level tree structure */
+ if (info->ft1_to_ft2)
+ {
+ /* we're in ft1->ft2 conversion mode. Saving key data */
+ insert_dynamic(info->ft1_to_ft2, (char*) (lastkey+off));
+ }
+ else
+ {
+ /* we need exact match only if not in ft1->ft2 conversion mode */
+ flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
+ comp_flag, &keypos, lastkey, &last_key);
+ }
+ /* fall through to normal delete */
+ }
+ else
+ {
+ /* popular word. two-level tree. going down */
+ uint tmp_key_length;
+ my_off_t root;
+ byte *kpos=keypos;
+
+ if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey)))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ root= _ma_dpos(info,nod_flag,kpos);
+ if (subkeys == -1)
+ {
+ /* the last entry in sub-tree */
+ if (_ma_dispose(info, keyinfo, root,DFLT_INIT_HITS))
+ DBUG_RETURN(-1);
+ /* fall through to normal delete */
+ }
+ else
+ {
+ keyinfo=&info->s->ft2_keyinfo;
+ kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
+ get_key_full_length_rdonly(off, key);
+ key+=off;
+ ret_value= _ma_ck_real_delete(info, &info->s->ft2_keyinfo,
+ key, HA_FT_WLEN, &root);
+ _ma_dpointer(info, kpos+HA_FT_WLEN, root);
+ subkeys++;
+ ft_intXstore(kpos, subkeys);
+ if (!ret_value)
+ ret_value= _ma_write_keypage(info,keyinfo,page,
+ DFLT_INIT_HITS,anc_buff);
+ DBUG_PRINT("exit",("Return: %d",ret_value));
+ DBUG_RETURN(ret_value);
+ }
+ }
+ }
+ leaf_buff=0;
+ LINT_INIT(leaf_page);
+ if (nod_flag)
+ {
+ leaf_page= _ma_kpos(nod_flag,keypos);
+ if (!(leaf_buff= (byte*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ {
+ DBUG_PRINT("error",("Couldn't allocate memory"));
+ my_errno=ENOMEM;
+ DBUG_PRINT("exit",("Return: %d",-1));
+ DBUG_RETURN(-1);
+ }
+ if (!_ma_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0))
+ goto err;
+ }
+
+ if (flag != 0)
+ {
+ if (!nod_flag)
+ {
+ DBUG_PRINT("error",("Didn't find key"));
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED; /* This should newer happend */
+ goto err;
+ }
+ save_flag=0;
+ ret_value=d_search(info,keyinfo,comp_flag,key,key_length,
+ leaf_page,leaf_buff);
+ }
+ else
+ { /* Found key */
+ uint tmp;
+ length=maria_getint(anc_buff);
+ if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length,
+ &next_block)))
+ goto err;
+
+ length-= tmp;
+
+ maria_putint(anc_buff,length,nod_flag);
+ if (!nod_flag)
+ { /* On leaf page */
+ if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff))
+ {
+ DBUG_PRINT("exit",("Return: %d",-1));
+ DBUG_RETURN(-1);
+ }
+ /* Page will be update later if we return 1 */
+ DBUG_RETURN(test(length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length)));
+ }
+ save_flag=1;
+ ret_value=del(info,keyinfo,key,anc_buff,leaf_page,leaf_buff,keypos,
+ next_block,lastkey);
+ }
+ if (ret_value >0)
+ {
+ save_flag=1;
+ if (ret_value == 1)
+ ret_value= underflow(info,keyinfo,anc_buff,leaf_page,leaf_buff,keypos);
+ else
+ { /* This happens only with packed keys */
+ DBUG_PRINT("test",("Enlarging of key when deleting"));
+ if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length))
+ goto err;
+ ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey,
+ (byte*) 0,(byte*) 0,(my_off_t) 0,(my_bool) 0);
+ }
+ }
+ if (ret_value == 0 && maria_getint(anc_buff) > keyinfo->block_length)
+ {
+ save_flag=1;
+ ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2;
+ }
+ if (save_flag && ret_value != 1)
+ ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff);
+ else
+ {
+ DBUG_DUMP("page",anc_buff,maria_getint(anc_buff));
+ }
+ my_afree(leaf_buff);
+ DBUG_PRINT("exit",("Return: %d",ret_value));
+ DBUG_RETURN(ret_value);
+
+err:
+ my_afree(leaf_buff);
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1);
+} /* d_search */
+
+
+ /* Remove a key that has a page-reference */
+
+static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *key, byte *anc_buff, my_off_t leaf_page,
+ byte *leaf_buff,
+ byte *keypos, /* Pos to where deleted key was */
+ my_off_t next_block,
+ byte *ret_key) /* key before keypos in anc_buff */
+{
+ int ret_value,length;
+ uint a_length,nod_flag,tmp;
+ my_off_t next_page;
+ byte keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
+ MARIA_SHARE *share=info->s;
+ MARIA_KEY_PARAM s_temp;
+ DBUG_ENTER("del");
+ DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page,
+ (ulong) keypos));
+ DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff));
+
+ endpos= leaf_buff+ maria_getint(leaf_buff);
+ if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
+ &tmp)))
+ DBUG_RETURN(-1);
+
+ if ((nod_flag=_ma_test_if_nod(leaf_buff)))
+ {
+ next_page= _ma_kpos(nod_flag,endpos);
+ if (!(next_buff= (byte*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ DBUG_RETURN(-1);
+ if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0))
+ ret_value= -1;
+ else
+ {
+ DBUG_DUMP("next_page",next_buff,maria_getint(next_buff));
+ if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff,
+ keypos,next_block,ret_key)) >0)
+ {
+ endpos=leaf_buff+maria_getint(leaf_buff);
+ if (ret_value == 1)
+ {
+ ret_value=underflow(info,keyinfo,leaf_buff,next_page,
+ next_buff,endpos);
+ if (ret_value == 0 && maria_getint(leaf_buff) > keyinfo->block_length)
+ {
+ ret_value= _ma_split_page(info,keyinfo,key,leaf_buff,ret_key,0) | 2;
+ }
+ }
+ else
+ {
+ DBUG_PRINT("test",("Inserting of key when deleting"));
+ if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
+ &tmp))
+ goto err;
+ ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff,
+ (byte*) 0,(byte*) 0,(my_off_t) 0,0);
+ }
+ }
+ if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ goto err;
+ }
+ my_afree(next_buff);
+ DBUG_RETURN(ret_value);
+ }
+
+ /* Remove last key from leaf page */
+
+ maria_putint(leaf_buff,key_start-leaf_buff,nod_flag);
+ if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ goto err;
+
+ /* Place last key in ancestor page on deleted key position */
+
+ a_length=maria_getint(anc_buff);
+ endpos=anc_buff+a_length;
+ if (keypos != anc_buff+2+share->base.key_reflength &&
+ !_ma_get_last_key(info,keyinfo,anc_buff,ret_key,keypos,&tmp))
+ goto err;
+ prev_key=(keypos == anc_buff+2+share->base.key_reflength ?
+ 0 : ret_key);
+ length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength,
+ keypos == endpos ? (byte*) 0 : keypos,
+ prev_key, prev_key,
+ keybuff,&s_temp);
+ if (length > 0)
+ bmove_upp(endpos+length,endpos,(uint) (endpos-keypos));
+ else
+ bmove(keypos,keypos-length, (int) (endpos-keypos)+length);
+ (*keyinfo->store_key)(keyinfo,keypos,&s_temp);
+ /* Save pointer to next leaf */
+ if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key))
+ goto err;
+ _ma_kpointer(info,keypos - share->base.key_reflength,next_block);
+ maria_putint(anc_buff,a_length+length,share->base.key_reflength);
+
+ DBUG_RETURN( maria_getint(leaf_buff) <=
+ (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length));
+err:
+ DBUG_RETURN(-1);
+} /* del */
+
+
+ /* Balances adjacent pages if underflow occours */
+
+static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *anc_buff,
+ my_off_t leaf_page,/* Ancestor page and underflow page */
+ byte *leaf_buff,
+ byte *keypos) /* Position to pos after key */
+{
+ int t_length;
+ uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag,
+ key_reflength,key_length;
+ my_off_t next_page;
+ byte anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF];
+ byte *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key;
+ byte *after_key;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("underflow");
+ DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page,
+ (ulong) keypos));
+ DBUG_DUMP("anc_buff",anc_buff,maria_getint(anc_buff));
+ DBUG_DUMP("leaf_buff",leaf_buff,maria_getint(leaf_buff));
+
+ buff=info->buff;
+ info->keybuff_used=1;
+ next_keypos=keypos;
+ nod_flag=_ma_test_if_nod(leaf_buff);
+ p_length=nod_flag+2;
+ anc_length=maria_getint(anc_buff);
+ leaf_length=maria_getint(leaf_buff);
+ key_reflength=share->base.key_reflength;
+ if (info->s->keyinfo+info->lastinx == keyinfo)
+ info->page_changed=1;
+
+ if ((keypos < anc_buff+anc_length && (info->state->records & 1)) ||
+ keypos == anc_buff+2+key_reflength)
+ { /* Use page right of anc-page */
+ DBUG_PRINT("test",("use right page"));
+
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ {
+ if (!(next_keypos= _ma_get_key(info, keyinfo,
+ anc_buff, buff, keypos, &length)))
+ goto err;
+ }
+ else
+ {
+ /* Got to end of found key */
+ buff[0]=buff[1]=0; /* Avoid length error check if packed key */
+ if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos,
+ buff))
+ goto err;
+ }
+ next_page= _ma_kpos(key_reflength,next_keypos);
+ if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0))
+ goto err;
+ buff_length=maria_getint(buff);
+ DBUG_DUMP("next",buff,buff_length);
+
+ /* find keys to make a big key-page */
+ bmove(next_keypos-key_reflength, buff+2, key_reflength);
+ if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length)
+ || !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key,
+ leaf_buff+leaf_length,&length))
+ goto err;
+
+ /* merge pages and put parting key from anc_buff between */
+ prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length,
+ prev_key, prev_key,
+ anc_key, &s_temp);
+ length=buff_length-p_length;
+ endpos=buff+length+leaf_length+t_length;
+ /* buff will always be larger than before !*/
+ bmove_upp(endpos, buff+buff_length,length);
+ memcpy(buff, leaf_buff,(size_t) leaf_length);
+ (*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp);
+ buff_length=(uint) (endpos-buff);
+ maria_putint(buff,buff_length,nod_flag);
+
+ /* remove key from anc_buff */
+
+ if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0)))
+ goto err;
+
+ anc_length-=s_length;
+ maria_putint(anc_buff,anc_length,key_reflength);
+
+ if (buff_length <= keyinfo->block_length)
+ { /* Keys in one page */
+ memcpy(leaf_buff,buff,(size_t) buff_length);
+ if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS))
+ goto err;
+ }
+ else
+ { /* Page is full */
+ endpos=anc_buff+anc_length;
+ DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx",
+ (long) anc_buff, (long) endpos));
+ if (keypos != anc_buff+2+key_reflength &&
+ !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length))
+ goto err;
+ if (!(half_pos= _ma_find_half_pos(nod_flag, keyinfo, buff, leaf_key,
+ &key_length, &after_key)))
+ goto err;
+ length=(uint) (half_pos-buff);
+ memcpy(leaf_buff,buff,(size_t) length);
+ maria_putint(leaf_buff,length,nod_flag);
+
+ /* Correct new keypointer to leaf_page */
+ half_pos=after_key;
+ _ma_kpointer(info,leaf_key+key_length,next_page);
+ /* Save key in anc_buff */
+ prev_key=(keypos == anc_buff+2+key_reflength ? (byte*) 0 : anc_key),
+ t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
+ (keypos == endpos ? (byte*) 0 :
+ keypos),
+ prev_key, prev_key,
+ leaf_key, &s_temp);
+ if (t_length >= 0)
+ bmove_upp(endpos+t_length, endpos, (uint) (endpos-keypos));
+ else
+ bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length);
+ (*keyinfo->store_key)(keyinfo,keypos,&s_temp);
+ maria_putint(anc_buff,(anc_length+=t_length),key_reflength);
+
+ /* Store key first in new page */
+ if (nod_flag)
+ bmove(buff+2,half_pos-nod_flag,(size_t) nod_flag);
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))
+ goto err;
+ t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (byte*) 0,
+ (byte*) 0, (byte*) 0,
+ leaf_key, &s_temp);
+ /* t_length will always be > 0 for a new page !*/
+ length=(uint) ((buff+maria_getint(buff))-half_pos);
+ bmove(buff+p_length+t_length, half_pos, (size_t) length);
+ (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp);
+ maria_putint(buff,length+t_length+p_length,nod_flag);
+
+ if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff))
+ goto err;
+ }
+ if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ goto err;
+ DBUG_RETURN(anc_length <= ((info->quick_mode ? MARIA_MIN_BLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length)));
+ }
+
+ DBUG_PRINT("test",("use left page"));
+
+ keypos= _ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length);
+ if (!keypos)
+ goto err;
+ next_page= _ma_kpos(key_reflength,keypos);
+ if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0))
+ goto err;
+ buff_length=maria_getint(buff);
+ endpos=buff+buff_length;
+ DBUG_DUMP("prev",buff,buff_length);
+
+ /* find keys to make a big key-page */
+ bmove(next_keypos - key_reflength, leaf_buff+2, key_reflength);
+ next_keypos=keypos;
+ if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos,
+ anc_key))
+ goto err;
+ if (!_ma_get_last_key(info,keyinfo,buff,leaf_key,endpos,&length))
+ goto err;
+
+ /* merge pages and put parting key from anc_buff between */
+ prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (leaf_length == p_length ?
+ (byte*) 0 : leaf_buff+p_length),
+ prev_key, prev_key,
+ anc_key, &s_temp);
+ if (t_length >= 0)
+ bmove(endpos+t_length, leaf_buff+p_length,
+ (size_t) (leaf_length-p_length));
+ else /* We gained space */
+ bmove(endpos,leaf_buff+((int) p_length-t_length),
+ (size_t) (leaf_length-p_length+t_length));
+
+ (*keyinfo->store_key)(keyinfo,endpos,&s_temp);
+ buff_length=buff_length+leaf_length-p_length+t_length;
+ maria_putint(buff,buff_length,nod_flag);
+
+ /* remove key from anc_buff */
+ if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key,
+ anc_buff+anc_length,(my_off_t *) 0)))
+ goto err;
+
+ anc_length-=s_length;
+ maria_putint(anc_buff,anc_length,key_reflength);
+
+ if (buff_length <= keyinfo->block_length)
+ { /* Keys in one page */
+ if (_ma_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS))
+ goto err;
+ }
+ else
+ { /* Page is full */
+ if (keypos == anc_buff+2+key_reflength)
+ anc_pos=0; /* First key */
+ else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos,
+ &length))
+ goto err;
+ endpos= _ma_find_half_pos(nod_flag,keyinfo,buff,leaf_key,
+ &key_length, &half_pos);
+ if (!endpos)
+ goto err;
+ _ma_kpointer(info,leaf_key+key_length,leaf_page);
+ /* Save key in anc_buff */
+ DBUG_DUMP("anc_buff",anc_buff,anc_length);
+ DBUG_DUMP("key_to_anc",leaf_key,key_length);
+
+ temp_pos=anc_buff+anc_length;
+ t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
+ keypos == temp_pos ? (byte*) 0
+ : keypos,
+ anc_pos, anc_pos,
+ leaf_key,&s_temp);
+ if (t_length > 0)
+ bmove_upp(temp_pos+t_length, temp_pos, (uint) (temp_pos-keypos));
+ else
+ bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length);
+ (*keyinfo->store_key)(keyinfo,keypos,&s_temp);
+ maria_putint(anc_buff,(anc_length+=t_length),key_reflength);
+
+ /* Store first key on new page */
+ if (nod_flag)
+ bmove(leaf_buff+2,half_pos-nod_flag,(size_t) nod_flag);
+ if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)))
+ goto err;
+ DBUG_DUMP("key_to_leaf",leaf_key,length);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (byte*) 0,
+ (byte*) 0, (byte*) 0, leaf_key, &s_temp);
+ length=(uint) ((buff+buff_length)-half_pos);
+ DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length));
+ bmove(leaf_buff+p_length+t_length,half_pos,
+ (size_t) length);
+ (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp);
+ maria_putint(leaf_buff,length+t_length+p_length,nod_flag);
+ if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ goto err;
+ maria_putint(buff,endpos-buff,nod_flag);
+ }
+ if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff))
+ goto err;
+ DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2);
+
+err:
+ DBUG_RETURN(-1);
+} /* underflow */
+
+
+ /*
+ remove a key from packed buffert
+ The current code doesn't handle the case that the next key may be
+ packed better against the previous key if there is a case difference
+ returns how many chars was removed or 0 on error
+ */
+
+static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte *keypos, /* Where key starts */
+ byte *lastkey, /* key to be removed */
+ byte *page_end, /* End of page */
+ my_off_t *next_block) /* ptr to next block */
+{
+ int s_length;
+ byte *start;
+ DBUG_ENTER("remove_key");
+ DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end));
+
+ start=keypos;
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ s_length=(int) (keyinfo->keylength+nod_flag);
+ if (next_block && nod_flag)
+ *next_block= _ma_kpos(nod_flag,keypos+s_length);
+ }
+ else
+ { /* Let keypos point at next key */
+ /* Calculate length of key */
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey))
+ DBUG_RETURN(0); /* Error */
+
+ if (next_block && nod_flag)
+ *next_block= _ma_kpos(nod_flag,keypos);
+ s_length=(int) (keypos-start);
+ if (keypos != page_end)
+ {
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ {
+ byte *old_key=start;
+ uint next_length,prev_length,prev_pack_length;
+ get_key_length(next_length,keypos);
+ get_key_pack_length(prev_length,prev_pack_length,old_key);
+ if (next_length > prev_length)
+ {
+ /* We have to copy data from the current key to the next key */
+ bmove_upp((char*) keypos,(char*) (lastkey+next_length),
+ (next_length-prev_length));
+ keypos-=(next_length-prev_length)+prev_pack_length;
+ store_key_length(keypos,prev_length);
+ s_length=(int) (keypos-start);
+ }
+ }
+ else
+ {
+ /* Check if a variable length first key part */
+ if ((keyinfo->seg->flag & HA_PACK_KEY) && *keypos & 128)
+ {
+ /* Next key is packed against the current one */
+ uint next_length,prev_length,prev_pack_length,lastkey_length,
+ rest_length;
+ if (keyinfo->seg[0].length >= 127)
+ {
+ if (!(prev_length=mi_uint2korr(start) & 32767))
+ goto end;
+ next_length=mi_uint2korr(keypos) & 32767;
+ keypos+=2;
+ prev_pack_length=2;
+ }
+ else
+ {
+ if (!(prev_length= *start & 127))
+ goto end; /* Same key as previous*/
+ next_length= *keypos & 127;
+ keypos++;
+ prev_pack_length=1;
+ }
+ if (!(*start & 128))
+ prev_length=0; /* prev key not packed */
+ if (keyinfo->seg[0].flag & HA_NULL_PART)
+ lastkey++; /* Skip null marker */
+ get_key_length(lastkey_length,lastkey);
+ if (!next_length) /* Same key after */
+ {
+ next_length=lastkey_length;
+ rest_length=0;
+ }
+ else
+ get_key_length(rest_length,keypos);
+
+ if (next_length >= prev_length)
+ { /* Key after is based on deleted key */
+ uint pack_length,tmp;
+ bmove_upp((char*) keypos,(char*) (lastkey+next_length),
+ tmp=(next_length-prev_length));
+ rest_length+=tmp;
+ pack_length= prev_length ? get_pack_length(rest_length): 0;
+ keypos-=tmp+pack_length+prev_pack_length;
+ s_length=(int) (keypos-start);
+ if (prev_length) /* Pack against prev key */
+ {
+ *keypos++= start[0];
+ if (prev_pack_length == 2)
+ *keypos++= start[1];
+ store_key_length(keypos,rest_length);
+ }
+ else
+ {
+ /* Next key is not packed anymore */
+ if (keyinfo->seg[0].flag & HA_NULL_PART)
+ {
+ rest_length++; /* Mark not null */
+ }
+ if (prev_pack_length == 2)
+ {
+ mi_int2store(keypos,rest_length);
+ }
+ else
+ *keypos= rest_length;
+ }
+ }
+ }
+ }
+ }
+ }
+ end:
+ bmove(start, start+s_length, (uint) (page_end-start-s_length));
+ DBUG_RETURN((uint) s_length);
+} /* remove_key */
diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c
new file mode 100644
index 00000000000..616147c1067
--- /dev/null
+++ b/storage/maria/ma_delete_all.c
@@ -0,0 +1,102 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Remove all rows from a MARIA table */
+/* This clears the status information and truncates files */
+
+#include "maria_def.h"
+
+int maria_delete_all_rows(MARIA_HA *info)
+{
+ uint i;
+ MARIA_SHARE *share=info->s;
+ MARIA_STATE_INFO *state=&share->state;
+ DBUG_ENTER("maria_delete_all_rows");
+
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ /* LOCKTODO take X-lock on table here */
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ if (_ma_mark_file_changed(info))
+ goto err;
+
+ info->state->records=info->state->del=state->split=0;
+ state->dellink = HA_OFFSET_ERROR;
+ state->sortkey= (ushort) ~0;
+ info->state->key_file_length=share->base.keystart;
+ info->state->data_file_length=0;
+ info->state->empty=info->state->key_empty=0;
+ info->state->checksum=0;
+
+ state->key_del= HA_OFFSET_ERROR;
+ for (i=0 ; i < share->base.keys ; i++)
+ state->key_root[i]= HA_OFFSET_ERROR;
+
+ /*
+ If we are using delayed keys or if the user has done changes to the tables
+ since it was locked then there may be key blocks in the key cache
+ */
+ flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED);
+ /*
+ RECOVERYTODO Log the two chsize and header modifications and force the
+ log. So that if crash between the two chsize, we finish the work at
+ Recovery. For this scenario:
+ "TRUNCATE TABLE t1; DROP TABLE t1; RENAME TABLE t2 to t1; crash;"
+ Recovery mustn't truncate the new t1, so the log records of TRUNCATE
+ should be applied only if t1 exists and its ZeroDirtyPagesLSN is smaller
+ than the records'. See more comments below.
+ */
+ if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) ||
+ my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) )
+ goto err;
+ /*
+ RECOVERYTODO Consider updating ZeroDirtyPagesLSN here. It is
+ not a necessity (it is one only in RENAME commands) but an optional
+ optimization which will allow some REDO skipping at Recovery.
+ */
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+#ifdef HAVE_MMAP
+ /* Resize mmaped area */
+ rw_wrlock(&info->s->mmap_lock);
+ _ma_remap_file(info, (my_off_t)0);
+ rw_unlock(&info->s->mmap_lock);
+#endif
+ /*
+ RECOVERYTODO Until we have the TRUNCATE log record and take it into
+ account for log-low-water-mark calculation and use it in Recovery, we need
+ to sync.
+ */
+ if (_ma_sync_table_files(info))
+ goto err;
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(0);
+
+err:
+ {
+ int save_errno=my_errno;
+ /* RECOVERYTODO log the header modifications */
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ info->update|=HA_STATE_WRITTEN; /* Buffer changed */
+ /* RECOVERYTODO until we log above we have to sync */
+ if (_ma_sync_table_files(info) && !save_errno)
+ save_errno= my_errno;
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(my_errno=save_errno);
+ }
+} /* maria_delete */
diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c
new file mode 100644
index 00000000000..5c7b4337b20
--- /dev/null
+++ b/storage/maria/ma_delete_table.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ deletes a table
+*/
+
+#include "ma_fulltext.h"
+
+int maria_delete_table(const char *name)
+{
+ char from[FN_REFLEN];
+#ifdef USE_RAID
+ uint raid_type=0,raid_chunks=0;
+#endif
+ DBUG_ENTER("maria_delete_table");
+
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(name,"delete");
+#endif
+ /* LOCKTODO take X-lock on table here */
+#ifdef USE_RAID
+ {
+ MARIA_HA *info;
+ /*
+ When built with RAID support, we need to determine if this table
+ makes use of the raid feature. If yes, we need to remove all raid
+ chunks. This is done with my_raid_delete(). Unfortunately it is
+ necessary to open the table just to check this. We use
+ 'open_for_repair' to be able to open even a crashed table. If even
+ this open fails, we assume no raid configuration for this table
+ and try to remove the normal data file only. This may however
+ leave the raid chunks behind.
+ */
+ if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR)))
+ raid_type= 0;
+ else
+ {
+ raid_type= info->s->base.raid_type;
+ raid_chunks= info->s->base.raid_chunks;
+ maria_close(info);
+ }
+ }
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(name,"delete");
+#endif
+#endif /* USE_RAID */
+
+ fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ /*
+ RECOVERYTODO log the two deletes below.
+ Then do the file deletions.
+ For this log record to be of any use for Recovery, we need the upper MySQL
+ layer to be crash-safe in DDLs; when it is we should reconsider the moment
+ of writing this log record, how to use it in Recovery, and force the log.
+ For now this record is only informative.
+ */
+ if (my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR)))
+ DBUG_RETURN(my_errno);
+ fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+#ifdef USE_RAID
+ if (raid_type)
+ DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME | MY_SYNC_DIR)) ?
+ my_errno : 0);
+#endif
+ DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME | MY_SYNC_DIR)) ?
+ my_errno : 0);
+}
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
new file mode 100644
index 00000000000..c3d0bf1fb0a
--- /dev/null
+++ b/storage/maria/ma_dynrec.c
@@ -0,0 +1,1954 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Functions to handle space-packed-records and blobs
+
+ A row may be stored in one or more linked blocks.
+ The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH.
+ Each block is aligned on MARIA_DYN_ALIGN_SIZE.
+ The reson for the max block size is to not have too many different types
+ of blocks. For the differnet block types, look at _ma_get_block_info()
+*/
+
+#include "maria_def.h"
+
+static my_bool write_dynamic_record(MARIA_HA *info,const byte *record,
+ ulong reclength);
+static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos,
+ ulong *length);
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ byte *record, ulong reclength);
+static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos,
+ uint second_read);
+static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos,
+ uint length);
+
+#ifdef THREAD
+/* Play it safe; We have a small stack when using threads */
+#undef my_alloca
+#undef my_afree
+#define my_alloca(A) my_malloc((A),MYF(0))
+#define my_afree(A) my_free((A),MYF(0))
+#endif
+
+ /* Interface function from MARIA_HA */
+
+#ifdef HAVE_MMAP
+
+/*
+ Create mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_dynmap_file()
+ info MARIA handler
+
+ RETURN
+ 0 ok
+ 1 error.
+*/
+
+my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size)
+{
+ DBUG_ENTER("_ma_dynmap_file");
+ if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN)
+ {
+ DBUG_PRINT("warning", ("File is too large for mmap"));
+ DBUG_RETURN(1);
+ }
+ info->s->file_map= (byte*)
+ my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN),
+ info->s->mode==O_RDONLY ? PROT_READ :
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_NORESERVE,
+ info->dfile, 0L);
+ if (info->s->file_map == (byte*) MAP_FAILED)
+ {
+ info->s->file_map= NULL;
+ DBUG_RETURN(1);
+ }
+#if defined(HAVE_MADVISE)
+ madvise(info->s->file_map, size, MADV_RANDOM);
+#endif
+ info->s->mmaped_length= size;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Resize mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_remap_file()
+ info MARIA handler
+
+ RETURN
+*/
+
+void _ma_remap_file(MARIA_HA *info, my_off_t size)
+{
+ if (info->s->file_map)
+ {
+ VOID(my_munmap(info->s->file_map,
+ (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN));
+ _ma_dynmap_file(info, size);
+ }
+}
+#endif
+
+
+/*
+ Read bytes from MySAM handler, using mmap or pread
+
+ SYNOPSIS
+ _ma_mmap_pread()
+ info MARIA handler
+ Buffer Input buffer
+ Count Count of bytes for read
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+*/
+
+uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile));
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(Buffer, info->s->file_map + offset, Count);
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return my_pread(info->dfile, Buffer, Count, offset, MyFlags);
+ }
+}
+
+
+ /* wrapper for my_pread in case if mmap isn't used */
+
+uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags)
+{
+ return my_pread(info->dfile, Buffer, Count, offset, MyFlags);
+}
+
+
+/*
+ Write bytes to MySAM handler, using mmap or pwrite
+
+ SYNOPSIS
+ _ma_mmap_pwrite()
+ info MARIA handler
+ Buffer Output buffer
+ Count Count of bytes for write
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+ !=0 error. In this case return error from pwrite
+*/
+
+uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile));
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(info->s->file_map + offset, Buffer, Count);
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ info->s->nonmmaped_inserts++;
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
+ }
+
+}
+
+
+ /* wrapper for my_pwrite in case if mmap isn't used */
+
+uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags)
+{
+ return my_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
+}
+
+
+my_bool _ma_write_dynamic_record(MARIA_HA *info, const byte *record)
+{
+ ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ reclength));
+}
+
+my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const byte *record)
+{
+ uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ return (update_dynamic_record(info, pos,
+ info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ length));
+}
+
+
+my_bool _ma_write_blob_record(MARIA_HA *info, const byte *record)
+{
+ byte *rec_buff;
+ int error;
+ ulong reclength,reclength2,extra;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER+1);
+ reclength= (info->s->base.pack_reclength +
+ _ma_calc_total_blob_length(info,record)+ extra);
+ if (!(rec_buff=(byte*) my_alloca(reclength)))
+ {
+ my_errno=ENOMEM;
+ return(1);
+ }
+ reclength2= _ma_rec_pack(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ DBUG_PRINT("info",("reclength: %lu reclength2: %lu",
+ reclength, reclength2));
+ DBUG_ASSERT(reclength2 <= reclength);
+ error= write_dynamic_record(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength2);
+ my_afree(rec_buff);
+ return(error != 0);
+}
+
+
+my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const byte *record)
+{
+ byte *rec_buff;
+ int error;
+ ulong reclength,extra;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER);
+ reclength= (info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,record)+ extra);
+#ifdef NOT_USED /* We now support big rows */
+ if (reclength > MARIA_DYN_MAX_ROW_LENGTH)
+ {
+ my_errno=HA_ERR_TO_BIG_ROW;
+ return 1;
+ }
+#endif
+ if (!(rec_buff=(byte*) my_alloca(reclength)))
+ {
+ my_errno=ENOMEM;
+ return(1);
+ }
+ reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ error=update_dynamic_record(info,pos,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength);
+ my_afree(rec_buff);
+ return(error != 0);
+}
+
+
+my_bool _ma_delete_dynamic_record(MARIA_HA *info)
+{
+ return delete_dynamic_record(info, info->cur_row.lastpos, 0);
+}
+
+
+ /* Write record to data-file */
+
+static my_bool write_dynamic_record(MARIA_HA *info, const byte *record,
+ ulong reclength)
+{
+ int flag;
+ ulong length;
+ my_off_t filepos;
+ DBUG_ENTER("write_dynamic_record");
+
+ flag=0;
+ do
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ if (_ma_write_part_record(info,filepos,length,
+ (info->append_insert_at_end ?
+ HA_OFFSET_ERROR : info->s->state.dellink),
+ (byte**) &record,&reclength,&flag))
+ goto err;
+ } while (reclength);
+
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+ /* Get a block for data ; The given data-area must be used !! */
+
+static int _ma_find_writepos(MARIA_HA *info,
+ ulong reclength, /* record length */
+ my_off_t *filepos, /* Return file pos */
+ ulong *length) /* length of block at filepos */
+{
+ MARIA_BLOCK_INFO block_info;
+ ulong tmp;
+ DBUG_ENTER("_ma_find_writepos");
+
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
+ {
+ /* Deleted blocks exists; Get last used block */
+ *filepos=info->s->state.dellink;
+ block_info.second_read=0;
+ info->rec_cache.seek_not_done=1;
+ if (!(_ma_get_block_info(&block_info,info->dfile,info->s->state.dellink) &
+ BLOCK_DELETED))
+ {
+ DBUG_PRINT("error",("Delete link crashed"));
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(-1);
+ }
+ info->s->state.dellink=block_info.next_filepos;
+ info->state->del--;
+ info->state->empty-= block_info.block_len;
+ *length= block_info.block_len;
+ }
+ else
+ {
+ /* No deleted blocks; Allocate a new block */
+ *filepos=info->state->data_file_length;
+ if ((tmp=reclength+3 + test(reclength >= (65520-3))) <
+ info->s->base.min_block_length)
+ tmp= info->s->base.min_block_length;
+ else
+ tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) &
+ (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)));
+ if (info->state->data_file_length >
+ (info->s->base.max_data_file_length - tmp))
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ DBUG_RETURN(-1);
+ }
+ if (tmp > MARIA_MAX_BLOCK_LENGTH)
+ tmp=MARIA_MAX_BLOCK_LENGTH;
+ *length= tmp;
+ info->state->data_file_length+= tmp;
+ info->s->state.split++;
+ info->update|=HA_STATE_WRITE_AT_END;
+ }
+ DBUG_RETURN(0);
+} /* _ma_find_writepos */
+
+
+
+/*
+ Unlink a deleted block from the deleted list.
+ This block will be combined with the preceding or next block to form
+ a big block.
+*/
+
+static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info)
+{
+ DBUG_ENTER("unlink_deleted_block");
+ if (block_info->filepos == info->s->state.dellink)
+ {
+ /* First deleted block; We can just use this ! */
+ info->s->state.dellink=block_info->next_filepos;
+ }
+ else
+ {
+ MARIA_BLOCK_INFO tmp;
+ tmp.second_read=0;
+ /* Unlink block from the previous block */
+ if (!(_ma_get_block_info(&tmp,info->dfile,block_info->prev_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+4,block_info->next_filepos);
+ if (info->s->file_write(info,(char*) tmp.header+4,8,
+ block_info->prev_filepos+4, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ /* Unlink block from next block */
+ if (block_info->next_filepos != HA_OFFSET_ERROR)
+ {
+ if (!(_ma_get_block_info(&tmp,info->dfile,block_info->next_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+12,block_info->prev_filepos);
+ if (info->s->file_write(info,(char*) tmp.header+12,8,
+ block_info->next_filepos+12,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ }
+ }
+ /* We now have one less deleted block */
+ info->state->del--;
+ info->state->empty-= block_info->block_len;
+ info->s->state.split--;
+
+ /*
+ If this was a block that we where accessing through table scan
+ (maria_rrnd() or maria_scan(), then ensure that we skip over this block
+ when doing next maria_rrnd() or maria_scan().
+ */
+ if (info->cur_row.nextpos == block_info->filepos)
+ info->cur_row.nextpos+= block_info->block_len;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Add a backward link to delete block
+
+ SYNOPSIS
+ update_backward_delete_link()
+ info MARIA handler
+ delete_block Position to delete block to update.
+ If this is 'HA_OFFSET_ERROR', nothing will be done
+ filepos Position to block that 'delete_block' should point to
+
+ RETURN
+ 0 ok
+ 1 error. In this case my_error is set.
+*/
+
+static my_bool update_backward_delete_link(MARIA_HA *info,
+ my_off_t delete_block,
+ MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_backward_delete_link");
+
+ if (delete_block != HA_OFFSET_ERROR)
+ {
+ block_info.second_read=0;
+ if (_ma_get_block_info(&block_info,info->dfile,delete_block)
+ & BLOCK_DELETED)
+ {
+ char buff[8];
+ mi_sizestore(buff,filepos);
+ if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP)))
+ DBUG_RETURN(1); /* Error on write */
+ }
+ else
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1); /* Wrong delete link */
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+/* Delete datarecord from database */
+/* info->rec_cache.seek_not_done is updated in cmp_record */
+
+static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uint second_read)
+{
+ uint length,b_type;
+ MARIA_BLOCK_INFO block_info,del_block;
+ int error;
+ my_bool remove_next_block;
+ DBUG_ENTER("delete_dynamic_record");
+
+ /* First add a link from the last block to the new one */
+ error= update_backward_delete_link(info, info->s->state.dellink, filepos);
+
+ block_info.second_read=second_read;
+ do
+ {
+ /* Remove block at 'filepos' */
+ if ((b_type= _ma_get_block_info(&block_info,info->dfile,filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR) ||
+ (length=(uint) (block_info.filepos-filepos) +block_info.block_len) <
+ MARIA_MIN_BLOCK_LENGTH)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(1);
+ }
+ /* Check if next block is a delete block */
+ del_block.second_read=0;
+ remove_next_block=0;
+ if (_ma_get_block_info(&del_block,info->dfile,filepos+length) &
+ BLOCK_DELETED && del_block.block_len+length <
+ MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ /* We can't remove this yet as this block may be the head block */
+ remove_next_block=1;
+ length+=del_block.block_len;
+ }
+
+ block_info.header[0]=0;
+ mi_int3store(block_info.header+1,length);
+ mi_sizestore(block_info.header+4,info->s->state.dellink);
+ if (b_type & BLOCK_LAST)
+ bfill(block_info.header+12,8,255);
+ else
+ mi_sizestore(block_info.header+12,block_info.next_filepos);
+ if (info->s->file_write(info,(byte*) block_info.header,20,filepos,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink = filepos;
+ info->state->del++;
+ info->state->empty+=length;
+ filepos=block_info.next_filepos;
+
+ /* Now it's safe to unlink the deleted block directly after this one */
+ if (remove_next_block && unlink_deleted_block(info,&del_block))
+ error=1;
+ } while (!(b_type & BLOCK_LAST));
+
+ DBUG_RETURN(error);
+}
+
+
+ /* Write a block to datafile */
+
+int _ma_write_part_record(MARIA_HA *info,
+ my_off_t filepos, /* points at empty block */
+ ulong length, /* length of block */
+ my_off_t next_filepos,/* Next empty block */
+ byte **record, /* pointer to record ptr */
+ ulong *reclength, /* length of *record */
+ int *flag) /* *flag == 0 if header */
+{
+ ulong head_length,res_length,extra_length,long_block,del_length;
+ byte *pos,*record_end;
+ my_off_t next_delete_block;
+ uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER];
+ DBUG_ENTER("_ma_write_part_record");
+
+ next_delete_block=HA_OFFSET_ERROR;
+
+ res_length=extra_length=0;
+ if (length > *reclength + MARIA_SPLIT_LENGTH)
+ { /* Splitt big block */
+ res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH,
+ MARIA_DYN_ALIGN_SIZE);
+ length-= res_length; /* Use this for first part */
+ }
+ long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1;
+ if (length == *reclength+ 3 + long_block)
+ {
+ /* Block is exactly of the right length */
+ temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ head_length=4;
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ head_length=3;
+ }
+ }
+ else if (length-long_block < *reclength+4)
+ { /* To short block */
+ if (next_filepos == HA_OFFSET_ERROR)
+ next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end ?
+ info->s->state.dellink : info->state->data_file_length);
+ if (*flag == 0) /* First block */
+ {
+ if (*reclength > MARIA_MAX_BLOCK_LENGTH)
+ {
+ head_length= 16;
+ temp[0]=13;
+ mi_int4store(temp+1,*reclength);
+ mi_int3store(temp+5,length-head_length);
+ mi_sizestore((byte*) temp+8,next_filepos);
+ }
+ else
+ {
+ head_length=5+8+long_block*2;
+ temp[0]=5+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ mi_int3store(temp+4,length-head_length);
+ mi_sizestore((byte*) temp+7,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ mi_int2store(temp+3,length-head_length);
+ mi_sizestore((byte*) temp+5,next_filepos);
+ }
+ }
+ }
+ else
+ {
+ head_length=3+8+long_block;
+ temp[0]=11+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,length-head_length);
+ mi_sizestore((byte*) temp+4,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,length-head_length);
+ mi_sizestore((byte*) temp+3,next_filepos);
+ }
+ }
+ }
+ else
+ { /* Block with empty info last */
+ head_length=4+long_block;
+ extra_length= length- *reclength-head_length;
+ temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ temp[4]= (uchar) (extra_length);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ temp[3]= (uchar) (extra_length);
+ }
+ length= *reclength+head_length; /* Write only what is needed */
+ }
+ DBUG_DUMP("header",(byte*) temp,head_length);
+
+ /* Make a long block for one write */
+ record_end= *record+length-head_length;
+ del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0);
+ bmove((byte*) (*record-head_length),(byte*) temp,head_length);
+ memcpy(temp,record_end,(size_t) (extra_length+del_length));
+ bzero((byte*) record_end,extra_length);
+
+ if (res_length)
+ {
+ /* Check first if we can join this block with the next one */
+ MARIA_BLOCK_INFO del_block;
+ my_off_t next_block=filepos+length+extra_length+res_length;
+
+ del_block.second_read=0;
+ if (next_block < info->state->data_file_length &&
+ info->s->state.dellink != HA_OFFSET_ERROR)
+ {
+ if ((_ma_get_block_info(&del_block,info->dfile,next_block)
+ & BLOCK_DELETED) &&
+ res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ res_length+=del_block.block_len;
+ }
+ }
+
+ /* Create a delete link of the last part of the block */
+ pos=record_end+extra_length;
+ pos[0]= '\0';
+ mi_int3store(pos+1,res_length);
+ mi_sizestore(pos+4,info->s->state.dellink);
+ bfill(pos+12,8,255); /* End link */
+ next_delete_block=info->s->state.dellink;
+ info->s->state.dellink= filepos+length+extra_length;
+ info->state->del++;
+ info->state->empty+=res_length;
+ info->s->state.split++;
+ }
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->update & HA_STATE_WRITE_AT_END)
+ {
+ if (info->update & HA_STATE_EXTEND_BLOCK)
+ {
+ info->update&= ~HA_STATE_EXTEND_BLOCK;
+ if (my_block_write(&info->rec_cache,(byte*) *record-head_length,
+ length+extra_length+del_length,filepos))
+ goto err;
+ }
+ else if (my_b_write(&info->rec_cache,(byte*) *record-head_length,
+ length+extra_length+del_length))
+ goto err;
+ }
+ else
+ {
+ info->rec_cache.seek_not_done=1;
+ if (info->s->file_write(info,(byte*) *record-head_length,
+ length+extra_length+
+ del_length,filepos,info->s->write_flag))
+ goto err;
+ }
+ memcpy(record_end,temp,(size_t) (extra_length+del_length));
+ *record=record_end;
+ *reclength-=(length-head_length);
+ *flag=6;
+
+ if (del_length)
+ {
+ /* link the next delete block to this */
+ if (update_backward_delete_link(info, next_delete_block,
+ info->s->state.dellink))
+ goto err;
+ }
+
+ DBUG_RETURN(0);
+err:
+ DBUG_PRINT("exit",("errno: %d",my_errno));
+ DBUG_RETURN(1);
+} /* _ma_write_part_record */
+
+
+ /* update record from datafile */
+
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ byte *record, ulong reclength)
+{
+ int flag;
+ uint error;
+ ulong length;
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_dynamic_record");
+
+ flag=block_info.second_read=0;
+ while (reclength > 0)
+ {
+ if (filepos != info->s->state.dellink)
+ {
+ block_info.next_filepos= HA_OFFSET_ERROR;
+ if ((error= _ma_get_block_info(&block_info,info->dfile,filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ DBUG_PRINT("error",("Got wrong block info"));
+ if (!(error & BLOCK_FATAL_ERROR))
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+ length=(ulong) (block_info.filepos-filepos) + block_info.block_len;
+ if (length < reclength)
+ {
+ uint tmp=MY_ALIGN(reclength - length + 3 +
+ test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE);
+ /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */
+ tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
+ /* Check if we can extend this block */
+ if (block_info.filepos + block_info.block_len ==
+ info->state->data_file_length &&
+ info->state->data_file_length <
+ info->s->base.max_data_file_length-tmp)
+ {
+ /* extend file */
+ DBUG_PRINT("info",("Extending file with %d bytes",tmp));
+ if (info->cur_row.nextpos == info->state->data_file_length)
+ info->cur_row.nextpos+= tmp;
+ info->state->data_file_length+= tmp;
+ info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK;
+ length+=tmp;
+ }
+ else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH)
+ {
+ /*
+ Check if next block is a deleted block
+ Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where
+ the next block is so small it can't be splited which could
+ casue problems
+ */
+
+ MARIA_BLOCK_INFO del_block;
+ del_block.second_read=0;
+ if (_ma_get_block_info(&del_block,info->dfile,
+ block_info.filepos + block_info.block_len) &
+ BLOCK_DELETED)
+ {
+ /* Use; Unlink it and extend the current block */
+ DBUG_PRINT("info",("Extending current block"));
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH)
+ {
+ /*
+ New block was too big, link overflow part back to
+ delete list
+ */
+ my_off_t next_pos;
+ ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH;
+ set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH);
+ next_pos= del_block.filepos+ del_block.block_len - rest_length;
+
+ if (update_backward_delete_link(info, info->s->state.dellink,
+ next_pos))
+ DBUG_RETURN(1);
+
+ /* create delete link for data that didn't fit into the page */
+ del_block.header[0]=0;
+ mi_int3store(del_block.header+1, rest_length);
+ mi_sizestore(del_block.header+4,info->s->state.dellink);
+ bfill(del_block.header+12,8,255);
+ if (info->s->file_write(info,(byte*) del_block.header, 20,
+ next_pos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink= next_pos;
+ info->s->state.split++;
+ info->state->del++;
+ info->state->empty+= rest_length;
+ length-= rest_length;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ }
+ if (_ma_write_part_record(info,filepos,length,block_info.next_filepos,
+ &record,&reclength,&flag))
+ goto err;
+ if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR)
+ {
+ /* Start writing data on deleted blocks */
+ filepos=info->s->state.dellink;
+ }
+ }
+
+ if (block_info.next_filepos != HA_OFFSET_ERROR)
+ if (delete_dynamic_record(info,block_info.next_filepos,1))
+ goto err;
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+ /* Pack a record. Return new reclength */
+
+uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from)
+{
+ uint length,new_length,flag,bit,i;
+ char *pos,*end,*startpos,*packpos;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *rec;
+ MARIA_BLOB *blob;
+ DBUG_ENTER("_ma_rec_pack");
+
+ flag= 0;
+ bit= 1;
+ startpos= packpos=to;
+ to+= info->s->base.pack_bytes;
+ blob= info->blobs;
+ rec= info->s->rec;
+ if (info->s->base.null_bytes)
+ {
+ memcpy(to, from, info->s->base.null_bytes);
+ from+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+ }
+
+ for (i=info->s->base.fields ; i-- > 0; from+= length,rec++)
+ {
+ length=(uint) rec->length;
+ if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ if (!blob->length)
+ flag|=bit;
+ else
+ {
+ char *temp_pos;
+ size_t tmp_length=length-maria_portable_sizeof_char_ptr;
+ memcpy((byte*) to,from,tmp_length);
+ memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*));
+ memcpy(to+tmp_length,temp_pos,(size_t) blob->length);
+ to+=tmp_length+blob->length;
+ }
+ blob++;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp((byte*) from, maria_zero_string, length) == 0)
+ flag|=bit;
+ else
+ {
+ memcpy((byte*) to,from,(size_t) length); to+=length;
+ }
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= (byte*) from; end= (byte*) from + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > from && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pref-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length +1 + test(rec->length > 255 && new_length > 127)
+ < length)
+ {
+ if (rec->length > 255 && new_length > 127)
+ {
+ to[0]=(char) ((new_length & 127)+128);
+ to[1]=(char) (new_length >> 7);
+ to+=2;
+ }
+ else
+ *to++= (char) new_length;
+ memcpy((byte*) to,pos,(size_t) new_length); to+=new_length;
+ flag|=bit;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) from;
+ *to++= *from;
+ }
+ else
+ {
+ tmp_length= uint2korr(from);
+ store_key_length_inc(to,tmp_length);
+ }
+ memcpy(to, from+pack_length,tmp_length);
+ to+= tmp_length;
+ continue;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ *packpos++ = (char) (uchar) flag;
+ bit=1; flag=0;
+ }
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ if (bit != 1)
+ *packpos= (char) (uchar) flag;
+ if (info->s->calc_checksum)
+ *to++= (byte) info->cur_row.checksum;
+ DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos)));
+ DBUG_RETURN((uint) (to-startpos));
+} /* _ma_rec_pack */
+
+
+
+/*
+ Check if a record was correctly packed. Used only by maria_chk
+ Returns 0 if record is ok.
+*/
+
+my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff,
+ ulong packed_length, my_bool with_checksum)
+{
+ uint length,new_length,flag,bit,i;
+ char *pos,*end,*packpos,*to;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *rec;
+ DBUG_ENTER("_ma_rec_check");
+
+ packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes;
+ rec=info->s->rec;
+ flag= *packpos; bit=1;
+ record+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+
+ for (i=info->s->base.fields ; i-- > 0; record+= length, rec++)
+ {
+ length=(uint) rec->length;
+ if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ uint blob_length=
+ _ma_calc_blob_length(length-maria_portable_sizeof_char_ptr,record);
+ if (!blob_length && !(flag & bit))
+ goto err;
+ if (blob_length)
+ to+=length - maria_portable_sizeof_char_ptr+ blob_length;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp((byte*) record, maria_zero_string, length) == 0)
+ {
+ if (!(flag & bit))
+ goto err;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= (byte*) record; end= (byte*) record + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > record && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pre-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length +1 + test(rec->length > 255 && new_length > 127)
+ < length)
+ {
+ if (!(flag & bit))
+ goto err;
+ if (rec->length > 255 && new_length > 127)
+ {
+ if (to[0] != (char) ((new_length & 127)+128) ||
+ to[1] != (char) (new_length >> 7))
+ goto err;
+ to+=2;
+ }
+ else if (*to++ != (char) new_length)
+ goto err;
+ to+=new_length;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *(uchar*) record;
+ to+= 1+ tmp_length;
+ continue;
+ }
+ else
+ {
+ tmp_length= uint2korr(record);
+ to+= get_pack_length(tmp_length)+tmp_length;
+ }
+ continue;
+ }
+ else
+ {
+ to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= *++packpos;
+ bit=1;
+ }
+ }
+ else
+ to+= length;
+ }
+ if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) ||
+ (bit != 1 && (flag & ~(bit - 1))))
+ goto err;
+ if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to))
+ {
+ DBUG_PRINT("error",("wrong checksum for row"));
+ goto err;
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_RETURN(1);
+}
+
+
+
+ /* Unpacks a record */
+ /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */
+ /* right. Returns reclength (>0) if ok */
+
+ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from,
+ ulong found_length)
+{
+ uint flag,bit,length,rec_length,min_pack_length;
+ enum en_fieldtype type;
+ byte *from_end,*to_end,*packpos;
+ reg3 MARIA_COLUMNDEF *rec,*end_field;
+ DBUG_ENTER("_ma_rec_unpack");
+
+ to_end=to + info->s->base.reclength;
+ from_end=from+found_length;
+ flag= (uchar) *from; bit=1; packpos=from;
+ if (found_length < info->s->base.min_pack_length)
+ goto err;
+ from+= info->s->base.pack_bytes;
+ min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes;
+
+ if ((length= info->s->base.null_bytes))
+ {
+ memcpy(to, from, length);
+ from+= length;
+ to+= length;
+ min_pack_length-= length;
+ }
+
+ for (rec=info->s->rec , end_field=rec+info->s->base.fields ;
+ rec < end_field ; to+= rec_length, rec++)
+ {
+ rec_length=rec->length;
+ if ((type = (enum en_fieldtype) rec->type) != FIELD_NORMAL &&
+ (type != FIELD_CHECK))
+ {
+ if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(rec_length-1);
+ if (pack_length == 1)
+ {
+ length= (uint) *(uchar*) from;
+ if (length > rec_length-1)
+ goto err;
+ *to= *from++;
+ }
+ else
+ {
+ get_key_length(length, from);
+ if (length > rec_length-2)
+ goto err;
+ int2store(to,length);
+ }
+ if (from+length > from_end)
+ goto err;
+ memcpy(to+pack_length, from, length);
+ from+= length;
+ min_pack_length--;
+ continue;
+ }
+ if (flag & bit)
+ {
+ if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO)
+ bzero((byte*) to,rec_length);
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ if (rec->length > 255 && *from & 128)
+ {
+ if (from + 1 >= from_end)
+ goto err;
+ length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2;
+ }
+ else
+ {
+ if (from == from_end)
+ goto err;
+ length= (uchar) *from++;
+ }
+ min_pack_length--;
+ if (length >= rec_length ||
+ min_pack_length + length > (uint) (from_end - from))
+ goto err;
+ if (type == FIELD_SKIP_ENDSPACE)
+ {
+ memcpy(to,(byte*) from,(size_t) length);
+ bfill((byte*) to+length,rec_length-length,' ');
+ }
+ else
+ {
+ bfill((byte*) to,rec_length-length,' ');
+ memcpy(to+rec_length-length,(byte*) from,(size_t) length);
+ }
+ from+=length;
+ }
+ }
+ else if (type == FIELD_BLOB)
+ {
+ uint size_length=rec_length- maria_portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length,from);
+ if ((ulong) (from_end-from) - size_length < blob_length ||
+ min_pack_length > (uint) (from_end -(from+size_length+blob_length)))
+ goto err;
+ memcpy((byte*) to,(byte*) from,(size_t) size_length);
+ from+=size_length;
+ memcpy_fixed((byte*) to+size_length,(byte*) &from,sizeof(char*));
+ from+=blob_length;
+ }
+ else
+ {
+ if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE)
+ min_pack_length--;
+ if (min_pack_length + rec_length > (uint) (from_end - from))
+ goto err;
+ memcpy(to,(byte*) from,(size_t) rec_length); from+=rec_length;
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= (uchar) *++packpos; bit=1;
+ }
+ }
+ else
+ {
+ if (min_pack_length > (uint) (from_end - from))
+ goto err;
+ min_pack_length-=rec_length;
+ memcpy(to, (byte*) from, (size_t) rec_length);
+ from+=rec_length;
+ }
+ }
+ if (info->s->calc_checksum)
+ info->cur_row.checksum= (uint) (uchar) *from++;
+ if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1))))
+ DBUG_RETURN(found_length);
+
+err:
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx",
+ (long) to, (long) to_end, (long) from, (long) from_end));
+ DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length);
+ DBUG_RETURN(MY_FILE_ERROR);
+} /* _ma_rec_unpack */
+
+
+ /* Calc length of blob. Update info in blobs->length */
+
+ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record)
+{
+ ulong length;
+ MARIA_BLOB *blob,*end;
+
+ for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ;
+ blob != end;
+ blob++)
+ {
+ blob->length= _ma_calc_blob_length(blob->pack_length,record + blob->offset);
+ length+=blob->length;
+ }
+ return length;
+}
+
+
+ulong _ma_calc_blob_length(uint length, const byte *pos)
+{
+ switch (length) {
+ case 1:
+ return (uint) (uchar) *pos;
+ case 2:
+ return (uint) uint2korr(pos);
+ case 3:
+ return uint3korr(pos);
+ case 4:
+ return uint4korr(pos);
+ default:
+ break;
+ }
+ return 0; /* Impossible */
+}
+
+
+void _ma_store_blob_length(byte *pos,uint pack_length,uint length)
+{
+ switch (pack_length) {
+ case 1:
+ *pos= (uchar) length;
+ break;
+ case 2:
+ int2store(pos,length);
+ break;
+ case 3:
+ int3store(pos,length);
+ break;
+ case 4:
+ int4store(pos,length);
+ default:
+ break;
+ }
+ return;
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_dynamic_record()
+ info MARIA_HA pointer to table.
+ filepos From where to read the record.
+ buf Destination for record.
+
+ NOTE
+ If a write buffer is active, it needs to be flushed if its contents
+ intersects with the record to read. We always check if the position
+ of the first byte of the write buffer is lower than the position
+ past the last byte to read. In theory this is also true if the write
+ buffer is completely below the read segment. That is, if there is no
+ intersection. But this case is unusual. We flush anyway. Only if the
+ first byte in the write buffer is above the last byte to read, we do
+ not flush.
+
+ A dynamic record may need several reads. So this check must be done
+ before every read. Reading a dynamic record starts with reading the
+ block header. If the record does not fit into the free space of the
+ header, the block may be longer than the header. In this case a
+ second read is necessary. These one or two reads repeat for every
+ part of the record.
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+int _ma_read_dynamic_record(MARIA_HA *info, byte *buf,
+ MARIA_RECORD_POS filepos)
+{
+ int block_of_record;
+ uint b_type,left_length;
+ byte *to;
+ MARIA_BLOCK_INFO block_info;
+ File file;
+ DBUG_ENTER("_ma_read_dynamic_record");
+
+ if (filepos != HA_OFFSET_ERROR)
+ {
+ LINT_INIT(to);
+ LINT_INIT(left_length);
+ file=info->dfile;
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ do
+ {
+ /* A corrupted table can have wrong pointers. (Bug# 19835) */
+ if (filepos == HA_OFFSET_ERROR)
+ goto panic;
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ info->rec_cache.seek_not_done=1;
+ if ((b_type= _ma_get_block_info(&block_info, file, filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_DELETED;
+ goto err;
+ }
+ if (block_of_record++ == 0) /* First block */
+ {
+ if (block_info.rec_len > (uint) info->s->base.max_pack_length)
+ goto panic;
+ if (info->s->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_length=block_info.rec_len;
+ }
+ if (left_length < block_info.data_len || ! block_info.data_len)
+ goto panic; /* Wrong linked record */
+ /* copy information that is already read */
+ {
+ uint offset= (uint) (block_info.filepos - filepos);
+ uint prefetch_len= (sizeof(block_info.header) - offset);
+ filepos+= sizeof(block_info.header);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy((byte*) to, block_info.header + offset, prefetch_len);
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /*
+ What a pity that this method is not called 'file_pread' and that
+ there is no equivalent without seeking. We are at the right
+ position already. :(
+ */
+ if (info->s->file_read(info, (byte*) to, block_info.data_len,
+ filepos, MYF(MY_NABP)))
+ goto panic;
+ left_length-=block_info.data_len;
+ to+=block_info.data_len;
+ }
+ filepos= block_info.next_filepos;
+ } while (left_length);
+
+ info->update|= HA_STATE_AKTIV; /* We have a aktive record */
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR ? 0 : 1);
+ }
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(1); /* Wrong data to read */
+
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+err:
+ VOID(_ma_writeinfo(info,0));
+ DBUG_RETURN(1);
+}
+
+ /* compare unique constraint between stored rows */
+
+my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos)
+{
+ byte *old_rec_buff,*old_record;
+ my_off_t old_rec_buff_size;
+ my_bool error;
+ DBUG_ENTER("_ma_cmp_dynamic_unique");
+
+ if (!(old_record=my_alloca(info->s->base.reclength)))
+ DBUG_RETURN(1);
+
+ /* Don't let the compare destroy blobs that may be in use */
+ old_rec_buff= info->rec_buff;
+ old_rec_buff_size= info->rec_buff_size;
+
+ if (info->s->base.blobs)
+ info->rec_buff= 0;
+ error= _ma_read_dynamic_record(info, old_record, pos) != 0;
+ if (!error)
+ error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0;
+ if (info->s->base.blobs)
+ {
+ my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ info->rec_buff= old_rec_buff;
+ info->rec_buff_size= old_rec_buff_size;
+ }
+ my_afree(old_record);
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare of record on disk with packed record in memory */
+
+my_bool _ma_cmp_dynamic_record(register MARIA_HA *info,
+ register const byte *record)
+{
+ uint flag, reclength, b_type,cmp_length;
+ my_off_t filepos;
+ byte *buffer;
+ MARIA_BLOCK_INFO block_info;
+ my_bool error= 1;
+ DBUG_ENTER("_ma_cmp_dynamic_record");
+
+ /* We are going to do changes; dont let anybody disturb */
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK);
+ if (flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(1);
+ }
+ info->rec_cache.seek_not_done=1;
+
+ /* If nobody have touched the database we don't have to test rec */
+
+ buffer=info->rec_buff;
+ if ((info->opt_flag & READ_CHECK_USED))
+ { /* If check isn't disabled */
+ if (info->s->base.blobs)
+ {
+ if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,record))))
+ DBUG_RETURN(1);
+ }
+ reclength= _ma_rec_pack(info,buffer,record);
+ record= buffer;
+
+ filepos= info->cur_row.lastpos;
+ flag=block_info.second_read=0;
+ block_info.next_filepos=filepos;
+ while (reclength > 0)
+ {
+ if ((b_type= _ma_get_block_info(&block_info,info->dfile,
+ block_info.next_filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ if (flag == 0) /* First block */
+ {
+ flag=1;
+ if (reclength != block_info.rec_len)
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ } else if (reclength < block_info.data_len)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+ reclength-= block_info.data_len;
+ cmp_length= block_info.data_len;
+ if (!reclength && info->s->calc_checksum)
+ cmp_length--; /* 'record' may not contain checksum */
+
+ if (_ma_cmp_buffer(info->dfile,record,block_info.filepos,
+ cmp_length))
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ flag=1;
+ record+=block_info.data_len;
+ }
+ }
+ my_errno=0;
+ error= 0;
+err:
+ if (buffer != info->rec_buff)
+ my_afree((gptr) buffer);
+ DBUG_PRINT("exit", ("result: %d", error));
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare file to buffert */
+
+static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos,
+ uint length)
+{
+ uint next_length;
+ char temp_buff[IO_SIZE*2];
+ DBUG_ENTER("_ma_cmp_buffer");
+
+ next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1));
+
+ while (length > IO_SIZE*2)
+ {
+ if (my_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) ||
+ memcmp((byte*) buff,temp_buff,next_length))
+ goto err;
+ filepos+=next_length;
+ buff+=next_length;
+ length-= next_length;
+ next_length=IO_SIZE*2;
+ }
+ if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP)))
+ goto err;
+ DBUG_RETURN(memcmp((byte*) buff,temp_buff,length) != 0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_rnd_dynamic_record()
+ info MARIA_HA pointer to table.
+ buf Destination for record.
+ filepos From where to read the record.
+ skip_deleted_blocks If to repeat reading until a non-deleted
+ record is found.
+
+ NOTE
+
+ If a write buffer is active, it needs to be flushed if its contents
+ intersects with the record to read. We always check if the position
+ of the first byte of the write buffer is lower than the position
+ past the last byte to read. In theory this is also true if the write
+ buffer is completely below the read segment. That is, if there is no
+ intersection. But this case is unusual. We flush anyway. Only if the
+ first byte in the write buffer is above the last byte to read, we do
+ not flush.
+
+ A dynamic record may need several reads. So this check must be done
+ before every read. Reading a dynamic record starts with reading the
+ block header. If the record does not fit into the free space of the
+ header, the block may be longer than the header. In this case a
+ second read is necessary. These one or two reads repeat for every
+ part of the record.
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+int _ma_read_rnd_dynamic_record(MARIA_HA *info,
+ byte *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ int block_of_record, info_read, save_errno;
+ uint left_len,b_type;
+ byte *to;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_read_rnd_dynamic_record");
+
+ info_read=0;
+ LINT_INIT(to);
+
+ if (info->lock_type == F_UNLCK)
+ {
+#ifndef UNSAFE_LOCKING
+#else
+ info->tmp_lock_type=F_RDLCK;
+#endif
+ }
+ else
+ info_read=1; /* memory-keyinfoblock is ok */
+
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ left_len=1;
+ do
+ {
+ if (filepos >= info->state->data_file_length)
+ {
+ if (!info_read)
+ { /* Check if changed */
+ info_read=1;
+ info->rec_cache.seek_not_done=1;
+ if (_ma_state_info_read_dsk(share->kfile,&share->state,1))
+ goto panic;
+ }
+ if (filepos >= info->state->data_file_length)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+ }
+ }
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos,
+ sizeof(block_info.header),
+ (!block_of_record && skip_deleted_blocks ?
+ READING_NEXT : 0) | READING_HEADER))
+ goto panic;
+ b_type= _ma_get_block_info(&block_info,-1,filepos);
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH &&
+ flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+ info->rec_cache.seek_not_done=1;
+ b_type= _ma_get_block_info(&block_info,info->dfile,filepos);
+ }
+
+ if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ && skip_deleted_blocks)
+ {
+ filepos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue; /* Search after next_record */
+ }
+ if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ {
+ my_errno= HA_ERR_RECORD_DELETED;
+ info->cur_row.lastpos= block_info.filepos;
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ }
+ goto err;
+ }
+ if (block_of_record == 0) /* First block */
+ {
+ if (block_info.rec_len > (uint) share->base.max_pack_length)
+ goto panic;
+ info->cur_row.lastpos= filepos;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ info->s->base.extra_rec_buff_size))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_len=block_info.rec_len;
+ }
+ if (left_len < block_info.data_len)
+ goto panic; /* Wrong linked record */
+
+ /* copy information that is already read */
+ {
+ uint offset=(uint) (block_info.filepos - filepos);
+ uint tmp_length= (sizeof(block_info.header) - offset);
+ filepos=block_info.filepos;
+
+ if (tmp_length > block_info.data_len)
+ tmp_length= block_info.data_len;
+ if (tmp_length)
+ {
+ memcpy((byte*) to, block_info.header+offset,tmp_length);
+ block_info.data_len-=tmp_length;
+ left_len-=tmp_length;
+ to+=tmp_length;
+ filepos+=tmp_length;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache,(byte*) to,filepos,
+ block_info.data_len,
+ (!block_of_record && skip_deleted_blocks) ?
+ READING_NEXT : 0))
+ goto panic;
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file <
+ block_info.filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */
+ if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP)))
+ {
+ if (my_errno == -1)
+ my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */
+ goto err;
+ }
+ }
+ }
+ /*
+ Increment block-of-record counter. If it was the first block,
+ remember the position behind the block for the next call.
+ */
+ if (block_of_record++ == 0)
+ {
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ skip_deleted_blocks=0;
+ }
+ left_len-=block_info.data_len;
+ to+=block_info.data_len;
+ filepos=block_info.next_filepos;
+ } while (left_len);
+
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+ fast_ma_writeinfo(info);
+ if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR)
+ DBUG_RETURN(0);
+ DBUG_RETURN(my_errno); /* Wrong record */
+
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */
+err:
+ save_errno=my_errno;
+ VOID(_ma_writeinfo(info,0));
+ DBUG_RETURN(my_errno=save_errno);
+}
+
+
+ /* Read and process header from a dynamic-record-file */
+
+uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos)
+{
+ uint return_val=0;
+ uchar *header=info->header;
+
+ if (file >= 0)
+ {
+ /*
+ We do not use my_pread() here because we want to have the file
+ pointer set to the end of the header after this function.
+ my_pread() may leave the file pointer untouched.
+ */
+ VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
+ if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) !=
+ sizeof(info->header))
+ goto err;
+ }
+ DBUG_DUMP("header",(byte*) header,MARIA_BLOCK_INFO_HEADER_LENGTH);
+ if (info->second_read)
+ {
+ if (info->header[0] <= 6 || info->header[0] == 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ else
+ {
+ if (info->header[0] > 6 && info->header[0] != 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */
+
+ switch (info->header[0]) {
+ case 0:
+ if ((info->block_len=(uint) mi_uint3korr(header+1)) <
+ MARIA_MIN_BLOCK_LENGTH ||
+ (info->block_len & (MARIA_DYN_ALIGN_SIZE -1)))
+ goto err;
+ info->filepos=filepos;
+ info->next_filepos=mi_sizekorr(header+4);
+ info->prev_filepos=mi_sizekorr(header+12);
+#if SIZEOF_OFF_T == 4
+ if ((mi_uint4korr(header+4) != 0 &&
+ (mi_uint4korr(header+4) != (ulong) ~0 ||
+ info->next_filepos != (ulong) ~0)) ||
+ (mi_uint4korr(header+12) != 0 &&
+ (mi_uint4korr(header+12) != (ulong) ~0 ||
+ info->prev_filepos != (ulong) ~0)))
+ goto err;
+#endif
+ return return_val | BLOCK_DELETED; /* Deleted block */
+
+ case 1:
+ info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 2:
+ info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 13:
+ info->rec_len=mi_uint4korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+5);
+ info->next_filepos=mi_sizekorr(header+8);
+ info->second_read=1;
+ info->filepos=filepos+16;
+ return return_val | BLOCK_FIRST;
+
+ case 3:
+ info->rec_len=info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 4:
+ info->rec_len=info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 5:
+ info->rec_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len=mi_uint2korr(header+3);
+ info->next_filepos=mi_sizekorr(header+5);
+ info->second_read=1;
+ info->filepos=filepos+13;
+ return return_val | BLOCK_FIRST;
+ case 6:
+ info->rec_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+4);
+ info->next_filepos=mi_sizekorr(header+7);
+ info->second_read=1;
+ info->filepos=filepos+15;
+ return return_val | BLOCK_FIRST;
+
+ /* The following blocks are identical to 1-6 without rec_len */
+ case 7:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_LAST;
+ case 8:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+
+ case 9:
+ info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+ case 10:
+ info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_LAST;
+
+ case 11:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->next_filepos=mi_sizekorr(header+3);
+ info->second_read=1;
+ info->filepos=filepos+11;
+ return return_val;
+ case 12:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->next_filepos=mi_sizekorr(header+4);
+ info->second_read=1;
+ info->filepos=filepos+12;
+ return return_val;
+ }
+
+err:
+ my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */
+ return BLOCK_ERROR;
+}
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
new file mode 100644
index 00000000000..90e79362442
--- /dev/null
+++ b/storage/maria/ma_extra.c
@@ -0,0 +1,458 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+static void maria_extra_keyflag(MARIA_HA *info,
+ enum ha_extra_function function);
+
+
+/*
+ Set options and buffers to optimize table handling
+
+ SYNOPSIS
+ maria_extra()
+ info open table
+ function operation
+ extra_arg Pointer to extra argument (normally pointer to ulong)
+ Used when function is one of:
+ HA_EXTRA_WRITE_CACHE
+ HA_EXTRA_CACHE
+ RETURN VALUES
+ 0 ok
+ # error
+*/
+
+int maria_extra(MARIA_HA *info, enum ha_extra_function function,
+ void *extra_arg)
+{
+ int error=0;
+ ulong cache_size;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_extra");
+ DBUG_PRINT("enter",("function: %d",(int) function));
+
+ switch (function) {
+ case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */
+ info->lastinx= 0; /* Use first index as def */
+ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed=1;
+ /* Next/prev gives first/last */
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ reinit_io_cache(&info->rec_cache,READ_CACHE,0,
+ (pbool) (info->lock_type != F_UNLCK),
+ (pbool) test(info->update & HA_STATE_ROW_CHANGED)
+ );
+ }
+ info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
+ HA_STATE_PREV_FOUND);
+ break;
+ case HA_EXTRA_CACHE:
+ if (info->lock_type == F_UNLCK &&
+ (share->options & HA_OPTION_PACK_RECORD))
+ {
+ error=1; /* Not possibly if not locked */
+ my_errno=EACCES;
+ break;
+ }
+ if (info->s->file_map) /* Don't use cache if mmap */
+ break;
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if ((share->options & HA_OPTION_COMPRESS_RECORD))
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ if (_ma_memmap_file(info))
+ {
+ /* We don't nead MADV_SEQUENTIAL if small file */
+ madvise(share->file_map,share->state.state.data_file_length,
+ share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ?
+ MADV_RANDOM : MADV_SEQUENTIAL);
+ pthread_mutex_unlock(&share->intern_lock);
+ break;
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+#endif
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ info->opt_flag&= ~WRITE_CACHE_USED;
+ if ((error=end_io_cache(&info->rec_cache)))
+ break;
+ }
+ if (!(info->opt_flag &
+ (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED)))
+ {
+ cache_size= (extra_arg ? *(ulong*) extra_arg :
+ my_default_record_cache_size);
+ if (!(init_io_cache(&info->rec_cache,info->dfile,
+ (uint) min(info->state->data_file_length+1,
+ cache_size),
+ READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
+ MYF(share->write_flag & MY_WAIT_IF_FULL))))
+ {
+ info->opt_flag|=READ_CACHE_USED;
+ info->update&= ~HA_STATE_ROW_CHANGED;
+ }
+ if (share->concurrent_insert)
+ info->rec_cache.end_of_file=info->state->data_file_length;
+ }
+ break;
+ case HA_EXTRA_REINIT_CACHE:
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos,
+ (pbool) (info->lock_type != F_UNLCK),
+ (pbool) test(info->update & HA_STATE_ROW_CHANGED));
+ info->update&= ~HA_STATE_ROW_CHANGED;
+ if (share->concurrent_insert)
+ info->rec_cache.end_of_file=info->state->data_file_length;
+ }
+ break;
+ case HA_EXTRA_WRITE_CACHE:
+ if (info->lock_type == F_UNLCK)
+ {
+ error=1; /* Not possibly if not locked */
+ break;
+ }
+
+ cache_size= (extra_arg ? *(ulong*) extra_arg :
+ my_default_record_cache_size);
+ if (!(info->opt_flag &
+ (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) &&
+ !share->state.header.uniques)
+ if (!(init_io_cache(&info->rec_cache,info->dfile, cache_size,
+ WRITE_CACHE,info->state->data_file_length,
+ (pbool) (info->lock_type != F_UNLCK),
+ MYF(share->write_flag & MY_WAIT_IF_FULL))))
+ {
+ info->opt_flag|=WRITE_CACHE_USED;
+ info->update&= ~(HA_STATE_ROW_CHANGED |
+ HA_STATE_WRITE_AT_END |
+ HA_STATE_EXTEND_BLOCK);
+ }
+ break;
+ case HA_EXTRA_PREPARE_FOR_UPDATE:
+ if (info->s->data_file_type != DYNAMIC_RECORD)
+ break;
+ /* Remove read/write cache if dynamic rows */
+ case HA_EXTRA_NO_CACHE:
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ error=end_io_cache(&info->rec_cache);
+ /* Sergei will insert full text index caching here */
+ }
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if (info->opt_flag & MEMMAP_USED)
+ madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM);
+#endif
+ break;
+ case HA_EXTRA_FLUSH_CACHE:
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ if ((error=flush_io_cache(&info->rec_cache)))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Fatal error found */
+ }
+ }
+ break;
+ case HA_EXTRA_NO_READCHECK:
+ info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */
+ break;
+ case HA_EXTRA_READCHECK:
+ info->opt_flag|= READ_CHECK_USED;
+ break;
+ case HA_EXTRA_KEYREAD: /* Read only keys to record */
+ case HA_EXTRA_REMEMBER_POS:
+ info->opt_flag |= REMEMBER_OLD_POS;
+ bmove((byte*) info->lastkey+share->base.max_key_length*2,
+ (byte*) info->lastkey,info->lastkey_length);
+ info->save_update= info->update;
+ info->save_lastinx= info->lastinx;
+ info->save_lastpos= info->cur_row.lastpos;
+ info->save_lastkey_length=info->lastkey_length;
+ if (function == HA_EXTRA_REMEMBER_POS)
+ break;
+ /* fall through */
+ case HA_EXTRA_KEYREAD_CHANGE_POS:
+ info->opt_flag |= KEY_READ_USED;
+ info->read_record= _ma_read_key_record;
+ break;
+ case HA_EXTRA_NO_KEYREAD:
+ case HA_EXTRA_RESTORE_POS:
+ if (info->opt_flag & REMEMBER_OLD_POS)
+ {
+ bmove((byte*) info->lastkey,
+ (byte*) info->lastkey+share->base.max_key_length*2,
+ info->save_lastkey_length);
+ info->update= info->save_update | HA_STATE_WRITTEN;
+ info->lastinx= info->save_lastinx;
+ info->cur_row.lastpos= info->save_lastpos;
+ info->lastkey_length=info->save_lastkey_length;
+ }
+ info->read_record= share->read_record;
+ info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
+ break;
+ case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */
+ info->lock_type= F_EXTRA_LCK; /* Simulate as locked */
+ break;
+ case HA_EXTRA_WAIT_LOCK:
+ info->lock_wait=0;
+ break;
+ case HA_EXTRA_NO_WAIT_LOCK:
+ info->lock_wait=MY_DONT_WAIT;
+ break;
+ case HA_EXTRA_NO_KEYS:
+ if (info->lock_type == F_UNLCK)
+ {
+ error=1; /* Not possibly if not lock */
+ break;
+ }
+ if (maria_is_any_key_active(share->state.key_map))
+ {
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1)
+ {
+ maria_clear_key_active(share->state.key_map, i);
+ info->update|= HA_STATE_CHANGED;
+ }
+ }
+
+ if (!share->changed)
+ {
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ share->changed=1; /* Update on close */
+ if (!share->global_changed)
+ {
+ share->global_changed=1;
+ share->state.open_count++;
+ }
+ }
+ share->state.state= *info->state;
+ error=_ma_state_info_write(share->kfile,&share->state,1 | 2);
+ }
+ break;
+ case HA_EXTRA_FORCE_REOPEN:
+ pthread_mutex_lock(&THR_LOCK_maria);
+ share->last_version= 0L; /* Impossible version */
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ break;
+ case HA_EXTRA_PREPARE_FOR_DELETE:
+ pthread_mutex_lock(&THR_LOCK_maria);
+ share->last_version= 0L; /* Impossible version */
+#ifdef __WIN__
+ /* Close the isam and data files as Win32 can't drop an open table */
+ pthread_mutex_lock(&share->intern_lock);
+ if (flush_key_blocks(share->key_cache, share->kfile,
+ (function == HA_EXTRA_FORCE_REOPEN ?
+ FLUSH_RELEASE : FLUSH_IGNORE_CHANGED)))
+ {
+ error=my_errno;
+ share->changed=1;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Fatal error found */
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ error=end_io_cache(&info->rec_cache);
+ }
+ if (info->lock_type != F_UNLCK && ! info->was_locked)
+ {
+ info->was_locked=info->lock_type;
+ if (maria_lock_database(info,F_UNLCK))
+ error=my_errno;
+ info->lock_type = F_UNLCK;
+ }
+ if (share->kfile >= 0)
+ _ma_decrement_open_count(info);
+ if (share->kfile >= 0 && my_close(share->kfile,MYF(0)))
+ error=my_errno;
+ {
+ LIST *list_element ;
+ for (list_element=maria_open_list ;
+ list_element ;
+ list_element=list_element->next)
+ {
+ MARIA_HA *tmpinfo=(MARIA_HA*) list_element->data;
+ if (tmpinfo->s == info->s)
+ {
+ if (tmpinfo->dfile >= 0 && my_close(tmpinfo->dfile,MYF(0)))
+ error = my_errno;
+ tmpinfo->dfile= -1;
+ }
+ }
+ }
+ share->kfile= -1; /* Files aren't open anymore */
+ pthread_mutex_unlock(&share->intern_lock);
+#endif
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ break;
+ case HA_EXTRA_FLUSH:
+ if (!share->temporary)
+ flush_key_blocks(share->key_cache, share->kfile, FLUSH_KEEP);
+#ifdef HAVE_PWRITE
+ _ma_decrement_open_count(info);
+#endif
+ if (share->not_flushed)
+ {
+ share->not_flushed=0;
+ if (_ma_sync_table_files(info))
+ error= my_errno;
+ if (error)
+ {
+ share->changed=1;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Fatal error found */
+ }
+ }
+ if (share->base.blobs && info->rec_buff_size >
+ share->base.default_rec_buff_size)
+ {
+ info->rec_buff_size= 1; /* Force realloc */
+ _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ share->base.default_rec_buff_size);
+ }
+ break;
+ case HA_EXTRA_NORMAL: /* Theese isn't in use */
+ info->quick_mode=0;
+ break;
+ case HA_EXTRA_QUICK:
+ info->quick_mode=1;
+ break;
+ case HA_EXTRA_NO_ROWS:
+ if (!share->state.header.uniques)
+ info->opt_flag|= OPT_NO_ROWS;
+ break;
+ case HA_EXTRA_PRELOAD_BUFFER_SIZE:
+ info->preload_buff_size= *((ulong *) extra_arg);
+ break;
+ case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ case HA_EXTRA_CHANGE_KEY_TO_DUP:
+ maria_extra_keyflag(info, function);
+ break;
+ case HA_EXTRA_MMAP:
+#ifdef HAVE_MMAP
+ pthread_mutex_lock(&share->intern_lock);
+ if (!share->file_map)
+ {
+ if (_ma_dynmap_file(info, share->state.state.data_file_length))
+ {
+ DBUG_PRINT("warning",("mmap failed: errno: %d",errno));
+ error= my_errno= errno;
+ }
+ else
+ {
+ share->file_read= _ma_mmap_pread;
+ share->file_write= _ma_mmap_pwrite;
+ }
+ }
+ pthread_mutex_unlock(&share->intern_lock);
+#endif
+ break;
+ case HA_EXTRA_MARK_AS_LOG_TABLE:
+ pthread_mutex_lock(&share->intern_lock);
+ share->is_log_table= TRUE;
+ pthread_mutex_unlock(&share->intern_lock);
+ break;
+ case HA_EXTRA_KEY_CACHE:
+ case HA_EXTRA_NO_KEY_CACHE:
+ default:
+ break;
+ }
+ {
+ char tmp[1];
+ tmp[0]=function;
+ }
+ DBUG_RETURN(error);
+} /* maria_extra */
+
+
+/*
+ Start/Stop Inserting Duplicates Into a Table, WL#1648.
+ */
+static void maria_extra_keyflag(MARIA_HA *info, enum ha_extra_function function)
+{
+ uint idx;
+
+ for (idx= 0; idx< info->s->base.keys; idx++)
+ {
+ switch (function) {
+ case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
+ info->s->keyinfo[idx].flag|= HA_NOSAME;
+ break;
+ case HA_EXTRA_CHANGE_KEY_TO_DUP:
+ info->s->keyinfo[idx].flag&= ~(HA_NOSAME);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+int maria_reset(MARIA_HA *info)
+{
+ int error= 0;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_reset");
+ /*
+ Free buffers and reset the following flags:
+ EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK
+
+ If the row buffer cache is large (for dynamic tables), reduce it
+ to save memory.
+ */
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ error= end_io_cache(&info->rec_cache);
+ }
+ if (share->base.blobs && info->rec_buff_size >
+ share->base.default_rec_buff_size)
+ {
+ info->rec_buff_size= 1; /* Force realloc */
+ _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ share->base.default_rec_buff_size);
+ }
+#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
+ if (info->opt_flag & MEMMAP_USED)
+ madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM);
+#endif
+ info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
+ info->quick_mode=0;
+ info->lastinx= 0; /* Use first index as def */
+ info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed= 1;
+ info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
+ HA_STATE_PREV_FOUND);
+ DBUG_RETURN(error);
+}
+
+
+int _ma_sync_table_files(const MARIA_HA *info)
+{
+ return (my_sync(info->dfile, MYF(0)) ||
+ my_sync(info->s->kfile, MYF(0)));
+}
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
new file mode 100644
index 00000000000..06b044e9e38
--- /dev/null
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -0,0 +1,964 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* TODO: add caching - pre-read several index entries at once */
+
+/*
+ Added optimization for full-text queries with plus-words. It was
+ implemented by sharing maximal document id (max_docid) variable
+ inside plus subtree. max_docid could be used by any word in plus
+ subtree, but it could be updated by plus-word only.
+
+ The idea is: there is no need to search for docid smaller than
+ biggest docid inside current plus subtree.
+
+ Examples:
+ +word1 word2
+ share same max_docid
+ max_docid updated by word1
+ +word1 +(word2 word3)
+ share same max_docid
+ max_docid updated by word1
+ +(word1 -word2) +(+word3 word4)
+ share same max_docid
+ max_docid updated by word3
+*/
+
+#define FT_CORE
+#include "ma_ftdefs.h"
+
+/* search with boolean queries */
+
+static double _wghts[11]=
+{
+ 0.131687242798354,
+ 0.197530864197531,
+ 0.296296296296296,
+ 0.444444444444444,
+ 0.666666666666667,
+ 1.000000000000000,
+ 1.500000000000000,
+ 2.250000000000000,
+ 3.375000000000000,
+ 5.062500000000000,
+ 7.593750000000000};
+static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
+
+static double _nwghts[11]=
+{
+ -0.065843621399177,
+ -0.098765432098766,
+ -0.148148148148148,
+ -0.222222222222222,
+ -0.333333333333334,
+ -0.500000000000000,
+ -0.750000000000000,
+ -1.125000000000000,
+ -1.687500000000000,
+ -2.531250000000000,
+ -3.796875000000000};
+static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
+
+#define FTB_FLAG_TRUNC 1
+/* At most one of the following flags can be set */
+#define FTB_FLAG_YES 2
+#define FTB_FLAG_NO 4
+#define FTB_FLAG_WONLY 8
+
+typedef struct st_ftb_expr FTB_EXPR;
+struct st_ftb_expr
+{
+ FTB_EXPR *up;
+ uint flags;
+/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
+ my_off_t docid[2];
+ my_off_t max_docid;
+ float weight;
+ float cur_weight;
+ LIST *phrase; /* phrase words */
+ LIST *document; /* for phrase search */
+ uint yesses; /* number of "yes" words matched */
+ uint nos; /* number of "no" words matched */
+ uint ythresh; /* number of "yes" words in expr */
+ uint yweaks; /* number of "yes" words for scan only */
+};
+
+typedef struct st_ftb_word
+{
+ FTB_EXPR *up;
+ uint flags;
+/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
+ my_off_t docid[2]; /* for index search and for scan */
+ my_off_t key_root;
+ my_off_t *max_docid;
+ MARIA_KEYDEF *keyinfo;
+ struct st_ftb_word *prev;
+ float weight;
+ uint ndepth;
+ uint len;
+ uchar off;
+ byte word[1];
+} FTB_WORD;
+
+typedef struct st_ft_info
+{
+ struct _ft_vft *please;
+ MARIA_HA *info;
+ CHARSET_INFO *charset;
+ FTB_EXPR *root;
+ FTB_WORD **list;
+ FTB_WORD *last_word;
+ MEM_ROOT mem_root;
+ QUEUE queue;
+ TREE no_dupes;
+ my_off_t lastpos;
+ uint keynr;
+ uchar with_scan;
+ enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
+} FTB;
+
+static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
+{
+ int i;
+
+ /* if a==curdoc, take it as a < b */
+ if (v && a->docid[0] == *v)
+ return -1;
+
+ /* ORDER BY docid, ndepth DESC */
+ i=CMP_NUM(a->docid[0], b->docid[0]);
+ if (!i)
+ i=CMP_NUM(b->ndepth,a->ndepth);
+ return i;
+}
+
+static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
+{
+ /* ORDER BY word DESC, ndepth DESC */
+ int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
+ (uchar*) (*a)->word+1,(*a)->len-1,0,0);
+ if (!i)
+ i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
+ return i;
+}
+
+
+typedef struct st_my_ftb_param
+{
+ FTB *ftb;
+ FTB_EXPR *ftbe;
+ byte *up_quot;
+ uint depth;
+} MY_FTB_PARAM;
+
+
+static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *info)
+{
+ MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
+ FTB_WORD *ftbw;
+ FTB_EXPR *ftbe, *tmp_expr;
+ FT_WORD *phrase_word;
+ LIST *tmp_element;
+ int r= info->weight_adjust;
+ float weight= (float)
+ (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
+
+ switch (info->type) {
+ case FT_TOKEN_WORD:
+ ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
+ sizeof(FTB_WORD) +
+ (info->trunc ? HA_MAX_KEY_BUFF :
+ word_len * ftb_param->ftb->charset->mbmaxlen +
+ HA_FT_WLEN +
+ ftb_param->ftb->info->s->rec_reflength));
+ ftbw->len= word_len + 1;
+ ftbw->flags= 0;
+ ftbw->off= 0;
+ if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
+ if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
+ ftbw->weight= weight;
+ ftbw->up= ftb_param->ftbe;
+ ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
+ ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
+ ftbw->key_root= HA_OFFSET_ERROR;
+ memcpy(ftbw->word + 1, word, word_len);
+ ftbw->word[0]= word_len;
+ if (info->yesno > 0) ftbw->up->ythresh++;
+ ftb_param->ftb->queue.max_elements++;
+ ftbw->prev= ftb_param->ftb->last_word;
+ ftb_param->ftb->last_word= ftbw;
+ ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
+ for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
+ if (! (tmp_expr->flags & FTB_FLAG_YES))
+ break;
+ ftbw->max_docid= &tmp_expr->max_docid;
+ /* fall through */
+ case FT_TOKEN_STOPWORD:
+ if (! ftb_param->up_quot) break;
+ phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ phrase_word->pos= word;
+ phrase_word->len= word_len;
+ tmp_element->data= (void *)phrase_word;
+ ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
+ /* Allocate document list at this point.
+ It allows to avoid huge amount of allocs/frees for each row.*/
+ tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
+ tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
+ ftb_param->ftbe->document=
+ list_add(ftb_param->ftbe->document, tmp_element);
+ break;
+ case FT_TOKEN_LEFT_PAREN:
+ ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
+ ftbe->flags= 0;
+ if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
+ if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
+ ftbe->weight= weight;
+ ftbe->up= ftb_param->ftbe;
+ ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
+ ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
+ ftbe->document= 0;
+ if (info->quot) ftb_param->ftb->with_scan|= 2;
+ if (info->yesno > 0) ftbe->up->ythresh++;
+ ftb_param->ftbe= ftbe;
+ ftb_param->depth++;
+ ftb_param->up_quot= info->quot;
+ break;
+ case FT_TOKEN_RIGHT_PAREN:
+ if (ftb_param->ftbe->document)
+ {
+ /* Circuit document list */
+ for (tmp_element= ftb_param->ftbe->document;
+ tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
+ tmp_element->next= ftb_param->ftbe->document;
+ ftb_param->ftbe->document->prev= tmp_element;
+ }
+ info->quot= 0;
+ if (ftb_param->ftbe->up)
+ {
+ DBUG_ASSERT(ftb_param->depth);
+ ftb_param->ftbe= ftb_param->ftbe->up;
+ ftb_param->depth--;
+ ftb_param->up_quot= 0;
+ }
+ break;
+ case FT_TOKEN_EOF:
+ default:
+ break;
+ }
+ return(0);
+}
+
+
+static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
+ char *query, int len)
+{
+ MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
+ MYSQL_FTPARSER_BOOLEAN_INFO info;
+ CHARSET_INFO *cs= ftb_param->ftb->charset;
+ char **start= &query;
+ char *end= query + len;
+ FT_WORD w;
+
+ info.prev= ' ';
+ info.quot= 0;
+ while (maria_ft_get_word(cs, start, end, &w, &info))
+ param->mysql_add_word(param, w.pos, w.len, &info);
+ return(0);
+}
+
+
+static void _ftb_parse_query(FTB *ftb, byte *query, uint len,
+ struct st_mysql_ftparser *parser)
+{
+ MYSQL_FTPARSER_PARAM *param;
+ MY_FTB_PARAM ftb_param;
+ DBUG_ENTER("_ftb_parse_query");
+ DBUG_ASSERT(parser);
+
+ if (ftb->state != UNINITIALIZED)
+ DBUG_VOID_RETURN;
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
+ DBUG_VOID_RETURN;
+
+ ftb_param.ftb= ftb;
+ ftb_param.depth= 0;
+ ftb_param.ftbe= ftb->root;
+ ftb_param.up_quot= 0;
+
+ param->mysql_parse= ftb_parse_query_internal;
+ param->mysql_add_word= ftb_query_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->cs= ftb->charset;
+ param->doc= query;
+ param->length= len;
+ param->flags= 0;
+ param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
+ parser->parse(param);
+ DBUG_VOID_RETURN;
+}
+
+
+static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
+ const void *a,const void *b)
+{
+ return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
+}
+
+/* returns 1 if the search was finished (must-word wasn't found) */
+static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
+{
+ int r;
+ int subkeys=1;
+ my_bool can_go_down;
+ MARIA_HA *info=ftb->info;
+ uint off= 0, extra=HA_FT_WLEN+info->s->base.rec_reflength;
+ byte *lastkey_buf= ftbw->word+ftbw->off;
+
+ if (ftbw->flags & FTB_FLAG_TRUNC)
+ lastkey_buf+=ftbw->len;
+
+ if (init_search)
+ {
+ ftbw->key_root=info->s->state.key_root[ftb->keynr];
+ ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
+
+ r= _ma_search(info, ftbw->keyinfo, ftbw->word, ftbw->len,
+ SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
+ }
+ else
+ {
+ uint sflag= SEARCH_BIGGER;
+ if (ftbw->docid[0] < *ftbw->max_docid)
+ {
+ sflag|= SEARCH_SAME;
+ _ma_dpointer(info, (ftbw->word + ftbw->len + HA_FT_WLEN),
+ *ftbw->max_docid);
+ }
+ r= _ma_search(info, ftbw->keyinfo, lastkey_buf,
+ USE_WHOLE_KEY, sflag, ftbw->key_root);
+ }
+
+ can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
+ /* Skip rows inserted by concurrent insert */
+ while (!r)
+ {
+ if (can_go_down)
+ {
+ /* going down ? */
+ off=info->lastkey_length-extra;
+ subkeys=ft_sintXkorr(info->lastkey+off);
+ }
+ if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length)
+ break;
+ r= _ma_search_next(info, ftbw->keyinfo, info->lastkey,
+ info->lastkey_length,
+ SEARCH_BIGGER, ftbw->key_root);
+ }
+
+ if (!r && !ftbw->off)
+ {
+ r= ha_compare_text(ftb->charset,
+ (uchar*) info->lastkey+1,
+ info->lastkey_length-extra-1,
+ (uchar*) ftbw->word+1,
+ ftbw->len-1,
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0);
+ }
+
+ if (r) /* not found */
+ {
+ if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
+ {
+ ftbw->docid[0]=HA_OFFSET_ERROR;
+ if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
+ {
+ /*
+ This word MUST BE present in every document returned,
+ so we can stop the search right now
+ */
+ ftb->state=INDEX_DONE;
+ return 1; /* search is done */
+ }
+ else
+ return 0;
+ }
+
+ /* going up to the first-level tree to continue search there */
+ _ma_dpointer(info, (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
+ ftbw->key_root=info->s->state.key_root[ftb->keynr];
+ ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
+ ftbw->off=0;
+ return _ft2_search(ftb, ftbw, 0);
+ }
+
+ /* matching key found */
+ memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
+ if (lastkey_buf == ftbw->word)
+ ftbw->len=info->lastkey_length-extra;
+
+ /* going down ? */
+ if (subkeys<0)
+ {
+ /*
+ yep, going down, to the second-level tree
+ TODO here: subkey-based optimization
+ */
+ ftbw->off=off;
+ ftbw->key_root= info->cur_row.lastpos;
+ ftbw->keyinfo=& info->s->ft2_keyinfo;
+ r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root);
+ DBUG_ASSERT(r==0); /* found something */
+ memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
+ }
+ ftbw->docid[0]= info->cur_row.lastpos;
+ if (ftbw->flags & FTB_FLAG_YES)
+ *ftbw->max_docid= info->cur_row.lastpos;
+ return 0;
+}
+
+static void _ftb_init_index_search(FT_INFO *ftb)
+{
+ int i;
+ FTB_WORD *ftbw;
+
+ if ((ftb->state != READY && ftb->state !=INDEX_DONE) ||
+ ftb->keynr == NO_SUCH_KEY)
+ return;
+ ftb->state=INDEX_SEARCH;
+
+ for (i=ftb->queue.elements; i; i--)
+ {
+ ftbw=(FTB_WORD *)(ftb->queue.root[i]);
+
+ if (ftbw->flags & FTB_FLAG_TRUNC)
+ {
+ /*
+ special treatment for truncation operator
+ 1. there are some (besides this) +words
+ | no need to search in the index, it can never ADD new rows
+ | to the result, and to remove half-matched rows we do scan anyway
+ 2. -trunc*
+ | same as 1.
+ 3. in 1 and 2, +/- need not be on the same expr. level,
+ but can be on any upper level, as in +word +(trunc1* trunc2*)
+ 4. otherwise
+ | We have to index-search for this prefix.
+ | It may cause duplicates, as in the index (sorted by <word,docid>)
+ | <aaaa,row1>
+ | <aabb,row2>
+ | <aacc,row1>
+ | Searching for "aa*" will find row1 twice...
+ */
+ FTB_EXPR *ftbe;
+ for (ftbe=(FTB_EXPR*)ftbw;
+ ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
+ ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
+ {
+ if (ftbe->flags & FTB_FLAG_NO || /* 2 */
+ ftbe->up->ythresh - ftbe->up->yweaks >1) /* 1 */
+ {
+ FTB_EXPR *top_ftbe=ftbe->up;
+ ftbw->docid[0]=HA_OFFSET_ERROR;
+ for (ftbe=(FTB_EXPR *)ftbw;
+ ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
+ ftbe=ftbe->up)
+ ftbe->up->yweaks++;
+ ftbe=0;
+ break;
+ }
+ }
+ if (!ftbe)
+ continue;
+ /* 4 */
+ if (!is_tree_inited(& ftb->no_dupes))
+ init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
+ _ftb_no_dupes_cmp,0,0,0);
+ else
+ reset_tree(& ftb->no_dupes);
+ }
+
+ ftbw->off=0; /* in case of reinit */
+ if (_ft2_search(ftb, ftbw, 1))
+ return;
+ }
+ queue_fix(& ftb->queue);
+}
+
+
+FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr, byte *query,
+ uint query_len, CHARSET_INFO *cs)
+{
+ FTB *ftb;
+ FTB_EXPR *ftbe;
+ FTB_WORD *ftbw;
+
+ if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
+ return 0;
+ ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean;
+ ftb->state=UNINITIALIZED;
+ ftb->info=info;
+ ftb->keynr=keynr;
+ ftb->charset=cs;
+ DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
+ ftb->with_scan=0;
+ ftb->lastpos=HA_OFFSET_ERROR;
+ bzero(& ftb->no_dupes, sizeof(TREE));
+ ftb->last_word= 0;
+
+ init_alloc_root(&ftb->mem_root, 1024, 1024);
+ ftb->queue.max_elements= 0;
+ if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
+ goto err;
+ ftbe->weight=1;
+ ftbe->flags=FTB_FLAG_YES;
+ ftbe->nos=1;
+ ftbe->up=0;
+ ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
+ ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
+ ftbe->phrase= NULL;
+ ftbe->document= 0;
+ ftb->root=ftbe;
+ _ftb_parse_query(ftb, query, query_len, keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ info->s->keyinfo[keynr].parser);
+ /*
+ Hack: instead of init_queue, we'll use reinit queue to be able
+ to alloc queue with alloc_root()
+ */
+ if (! (ftb->queue.root= (byte **)alloc_root(&ftb->mem_root,
+ (ftb->queue.max_elements + 1) *
+ sizeof(void *))))
+ goto err;
+ reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
+ (int (*)(void*, byte*, byte*))FTB_WORD_cmp, 0);
+ for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
+ queue_insert(&ftb->queue, (byte *)ftbw);
+ ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
+ sizeof(FTB_WORD *)*ftb->queue.elements);
+ memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
+ qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
+ (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset);
+ if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
+ ftb->state=READY;
+ return ftb;
+err:
+ free_root(& ftb->mem_root, MYF(0));
+ my_free((gptr)ftb,MYF(0));
+ return 0;
+}
+
+
+typedef struct st_my_ftb_phrase_param
+{
+ LIST *phrase;
+ LIST *document;
+ CHARSET_INFO *cs;
+ uint phrase_length;
+ uint document_length;
+ uint match;
+} MY_FTB_PHRASE_PARAM;
+
+
+static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
+ FT_WORD *w= (FT_WORD *)phrase_param->document->data;
+ LIST *phrase, *document;
+ w->pos= word;
+ w->len= word_len;
+ phrase_param->document= phrase_param->document->prev;
+ if (phrase_param->phrase_length > phrase_param->document_length)
+ {
+ phrase_param->document_length++;
+ return 0;
+ }
+ /* TODO: rewrite phrase search to avoid
+ comparing the same word twice. */
+ for (phrase= phrase_param->phrase, document= phrase_param->document->next;
+ phrase; phrase= phrase->next, document= document->next)
+ {
+ FT_WORD *phrase_word= (FT_WORD *)phrase->data;
+ FT_WORD *document_word= (FT_WORD *)document->data;
+ if (my_strnncoll(phrase_param->cs,
+ (uchar*) phrase_word->pos, phrase_word->len,
+ (uchar*) document_word->pos, document_word->len))
+ return 0;
+ }
+ phrase_param->match++;
+ return 0;
+}
+
+
+static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
+ char *document, int len)
+{
+ FT_WORD word;
+ MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
+ const char *docend= document + len;
+ while (maria_ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE))
+ {
+ param->mysql_add_word(param, word.pos, word.len, 0);
+ if (phrase_param->match)
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ Checks if given buffer matches phrase list.
+
+ SYNOPSIS
+ _ftb_check_phrase()
+ s0 start of buffer
+ e0 end of buffer
+ phrase broken into list phrase
+ cs charset info
+
+ RETURN VALUE
+ 1 is returned if phrase found, 0 else.
+*/
+
+static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len,
+ FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
+{
+ MY_FTB_PHRASE_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM *param;
+ DBUG_ENTER("_ftb_check_phrase");
+ DBUG_ASSERT(parser);
+
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
+ DBUG_RETURN(0);
+ ftb_param.phrase= ftbe->phrase;
+ ftb_param.document= ftbe->document;
+ ftb_param.cs= ftb->charset;
+ ftb_param.phrase_length= list_length(ftbe->phrase);
+ ftb_param.document_length= 1;
+ ftb_param.match= 0;
+
+ param->mysql_parse= ftb_check_phrase_internal;
+ param->mysql_add_word= ftb_phrase_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->cs= ftb->charset;
+ param->doc= (byte *)document;
+ param->length= len;
+ param->flags= 0;
+ param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+ parser->parse(param);
+ DBUG_RETURN(ftb_param.match ? 1 : 0);
+}
+
+
+static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
+{
+ FT_SEG_ITERATOR ftsi;
+ FTB_EXPR *ftbe;
+ float weight=ftbw->weight;
+ int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0);
+ my_off_t curdoc=ftbw->docid[mode];
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
+
+ for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
+ {
+ ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
+ if (ftbe->docid[mode] != curdoc)
+ {
+ ftbe->cur_weight=0;
+ ftbe->yesses=ftbe->nos=0;
+ ftbe->docid[mode]=curdoc;
+ }
+ if (ftbe->nos)
+ break;
+ if (yn & FTB_FLAG_YES)
+ {
+ weight /= ftbe->ythresh;
+ ftbe->cur_weight += weight;
+ if ((int) ++ftbe->yesses == ythresh)
+ {
+ yn=ftbe->flags;
+ weight=ftbe->cur_weight*ftbe->weight;
+ if (mode && ftbe->phrase)
+ {
+ int not_found=1;
+
+ memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
+ while (_ma_ft_segiterator(&ftsi) && not_found)
+ {
+ if (!ftsi.pos)
+ continue;
+ not_found = ! _ftb_check_phrase(ftb, ftsi.pos, ftsi.len,
+ ftbe, parser);
+ }
+ if (not_found) break;
+ } /* ftbe->quot */
+ }
+ else
+ break;
+ }
+ else
+ if (yn & FTB_FLAG_NO)
+ {
+ /*
+ NOTE: special sort function of queue assures that all
+ (yn & FTB_FLAG_NO) != 0
+ events for every particular subexpression will
+ "auto-magically" happen BEFORE all the
+ (yn & FTB_FLAG_YES) != 0 events. So no
+ already matched expression can become not-matched again.
+ */
+ ++ftbe->nos;
+ break;
+ }
+ else
+ {
+ if (ftbe->ythresh)
+ weight/=3;
+ ftbe->cur_weight += weight;
+ if ((int) ftbe->yesses < ythresh)
+ break;
+ if (!(yn & FTB_FLAG_WONLY))
+ yn= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
+ weight*= ftbe->weight;
+ }
+ }
+}
+
+
+int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
+{
+ FTB_EXPR *ftbe;
+ FTB_WORD *ftbw;
+ MARIA_HA *info=ftb->info;
+ my_off_t curdoc;
+
+ if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
+ return -1;
+
+ /* black magic ON */
+ if ((int) _ma_check_index(info, ftb->keynr) < 0)
+ return my_errno;
+ if (_ma_readinfo(info, F_RDLCK, 1))
+ return my_errno;
+ /* black magic OFF */
+
+ if (!ftb->queue.elements)
+ return my_errno=HA_ERR_END_OF_FILE;
+
+ /* Attention!!! Address of a local variable is used here! See err: label */
+ ftb->queue.first_cmp_arg=(void *)&curdoc;
+
+ while (ftb->state == INDEX_SEARCH &&
+ (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
+ HA_OFFSET_ERROR)
+ {
+ while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
+ {
+ _ftb_climb_the_tree(ftb, ftbw, 0);
+
+ /* update queue */
+ _ft2_search(ftb, ftbw, 0);
+ queue_replaced(& ftb->queue);
+ }
+
+ ftbe=ftb->root;
+ if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
+ ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
+ {
+ /* curdoc matched ! */
+ if (is_tree_inited(&ftb->no_dupes) &&
+ tree_insert(&ftb->no_dupes, &curdoc, 0,
+ ftb->no_dupes.custom_arg)->count >1)
+ /* but it managed already to get past this line once */
+ continue;
+
+ info->cur_row.lastpos= curdoc;
+ /* Clear all states, except that the table was updated */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ if (!(*info->read_record)(info, record, curdoc))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0)
+ continue; /* no match */
+ my_errno=0;
+ goto err;
+ }
+ goto err;
+ }
+ }
+ ftb->state=INDEX_DONE;
+ my_errno=HA_ERR_END_OF_FILE;
+err:
+ ftb->queue.first_cmp_arg=(void *)0;
+ return my_errno;
+}
+
+
+typedef struct st_my_ftb_find_param
+{
+ FT_INFO *ftb;
+ FT_SEG_ITERATOR *ftsi;
+} MY_FTB_FIND_PARAM;
+
+
+static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
+ FT_INFO *ftb= ftb_param->ftb;
+ FTB_WORD *ftbw;
+ int a, b, c;
+ for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
+ {
+ ftbw= ftb->list[c];
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word+1, ftbw->len-1,
+ (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0)
+ b= c;
+ else
+ a= c;
+ }
+ for (; c >= 0; c--)
+ {
+ ftbw= ftb->list[c];
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
+ (uchar*)ftbw->word + 1,ftbw->len - 1,
+ (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
+ break;
+ if (ftbw->docid[1] == ftb->info->cur_row.lastpos)
+ continue;
+ ftbw->docid[1]= ftb->info->cur_row.lastpos;
+ _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi);
+ }
+ return(0);
+}
+
+
+static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
+ char *doc, int len)
+{
+ MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
+ FT_INFO *ftb= ftb_param->ftb;
+ char *end= doc + len;
+ FT_WORD w;
+ while (maria_ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE))
+ param->mysql_add_word(param, w.pos, w.len, 0);
+ return(0);
+}
+
+
+float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
+{
+ FTB_EXPR *ftbe;
+ FT_SEG_ITERATOR ftsi, ftsi2;
+ MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos;
+ MY_FTB_FIND_PARAM ftb_param;
+ MYSQL_FTPARSER_PARAM *param;
+ struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
+ &ft_default_parser :
+ ftb->info->s->keyinfo[ftb->keynr].parser;
+
+ if (docid == HA_OFFSET_ERROR)
+ return -2.0;
+ if (!ftb->queue.elements)
+ return 0;
+ if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
+ return 0;
+
+ if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
+ {
+ FTB_EXPR *x;
+ uint i;
+
+ for (i=0; i < ftb->queue.elements; i++)
+ {
+ ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
+ for (x=ftb->list[i]->up; x; x=x->up)
+ x->docid[1]=HA_OFFSET_ERROR;
+ }
+ }
+
+ ftb->lastpos=docid;
+
+ if (ftb->keynr==NO_SUCH_KEY)
+ _ma_ft_segiterator_dummy_init(record, length, &ftsi);
+ else
+ _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
+ memcpy(&ftsi2, &ftsi, sizeof(ftsi));
+
+ ftb_param.ftb= ftb;
+ ftb_param.ftsi= &ftsi2;
+ param->mysql_parse= ftb_find_relevance_parse;
+ param->mysql_add_word= ftb_find_relevance_add_word;
+ param->mysql_ftparam= (void *)&ftb_param;
+ param->flags= 0;
+ param->cs= ftb->charset;
+ param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+ while (_ma_ft_segiterator(&ftsi))
+ {
+ if (!ftsi.pos)
+ continue;
+ param->doc= (byte *)ftsi.pos;
+ param->length= ftsi.len;
+ parser->parse(param);
+ }
+ ftbe=ftb->root;
+ if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
+ ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
+ { /* row matched ! */
+ return ftbe->cur_weight;
+ }
+ else
+ { /* match failed ! */
+ return 0.0;
+ }
+}
+
+
+void maria_ft_boolean_close_search(FT_INFO *ftb)
+{
+ if (is_tree_inited(& ftb->no_dupes))
+ {
+ delete_tree(& ftb->no_dupes);
+ }
+ free_root(& ftb->mem_root, MYF(0));
+ my_free((gptr)ftb,MYF(0));
+}
+
+
+float maria_ft_boolean_get_relevance(FT_INFO *ftb)
+{
+ return ftb->root->cur_weight;
+}
+
+
+void maria_ft_boolean_reinit_search(FT_INFO *ftb)
+{
+ _ftb_init_index_search(ftb);
+}
diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c
new file mode 100644
index 00000000000..fe4900aeb64
--- /dev/null
+++ b/storage/maria/ma_ft_eval.c
@@ -0,0 +1,255 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include "maria_ft_eval.h"
+#include <stdarg.h>
+#include <my_getopt.h>
+
+static void print_error(int exit_code, const char *fmt,...);
+static void get_options(int argc, char *argv[]);
+static int create_record(char *pos, FILE *file);
+static void usage();
+
+static struct my_option my_long_options[] =
+{
+ {"", 's', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'q', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '#', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+int main(int argc, char *argv[])
+{
+ MARIA_HA *file;
+ int i,j;
+
+ MY_INIT(argv[0]);
+ get_options(argc,argv);
+ bzero((char*)recinfo,sizeof(recinfo));
+
+ maria_init();
+ /* First define 2 columns */
+ recinfo[0].type=FIELD_SKIP_ENDSPACE;
+ recinfo[0].length=docid_length;
+ recinfo[1].type=FIELD_BLOB;
+ recinfo[1].length= 4+maria_portable_sizeof_char_ptr;
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].flag= HA_BLOB_PART;
+ keyinfo[0].seg[0].start=recinfo[0].length;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit=0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].bit_start=4;
+ keyinfo[0].seg[0].language=MY_CHARSET_CURRENT;
+ keyinfo[0].flag = HA_FULLTEXT;
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+ if (maria_create(filename,1,keyinfo,2,recinfo,0,NULL,(MARIA_CREATE_INFO*) 0,0))
+ goto err;
+ if (!(file=maria_open(filename,2,0)))
+ goto err;
+ if (!silent)
+ printf("Initializing stopwords\n");
+ maria_ft_init_stopwords(stopwordlist);
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ my_errno=0;
+ i=0;
+ while (create_record(record,df))
+ {
+ error=maria_write(file,record);
+ if (error)
+ printf("I= %2d maria_write: %d errno: %d\n",i,error,my_errno);
+ i++;
+ }
+ fclose(df);
+
+ if (maria_close(file)) goto err;
+ if (!silent)
+ printf("- Reopening file\n");
+ if (!(file=maria_open(filename,2,0))) goto err;
+ if (!silent)
+ printf("- Reading rows with key\n");
+ for (i=1;create_record(record,qf);i++)
+ {
+ FT_DOCLIST *result;
+ double w;
+ int t, err;
+
+ result=maria_ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1);
+ if (!result)
+ {
+ printf("Query %d failed with errno %3d\n",i,my_errno);
+ goto err;
+ }
+ if (!silent)
+ printf("Query %d. Found: %d.\n",i,result->ndocs);
+ for (j=0;(err=maria_ft_nlq_read_next(result, read_record))==0;j++)
+ {
+ t=uint2korr(read_record);
+ w=maria_ft_nlq_get_relevance(result);
+ printf("%d %.*s %f\n",i,t,read_record+2,w);
+ }
+ if (err != HA_ERR_END_OF_FILE)
+ {
+ printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno);
+ goto err;
+ }
+ maria_ft_nlq_close_search(result);
+ }
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+
+ err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch (optid) {
+ case 's':
+ if (stopwordlist && stopwordlist != maria_ft_precompiled_stopwords)
+ break;
+ {
+ FILE *f; char s[HA_FT_MAXLEN]; int i=0,n=SWL_INIT;
+
+ if (!(stopwordlist=(const char**) malloc(n*sizeof(char *))))
+ print_error(1,"malloc(%d)",n*sizeof(char *));
+ if (!(f=fopen(argument,"r")))
+ print_error(1,"fopen(%s)",argument);
+ while (!feof(f))
+ {
+ if (!(fgets(s,HA_FT_MAXLEN,f)))
+ print_error(1,"fgets(s,%d,%s)",HA_FT_MAXLEN,argument);
+ if (!(stopwordlist[i++]=strdup(s)))
+ print_error(1,"strdup(%s)",s);
+ if (i >= n)
+ {
+ n+=SWL_PLUS;
+ if (!(stopwordlist=(const char**) realloc((char*) stopwordlist,
+ n*sizeof(char *))))
+ print_error(1,"realloc(%d)",n*sizeof(char *));
+ }
+ }
+ fclose(f);
+ stopwordlist[i]=NULL;
+ break;
+ }
+ case 'q': silent=1; break;
+ case 'S': if (stopwordlist==maria_ft_precompiled_stopwords) stopwordlist=NULL; break;
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case 'V':
+ case '?':
+ case 'h':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (!(d_file=argv[optind])) print_error(1,"No d_file");
+ if (!(df=fopen(d_file,"r")))
+ print_error(1,"fopen(%s)",d_file);
+ if (!(q_file=argv[optind+1])) print_error(1,"No q_file");
+ if (!(qf=fopen(q_file,"r")))
+ print_error(1,"fopen(%s)",q_file);
+ return;
+} /* get options */
+
+
+static int create_record(char *pos, FILE *file)
+{
+ uint tmp; char *ptr;
+
+ bzero((char *)pos,MAX_REC_LENGTH);
+
+ /* column 1 - VARCHAR */
+ if (!(fgets(pos+2,MAX_REC_LENGTH-32,file)))
+ {
+ if (feof(file))
+ return 0;
+ else
+ print_error(1,"fgets(docid) - 1");
+ }
+ tmp=(uint) strlen(pos+2)-1;
+ int2store(pos,tmp);
+ pos+=recinfo[0].length;
+
+ /* column 2 - BLOB */
+
+ if (!(fgets(blob_record,MAX_BLOB_LENGTH,file)))
+ print_error(1,"fgets(docid) - 2");
+ tmp=(uint) strlen(blob_record);
+ int4store(pos,tmp);
+ ptr=blob_record;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ return 1;
+}
+
+/* VARARGS */
+
+static void print_error(int exit_code, const char *fmt,...)
+{
+ va_list args;
+
+ va_start(args,fmt);
+ fprintf(stderr,"%s: error: ",my_progname);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ exit(exit_code);
+}
+
+
+static void usage()
+{
+ printf("%s [options]\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_ft_eval.h b/storage/maria/ma_ft_eval.h
new file mode 100644
index 00000000000..d9b5c51642c
--- /dev/null
+++ b/storage/maria/ma_ft_eval.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2006 MySQL AB & Sergei A. Golubchik
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+const char **stopwordlist=maria_ft_precompiled_stopwords;
+
+#define MAX_REC_LENGTH 128
+#define MAX_BLOB_LENGTH 60000
+char record[MAX_REC_LENGTH], read_record[MAX_REC_LENGTH+MAX_BLOB_LENGTH];
+char blob_record[MAX_BLOB_LENGTH+20*20];
+
+char *filename= (char*) "EVAL";
+
+int silent=0, error=0;
+
+uint key_length=MAX_BLOB_LENGTH,docid_length=32;
+char *d_file, *q_file;
+FILE *df,*qf;
+
+MARIA_COLUMNDEF recinfo[3];
+MARIA_KEYDEF keyinfo[2];
+HA_KEYSEG keyseg[10];
+
+#define SWL_INIT 500
+#define SWL_PLUS 50
+
+#define MAX_LINE_LENGTH 128
+char line[MAX_LINE_LENGTH];
diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c
new file mode 100644
index 00000000000..4c922516455
--- /dev/null
+++ b/storage/maria/ma_ft_nlq_search.c
@@ -0,0 +1,371 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#define FT_CORE
+#include "ma_ftdefs.h"
+
+/* search with natural language queries */
+
+typedef struct ft_doc_rec
+{
+ my_off_t dpos;
+ double weight;
+} FT_DOC;
+
+struct st_ft_info
+{
+ struct _ft_vft *please;
+ MARIA_HA *info;
+ int ndocs;
+ int curdoc;
+ FT_DOC doc[1];
+};
+
+typedef struct st_all_in_one
+{
+ MARIA_HA *info;
+ uint keynr;
+ CHARSET_INFO *charset;
+ uchar *keybuff;
+ TREE dtree;
+} ALL_IN_ONE;
+
+typedef struct st_ft_superdoc
+{
+ FT_DOC doc;
+ FT_WORD *word_ptr;
+ double tmp_weight;
+} FT_SUPERDOC;
+
+static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)),
+ FT_SUPERDOC *p1, FT_SUPERDOC *p2)
+{
+ if (p1->doc.dpos < p2->doc.dpos)
+ return -1;
+ if (p1->doc.dpos == p2->doc.dpos)
+ return 0;
+ return 1;
+}
+
+static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
+{
+ int subkeys, r;
+ uint keylen, doc_cnt;
+ FT_SUPERDOC sdoc, *sptr;
+ TREE_ELEMENT *selem;
+ double gweight=1;
+ MARIA_HA *info= aio->info;
+ byte *keybuff= (byte*) aio->keybuff;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr;
+ my_off_t key_root=info->s->state.key_root[aio->keynr];
+ uint extra=HA_FT_WLEN+info->s->base.rec_reflength;
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ float tmp_weight;
+#else
+#error
+#endif
+
+ DBUG_ENTER("walk_and_match");
+
+ word->weight=LWS_FOR_QUERY;
+
+ keylen= _ma_ft_make_key(info,aio->keynr,(char*) keybuff,word,0);
+ keylen-=HA_FT_WLEN;
+ doc_cnt=0;
+
+ /* Skip rows inserted by current inserted */
+ for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ;
+ !r &&
+ (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
+ info->cur_row.lastpos >= info->state->data_file_length ;
+ r= _ma_search_next(info, keyinfo, info->lastkey,
+ info->lastkey_length, SEARCH_BIGGER, key_root))
+ ;
+
+ info->update|= HA_STATE_AKTIV; /* for _ma_test_if_changed() */
+
+ /* The following should be safe, even if we compare doubles */
+ while (!r && gweight)
+ {
+
+ if (keylen &&
+ ha_compare_text(aio->charset,
+ (uchar*) info->lastkey+1, info->lastkey_length-extra-1,
+ (uchar*) keybuff+1, keylen-1, 0, 0))
+ break;
+
+ if (subkeys<0)
+ {
+ if (doc_cnt)
+ DBUG_RETURN(1); /* index is corrupted */
+ /*
+ TODO here: unsafe optimization, should this word
+ be skipped (based on subkeys) ?
+ */
+ keybuff+=keylen;
+ keyinfo=& info->s->ft2_keyinfo;
+ key_root= info->cur_row.lastpos;
+ keylen=0;
+ r= _ma_search_first(info, keyinfo, key_root);
+ goto do_skip;
+ }
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ tmp_weight=*(float*)&subkeys;
+#else
+#error
+#endif
+ /* The following should be safe, even if we compare doubles */
+ if (tmp_weight==0)
+ DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */
+
+ sdoc.doc.dpos= info->cur_row.lastpos;
+
+ /* saving document matched into dtree */
+ if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg)))
+ DBUG_RETURN(1);
+
+ sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem);
+
+ if (selem->count==1) /* document's first match */
+ sptr->doc.weight=0;
+ else
+ sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight;
+
+ sptr->word_ptr=word;
+ sptr->tmp_weight=tmp_weight;
+
+ doc_cnt++;
+
+ gweight=word->weight*GWS_IN_USE;
+ if (gweight < 0 || doc_cnt > 2000000)
+ gweight=0;
+
+ if (_ma_test_if_changed(info) == 0)
+ r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+ else
+ r= _ma_search(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+do_skip:
+ while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 &&
+ !r && info->cur_row.lastpos >= info->state->data_file_length)
+ r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length,
+ SEARCH_BIGGER, key_root);
+
+ }
+ word->weight=gweight;
+
+ DBUG_RETURN(0);
+}
+
+
+static int walk_and_copy(FT_SUPERDOC *from,
+ uint32 count __attribute__((unused)), FT_DOC **to)
+{
+ DBUG_ENTER("walk_and_copy");
+ from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
+ (*to)->dpos=from->doc.dpos;
+ (*to)->weight=from->doc.weight;
+ (*to)++;
+ DBUG_RETURN(0);
+}
+
+static int walk_and_push(FT_SUPERDOC *from,
+ uint32 count __attribute__((unused)), QUEUE *best)
+{
+ DBUG_ENTER("walk_and_copy");
+ from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
+ set_if_smaller(best->elements, ft_query_expansion_limit-1);
+ queue_insert(best, (byte *)& from->doc);
+ DBUG_RETURN(0);
+}
+
+
+static int FT_DOC_cmp(void *unused __attribute__((unused)),
+ FT_DOC *a, FT_DOC *b)
+{
+ return sgn(b->weight - a->weight);
+}
+
+
+FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query,
+ uint query_len, uint flags, byte *record)
+{
+ TREE wtree;
+ ALL_IN_ONE aio;
+ FT_DOC *dptr;
+ FT_INFO *dlist=NULL;
+ MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos;
+ struct st_mysql_ftparser *parser;
+ MYSQL_FTPARSER_PARAM *ftparser_param;
+ DBUG_ENTER("maria_ft_init_nlq_search");
+
+ /* black magic ON */
+ if ((int) (keynr = _ma_check_index(info,keynr)) < 0)
+ DBUG_RETURN(NULL);
+ if (_ma_readinfo(info,F_RDLCK,1))
+ DBUG_RETURN(NULL);
+ /* black magic OFF */
+
+ aio.info=info;
+ aio.keynr=keynr;
+ aio.charset=info->s->keyinfo[keynr].seg->charset;
+ aio.keybuff= (uchar*) info->lastkey+info->s->base.max_key_length;
+ parser= info->s->keyinfo[keynr].parser;
+ if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0)))
+ goto err;
+
+ bzero(&wtree,sizeof(wtree));
+
+ init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
+ NULL, NULL);
+
+ maria_ft_parse_init(&wtree, aio.charset);
+ ftparser_param->flags= 0;
+ if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param,
+ &wtree.mem_root))
+ goto err;
+
+ if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
+ left_root_right))
+ goto err;
+
+ if (flags & FT_EXPAND && ft_query_expansion_limit)
+ {
+ QUEUE best;
+ init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp,
+ 0);
+ tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
+ &best, left_root_right);
+ while (best.elements)
+ {
+ my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
+ if (!(*info->read_record)(info, record, docid))
+ {
+ info->update|= HA_STATE_AKTIV;
+ ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY;
+ _ma_ft_parse(&wtree, info, keynr, record, ftparser_param,
+ &wtree.mem_root);
+ }
+ }
+ delete_queue(&best);
+ reset_tree(&aio.dtree);
+ if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
+ left_root_right))
+ goto err;
+
+ }
+
+ /*
+ If ndocs == 0, this will not allocate RAM for FT_INFO.doc[],
+ so if ndocs == 0, FT_INFO.doc[] must not be accessed.
+ */
+ dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
+ sizeof(FT_DOC)*
+ (int)(aio.dtree.elements_in_tree-1),
+ MYF(0));
+ if (!dlist)
+ goto err;
+
+ dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq;
+ dlist->ndocs=aio.dtree.elements_in_tree;
+ dlist->curdoc=-1;
+ dlist->info=aio.info;
+ dptr=dlist->doc;
+
+ tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy,
+ &dptr, left_root_right);
+
+ if (flags & FT_SORTED)
+ qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0);
+
+err:
+ delete_tree(&aio.dtree);
+ delete_tree(&wtree);
+ info->cur_row.lastpos= saved_lastpos;
+ DBUG_RETURN(dlist);
+}
+
+
+int maria_ft_nlq_read_next(FT_INFO *handler, char *record)
+{
+ MARIA_HA *info= (MARIA_HA *) handler->info;
+
+ if (++handler->curdoc >= handler->ndocs)
+ {
+ --handler->curdoc;
+ return HA_ERR_END_OF_FILE;
+ }
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ info->cur_row.lastpos= handler->doc[handler->curdoc].dpos;
+ if (!(*info->read_record)(info, record, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ return 0;
+ }
+ return my_errno;
+}
+
+
+float maria_ft_nlq_find_relevance(FT_INFO *handler,
+ byte *record __attribute__((unused)),
+ uint length __attribute__((unused)))
+{
+ int a,b,c;
+ FT_DOC *docs=handler->doc;
+ MARIA_RECORD_POS docid= handler->info->cur_row.lastpos;
+
+ if (docid == HA_POS_ERROR)
+ return -5.0;
+
+ /* Assuming docs[] is sorted by dpos... */
+
+ for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
+ {
+ if (docs[c].dpos > docid)
+ b=c;
+ else
+ a=c;
+ }
+ /* bounds check to avoid accessing unallocated handler->doc */
+ if (a < handler->ndocs && docs[a].dpos == docid)
+ return (float) docs[a].weight;
+ else
+ return 0.0;
+}
+
+
+void maria_ft_nlq_close_search(FT_INFO *handler)
+{
+ my_free((gptr)handler,MYF(0));
+}
+
+
+float maria_ft_nlq_get_relevance(FT_INFO *handler)
+{
+ return (float) handler->doc[handler->curdoc].weight;
+}
+
+
+void maria_ft_nlq_reinit_search(FT_INFO *handler)
+{
+ handler->curdoc=-1;
+}
+
diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c
new file mode 100644
index 00000000000..24713c1344f
--- /dev/null
+++ b/storage/maria/ma_ft_parser.c
@@ -0,0 +1,425 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#include "ma_ftdefs.h"
+
+typedef struct st_maria_ft_docstat {
+ FT_WORD *list;
+ uint uniq;
+ double sum;
+} FT_DOCSTAT;
+
+
+typedef struct st_my_maria_ft_parser_param
+{
+ TREE *wtree;
+ MEM_ROOT *mem_root;
+} MY_FT_PARSER_PARAM;
+
+
+static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
+{
+ return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
+ (uchar*) w2->pos, w2->len, 0, 0);
+}
+
+static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
+{
+ word->weight=LWS_IN_USE;
+ docstat->sum+=word->weight;
+ memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD));
+ return 0;
+}
+
+/* transforms tree of words into the array, applying normalization */
+
+FT_WORD * maria_ft_linearize(TREE *wtree, MEM_ROOT *mem_root)
+{
+ FT_WORD *wlist,*p;
+ FT_DOCSTAT docstat;
+ DBUG_ENTER("maria_ft_linearize");
+
+ if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)*
+ (1+wtree->elements_in_tree))))
+ {
+ docstat.list=wlist;
+ docstat.uniq=wtree->elements_in_tree;
+ docstat.sum=0;
+ tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right);
+ }
+ delete_tree(wtree);
+ if (!wlist)
+ DBUG_RETURN(NULL);
+
+ docstat.list->pos=NULL;
+
+ for (p=wlist;p->pos;p++)
+ {
+ p->weight=PRENORM_IN_USE;
+ }
+
+ for (p=wlist;p->pos;p++)
+ {
+ p->weight/=NORM_IN_USE;
+ }
+
+ DBUG_RETURN(wlist);
+}
+
+my_bool maria_ft_boolean_check_syntax_string(const byte *str)
+{
+ uint i, j;
+
+ if (!str ||
+ (strlen(str)+1 != sizeof(ft_boolean_syntax)) ||
+ (str[0] != ' ' && str[1] != ' '))
+ return 1;
+ for (i=0; i<sizeof(ft_boolean_syntax); i++)
+ {
+ /* limiting to 7-bit ascii only */
+ if ((unsigned char)(str[i]) > 127 ||
+ my_isalnum(default_charset_info, str[i]))
+ return 1;
+ for (j=0; j<i; j++)
+ if (str[i] == str[j] && (i != 11 || j != 10))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ RETURN VALUE
+ 0 - eof
+ 1 - word found
+ 2 - left bracket
+ 3 - right bracket
+ 4 - stopword found
+*/
+byte maria_ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
+ FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param)
+{
+ byte *doc=*start;
+ int ctype;
+ uint mwc, length, mbl;
+
+ param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
+ param->weight_adjust= param->wasign= 0;
+ param->type= FT_TOKEN_EOF;
+
+ while (doc<end)
+ {
+ for (; doc < end; doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ break;
+ if (*doc == FTB_RQUOT && param->quot)
+ {
+ param->quot=doc;
+ *start=doc+1;
+ param->type= FT_TOKEN_RIGHT_PAREN;
+ goto ret;
+ }
+ if (!param->quot)
+ {
+ if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
+ {
+ /* param->prev=' '; */
+ *start=doc+1;
+ if (*doc == FTB_LQUOT) param->quot=*start;
+ param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN);
+ goto ret;
+ }
+ if (param->prev == ' ')
+ {
+ if (*doc == FTB_YES ) { param->yesno=+1; continue; } else
+ if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else
+ if (*doc == FTB_NO ) { param->yesno=-1; continue; } else
+ if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else
+ if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else
+ if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; }
+ }
+ }
+ param->prev=*doc;
+ param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
+ param->weight_adjust= param->wasign= 0;
+ }
+
+ mwc=length=0;
+ for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ mwc=0;
+ else if (!misc_word_char(*doc) || mwc)
+ break;
+ else
+ mwc++;
+ }
+ param->prev='A'; /* be sure *prev is true_word_char */
+ word->len= (uint)(doc-word->pos) - mwc;
+ if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
+ doc++;
+
+ if (((length >= ft_min_word_len && !is_stopword(word->pos, word->len))
+ || param->trunc) && length < ft_max_word_len)
+ {
+ *start=doc;
+ param->type= FT_TOKEN_WORD;
+ goto ret;
+ }
+ else if (length) /* make sure length > 0 (if start contains spaces only) */
+ {
+ *start= doc;
+ param->type= FT_TOKEN_STOPWORD;
+ goto ret;
+ }
+ }
+ if (param->quot)
+ {
+ param->quot=*start=doc;
+ param->type= 3; /* FT_RBR */
+ goto ret;
+ }
+ret:
+ return param->type;
+}
+
+byte maria_ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
+ FT_WORD *word, my_bool skip_stopwords)
+{
+ byte *doc= *start;
+ uint mwc, length, mbl;
+ int ctype;
+ DBUG_ENTER("maria_ft_simple_get_word");
+
+ do
+ {
+ for (;; doc+= (mbl > 0 ? mbl : 1))
+ {
+ if (doc >= end)
+ DBUG_RETURN(0);
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ break;
+ }
+
+ mwc= length= 0;
+ for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1))
+ {
+ mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
+ if (true_word_char(ctype, *doc))
+ mwc= 0;
+ else if (!misc_word_char(*doc) || mwc)
+ break;
+ else
+ mwc++;
+ }
+
+ word->len= (uint)(doc-word->pos) - mwc;
+
+ if (skip_stopwords == FALSE ||
+ (length >= ft_min_word_len && length < ft_max_word_len &&
+ !is_stopword(word->pos, word->len)))
+ {
+ *start= doc;
+ DBUG_RETURN(1);
+ }
+ } while (doc < end);
+ DBUG_RETURN(0);
+}
+
+void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
+{
+ DBUG_ENTER("maria_ft_parse_init");
+ if (!is_tree_inited(wtree))
+ init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs);
+ DBUG_VOID_RETURN;
+}
+
+
+static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param,
+ char *word, int word_len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ TREE *wtree;
+ FT_WORD w;
+ MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
+ DBUG_ENTER("maria_ft_add_word");
+ wtree= ft_param->wtree;
+ if (param->flags & MYSQL_FTFLAGS_NEED_COPY)
+ {
+ byte *ptr;
+ DBUG_ASSERT(wtree->with_delete == 0);
+ ptr= (byte *)alloc_root(ft_param->mem_root, word_len);
+ memcpy(ptr, word, word_len);
+ w.pos= ptr;
+ }
+ else
+ w.pos= word;
+ w.len= word_len;
+ if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
+ {
+ delete_tree(wtree);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+
+static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param,
+ char *doc, int doc_len)
+{
+ byte *end=doc+doc_len;
+ MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
+ TREE *wtree= ft_param->wtree;
+ FT_WORD w;
+ DBUG_ENTER("maria_ft_parse_internal");
+
+ while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
+ if (param->mysql_add_word(param, w.pos, w.len, 0))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+int maria_ft_parse(TREE *wtree, byte *doc, int doclen,
+ struct st_mysql_ftparser *parser,
+ MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
+{
+ MY_FT_PARSER_PARAM my_param;
+ DBUG_ENTER("maria_ft_parse");
+ DBUG_ASSERT(parser);
+ my_param.wtree= wtree;
+ my_param.mem_root= mem_root;
+
+ param->mysql_parse= maria_ft_parse_internal;
+ param->mysql_add_word= maria_ft_add_word;
+ param->mysql_ftparam= &my_param;
+ param->cs= wtree->custom_arg;
+ param->doc= doc;
+ param->length= doclen;
+ param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
+ DBUG_RETURN(parser->parse(param));
+}
+
+
+#define MAX_PARAM_NR 2
+MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info,
+ uint keynr, uint paramnr)
+{
+ uint32 ftparser_nr;
+ struct st_mysql_ftparser *parser;
+ if (! info->ftparser_param)
+ {
+ /* info->ftparser_param can not be zero after the initialization,
+ because it always includes built-in fulltext parser. And built-in
+ parser can be called even if the table has no fulltext indexes and
+ no varchar/text fields. */
+ if (! info->s->ftparsers)
+ {
+ /* It's ok that modification to shared structure is done w/o mutex
+ locks, because all threads would set the same variables to the
+ same values. */
+ uint i, j, keys= info->s->state.header.keys, ftparsers= 1;
+ for (i= 0; i < keys; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i];
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ for (j= 0;; j++)
+ {
+ if (j == i)
+ {
+ keyinfo->ftparser_nr= ftparsers++;
+ break;
+ }
+ if (info->s->keyinfo[j].flag & HA_FULLTEXT &&
+ keyinfo->parser == info->s->keyinfo[j].parser)
+ {
+ keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr;
+ break;
+ }
+ }
+ }
+ }
+ info->s->ftparsers= ftparsers;
+ }
+ /*
+ We have to allocate two MYSQL_FTPARSER_PARAM structures per plugin
+ because in a boolean search a parser is called recursively
+ ftb_find_relevance* calls ftb_check_phrase*
+ (MAX_PARAM_NR=2)
+ */
+ info->ftparser_param= (MYSQL_FTPARSER_PARAM *)
+ my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) *
+ info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL));
+ init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0);
+ if (! info->ftparser_param)
+ return 0;
+ }
+ if (keynr == NO_SUCH_KEY)
+ {
+ ftparser_nr= 0;
+ parser= &ft_default_parser;
+ }
+ else
+ {
+ ftparser_nr= info->s->keyinfo[keynr].ftparser_nr;
+ parser= info->s->keyinfo[keynr].parser;
+ }
+ DBUG_ASSERT(paramnr < MAX_PARAM_NR);
+ ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr;
+ if (! info->ftparser_param[ftparser_nr].mysql_add_word)
+ {
+ /* Note, that mysql_add_word is used here as a flag:
+ mysql_add_word == 0 - parser is not initialized
+ mysql_add_word != 0 - parser is initialized, or no
+ initialization needed. */
+ info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1;
+ if (parser->init && parser->init(&info->ftparser_param[ftparser_nr]))
+ return 0;
+ }
+ return &info->ftparser_param[ftparser_nr];
+}
+
+
+void maria_ftparser_call_deinitializer(MARIA_HA *info)
+{
+ uint i, j, keys= info->s->state.header.keys;
+ free_root(&info->ft_memroot, MYF(0));
+ if (! info->ftparser_param)
+ return;
+ for (i= 0; i < keys; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i];
+ for (j=0; j < MAX_PARAM_NR; j++)
+ {
+ MYSQL_FTPARSER_PARAM *ftparser_param=
+ &info->ftparser_param[keyinfo->ftparser_nr*MAX_PARAM_NR + j];
+ if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word)
+ {
+ if (keyinfo->parser->deinit)
+ keyinfo->parser->deinit(ftparser_param);
+ ftparser_param->mysql_add_word= 0;
+ }
+ else
+ break;
+ }
+ }
+}
diff --git a/storage/maria/ma_ft_stem.c b/storage/maria/ma_ft_stem.c
new file mode 100644
index 00000000000..7a2f8cfd7c5
--- /dev/null
+++ b/storage/maria/ma_ft_stem.c
@@ -0,0 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* mulitingual stem */
diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c
new file mode 100644
index 00000000000..595c31e774c
--- /dev/null
+++ b/storage/maria/ma_ft_test1.c
@@ -0,0 +1,318 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include "maria_ft_test1.h"
+#include <my_getopt.h>
+
+static int key_field=FIELD_VARCHAR,extra_field=FIELD_SKIP_ENDSPACE;
+static uint key_length=200,extra_length=50;
+static int key_type=HA_KEYTYPE_TEXT;
+static int verbose=0,silent=0,skip_update=0,
+ no_keys=0,no_stopwords=0,no_search=0,no_fulltext=0;
+static int create_flag=0,error=0;
+
+#define MAX_REC_LENGTH 300
+static char record[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH];
+
+static int run_test(const char *filename);
+static void get_options(int argc, char *argv[]);
+static void create_record(char *, int);
+static void usage();
+
+static struct my_option my_long_options[] =
+{
+ {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 's', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'N', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'K', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'F', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", 'U', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"", '#', "", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+int main(int argc, char *argv[])
+{
+ MY_INIT(argv[0]);
+
+ get_options(argc,argv);
+ maria_init();
+
+ exit(run_test("FT1"));
+}
+
+static MARIA_COLUMNDEF recinfo[3];
+static MARIA_KEYDEF keyinfo[2];
+static HA_KEYSEG keyseg[10];
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ int i,j;
+ my_off_t pos;
+
+ bzero((char*) recinfo,sizeof(recinfo));
+
+ /* First define 2 columns */
+ recinfo[0].type=extra_field;
+ recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr :
+ extra_length);
+ if (extra_field == FIELD_VARCHAR)
+ recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length);
+ recinfo[1].type=key_field;
+ recinfo[1].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr :
+ key_length);
+ if (key_field == FIELD_VARCHAR)
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].seg[0].type= key_type;
+ keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART:
+ (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0;
+ keyinfo[0].seg[0].start=recinfo[0].length;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit= 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT);
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+ if (maria_create(filename,(no_keys?0:1),keyinfo,2,recinfo,0,NULL,
+ (MARIA_CREATE_INFO*) 0, create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,0)))
+ goto err;
+
+ if (!silent)
+ printf("- %s stopwords\n",no_stopwords?"Skipping":"Initializing");
+ maria_ft_init_stopwords(no_stopwords?NULL:maria_ft_precompiled_stopwords);
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ my_errno=0;
+ for (i=NUPD ; i<NDATAS; i++ )
+ {
+ create_record(record,i);
+ error=maria_write(file,record);
+ if (verbose || error)
+ printf("I= %2d maria_write: %d errno: %d, record: %s\n",
+ i,error,my_errno,data[i].f0);
+ }
+
+ if (!skip_update)
+ {
+ if (!silent)
+ printf("- Updating rows\n");
+
+ /* Read through all rows and update them */
+ pos=(ha_rows) 0;
+ i=0;
+ while ((error=maria_rrnd(file,read_record,pos)) == 0)
+ {
+ create_record(record,NUPD-i-1);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update row: %.*s, error: %d\n",
+ keyinfo[0].seg[0].length,record,my_errno);
+ }
+ if(++i == NUPD) break;
+ pos=HA_OFFSET_ERROR;
+ }
+ if (i != NUPD)
+ printf("Found %d of %d rows\n", i,NUPD);
+ }
+
+ if (maria_close(file)) goto err;
+ if(no_search) return 0;
+ if (!silent)
+ printf("- Reopening file\n");
+ if (!(file=maria_open(filename,2,0))) goto err;
+ if (!silent)
+ printf("- Reading rows with key\n");
+ for (i=0 ; i < NQUERIES ; i++)
+ {
+ FT_DOCLIST *result;
+ result=maria_ft_nlq_init_search(file,0,(char*) query[i],strlen(query[i]),1);
+ if(!result)
+ {
+ printf("Query %d: `%s' failed with errno %3d\n",i,query[i],my_errno);
+ continue;
+ }
+ printf("Query %d: `%s'. Found: %d. Top five documents:\n",
+ i,query[i],result->ndocs);
+ for (j=0;j<5;j++)
+ {
+ double w; int err;
+ err= maria_ft_nlq_read_next(result, read_record);
+ if (err==HA_ERR_END_OF_FILE)
+ {
+ printf("No more matches!\n");
+ break;
+ }
+ else if (err)
+ {
+ printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno);
+ break;
+ }
+ w=maria_ft_nlq_get_relevance(result);
+ if (key_field == FIELD_VARCHAR)
+ {
+ uint l;
+ char *p;
+ p=recinfo[0].length+read_record;
+ l=uint2korr(p);
+ printf("%10.7f: %.*s\n",w,(int) l,p+2);
+ }
+ else
+ printf("%10.7f: %.*s\n",w,recinfo[1].length,
+ recinfo[0].length+read_record);
+ }
+ maria_ft_nlq_close_search(result);
+ }
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+static char blob_key[MAX_REC_LENGTH];
+/* static char blob_record[MAX_REC_LENGTH+20*20]; */
+
+void create_record(char *pos, int n)
+{
+ bzero((char*) pos,MAX_REC_LENGTH);
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;
+ strnmov(blob_key,data[n].f0,keyinfo[0].seg[0].length);
+ tmp=strlen(blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint tmp;
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+pack_length);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
+ pos+=recinfo[0].length;
+ }
+ else
+ {
+ strnmov(pos,data[n].f0,keyinfo[0].seg[0].length);
+ pos+=recinfo[0].length;
+ }
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;
+ strnmov(blob_key,data[n].f2,keyinfo[0].seg[0].length);
+ tmp=strlen(blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[1].length;
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ uint tmp;
+ /* -1 is here because pack_length is stored in seg->length */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1);
+ strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length);
+ tmp=strlen(pos+1);
+ if (pack_length == 1)
+ *pos= (char) tmp;
+ else
+ int2store(pos,tmp);
+ pos+=recinfo[1].length;
+ }
+ else
+ {
+ strnmov(pos,data[n].f2,keyinfo[0].seg[0].length);
+ pos+=recinfo[1].length;
+ }
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch(optid) {
+ case 'v': verbose=1; break;
+ case 's': silent=1; break;
+ case 'F': no_fulltext=1; no_search=1;
+ case 'U': skip_update=1; break;
+ case 'K': no_keys=no_search=1; break;
+ case 'N': no_search=1; break;
+ case 'S': no_stopwords=1; break;
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case 'V':
+ case '?':
+ case 'h':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+/* Read options */
+
+static void get_options(int argc,char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("%s [options]\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_ft_test1.h b/storage/maria/ma_ft_test1.h
new file mode 100644
index 00000000000..9449f063125
--- /dev/null
+++ b/storage/maria/ma_ft_test1.h
@@ -0,0 +1,421 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#define NUPD 20
+#define NDATAS 389
+struct { const char *f0, *f2; } data[NDATAS] = {
+ {"1", "General Information about MySQL"},
+ {"1.1", "What is MySQL?"},
+ {"1.2", "About this manual"},
+ {"1.3", "History of MySQL"},
+ {"1.4", "The main features of MySQL"},
+ {"1.5", "General SQL information and tutorials"},
+ {"1.6", "Useful MySQL-related links"},
+ {"1.7", "What are stored procedures and triggers and so on?"},
+ {"2", "MySQL mailing lists and how to ask questions/give error (bug) reports"},
+ {"2.1", "Subscribing to/un-subscribing from the MySQL mailing list"},
+ {"2.2", "Asking questions or reporting bugs"},
+ {"2.3", "I think I have found a bug. What information do you need to help me?"},
+ {"2.3.1", "MySQL keeps crashing"},
+ {"2.4", "Guidelines for answering questions on the mailing list"},
+ {"3", "Licensing or When do I have/want to pay for MySQL?"},
+ {"3.1", "How much does MySQL cost?"},
+ {"3.2", "How do I get commercial support?"},
+ {"3.2.1", "Types of commercial support"},
+ {"3.2.1.1", "Basic email support"},
+ {"3.2.1.2", "Extended email support"},
+/*------------------------------- NUPD=20 -------------------------------*/
+ {"3.2.1.3", "Asking: Login support"},
+ {"3.2.1.4", "Extended login support"},
+ {"3.3", "How do I pay for licenses/support?"},
+ {"3.4", "Who do I contact when I want more information about licensing/support?"},
+ {"3.5", "What Copyright does MySQL use?"},
+ {"3.6", "When may I distribute MySQL commercially without a fee?"},
+ {"3.7", "I want to sell a product that can be configured to use MySQL"},
+ {"3.8", "I am running a commercial web server using MySQL"},
+ {"3.9", "Do I need a license to sell commercial Perl/tcl/PHP/Web+ etc applications?"},
+ {"3.10", "Possible future changes in the licensing"},
+ {"4", "Compiling and installing MySQL"},
+ {"4.1", "How do I get MySQL?"},
+ {"4.2", "Which MySQL version should I use?"},
+ {"4.3", "How/when will you release updates?"},
+ {"4.4", "What operating systems does MySQL support?"},
+ {"4.5", "Compiling MySQL from source code"},
+ {"4.5.1", "Quick installation overview"},
+ {"4.5.2", "Usual configure switches"},
+ {"4.5.3", "Applying a patch"},
+ {"4.6", "Problems compiling?"},
+ {"4.7", "General compilation notes"},
+ {"4.8", "MIT-pthreads notes (FreeBSD)"},
+ {"4.9", "Perl installation comments"},
+ {"4.10", "Special things to consider for some machine/OS combinations"},
+ {"4.10.1", "Solaris notes"},
+ {"4.10.2", "SunOS 4 notes"},
+ {"4.10.3", "Linux notes for all versions"},
+ {"4.10.3.1", "Linux-x86 notes"},
+ {"4.10.3.2", "RedHat 5.0"},
+ {"4.10.3.3", "RedHat 5.1"},
+ {"4.10.3.4", "Linux-Sparc notes"},
+ {"4.10.3.5", "Linux-Alpha notes"},
+ {"4.10.3.6", "MkLinux notes"},
+ {"4.10.4", "Alpha-DEC-Unix notes"},
+ {"4.10.5", "Alpha-DEC-OSF1 notes"},
+ {"4.10.6", "SGI-IRIX notes"},
+ {"4.10.7", "FreeBSD notes"},
+ {"4.10.7.1", "FreeBSD-3.0 notes"},
+ {"4.10.8", "BSD/OS 2.# notes"},
+ {"4.10.8.1", "BSD/OS 3.# notes"},
+ {"4.10.9", "SCO notes"},
+ {"4.10.10", "SCO Unixware 7.0 notes"},
+ {"4.10.11", "IBM-AIX notes"},
+ {"4.10.12", "HP-UX notes"},
+ {"4.11", "TcX binaries"},
+ {"4.12", "Win32 notes"},
+ {"4.13", "Installation instructions for MySQL binary releases"},
+ {"4.13.1", "How to get MySQL Perl support working"},
+ {"4.13.2", "Linux notes"},
+ {"4.13.3", "HP-UX notes"},
+ {"4.13.4", "Linking client libraries"},
+ {"4.14", "Problems running mysql_install_db"},
+ {"4.15", "Problems starting MySQL"},
+ {"4.16", "Automatic start/stop of MySQL"},
+ {"4.17", "Option files"},
+ {"5", "How standards-compatible is MySQL?"},
+ {"5.1", "What extensions has MySQL to ANSI SQL92?"},
+ {"5.2", "What functionality is missing in MySQL?"},
+ {"5.2.1", "Sub-selects"},
+ {"5.2.2", "SELECT INTO TABLE"},
+ {"5.2.3", "Transactions"},
+ {"5.2.4", "Triggers"},
+ {"5.2.5", "Foreign Keys"},
+ {"5.2.5.1", "Some reasons NOT to use FOREIGN KEYS"},
+ {"5.2.6", "Views"},
+ {"5.2.7", "-- as start of a comment"},
+ {"5.3", "What standards does MySQL follow?"},
+ {"5.4", "What functions exist only for compatibility?"},
+ {"5.5", "Limitations of BLOB and TEXT types"},
+ {"5.6", "How to cope without COMMIT-ROLLBACK"},
+ {"6", "The MySQL access privilege system"},
+ {"6.1", "What the privilege system does"},
+ {"6.2", "Connecting to the MySQL server"},
+ {"6.2.1", "Keeping your password secure"},
+ {"6.3", "Privileges provided by MySQL"},
+ {"6.4", "How the privilege system works"},
+ {"6.5", "The privilege tables"},
+ {"6.6", "Setting up the initial MySQL privileges"},
+ {"6.7", "Adding new user privileges to MySQL"},
+ {"6.8", "An example permission setup"},
+ {"6.9", "Causes of Access denied errors"},
+ {"6.10", "How to make MySQL secure against crackers"},
+ {"7", "MySQL language reference"},
+ {"7.1", "Literals: how to write strings and numbers"},
+ {"7.1.1", "Strings"},
+ {"7.1.2", "Numbers"},
+ {"7.1.3", "NULL values"},
+ {"7.1.4", "Database, table, index, column and alias names"},
+ {"7.1.4.1", "Case sensitivity in names"},
+ {"7.2", "Column types"},
+ {"7.2.1", "Column type storage requirements"},
+ {"7.2.5", "Numeric types"},
+ {"7.2.6", "Date and time types"},
+ {"7.2.6.1", "The DATE type"},
+ {"7.2.6.2", "The TIME type"},
+ {"7.2.6.3", "The DATETIME type"},
+ {"7.2.6.4", "The TIMESTAMP type"},
+ {"7.2.6.5", "The YEAR type"},
+ {"7.2.6.6", "Miscellaneous date and time properties"},
+ {"7.2.7", "String types"},
+ {"7.2.7.1", "The CHAR and VARCHAR types"},
+ {"7.2.7.2", "The BLOB and TEXT types"},
+ {"7.2.7.3", "The ENUM type"},
+ {"7.2.7.4", "The SET type"},
+ {"7.2.8", "Choosing the right type for a column"},
+ {"7.2.9", "Column indexes"},
+ {"7.2.10", "Multiple-column indexes"},
+ {"7.2.11", "Using column types from other database engines"},
+ {"7.3", "Functions for use in SELECT and WHERE clauses"},
+ {"7.3.1", "Grouping functions"},
+ {"7.3.2", "Normal arithmetic operations"},
+ {"7.3.3", "Bit functions"},
+ {"7.3.4", "Logical operations"},
+ {"7.3.5", "Comparison operators"},
+ {"7.3.6", "String comparison functions"},
+ {"7.3.7", "Control flow functions"},
+ {"7.3.8", "Mathematical functions"},
+ {"7.3.9", "String functions"},
+ {"7.3.10", "Date and time functions"},
+ {"7.3.11", "Miscellaneous functions"},
+ {"7.3.12", "Functions for use with GROUP BY clauses"},
+ {"7.4", "CREATE DATABASE syntax"},
+ {"7.5", "DROP DATABASE syntax"},
+ {"7.6", "CREATE TABLE syntax"},
+ {"7.7", "ALTER TABLE syntax"},
+ {"7.8", "OPTIMIZE TABLE syntax"},
+ {"7.9", "DROP TABLE syntax"},
+ {"7.10", "DELETE syntax"},
+ {"7.11", "SELECT syntax"},
+ {"7.12", "JOIN syntax"},
+ {"7.13", "INSERT syntax"},
+ {"7.14", "REPLACE syntax"},
+ {"7.15", "LOAD DATA INFILE syntax"},
+ {"7.16", "UPDATE syntax"},
+ {"7.17", "USE syntax"},
+ {"7.18", "SHOW syntax (Get information about tables, columns...)"},
+ {"7.19", "EXPLAIN syntax (Get information about a SELECT)"},
+ {"7.20", "DESCRIBE syntax (Get information about columns)"},
+ {"7.21", "LOCK TABLES/UNLOCK TABLES syntax"},
+ {"7.22", "SET OPTION syntax"},
+ {"7.23", "GRANT syntax (Compatibility function)"},
+ {"7.24", "CREATE INDEX syntax (Compatibility function)"},
+ {"7.25", "DROP INDEX syntax (Compatibility function)"},
+ {"7.26", "Comment syntax"},
+ {"7.27", "CREATE FUNCTION/DROP FUNCTION syntax"},
+ {"7.28", "Is MySQL picky about reserved words?"},
+ {"8", "Example SQL queries"},
+ {"8.1", "Queries from twin project"},
+ {"8.1.1", "Find all non-distributed twins"},
+ {"8.1.2", "Show a table on twin pair status"},
+ {"9", "How safe/stable is MySQL?"},
+ {"9.1", "How stable is MySQL?"},
+ {"9.2", "Why are there is so many releases of MySQL?"},
+ {"9.3", "Checking a table for errors"},
+ {"9.4", "How to repair tables"},
+ {"9.5", "Is there anything special to do when upgrading/downgrading MySQL?"},
+ {"9.5.1", "Upgrading from a 3.21 version to 3.22"},
+ {"9.5.2", "Upgrading from a 3.20 version to 3.21"},
+ {"9.5.3", "Upgrading to another architecture"},
+ {"9.6", "Year 2000 compliance"},
+ {"10", "MySQL Server functions"},
+ {"10.1", "What languages are supported by MySQL?"},
+ {"10.1.1", "Character set used for data &#38; sorting"},
+ {"10.2", "The update log"},
+ {"10.3", "How big can MySQL tables be?"},
+ {"11", "Getting maximum performance from MySQL"},
+ {"11.1", "How does one change the size of MySQL buffers?"},
+ {"11.2", "How compiling and linking affects the speed of MySQL"},
+ {"11.3", "How does MySQL use memory?"},
+ {"11.4", "How does MySQL use indexes?"},
+ {"11.5", "What optimizations are done on WHERE clauses?"},
+ {"11.6", "How does MySQL open &#38; close tables?"},
+ {"11.6.0.1", "What are the drawbacks of creating possibly thousands of tables in a database?"},
+ {"11.7", "How does MySQL lock tables?"},
+ {"11.8", "How should I arrange my table to be as fast/small as possible?"},
+ {"11.9", "What affects the speed of INSERT statements?"},
+ {"11.10", "What affects the speed DELETE statements?"},
+ {"11.11", "How do I get MySQL to run at full speed?"},
+ {"11.12", "What are the different row formats? Or, when should VARCHAR/CHAR be used?"},
+ {"11.13", "Why so many open tables?"},
+ {"12", "MySQL benchmark suite"},
+ {"13", "MySQL Utilites"},
+ {"13.1", "Overview of the different MySQL programs"},
+ {"13.2", "The MySQL table check, optimize and repair program"},
+ {"13.2.1", "isamchk memory use"},
+ {"13.2.2", "Getting low-level table information"},
+ {"13.3", "The MySQL compressed read-only table generator"},
+ {"14", "Adding new functions to MySQL"},
+ {"15", "MySQL ODBC Support"},
+ {"15.1", "Operating systems supported by MyODBC"},
+ {"15.2", "How to report problems with MyODBC"},
+ {"15.3", "Programs known to work with MyODBC"},
+ {"15.4", "How to fill in the various fields in the ODBC administrator program"},
+ {"15.5", "How to get the value of an AUTO_INCREMENT column in ODBC"},
+ {"16", "Problems and common errors"},
+ {"16.1", "Some common errors when using MySQL"},
+ {"16.1.1", "MySQL server has gone away error"},
+ {"16.1.2", "Can't connect to local MySQL server error"},
+ {"16.1.3", "Out of memory error"},
+ {"16.1.4", "Packet too large error"},
+ {"16.1.5", "The table is full error"},
+ {"16.1.6", "Commands out of sync error in client"},
+ {"16.1.7", "Removing user error"},
+ {"16.2", "How MySQL handles a full disk"},
+ {"16.3", "How to run SQL commands from a text file"},
+ {"16.4", "Where MySQL stores temporary files"},
+ {"16.5", "Access denied error"},
+ {"16.6", "How to run MySQL as a normal user"},
+ {"16.7", "Problems with file permissions"},
+ {"16.8", "File not found"},
+ {"16.9", "Problems using DATE columns"},
+ {"16.10", "Case sensitivity in searches"},
+ {"16.11", "Problems with NULL values"},
+ {"17", "Solving some common problems with MySQL"},
+ {"17.1", "Database replication"},
+ {"17.2", "Database backups"},
+ {"18", "MySQL client tools and API's"},
+ {"18.1", "MySQL C API"},
+ {"18.2", "C API datatypes"},
+ {"18.3", "C API function overview"},
+ {"18.4", "C API function descriptions"},
+ {"18.4.1", "mysql_affected_rows()"},
+ {"18.4.2", "mysql_close()"},
+ {"18.4.3", "mysql_connect()"},
+ {"18.4.4", "mysql_create_db()"},
+ {"18.4.5", "mysql_data_seek()"},
+ {"18.4.6", "mysql_debug()"},
+ {"18.4.7", "mysql_drop_db()"},
+ {"18.4.8", "mysql_dump_debug_info()"},
+ {"18.4.9", "mysql_eof()"},
+ {"18.4.10", "mysql_errno()"},
+ {"18.4.11", "mysql_error()"},
+ {"18.4.12", "mysql_escape_string()"},
+ {"18.4.13", "mysql_fetch_field()"},
+ {"18.4.14", "mysql_fetch_fields()"},
+ {"18.4.15", "mysql_fetch_field_direct()"},
+ {"18.4.16", "mysql_fetch_lengths()"},
+ {"18.4.17", "mysql_fetch_row()"},
+ {"18.4.18", "mysql_field_seek()"},
+ {"18.4.19", "mysql_field_tell()"},
+ {"18.4.20", "mysql_free_result()"},
+ {"18.4.21", "mysql_get_client_info()"},
+ {"18.4.22", "mysql_get_host_info()"},
+ {"18.4.23", "mysql_get_proto_info()"},
+ {"18.4.24", "mysql_get_server_info()"},
+ {"18.4.25", "mysql_info()"},
+ {"18.4.26", "mysql_init()"},
+ {"18.4.27", "mysql_insert_id()"},
+ {"18.4.28", "mysql_kill()"},
+ {"18.4.29", "mysql_list_dbs()"},
+ {"18.4.30", "mysql_list_fields()"},
+ {"18.4.31", "mysql_list_processes()"},
+ {"18.4.32", "mysql_list_tables()"},
+ {"18.4.33", "mysql_num_fields()"},
+ {"18.4.34", "mysql_num_rows()"},
+ {"18.4.35", "mysql_query()"},
+ {"18.4.36", "mysql_real_connect()"},
+ {"18.4.37", "mysql_real_query()"},
+ {"18.4.38", "mysql_reload()"},
+ {"18.4.39", "mysql_row_tell()"},
+ {"18.4.40", "mysql_select_db()"},
+ {"18.4.41", "mysql_shutdown()"},
+ {"18.4.42", "mysql_stat()"},
+ {"18.4.43", "mysql_store_result()"},
+ {"18.4.44", "mysql_thread_id()"},
+ {"18.4.45", "mysql_use_result()"},
+ {"18.4.46", "Why is it that after mysql_query() returns success, mysql_store_result() sometimes returns NULL?"},
+ {"18.4.47", "What results can I get from a query?"},
+ {"18.4.48", "How can I get the unique ID for the last inserted row?"},
+ {"18.4.49", "Problems linking with the C API"},
+ {"18.4.50", "How to make a thread-safe client"},
+ {"18.5", "MySQL Perl API's"},
+ {"18.5.1", "DBI with DBD::mysql"},
+ {"18.5.1.1", "The DBI interface"},
+ {"18.5.1.2", "More DBI/DBD information"},
+ {"18.6", "MySQL Java connectivity (JDBC)"},
+ {"18.7", "MySQL PHP API's"},
+ {"18.8", "MySQL C++ API's"},
+ {"18.9", "MySQL Python API's"},
+ {"18.10", "MySQL TCL API's"},
+ {"19", "How MySQL compares to other databases"},
+ {"19.1", "How MySQL compares to mSQL"},
+ {"19.1.1", "How to convert mSQL tools for MySQL"},
+ {"19.1.2", "How mSQL and MySQL client/server communications protocols differ"},
+ {"19.1.3", "How mSQL 2.0 SQL syntax differs from MySQL"},
+ {"19.2", "How MySQL compares to PostgreSQL"},
+ {"A", "Some users of MySQL"},
+ {"B", "Contributed programs"},
+ {"C", "Contributors to MySQL"},
+ {"D", "MySQL change history"},
+ {"19.3", "Changes in release 3.22.x (Alpha version)"},
+ {"19.3.1", "Changes in release 3.22.7"},
+ {"19.3.2", "Changes in release 3.22.6"},
+ {"19.3.3", "Changes in release 3.22.5"},
+ {"19.3.4", "Changes in release 3.22.4"},
+ {"19.3.5", "Changes in release 3.22.3"},
+ {"19.3.6", "Changes in release 3.22.2"},
+ {"19.3.7", "Changes in release 3.22.1"},
+ {"19.3.8", "Changes in release 3.22.0"},
+ {"19.4", "Changes in release 3.21.x"},
+ {"19.4.1", "Changes in release 3.21.33"},
+ {"19.4.2", "Changes in release 3.21.32"},
+ {"19.4.3", "Changes in release 3.21.31"},
+ {"19.4.4", "Changes in release 3.21.30"},
+ {"19.4.5", "Changes in release 3.21.29"},
+ {"19.4.6", "Changes in release 3.21.28"},
+ {"19.4.7", "Changes in release 3.21.27"},
+ {"19.4.8", "Changes in release 3.21.26"},
+ {"19.4.9", "Changes in release 3.21.25"},
+ {"19.4.10", "Changes in release 3.21.24"},
+ {"19.4.11", "Changes in release 3.21.23"},
+ {"19.4.12", "Changes in release 3.21.22"},
+ {"19.4.13", "Changes in release 3.21.21a"},
+ {"19.4.14", "Changes in release 3.21.21"},
+ {"19.4.15", "Changes in release 3.21.20"},
+ {"19.4.16", "Changes in release 3.21.19"},
+ {"19.4.17", "Changes in release 3.21.18"},
+ {"19.4.18", "Changes in release 3.21.17"},
+ {"19.4.19", "Changes in release 3.21.16"},
+ {"19.4.20", "Changes in release 3.21.15"},
+ {"19.4.21", "Changes in release 3.21.14b"},
+ {"19.4.22", "Changes in release 3.21.14a"},
+ {"19.4.23", "Changes in release 3.21.13"},
+ {"19.4.24", "Changes in release 3.21.12"},
+ {"19.4.25", "Changes in release 3.21.11"},
+ {"19.4.26", "Changes in release 3.21.10"},
+ {"19.4.27", "Changes in release 3.21.9"},
+ {"19.4.28", "Changes in release 3.21.8"},
+ {"19.4.29", "Changes in release 3.21.7"},
+ {"19.4.30", "Changes in release 3.21.6"},
+ {"19.4.31", "Changes in release 3.21.5"},
+ {"19.4.32", "Changes in release 3.21.4"},
+ {"19.4.33", "Changes in release 3.21.3"},
+ {"19.4.34", "Changes in release 3.21.2"},
+ {"19.4.35", "Changes in release 3.21.0"},
+ {"19.5", "Changes in release 3.20.x"},
+ {"19.5.1", "Changes in release 3.20.18"},
+ {"19.5.2", "Changes in release 3.20.17"},
+ {"19.5.3", "Changes in release 3.20.16"},
+ {"19.5.4", "Changes in release 3.20.15"},
+ {"19.5.5", "Changes in release 3.20.14"},
+ {"19.5.6", "Changes in release 3.20.13"},
+ {"19.5.7", "Changes in release 3.20.11"},
+ {"19.5.8", "Changes in release 3.20.10"},
+ {"19.5.9", "Changes in release 3.20.9"},
+ {"19.5.10", "Changes in release 3.20.8"},
+ {"19.5.11", "Changes in release 3.20.7"},
+ {"19.5.12", "Changes in release 3.20.6"},
+ {"19.5.13", "Changes in release 3.20.3"},
+ {"19.5.14", "Changes in release 3.20.0"},
+ {"19.6", "Changes in release 3.19.x"},
+ {"19.6.1", "Changes in release 3.19.5"},
+ {"19.6.2", "Changes in release 3.19.4"},
+ {"19.6.3", "Changes in release 3.19.3"},
+ {"E", "Known errors and design deficiencies in MySQL"},
+ {"F", "List of things we want to add to MySQL in the future (The TODO)"},
+ {"19.7", "Things that must done in the real near future"},
+ {"19.8", "Things that have to be done sometime"},
+ {"19.9", "Some things we don't have any plans to do"},
+ {"G", "Comments on porting to other systems"},
+ {"19.10", "Debugging MySQL"},
+ {"19.11", "Comments about RTS threads"},
+ {"19.12", "What is the difference between different thread packages?"},
+ {"H", "Description of MySQL regular expression syntax"},
+ {"I", "What is Unireg?"},
+ {"J", "The MySQL server license"},
+ {"K", "The MySQL license for Microsoft operating systems"},
+ {"*", "SQL command, type and function index"},
+ {"*", "Concept Index"}
+};
+
+#define NQUERIES 5
+const char *query[NQUERIES]={
+ "mysql information and manual",
+ "upgrading from previous version",
+ "column indexes",
+ "against about after more right the with/without", /* stopwords test */
+ "mysql license and copyright"
+};
diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c
new file mode 100644
index 00000000000..25f4d5a67a0
--- /dev/null
+++ b/storage/maria/ma_ft_update.c
@@ -0,0 +1,353 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* functions to work with full-text indices */
+
+#include "ma_ftdefs.h"
+#include <math.h>
+
+void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const byte *record,
+ FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator_init");
+
+ ftsi->num=info->s->keyinfo[keynr].keysegs;
+ ftsi->seg=info->s->keyinfo[keynr].seg;
+ ftsi->rec=record;
+ DBUG_VOID_RETURN;
+}
+
+void _ma_ft_segiterator_dummy_init(const byte *record, uint len,
+ FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator_dummy_init");
+
+ ftsi->num=1;
+ ftsi->seg=0;
+ ftsi->pos=record;
+ ftsi->len=len;
+ DBUG_VOID_RETURN;
+}
+
+/*
+ This function breaks convention "return 0 in success"
+ but it's easier to use like this
+
+ while(_ma_ft_segiterator())
+
+ so "1" means "OK", "0" means "EOF"
+*/
+
+uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
+{
+ DBUG_ENTER("_ma_ft_segiterator");
+
+ if (!ftsi->num)
+ DBUG_RETURN(0);
+
+ ftsi->num--;
+ if (!ftsi->seg)
+ DBUG_RETURN(1);
+
+ ftsi->seg--;
+
+ if (ftsi->seg->null_bit &&
+ (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
+ {
+ ftsi->pos=0;
+ DBUG_RETURN(1);
+ }
+ ftsi->pos= ftsi->rec+ftsi->seg->start;
+ if (ftsi->seg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= (ftsi->seg->bit_start);
+ ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos :
+ uint2korr(ftsi->pos));
+ ftsi->pos+= pack_length; /* Skip VARCHAR length */
+ DBUG_RETURN(1);
+ }
+ if (ftsi->seg->flag & HA_BLOB_PART)
+ {
+ ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
+ memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
+ sizeof(char*));
+ DBUG_RETURN(1);
+ }
+ ftsi->len=ftsi->seg->length;
+ DBUG_RETURN(1);
+}
+
+
+/* parses a document i.e. calls maria_ft_parse for every keyseg */
+
+uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const byte *record,
+ MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
+{
+ FT_SEG_ITERATOR ftsi;
+ struct st_mysql_ftparser *parser;
+ DBUG_ENTER("_ma_ft_parse");
+
+ _ma_ft_segiterator_init(info, keynr, record, &ftsi);
+
+ maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
+ parser= info->s->keyinfo[keynr].parser;
+ while (_ma_ft_segiterator(&ftsi))
+ {
+ if (ftsi.pos)
+ if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, parser, param,
+ mem_root))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record,
+ MEM_ROOT *mem_root)
+{
+ TREE ptree;
+ MYSQL_FTPARSER_PARAM *param;
+ DBUG_ENTER("_ma_ft_parserecord");
+ if (! (param= maria_ftparser_call_initializer(info, keynr, 0)))
+ DBUG_RETURN(NULL);
+ bzero((char*) &ptree, sizeof(ptree));
+ param->flags= 0;
+ if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root))
+ DBUG_RETURN(NULL);
+
+ DBUG_RETURN(maria_ft_linearize(&ptree, mem_root));
+}
+
+static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf,
+ FT_WORD *wlist, my_off_t filepos)
+{
+ uint key_length;
+ DBUG_ENTER("_ma_ft_store");
+
+ for (; wlist->pos; wlist++)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
+ if (_ma_ck_write(info, keynr, keybuf, key_length))
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+}
+
+static int _ma_ft_erase(MARIA_HA *info, uint keynr, byte *keybuf,
+ FT_WORD *wlist, my_off_t filepos)
+{
+ uint key_length, err=0;
+ DBUG_ENTER("_ma_ft_erase");
+
+ for (; wlist->pos; wlist++)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
+ if (_ma_ck_delete(info, keynr, keybuf, key_length))
+ err=1;
+ }
+ DBUG_RETURN(err);
+}
+
+/*
+ Compares an appropriate parts of two WORD_KEY keys directly out of records
+ returns 1 if they are different
+*/
+
+#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1
+#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL 0
+
+int _ma_ft_cmp(MARIA_HA *info, uint keynr, const byte *rec1, const byte *rec2)
+{
+ FT_SEG_ITERATOR ftsi1, ftsi2;
+ CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
+ DBUG_ENTER("_ma_ft_cmp");
+
+ _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1);
+ _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2);
+
+ while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2))
+ {
+ if ((ftsi1.pos != ftsi2.pos) &&
+ (!ftsi1.pos || !ftsi2.pos ||
+ ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
+ (uchar*) ftsi2.pos,ftsi2.len,0,0)))
+ DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
+ }
+ DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
+}
+
+
+/* update a document entry */
+
+int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf,
+ const byte *oldrec, const byte *newrec, my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *oldlist,*newlist, *old_word, *new_word;
+ CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
+ uint key_length;
+ int cmp, cmp2;
+ DBUG_ENTER("_ma_ft_update");
+
+ if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec,
+ &info->ft_memroot)) ||
+ !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec,
+ &info->ft_memroot)))
+ goto err;
+
+ error=0;
+ while(old_word->pos && new_word->pos)
+ {
+ cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
+ (uchar*) new_word->pos,new_word->len,0,0);
+ cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
+
+ if (cmp < 0 || cmp2)
+ {
+ key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos);
+ if ((error= _ma_ck_delete(info,keynr, keybuf,key_length)))
+ goto err;
+ }
+ if (cmp > 0 || cmp2)
+ {
+ key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos);
+ if ((error= _ma_ck_write(info, keynr, keybuf,key_length)))
+ goto err;
+ }
+ if (cmp<=0) old_word++;
+ if (cmp>=0) new_word++;
+ }
+ if (old_word->pos)
+ error= _ma_ft_erase(info,keynr,keybuf,old_word,pos);
+ else if (new_word->pos)
+ error= _ma_ft_store(info,keynr,keybuf,new_word,pos);
+
+err:
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_RETURN(error);
+}
+
+
+/* adds a document to the collection */
+
+int _ma_ft_add(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record,
+ my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *wlist;
+ DBUG_ENTER("_ma_ft_add");
+ DBUG_PRINT("enter",("keynr: %d",keynr));
+
+ if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
+ error= _ma_ft_store(info,keynr,keybuf,wlist,pos);
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+}
+
+
+/* removes a document from the collection */
+
+int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record,
+ my_off_t pos)
+{
+ int error= -1;
+ FT_WORD *wlist;
+ DBUG_ENTER("_ma_ft_del");
+ DBUG_PRINT("enter",("keynr: %d",keynr));
+
+ if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
+ error= _ma_ft_erase(info,keynr,keybuf,wlist,pos);
+ free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
+ DBUG_PRINT("exit",("Return: %d",error));
+ DBUG_RETURN(error);
+}
+
+
+uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr,
+ my_off_t filepos)
+{
+ byte buf[HA_FT_MAXBYTELEN+16];
+ DBUG_ENTER("_ma_ft_make_key");
+
+#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
+ {
+ float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight);
+ mi_float4store(buf,weight);
+ }
+#else
+#error
+#endif
+
+ int2store(buf+HA_FT_WLEN,wptr->len);
+ memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
+ DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos));
+}
+
+
+/*
+ convert key value to ft2
+*/
+
+uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, byte *key)
+{
+ my_off_t root;
+ DYNAMIC_ARRAY *da=info->ft1_to_ft2;
+ MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
+ byte *key_ptr= (byte*) dynamic_array_ptr(da, 0), *end;
+ uint length, key_length;
+ DBUG_ENTER("_ma_ft_convert_to_ft2");
+
+ /* we'll generate one pageful at once, and insert the rest one-by-one */
+ /* calculating the length of this page ...*/
+ length=(keyinfo->block_length-2) / keyinfo->keylength;
+ set_if_smaller(length, da->elements);
+ length=length * keyinfo->keylength;
+
+ get_key_full_length_rdonly(key_length, key);
+ while (_ma_ck_delete(info, keynr, key, key_length) == 0)
+ {
+ /*
+ nothing to do here.
+ _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys
+ */
+ }
+
+ /* creating pageful of keys */
+ maria_putint(info->buff,length+2,0);
+ memcpy(info->buff+2, key_ptr, length);
+ info->keybuff_used=info->page_changed=1; /* info->buff is used */
+ if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR ||
+ _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff))
+ DBUG_RETURN(-1);
+
+ /* inserting the rest of key values */
+ end= (byte*) dynamic_array_ptr(da, da->elements);
+ for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
+ if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
+ DBUG_RETURN(-1);
+
+ /* now, writing the word key entry */
+ ft_intXstore(key+key_length, - (int) da->elements);
+ _ma_dpointer(info, key+key_length+HA_FT_WLEN, root);
+
+ DBUG_RETURN(_ma_ck_real_write_btree(info,
+ info->s->keyinfo+keynr,
+ key, 0,
+ &info->s->state.key_root[keynr],
+ SEARCH_SAME));
+}
diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h
new file mode 100644
index 00000000000..def7e92e6e0
--- /dev/null
+++ b/storage/maria/ma_ftdefs.h
@@ -0,0 +1,153 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* some definitions for full-text indices */
+
+#include "ma_fulltext.h"
+#include <m_ctype.h>
+#include <my_tree.h>
+#include <queues.h>
+#include <mysql/plugin.h>
+
+#define true_word_char(ctype, character) \
+ ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \
+ (character) == '_')
+#define misc_word_char(X) 0
+
+#define FT_MAX_WORD_LEN_FOR_SORT 31
+
+#define FTPARSER_MEMROOT_ALLOC_SIZE 65536
+
+#define COMPILE_STOPWORDS_IN
+
+/* Interested readers may consult SMART
+ (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
+ for an excellent implementation of vector space model we use.
+ It also demonstrate the usage of different weghting techniques.
+ This code, though, is completely original and is not based on the
+ SMART code but was in some cases inspired by it.
+
+ NORM_PIVOT was taken from the article
+ A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization",
+ ACM SIGIR'96, 21-29, 1996
+ */
+
+#define LWS_FOR_QUERY LWS_TF
+#define LWS_IN_USE LWS_LOG
+#define PRENORM_IN_USE PRENORM_AVG
+#define NORM_IN_USE NORM_PIVOT
+#define GWS_IN_USE GWS_PROB
+/*==============================================================*/
+#define LWS_TF (count)
+#define LWS_BINARY (count>0)
+#define LWS_SQUARE (count*count)
+#define LWS_LOG (count?(log( (double) count)+1):0)
+/*--------------------------------------------------------------*/
+#define PRENORM_NONE (p->weight)
+#define PRENORM_MAX (p->weight/docstat.max)
+#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max)
+#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq)
+#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq)))
+/*--------------------------------------------------------------*/
+#define NORM_NONE (1)
+#define NORM_SUM (docstat.nsum)
+#define NORM_COS (sqrt(docstat.nsum2))
+
+#define PIVOT_VAL (0.0115)
+#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq)
+/*---------------------------------------------------------------*/
+#define GWS_NORM (1/sqrt(sum2))
+#define GWS_GFIDF (sum/doc_cnt)
+/* Mysterious, but w/o (double) GWS_IDF performs better :-o */
+#define GWS_IDF log(aio->info->state->records/doc_cnt)
+#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt)
+#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 )
+#define GWS_FREQ (1.0/doc_cnt)
+#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2)
+#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3)
+#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records))
+/*=================================================================*/
+
+/* Boolean search operators */
+#define FTB_YES (ft_boolean_syntax[0])
+#define FTB_EGAL (ft_boolean_syntax[1])
+#define FTB_NO (ft_boolean_syntax[2])
+#define FTB_INC (ft_boolean_syntax[3])
+#define FTB_DEC (ft_boolean_syntax[4])
+#define FTB_LBR (ft_boolean_syntax[5])
+#define FTB_RBR (ft_boolean_syntax[6])
+#define FTB_NEG (ft_boolean_syntax[7])
+#define FTB_TRUNC (ft_boolean_syntax[8])
+#define FTB_LQUOT (ft_boolean_syntax[10])
+#define FTB_RQUOT (ft_boolean_syntax[11])
+
+typedef struct st_maria_ft_word {
+ byte * pos;
+ uint len;
+ double weight;
+} FT_WORD;
+
+int is_stopword(char *word, uint len);
+
+uint _ma_ft_make_key(MARIA_HA *, uint , byte *, FT_WORD *, my_off_t);
+
+byte maria_ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *,
+ MYSQL_FTPARSER_BOOLEAN_INFO *);
+byte maria_ft_simple_get_word(CHARSET_INFO *, byte **, const byte *,
+ FT_WORD *, my_bool);
+
+typedef struct _st_maria_ft_seg_iterator {
+ uint num, len;
+ HA_KEYSEG *seg;
+ const byte *rec, *pos;
+} FT_SEG_ITERATOR;
+
+void _ma_ft_segiterator_init(MARIA_HA *, uint, const byte *, FT_SEG_ITERATOR *);
+void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
+uint _ma_ft_segiterator(FT_SEG_ITERATOR *);
+
+void maria_ft_parse_init(TREE *, CHARSET_INFO *);
+int maria_ft_parse(TREE *, byte *, int, struct st_mysql_ftparser *parser,
+ MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
+FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *);
+FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *, MEM_ROOT *);
+uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *,
+ MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
+
+FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *);
+FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *);
+
+extern const struct _ft_vft _ma_ft_vft_nlq;
+int maria_ft_nlq_read_next(FT_INFO *, char *);
+float maria_ft_nlq_find_relevance(FT_INFO *, byte *, uint);
+void maria_ft_nlq_close_search(FT_INFO *);
+float maria_ft_nlq_get_relevance(FT_INFO *);
+my_off_t maria_ft_nlq_get_docid(FT_INFO *);
+void maria_ft_nlq_reinit_search(FT_INFO *);
+
+extern const struct _ft_vft _ma_ft_vft_boolean;
+int maria_ft_boolean_read_next(FT_INFO *, char *);
+float maria_ft_boolean_find_relevance(FT_INFO *, byte *, uint);
+void maria_ft_boolean_close_search(FT_INFO *);
+float maria_ft_boolean_get_relevance(FT_INFO *);
+my_off_t maria_ft_boolean_get_docid(FT_INFO *);
+void maria_ft_boolean_reinit_search(FT_INFO *);
+extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info,
+ uint keynr,
+ uint paramnr);
+extern void maria_ftparser_call_deinitializer(MARIA_HA *info);
diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h
new file mode 100644
index 00000000000..cf21471b316
--- /dev/null
+++ b/storage/maria/ma_fulltext.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/* some definitions for full-text indices */
+
+#include "maria_def.h"
+#include "ft_global.h"
+
+int _ma_ft_cmp(MARIA_HA *, uint, const byte *, const byte *);
+int _ma_ft_add(MARIA_HA *, uint, byte *, const byte *, my_off_t);
+int _ma_ft_del(MARIA_HA *, uint, byte *, const byte *, my_off_t);
+
+uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, byte *);
diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c
new file mode 100644
index 00000000000..397cd2465d4
--- /dev/null
+++ b/storage/maria/ma_info.c
@@ -0,0 +1,137 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Return useful base information for an open table */
+
+#include "maria_def.h"
+#ifdef __WIN__
+#include <sys/stat.h>
+#endif
+
+ /* Get position to last record */
+
+MARIA_RECORD_POS maria_position(MARIA_HA *info)
+{
+ return info->cur_row.lastpos;
+}
+
+
+/* Get information about the table */
+/* if flag == 2 one get current info (no sync from database */
+
+int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag)
+{
+ MY_STAT state;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_status");
+
+ x->recpos= info->cur_row.lastpos;
+ if (flag == HA_STATUS_POS)
+ DBUG_RETURN(0); /* Compatible with ISAM */
+ if (!(flag & HA_STATUS_NO_LOCK))
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ VOID(_ma_readinfo(info,F_RDLCK,0));
+ fast_ma_writeinfo(info);
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ x->records = info->state->records;
+ x->deleted = info->state->del;
+ x->delete_length = info->state->empty;
+ x->data_file_length =info->state->data_file_length;
+ x->index_file_length=info->state->key_file_length;
+
+ x->keys = share->state.header.keys;
+ x->check_time = share->state.check_time;
+ x->mean_reclength = info->state->records ?
+ (ulong) ((info->state->data_file_length-info->state->empty)/
+ info->state->records) : (ulong) share->min_pack_length;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ x->errkey= info->errkey;
+ x->dup_key_pos= info->dup_key_pos;
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ x->reclength = share->base.reclength;
+ x->max_data_file_length=share->base.max_data_file_length;
+ x->max_index_file_length=info->s->base.max_key_file_length;
+ x->filenr = info->dfile;
+ x->options = share->options;
+ x->create_time=share->state.create_time;
+ x->reflength= maria_get_pointer_length(share->base.max_data_file_length,
+ maria_data_pointer_size);
+ x->record_offset= ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
+ 0L : share->base.pack_reclength);
+ x->sortkey= -1; /* No clustering */
+ x->rec_per_key = share->state.rec_per_key_part;
+ x->key_map = share->state.key_map;
+ x->data_file_name = share->data_file_name;
+ x->index_file_name = share->index_file_name;
+ }
+ if ((flag & HA_STATUS_TIME) && !my_fstat(info->dfile,&state,MYF(0)))
+ x->update_time=state.st_mtime;
+ else
+ x->update_time=0;
+ if (flag & HA_STATUS_AUTO)
+ {
+ x->auto_increment= share->state.auto_increment+1;
+ if (!x->auto_increment) /* This shouldn't happen */
+ x->auto_increment= ~(ulonglong) 0;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write a message to the error log.
+
+ SYNOPSIS
+ _ma_report_error()
+ file_name Name of table file (e.g. index_file_name).
+ errcode Error number.
+
+ DESCRIPTION
+ This function supplies my_error() with a table name. Most error
+ messages need one. Since string arguments in error messages are limited
+ to 64 characters by convention, we ensure that in case of truncation,
+ that the end of the index file path is in the message. This contains
+ the most valuable information (the table name and the database name).
+
+ RETURN
+ void
+*/
+
+void _ma_report_error(int errcode, const char *file_name)
+{
+ uint length;
+ DBUG_ENTER("_ma_report_error");
+ DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name));
+
+ if ((length= strlen(file_name)) > 64)
+ {
+ uint dir_length= dirname_length(file_name);
+ file_name+= dir_length;
+ if ((length-= dir_length) > 64)
+ file_name+= length - 64;
+ }
+ my_error(errcode, MYF(ME_NOREFRESH), file_name);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c
new file mode 100644
index 00000000000..679aa8c6f8f
--- /dev/null
+++ b/storage/maria/ma_init.c
@@ -0,0 +1,60 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Initialize an maria-database */
+
+#include "maria_def.h"
+#include <ft_global.h>
+#include "ma_blockrec.h"
+
+my_bool maria_inited= FALSE;
+pthread_mutex_t THR_LOCK_maria;
+
+/*
+ Initialize maria
+
+ SYNOPSIS
+ maria_init()
+
+ TODO
+ Open log files and do recovery if need
+
+ RETURN
+ 0 ok
+ # error number
+*/
+
+int maria_init(void)
+{
+ if (!maria_inited)
+ {
+ maria_inited= TRUE;
+ pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW);
+ _ma_init_block_record_data();
+ }
+ return 0;
+}
+
+
+void maria_end(void)
+{
+ if (maria_inited)
+ {
+ maria_inited= FALSE;
+ ft_free_stopwords();
+ pthread_mutex_destroy(&THR_LOCK_maria);
+ }
+}
diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c
new file mode 100644
index 00000000000..036fd305c4d
--- /dev/null
+++ b/storage/maria/ma_key.c
@@ -0,0 +1,591 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Functions to handle keys */
+
+#include "maria_def.h"
+#include "m_ctype.h"
+#include "ma_sp_defs.h"
+#ifdef HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
+
+#define CHECK_KEYS /* Enable safety checks */
+
+#define FIX_LENGTH(cs, pos, length, char_length) \
+ do { \
+ if (length > char_length) \
+ char_length= my_charpos(cs, pos, pos+length, char_length); \
+ set_if_smaller(char_length,length); \
+ } while(0)
+
+static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record);
+
+/*
+ Make a intern key from a record
+
+ SYNOPSIS
+ _ma_make_key()
+ info MyiSAM handler
+ keynr key number
+ key Store created key here
+ record Record
+ filepos Position to record in the data file
+
+ RETURN
+ Length of key
+*/
+
+uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key,
+ const byte *record, MARIA_RECORD_POS filepos)
+{
+ const byte *pos;
+ byte *start;
+ reg1 HA_KEYSEG *keyseg;
+ my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
+ DBUG_ENTER("_ma_make_key");
+
+ if (info->s->keyinfo[keynr].flag & HA_SPATIAL)
+ {
+ /*
+ TODO: nulls processing
+ */
+#ifdef HAVE_SPATIAL
+ DBUG_RETURN(_ma_sp_make_key(info,keynr, key,record,filepos));
+#else
+ DBUG_ASSERT(0); /* maria_open should check that this never happens*/
+#endif
+ }
+
+ start=key;
+ for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint length=keyseg->length;
+ uint char_length;
+ CHARSET_INFO *cs=keyseg->charset;
+
+ if (keyseg->null_bit)
+ {
+ if (record[keyseg->null_pos] & keyseg->null_bit)
+ {
+ *key++= 0; /* NULL in key */
+ continue;
+ }
+ *key++=1; /* Not NULL */
+ }
+
+ char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen :
+ length);
+
+ pos= record+keyseg->start;
+ if (type == HA_KEYTYPE_BIT)
+ {
+ if (keyseg->bit_length)
+ {
+ uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos,
+ keyseg->bit_start, keyseg->bit_length);
+ *key++= (char) bits;
+ length--;
+ }
+ memcpy(key, pos, length);
+ key+= length;
+ continue;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ if (type != HA_KEYTYPE_NUM)
+ {
+ length= cs->cset->lengthsp(cs, pos, length);
+ }
+ else
+ {
+ const byte *end= pos + length;
+ while (pos < end && pos[0] == ' ')
+ pos++;
+ length= (uint) (end-pos);
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key, pos, (size_t) char_length);
+ key+=char_length;
+ continue;
+ }
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= (keyseg->bit_start == 1 ? 1 : 2);
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
+ set_if_smaller(length,tmp_length);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos);
+ memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*));
+ set_if_smaller(length,tmp_length);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ { /* Numerical column */
+#ifdef HAVE_ISNAN
+ if (type == HA_KEYTYPE_FLOAT)
+ {
+ float nr;
+ float4get(nr,pos);
+ if (isnan(nr))
+ {
+ /* Replace NAN with zero */
+ bzero(key,length);
+ key+=length;
+ continue;
+ }
+ }
+ else if (type == HA_KEYTYPE_DOUBLE)
+ {
+ double nr;
+ float8get(nr,pos);
+ if (isnan(nr))
+ {
+ bzero(key,length);
+ key+=length;
+ continue;
+ }
+ }
+#endif
+ pos+=length;
+ while (length--)
+ {
+ *key++ = *--pos;
+ }
+ continue;
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ memcpy(key, pos, char_length);
+ if (length > char_length)
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
+ key+= length;
+ }
+ _ma_dpointer(info,key,filepos);
+ DBUG_PRINT("exit",("keynr: %d",keynr));
+ DBUG_DUMP("key",start,(uint) (key-start)+keyseg->length);
+ DBUG_EXECUTE("key",
+ _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start,
+ (uint) (key-start)););
+ DBUG_RETURN((uint) (key-start)); /* Return keylength */
+} /* _ma_make_key */
+
+
+/*
+ Pack a key to intern format from given format (c_rkey)
+
+ SYNOPSIS
+ _ma_pack_key()
+ info MARIA handler
+ uint keynr key number
+ key Store packed key here
+ old Not packed key
+ k_length Length of 'old' to use
+ last_used_keyseg out parameter. May be NULL
+
+ RETURN
+ length of packed key
+
+ last_use_keyseg Store pointer to the keyseg after the last used one
+*/
+
+uint _ma_pack_key(register MARIA_HA *info, uint keynr, byte *key,
+ const byte *old, uint k_length, HA_KEYSEG **last_used_keyseg)
+{
+ byte *start_key=key;
+ HA_KEYSEG *keyseg;
+ my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
+ DBUG_ENTER("_ma_pack_key");
+
+ for (keyseg=info->s->keyinfo[keynr].seg ;
+ keyseg->type && (int) k_length > 0;
+ old+=keyseg->length, keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint length=min((uint) keyseg->length,(uint) k_length);
+ uint char_length;
+ const byte *pos;
+ CHARSET_INFO *cs=keyseg->charset;
+
+ if (keyseg->null_bit)
+ {
+ k_length--;
+ if (!(*key++= (char) 1-*old++)) /* Copy null marker */
+ {
+ k_length-=length;
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
+ k_length-=2; /* Skip length */
+ old+= 2;
+ }
+ continue; /* Found NULL */
+ }
+ }
+ char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen :
+ length);
+ pos= old;
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ const byte *end= pos + length;
+ if (type != HA_KEYTYPE_NUM)
+ {
+ while (end > pos && end[-1] == ' ')
+ end--;
+ }
+ else
+ {
+ while (pos < end && pos[0] == ' ')
+ pos++;
+ }
+ k_length-=length;
+ length=(uint) (end-pos);
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ memcpy(key,pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART))
+ {
+ /* Length of key-part used with maria_rkey() always 2 */
+ uint tmp_length=uint2korr(pos);
+ k_length-= 2+length;
+ pos+=2;
+ set_if_smaller(length,tmp_length); /* Safety */
+ FIX_LENGTH(cs, pos, length, char_length);
+ store_key_length_inc(key,char_length);
+ old+=2; /* Skip length */
+ memcpy(key, pos,(size_t) char_length);
+ key+= char_length;
+ continue;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ { /* Numerical column */
+ pos+=length;
+ k_length-=length;
+ while (length--)
+ {
+ *key++ = *--pos;
+ }
+ continue;
+ }
+ FIX_LENGTH(cs, pos, length, char_length);
+ memcpy(key, pos, char_length);
+ if (length > char_length)
+ cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' ');
+ key+= length;
+ k_length-=length;
+ }
+ if (last_used_keyseg)
+ *last_used_keyseg= keyseg;
+
+#ifdef NOT_USED
+ if (keyseg->type)
+ {
+ /* Part-key ; fill with ASCII 0 for easier searching */
+ length= (uint) -k_length; /* unused part of last key */
+ do
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ length++;
+ if (keyseg->flag & HA_SPACE_PACK)
+ length+=2;
+ else
+ length+= keyseg->length;
+ keyseg++;
+ } while (keyseg->type);
+ bzero(key,length);
+ key+=length;
+ }
+#endif
+ DBUG_RETURN((uint) (key-start_key));
+} /* _ma_pack_key */
+
+
+
+/*
+ Store found key in record
+
+ SYNOPSIS
+ _ma_put_key_in_record()
+ info MARIA handler
+ keynr Key number that was used
+ record Store key here
+
+ Last read key is in info->lastkey
+
+ NOTES
+ Used when only-keyread is wanted
+
+ RETURN
+ 0 ok
+ 1 error
+*/
+
+static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr,
+ byte *record)
+{
+ reg2 byte *key;
+ byte *pos,*key_end;
+ reg1 HA_KEYSEG *keyseg;
+ byte *blob_ptr;
+ DBUG_ENTER("_ma_put_key_in_record");
+
+ blob_ptr= info->lastkey2; /* Place to put blob parts */
+ key=info->lastkey; /* KEy that was read */
+ key_end=key+info->lastkey_length;
+ for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->null_bit)
+ {
+ if (!*key++)
+ {
+ record[keyseg->null_pos]|= keyseg->null_bit;
+ continue;
+ }
+ record[keyseg->null_pos]&= ~keyseg->null_bit;
+ }
+ if (keyseg->type == HA_KEYTYPE_BIT)
+ {
+ uint length= keyseg->length;
+
+ if (keyseg->bit_length)
+ {
+ byte bits= *key++;
+ set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ length--;
+ }
+ else
+ {
+ clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start,
+ keyseg->bit_length);
+ }
+ memcpy(record + keyseg->start, key, length);
+ key+= length;
+ continue;
+ }
+ if (keyseg->flag & HA_SPACE_PACK)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ pos= record+keyseg->start;
+ if (keyseg->type != (int) HA_KEYTYPE_NUM)
+ {
+ memcpy(pos,key,(size_t) length);
+ keyseg->charset->cset->fill(keyseg->charset,
+ pos + length, keyseg->length - length,
+ ' ');
+ }
+ else
+ {
+ bfill(pos,keyseg->length-length,' ');
+ memcpy(pos+keyseg->length-length,key,(size_t) length);
+ }
+ key+=length;
+ continue;
+ }
+
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ /* Store key length */
+ if (keyseg->bit_start == 1)
+ *(uchar*) (record+keyseg->start)= (uchar) length;
+ else
+ int2store(record+keyseg->start, length);
+ /* And key data */
+ memcpy(record+keyseg->start + keyseg->bit_start, key, length);
+ key+= length;
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint length;
+ get_key_length(length,key);
+#ifdef CHECK_KEYS
+ if (length > keyseg->length || key+length > key_end)
+ goto err;
+#endif
+ memcpy(record+keyseg->start+keyseg->bit_start,
+ (char*) &blob_ptr,sizeof(char*));
+ memcpy(blob_ptr,key,length);
+ blob_ptr+=length;
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ _ma_store_blob_length(record+keyseg->start,
+ (uint) keyseg->bit_start,length);
+ key+=length;
+ }
+ else if (keyseg->flag & HA_SWAP_KEY)
+ {
+ byte *to= record+keyseg->start+keyseg->length;
+ byte *end= key+keyseg->length;
+#ifdef CHECK_KEYS
+ if (end > key_end)
+ goto err;
+#endif
+ do
+ {
+ *--to= *key++;
+ } while (key != end);
+ continue;
+ }
+ else
+ {
+#ifdef CHECK_KEYS
+ if (key+keyseg->length > key_end)
+ goto err;
+#endif
+ memcpy(record+keyseg->start, key, (size_t) keyseg->length);
+ key+= keyseg->length;
+ }
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_RETURN(1); /* Crashed row */
+} /* _ma_put_key_in_record */
+
+
+ /* Here when key reads are used */
+
+int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos)
+{
+ fast_ma_writeinfo(info);
+ if (filepos != HA_OFFSET_ERROR)
+ {
+ if (info->lastinx >= 0)
+ { /* Read only key */
+ if (_ma_put_key_in_record(info,(uint) info->lastinx,buf))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return -1;
+ }
+ info->update|= HA_STATE_AKTIV; /* We should find a record */
+ return 0;
+ }
+ my_errno=HA_ERR_WRONG_INDEX;
+ }
+ return(-1); /* Wrong data to read */
+}
+
+
+/*
+ Retrieve auto_increment info
+
+ SYNOPSIS
+ retrieve_auto_increment()
+ info Maria handler
+ record Row to update
+
+ IMPLEMENTATION
+ For signed columns we don't retrieve the auto increment value if it's
+ less than zero.
+*/
+
+ulonglong ma_retrieve_auto_increment(MARIA_HA *info,const byte *record)
+{
+ ulonglong value= 0; /* Store unsigned values here */
+ longlong s_value= 0; /* Store signed values here */
+ HA_KEYSEG *keyseg= info->s->keyinfo[info->s->base.auto_key-1].seg;
+ const byte *key= record + keyseg->start;
+
+ switch (keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ s_value= (longlong) *(char*)key;
+ break;
+ case HA_KEYTYPE_BINARY:
+ value=(ulonglong) *(uchar*) key;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ s_value= (longlong) sint2korr(key);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ value=(ulonglong) uint2korr(key);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ s_value= (longlong) sint4korr(key);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ value=(ulonglong) uint4korr(key);
+ break;
+ case HA_KEYTYPE_INT24:
+ s_value= (longlong) sint3korr(key);
+ break;
+ case HA_KEYTYPE_UINT24:
+ value=(ulonglong) uint3korr(key);
+ break;
+ case HA_KEYTYPE_FLOAT: /* This shouldn't be used */
+ {
+ float f_1;
+ float4get(f_1,key);
+ /* Ignore negative values */
+ value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1;
+ break;
+ }
+ case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */
+ {
+ double f_1;
+ float8get(f_1,key);
+ /* Ignore negative values */
+ value = (f_1 < 0.0) ? 0 : (ulonglong) f_1;
+ break;
+ }
+ case HA_KEYTYPE_LONGLONG:
+ s_value= sint8korr(key);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ value= uint8korr(key);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ value=0; /* Error */
+ break;
+ }
+
+ /*
+ The following code works becasue if s_value < 0 then value is 0
+ and if s_value == 0 then value will contain either s_value or the
+ correct value.
+ */
+ return (s_value > 0) ? (ulonglong) s_value : value;
+}
diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c
new file mode 100644
index 00000000000..64725baae86
--- /dev/null
+++ b/storage/maria/ma_keycache.c
@@ -0,0 +1,164 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Key cache assignments
+*/
+
+#include "maria_def.h"
+
+/*
+ Assign pages of the index file for a table to a key cache
+
+ SYNOPSIS
+ maria_assign_to_key_cache()
+ info open table
+ key_map map of indexes to assign to the key cache
+ key_cache_ptr pointer to the key cache handle
+ assign_lock Mutex to lock during assignment
+
+ PREREQUESTS
+ One must have a READ lock or a WRITE lock on the table when calling
+ the function to ensure that there is no other writers to it.
+
+ The caller must also ensure that one doesn't call this function from
+ two different threads with the same table.
+
+ NOTES
+ At present pages for all indexes must be assigned to the same key cache.
+ In future only pages for indexes specified in the key_map parameter
+ of the table will be assigned to the specified key cache.
+
+ RETURN VALUE
+ 0 If a success
+ # Error code
+*/
+
+int maria_assign_to_key_cache(MARIA_HA *info,
+ ulonglong key_map __attribute__((unused)),
+ KEY_CACHE *key_cache)
+{
+ int error= 0;
+ MARIA_SHARE* share= info->s;
+ DBUG_ENTER("maria_assign_to_key_cache");
+ DBUG_PRINT("enter",
+ ("old_key_cache_handle: 0x%lx new_key_cache_handle: 0x%lx",
+ (long) share->key_cache, (long) key_cache));
+
+ /*
+ Skip operation if we didn't change key cache. This can happen if we
+ call this for all open instances of the same table
+ */
+ if (share->key_cache == key_cache)
+ DBUG_RETURN(0);
+
+ /*
+ First flush all blocks for the table in the old key cache.
+ This is to ensure that the disk is consistent with the data pages
+ in memory (which may not be the case if the table uses delayed_key_write)
+
+ Note that some other read thread may still fill in the key cache with
+ new blocks during this call and after, but this doesn't matter as
+ all threads will start using the new key cache for their next call to
+ maria library and we know that there will not be any changed blocks
+ in the old key cache.
+ */
+
+ if (flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE))
+ {
+ error= my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info); /* Mark that table must be checked */
+ }
+
+ /*
+ Flush the new key cache for this file. This is needed to ensure
+ that there is no old blocks (with outdated data) left in the new key
+ cache from an earlier assign_to_keycache operation
+
+ (This can never fail as there is never any not written data in the
+ new key cache)
+ */
+ (void) flush_key_blocks(key_cache, share->kfile, FLUSH_RELEASE);
+
+ /*
+ ensure that setting the key cache and changing the multi_key_cache
+ is done atomicly
+ */
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ Tell all threads to use the new key cache
+ This should be seen at the lastes for the next call to an maria function.
+ */
+ share->key_cache= key_cache;
+
+ /* store the key cache in the global hash structure for future opens */
+ if (multi_key_cache_set(share->unique_file_name, share->unique_name_length,
+ share->key_cache))
+ error= my_errno;
+ pthread_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Change all MARIA entries that uses one key cache to another key cache
+
+ SYNOPSIS
+ maria_change_key_cache()
+ old_key_cache Old key cache
+ new_key_cache New key cache
+
+ NOTES
+ This is used when we delete one key cache.
+
+ To handle the case where some other threads tries to open an MARIA
+ table associated with the to-be-deleted key cache while this operation
+ is running, we have to call 'multi_key_cache_change()' from this
+ function while we have a lock on the MARIA table list structure.
+
+ This is safe as long as it's only MARIA that is using this specific
+ key cache.
+*/
+
+
+void maria_change_key_cache(KEY_CACHE *old_key_cache,
+ KEY_CACHE *new_key_cache)
+{
+ LIST *pos;
+ DBUG_ENTER("maria_change_key_cache");
+
+ /*
+ Lock list to ensure that no one can close the table while we manipulate it
+ */
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info= (MARIA_HA*) pos->data;
+ MARIA_SHARE *share= info->s;
+ if (share->key_cache == old_key_cache)
+ maria_assign_to_key_cache(info, (ulonglong) ~0, new_key_cache);
+ }
+
+ /*
+ We have to do the following call while we have the lock on the
+ MARIA list structure to ensure that another thread is not trying to
+ open a new table that will be associted with the old key cache
+ */
+ multi_key_cache_change(old_key_cache, new_key_cache);
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c
new file mode 100644
index 00000000000..170e59a601a
--- /dev/null
+++ b/storage/maria/ma_least_recently_dirtied.c
@@ -0,0 +1,106 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3261 Maria - background flushing of the least-recently-dirtied pages
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/*
+ To be part of the page cache.
+ The pseudocode below is dependent on the page cache
+ which is being designed WL#3134. It is not clear if I need to do page
+ copies, as the page cache already keeps page copies.
+ So, this code will move to the page cache and take inspiration from its
+ methods. Below is just to give the idea of what could be done.
+ And I should compare my imaginations to WL#3134.
+*/
+
+/* Here is the implementation of this module */
+
+#include "page_cache.h"
+#include "least_recently_dirtied.h"
+
+/*
+ This thread does background flush of pieces of the LRD, and serves
+ requests for asynchronous checkpoints.
+ Just launch it when engine starts.
+ MikaelR questioned why the same thread does two different jobs, the risk
+ could be that while a checkpoint happens no LRD flushing happens.
+ For now, we only do checkpoints - no LRD flushing (to be done when the
+ second version of the page cache is ready WL#3077).
+ Reasons to delay:
+ - Recovery will work (just slower)
+ - new page cache may be different, why do then re-do
+ - current pagecache probably has issues with flushing when somebody is
+ writing to the table being flushed - better avoid that.
+*/
+pthread_handler_decl background_flush_and_checkpoint_thread()
+{
+ while (this_thread_not_killed)
+ {
+ /* note that we don't care of the checkpoint's success */
+ (void)execute_asynchronous_checkpoint_if_any();
+ sleep(5);
+ /*
+ in the final version, we will not sleep but call flush_pages_from_LRD()
+ repeatedly. If there are no dirty pages, we'll make sure to not have a
+ tight loop probing for checkpoint requests.
+ */
+ }
+}
+
+/* The rest of this file will not serve in first version */
+
+/*
+ flushes only the first pages of the LRD.
+ max_this_number could be FLUSH_CACHE (of mf_pagecache.c) for example.
+*/
+flush_pages_from_LRD(uint max_this_number, LSN max_this_lsn)
+{
+ /*
+ One rule to better observe is "page must be flushed to disk before it is
+ removed from LRD" (otherwise checkpoint is incomplete info, corruption).
+ */
+
+ /*
+ Build a list of pages to flush:
+ changed_blocks[i] is roughly sorted by descending rec_lsn,
+ so we could do a merge sort of changed_blocks[] lists, stopping after we
+ have the max_this_number first elements or after we have found a page with
+ rec_lsn > max_this_lsn.
+ Then do like pagecache_flush_blocks_int() does (beware! this time we are
+ not alone on the file! there may be dangers! TODO: sort this out).
+ */
+
+ /*
+ MikaelR noted that he observed that Linux's file cache may never fsync to
+ disk until this cache is full, at which point it decides to empty the
+ cache, making the machine very slow. A solution was to fsync after writing
+ 2 MB.
+ */
+}
+
+/*
+ Note that when we flush all page from LRD up to rec_lsn>=max_lsn,
+ this is approximate because the LRD list may
+ not be exactly sorted by rec_lsn (because for a big row, all pages of the
+ row are inserted into the LRD with rec_lsn being the LSN of the REDO for the
+ first page, so if there are concurrent insertions, the last page of the big
+ row may have a smaller rec_lsn than the previous pages inserted by
+ concurrent inserters).
+*/
diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h
new file mode 100644
index 00000000000..f6d7420febc
--- /dev/null
+++ b/storage/maria/ma_least_recently_dirtied.h
@@ -0,0 +1,26 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3261 Maria - background flushing of the least-recently-dirtied pages
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* This is the interface of this module. */
+
+/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */
+int flush_all_LRD_to_lsn(LSN max_lsn);
diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c
new file mode 100644
index 00000000000..6a0bbe82dcb
--- /dev/null
+++ b/storage/maria/ma_locking.c
@@ -0,0 +1,512 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ locking of isam-tables.
+ reads info from a isam-table. Must be first request before doing any furter
+ calls to any isamfunktion. Is used to allow many process use the same
+ isamdatabase.
+*/
+
+#include "ma_ftdefs.h"
+
+ /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
+
+int maria_lock_database(MARIA_HA *info, int lock_type)
+{
+ int error;
+ uint count;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_lock_database");
+ DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
+ "global_changed: %d open_count: %u name: '%s'",
+ lock_type, info->lock_type, share->r_locks,
+ share->w_locks,
+ share->global_changed, share->state.open_count,
+ share->index_file_name));
+ if (share->options & HA_OPTION_READ_ONLY_DATA ||
+ info->lock_type == lock_type)
+ DBUG_RETURN(0);
+ if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
+ {
+ ++share->w_locks;
+ ++share->tot_locks;
+ info->lock_type= lock_type;
+ DBUG_RETURN(0);
+ }
+
+ error=0;
+ pthread_mutex_lock(&share->intern_lock);
+ if (share->kfile >= 0) /* May only be false on windows */
+ {
+ switch (lock_type) {
+ case F_UNLCK:
+ maria_ftparser_call_deinitializer(info);
+ if (info->lock_type == F_RDLCK)
+ count= --share->r_locks;
+ else
+ count= --share->w_locks;
+ --share->tot_locks;
+ if (info->lock_type == F_WRLCK && !share->w_locks)
+ {
+ if (!share->delay_key_write && flush_key_blocks(share->key_cache,
+ share->kfile,
+ FLUSH_KEEP))
+ {
+ error= my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ /* Mark that table must be checked */
+ maria_mark_crashed(info);
+ }
+ if (share->data_file_type == BLOCK_RECORD &&
+ flush_key_blocks(share->key_cache, info->dfile, FLUSH_KEEP))
+ {
+ error= my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ /* Mark that table must be checked */
+ maria_mark_crashed(info);
+ }
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ if (end_io_cache(&info->rec_cache))
+ {
+ error=my_errno;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ }
+ if (!count)
+ {
+ DBUG_PRINT("info",("changed: %u w_locks: %u",
+ (uint) share->changed, share->w_locks));
+ if (share->changed && !share->w_locks)
+ {
+#ifdef HAVE_MMAP
+ if ((info->s->mmaped_length !=
+ info->s->state.state.data_file_length) &&
+ (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
+ {
+ if (info->s->concurrent_insert)
+ rw_wrlock(&info->s->mmap_lock);
+ _ma_remap_file(info, info->s->state.state.data_file_length);
+ info->s->nonmmaped_inserts= 0;
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->mmap_lock);
+ }
+#endif
+ share->state.process= share->last_process=share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+ if (_ma_state_info_write(share->kfile, &share->state, 1))
+ error=my_errno;
+ share->changed=0;
+ if (maria_flush)
+ {
+ if (_ma_sync_table_files(info))
+ error= my_errno;
+ }
+ else
+ share->not_flushed=1;
+ if (error)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ }
+ }
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ info->lock_type= F_UNLCK;
+ break;
+ case F_RDLCK:
+ if (info->lock_type == F_WRLCK)
+ {
+ /*
+ Change RW to READONLY
+
+ mysqld does not turn write locks to read locks,
+ so we're never here in mysqld.
+ */
+ share->w_locks--;
+ share->r_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+ if (!share->r_locks && !share->w_locks)
+ {
+ if (_ma_state_info_read_dsk(share->kfile, &share->state, 1))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+ VOID(_ma_test_if_changed(info));
+ share->r_locks++;
+ share->tot_locks++;
+ info->lock_type=lock_type;
+ break;
+ case F_WRLCK:
+ if (info->lock_type == F_RDLCK)
+ { /* Change READONLY to RW */
+ if (share->r_locks == 1)
+ {
+ share->r_locks--;
+ share->w_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+ }
+ if (!(share->options & HA_OPTION_READ_ONLY_DATA))
+ {
+ if (!share->w_locks)
+ {
+ if (!share->r_locks)
+ {
+ if (_ma_state_info_read_dsk(share->kfile, &share->state, 1))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+ }
+ }
+ VOID(_ma_test_if_changed(info));
+
+ info->lock_type=lock_type;
+ info->invalidator=info->s->invalidator;
+ share->w_locks++;
+ share->tot_locks++;
+ break;
+ default:
+ break; /* Impossible */
+ }
+ }
+#ifdef __WIN__
+ else
+ {
+ /*
+ Check for bad file descriptors if this table is part
+ of a merge union. Failing to capture this may cause
+ a crash on windows if the table is renamed and
+ later on referenced by the merge table.
+ */
+ if( info->owned_by_merge && (info->s)->kfile < 0 )
+ {
+ error = HA_ERR_NO_SUCH_TABLE;
+ }
+ }
+#endif
+ pthread_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+} /* maria_lock_database */
+
+
+/****************************************************************************
+ The following functions are called by thr_lock() in threaded applications
+****************************************************************************/
+
+/*
+ Create a copy of the current status for the table
+
+ SYNOPSIS
+ _ma_get_status()
+ param Pointer to Myisam handler
+ concurrent_insert Set to 1 if we are going to do concurrent inserts
+ (THR_WRITE_CONCURRENT_INSERT was used)
+*/
+
+void _ma_get_status(void* param, int concurrent_insert)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ DBUG_ENTER("_ma_get_status");
+ DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
+ (long) info->s->state.state.key_file_length,
+ (long) info->s->state.state.data_file_length,
+ concurrent_insert));
+#ifndef DBUG_OFF
+ if (info->state->key_file_length > info->s->state.state.key_file_length ||
+ info->state->data_file_length > info->s->state.state.data_file_length)
+ DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
+ (long) info->state->key_file_length,
+ (long) info->state->data_file_length));
+#endif
+ info->save_state=info->s->state.state;
+ info->state= &info->save_state;
+ info->append_insert_at_end= concurrent_insert;
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_update_status(void* param)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ /*
+ Because someone may have closed the table we point at, we only
+ update the state if its our own state. This isn't a problem as
+ we are always pointing at our own lock or at a read lock.
+ (This is enforced by thr_multi_lock.c)
+ */
+ if (info->state == &info->save_state)
+ {
+#ifndef DBUG_OFF
+ DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld",
+ (long) info->state->key_file_length,
+ (long) info->state->data_file_length));
+ if (info->state->key_file_length < info->s->state.state.key_file_length ||
+ info->state->data_file_length < info->s->state.state.data_file_length)
+ DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
+ (long) info->s->state.state.key_file_length,
+ (long) info->s->state.state.data_file_length));
+#endif
+ info->s->state.state= *info->state;
+ info->state= &info->s->state.state;
+ }
+ info->append_insert_at_end= 0;
+
+ /*
+ We have to flush the write cache here as other threads may start
+ reading the table before maria_lock_database() is called
+ */
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ if (end_io_cache(&info->rec_cache))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ info->opt_flag&= ~WRITE_CACHE_USED;
+ }
+}
+
+void _ma_copy_status(void* to,void *from)
+{
+ ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state;
+}
+
+
+/*
+ Check if should allow concurrent inserts
+
+ IMPLEMENTATION
+ Allow concurrent inserts if we don't have a hole in the table or
+ if there is no active write lock and there is active read locks and
+ maria_concurrent_insert == 2. In this last case the new
+ row('s) are inserted at end of file instead of filling up the hole.
+
+ The last case is to allow one to inserts into a heavily read-used table
+ even if there is holes.
+
+ NOTES
+ If there is a an rtree indexes in the table, concurrent inserts are
+ disabled in maria_open()
+
+ RETURN
+ 0 ok to use concurrent inserts
+ 1 not ok
+*/
+
+my_bool _ma_check_status(void *param)
+{
+ MARIA_HA *info=(MARIA_HA*) param;
+ /*
+ The test for w_locks == 1 is here because this thread has already done an
+ external lock (in other words: w_locks == 1 means no other threads has
+ a write lock)
+ */
+ DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
+ (long) info->s->state.dellink, (uint) info->s->r_locks,
+ (uint) info->s->w_locks));
+ return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
+ (maria_concurrent_insert == 2 && info->s->r_locks &&
+ info->s->w_locks == 1));
+}
+
+
+/****************************************************************************
+ ** functions to read / write the state
+****************************************************************************/
+
+int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer)
+{
+ DBUG_ENTER("_ma_readinfo");
+
+ if (info->lock_type == F_UNLCK)
+ {
+ MARIA_SHARE *share=info->s;
+ if (!share->tot_locks)
+ {
+ if (_ma_state_info_read_dsk(share->kfile, &share->state, 1))
+ {
+ int error=my_errno ? my_errno : -1;
+ my_errno=error;
+ DBUG_RETURN(1);
+ }
+ }
+ if (check_keybuffer)
+ VOID(_ma_test_if_changed(info));
+ info->invalidator=info->s->invalidator;
+ }
+ else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
+ {
+ my_errno=EACCES; /* Not allowed to change */
+ DBUG_RETURN(-1); /* when have read_lock() */
+ }
+ DBUG_RETURN(0);
+} /* _ma_readinfo */
+
+
+/*
+ Every isam-function that uppdates the isam-database MUST end with this
+ request
+*/
+
+int _ma_writeinfo(register MARIA_HA *info, uint operation)
+{
+ int error,olderror;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_writeinfo");
+ DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
+ share->tot_locks));
+
+ error=0;
+ if (share->tot_locks == 0)
+ {
+ if (operation)
+ { /* Two threads can't be here */
+ olderror= my_errno; /* Remember last error */
+ share->state.process= share->last_process= share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+ if ((error= _ma_state_info_write(share->kfile, &share->state, 1)))
+ olderror=my_errno;
+#ifdef __WIN__
+ if (maria_flush)
+ {
+ _commit(share->kfile);
+ _commit(info->dfile);
+ }
+#endif
+ my_errno=olderror;
+ }
+ }
+ else if (operation)
+ share->changed= 1; /* Mark keyfile changed */
+ DBUG_RETURN(error);
+} /* _ma_writeinfo */
+
+
+ /* Test if someone has changed the database */
+ /* (Should be called after readinfo) */
+
+int _ma_test_if_changed(register MARIA_HA *info)
+{
+ MARIA_SHARE *share=info->s;
+ if (share->state.process != share->last_process ||
+ share->state.unique != info->last_unique ||
+ share->state.update_count != info->last_loop)
+ { /* Keyfile has changed */
+ DBUG_PRINT("info",("index file changed"));
+ if (share->state.process != share->this_process)
+ VOID(flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE));
+ share->last_process=share->state.process;
+ info->last_unique= share->state.unique;
+ info->last_loop= share->state.update_count;
+ info->update|= HA_STATE_WRITTEN; /* Must use file on next */
+ info->data_changed= 1; /* For maria_is_changed */
+ return 1;
+ }
+ return (!(info->update & HA_STATE_AKTIV) ||
+ (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
+ HA_STATE_KEY_CHANGED)));
+} /* _ma_test_if_changed */
+
+
+/*
+ Put a mark in the .MYI file that someone is updating the table
+
+
+ DOCUMENTATION
+
+ state.open_count in the .MYI file is used the following way:
+ - For the first change of the .MYI file in this process open_count is
+ incremented by maria_mark_file_change(). (We have a write lock on the file
+ when this happens)
+ - In maria_close() it's decremented by _ma_decrement_open_count() if it
+ was incremented in the same process.
+
+ This mean that if we are the only process using the file, the open_count
+ tells us if the MARIA file wasn't properly closed. (This is true if
+ my_disable_locking is set).
+*/
+
+
+int _ma_mark_file_changed(MARIA_HA *info)
+{
+ char buff[3];
+ register MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_mark_file_changed");
+
+ if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed)
+ {
+ share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ if (!share->global_changed)
+ {
+ share->global_changed=1;
+ share->state.open_count++;
+ }
+ if (!share->temporary)
+ {
+ mi_int2store(buff,share->state.open_count);
+ buff[2]=1; /* Mark that it's changed */
+ DBUG_RETURN(my_pwrite(share->kfile,buff,sizeof(buff),
+ sizeof(share->state.header),
+ MYF(MY_NABP)));
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
+ call. In these context the following code should be safe!
+ */
+
+int _ma_decrement_open_count(MARIA_HA *info)
+{
+ char buff[2];
+ register MARIA_SHARE *share=info->s;
+ int lock_error=0,write_error=0;
+ if (share->global_changed)
+ {
+ uint old_lock=info->lock_type;
+ share->global_changed=0;
+ lock_error=maria_lock_database(info,F_WRLCK);
+ /* Its not fatal even if we couldn't get the lock ! */
+ if (share->state.open_count > 0)
+ {
+ share->state.open_count--;
+ mi_int2store(buff,share->state.open_count);
+ write_error=my_pwrite(share->kfile,buff,sizeof(buff),
+ sizeof(share->state.header),
+ MYF(MY_NABP));
+ }
+ if (!lock_error)
+ lock_error=maria_lock_database(info,old_lock);
+ }
+ return test(lock_error || write_error);
+}
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
new file mode 100644
index 00000000000..1e9afc7571d
--- /dev/null
+++ b/storage/maria/ma_loghandler.c
@@ -0,0 +1,5296 @@
+#include "maria_def.h"
+
+/* number of opened log files in the pagecache (should be at least 2) */
+#define OPENED_FILES_NUM 3
+
+/* records buffer size (should be LOG_PAGE_SIZE * n) */
+#define TRANSLOG_WRITE_BUFFER (1024*1024)
+/* min chunk length */
+#define TRANSLOG_MIN_CHUNK 3
+/*
+ Number of buffers used by loghandler
+
+ Should be at least 4, because one thread can block up to 2 buffers in
+ normal circumstances (less then half of one and full other, or just
+ switched one and other), But if we met end of the file in the middle and
+ have to switch buffer it will be 3. + 1 buffer for flushing/writing.
+ We have a bigger number here for higher concurrency.
+*/
+#define TRANSLOG_BUFFERS_NO 5
+/* number of bytes (+ header) which can be unused on first page in sequence */
+#define TRANSLOG_MINCHUNK_CONTENT 1
+/* version of log file */
+#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
+
+#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
+
+/* QQ: For temporary debugging */
+#define UNRECOVERABLE_ERROR(E) \
+ do { \
+ DBUG_PRINT("error", E); \
+ printf E; \
+ putchar('\n'); \
+ } while(0);
+
+
+
+/* record part descriptor */
+struct st_translog_part
+{
+ translog_size_t len;
+ byte *buff;
+};
+
+
+/* record parts descriptor */
+struct st_translog_parts
+{
+ /* full record length */
+ translog_size_t record_length;
+ /* full record length with chunk headers */
+ translog_size_t total_record_length;
+ /* array of parts (st_translog_part) */
+ DYNAMIC_ARRAY parts;
+ /* current part index */
+ uint current;
+};
+
+/* log write buffer descriptor */
+struct st_translog_buffer
+{
+ LSN last_lsn;
+ /* This buffer offset in the file */
+ TRANSLOG_ADDRESS offset;
+ /*
+ How much written (or will be written when copy_to_buffer_in_progress
+ become 0) to this buffer
+ */
+ translog_size_t size;
+ /* File handler for this buffer */
+ File file;
+ /* Threads which are waiting for buffer filling/freeing */
+ WQUEUE waiting_filling_buffer;
+ /* Number of record which are in copy progress */
+ uint copy_to_buffer_in_progress;
+ /* list of waiting buffer ready threads */
+ struct st_my_thread_var *waiting_flush;
+ struct st_translog_buffer *overlay;
+#ifndef DBUG_OFF
+ uint buffer_no;
+#endif
+ /* lock for the buffer. Current buffer also lock the handler */
+ pthread_mutex_t mutex;
+ /* IO cache for current log */
+ byte buffer[TRANSLOG_WRITE_BUFFER];
+};
+
+
+struct st_buffer_cursor
+{
+ /* pointer on the buffer */
+ byte *ptr;
+ /* current buffer */
+ struct st_translog_buffer *buffer;
+ /* current page fill */
+ uint16 current_page_fill;
+ /* how many times we finish this page to write it */
+ uint16 write_counter;
+ /* previous write offset */
+ uint16 previous_offset;
+ /* Number of current buffer */
+ uint8 buffer_no;
+ my_bool chaser, protected;
+};
+
+
+struct st_translog_descriptor
+{
+ /* *** Parameters of the log handler *** */
+
+ /* Page cache for the log reads */
+ PAGECACHE *pagecache;
+ /* Flags */
+ uint flags;
+ /* max size of one log size (for new logs creation) */
+ uint32 log_file_max_size;
+ /* server version */
+ uint32 server_version;
+ /* server ID */
+ uint32 server_id;
+ /* Loghandler's buffer capacity in case of chunk 2 filling */
+ uint32 buffer_capacity_chunk_2;
+ /* Half of the buffer capacity in case of chunk 2 filling */
+ uint32 half_buffer_capacity_chunk_2;
+ /* Page overhead calculated by flags */
+ uint16 page_overhead;
+ /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */
+ uint16 page_capacity_chunk_2;
+ /* Directory to store files */
+ char directory[FN_REFLEN];
+
+ /* *** Current state of the log handler *** */
+ /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */
+ File log_file_num[OPENED_FILES_NUM];
+ File directory_fd;
+ /* buffers for log writing */
+ struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
+ /*
+ horizon - visible end of the log (here is absolute end of the log:
+ position where next chunk can start
+ */
+ TRANSLOG_ADDRESS horizon;
+ /* horizon buffer cursor */
+ struct st_buffer_cursor bc;
+
+ /* Last flushed LSN */
+ LSN flushed;
+ LSN sent_to_file;
+ pthread_mutex_t sent_to_file_lock;
+};
+
+static struct st_translog_descriptor log_descriptor;
+
+/* Marker for end of log */
+static byte end_of_log= 0;
+
+/* record classes */
+enum record_class
+{
+ LOGRECTYPE_NOT_ALLOWED,
+ LOGRECTYPE_VARIABLE_LENGTH,
+ LOGRECTYPE_PSEUDOFIXEDLENGTH,
+ LOGRECTYPE_FIXEDLENGTH
+};
+
+/* chunk types */
+#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
+#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
+#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
+#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
+#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
+#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
+
+/* compressed (relative) LSN constants */
+#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
+#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed LSN */
+
+typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type,
+ void *tcb,
+ struct st_translog_parts *parts);
+
+typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type,
+ void *tcb,
+ LSN *lsn,
+ struct st_translog_parts *parts);
+
+typedef uint16(*read_rec_hook) (enum translog_record_type type,
+ uint16 read_length, uchar *read_buff,
+ byte *decoded_buff);
+
+/*
+ Descriptor of log record type
+ Note: Don't reorder because of constructs later...
+*/
+struct st_log_record_type_descriptor
+{
+ /* internal class of the record */
+ enum record_class class;
+ /* length for fixed-size record, or maximum length of pseudo-fixed */
+ uint16 fixed_length;
+ /* how much record body (belonged to headers too) read with headers */
+ uint16 read_header_len;
+ /* HOOK for writing the record called before lock */
+ prewrite_rec_hook prewrite_hook;
+ /* HOOK for writing the record called when LSN is known */
+ inwrite_rec_hook inwrite_hook;
+ /* HOOK for reading headers */
+ read_rec_hook read_hook;
+ /*
+ For pseudo fixed records number of compressed LSNs followed by
+ system header
+ */
+ int16 compressed_LSN;
+};
+
+
+static struct st_log_record_type_descriptor
+ log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]=
+{
+ /*LOGREC_RESERVED_FOR_CHUNKS23= 0 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_HEAD= 1 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_TAIL= 2 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_BLOB= 3 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_BLOBS= 4 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_PURGE_ROW= 5 */
+ {LOGRECTYPE_FIXEDLENGTH, 9, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_PURGE_BLOCKS= 6 */
+ {LOGRECTYPE_FIXEDLENGTH, 10, 10, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_DELETE_ROW= 7 */
+ {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_UPDATE_ROW_HEAD= 8 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INDEX= 9 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_UNDELETE_ROW= 10 */
+ {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0},
+ /*LOGREC_CLR_END= 11 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_PURGE_END= 12 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_ROW_INSERT= 13 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 14, 14, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_ROW_DELETE= 14 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 19, 19, NULL, NULL, NULL, 2},
+ /*LOGREC_UNDO_ROW_UPDATE= 15 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 14, NULL, NULL, NULL, 2},
+ /*LOGREC_UNDO_KEY_INSERT= 16 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_KEY_DELETE= 17 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, NULL, NULL, 2},
+ /*LOGREC_PREPARE= 18 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_PREPARE_WITH_UNDO_PURGE= 19 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_COMMIT= 20 */
+ {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_COMMIT_WITH_UNDO_PURGE= 21 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_CHECKPOINT_PAGE= 22 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 6, NULL, NULL, NULL, 0},
+ /*LOGREC_CHECKPOINT_TRAN= 23 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_CHECKPOINT_TABL= 24 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_CREATE_TABLE= 25 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_RENAME_TABLE= 26 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_DROP_TABLE= 27 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_TRUNCATE_TABLE= 28 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_FILE_ID= 29 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0},
+ /*LOGREC_LONG_TRANSACTION_ID= 30 */
+ {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0},
+ /*31 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*32 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*33 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*34 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*35 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*36 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*37 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*38 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*39 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*40 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*41 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*42 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*43 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*44 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*45 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*46 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*47 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*48 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*49 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*50 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*51 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*52 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*53 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*54 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*55 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*56 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*57 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*58 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*59 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*60 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*61 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*62 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_RESERVED_FUTURE_EXTENSION= 63 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}
+};
+
+/* all possible flags page overheads */
+static uint page_overhead[TRANSLOG_FLAGS_NUM];
+
+typedef struct st_translog_validator_data
+{
+ TRANSLOG_ADDRESS *addr;
+ my_bool was_recovered;
+} TRANSLOG_VALIDATOR_DATA;
+
+
+const char *maria_data_root;
+
+
+/*
+ Check cursor/buffer consistence
+
+ SYNOPSIS
+ translog_check_cursor
+ cursor cursor which will be checked
+*/
+
+#ifndef DBUG_OFF
+static void translog_check_cursor(struct st_buffer_cursor *cursor)
+{
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
+ cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+}
+#endif
+
+/*
+ Get file name of the log by log number
+
+ SYNOPSIS
+ translog_filename_by_fileno()
+ file_no Number of the log we want to open
+ path Pointer to buffer where file name will be
+ stored (must be FN_REFLEN bytes at least
+ RETURN
+ pointer to path
+*/
+
+static char *translog_filename_by_fileno(uint32 file_no, char *path)
+{
+ char file_name[10 + 8 + 1]; /* See my_sprintf */
+ char *res;
+ DBUG_ENTER("translog_filename_by_fileno");
+ DBUG_ASSERT(file_no <= 0xfffffff);
+ my_sprintf(file_name, (file_name, "maria_log.%08u", file_no));
+ res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME));
+ DBUG_PRINT("info", ("Path: '%s' path: 0x%lx res: 0x%lx",
+ res, (ulong) path, (ulong) res));
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Open log file with given number without cache
+
+ SYNOPSIS
+ open_logfile_by_number_no_cache()
+ file_no Number of the log we want to open
+
+ RETURN
+ -1 error
+ # file descriptor number
+*/
+
+static File open_logfile_by_number_no_cache(uint32 file_no)
+{
+ File file;
+ char path[FN_REFLEN];
+ DBUG_ENTER("open_logfile_by_number_no_cache");
+
+ /* Todo: add O_DIRECT to open flags (when buffer is aligned) */
+ if ((file= my_open(translog_filename_by_fileno(file_no, path),
+ O_CREAT | O_BINARY | O_RDWR,
+ MYF(MY_WME))) < 0)
+ {
+ UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path));
+ DBUG_RETURN(-1);
+ }
+ DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
+ DBUG_RETURN(file);
+}
+
+
+/*
+ Write log file page header in the just opened new log file
+
+ SYNOPSIS
+ translog_write_file_header();
+
+ NOTES
+ First page is just a marker page; We don't store any real log data in it.
+
+ RETURN
+ 0 OK
+ 1 ERROR
+*/
+
+uchar NEAR maria_trans_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
+ 'L', 'O', 'G' };
+
+static my_bool translog_write_file_header()
+{
+ ulonglong timestamp;
+ byte page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
+ DBUG_ENTER("translog_write_file_header");
+
+ /* file tag */
+ memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
+ page+= sizeof(maria_trans_file_magic);
+ /* timestamp */
+ timestamp= my_getsystime();
+ int8store(page, timestamp);
+ page+= 8;
+ /* maria version */
+ int4store(page, TRANSLOG_VERSION_ID);
+ page+= 4;
+ /* mysql version (MYSQL_VERSION_ID) */
+ int4store(page, log_descriptor.server_version);
+ page+= 4;
+ /* server ID */
+ int4store(page, log_descriptor.server_id);
+ page+= 4;
+ /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */
+ int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE);
+ page+= 2;
+ /* file number */
+ int3store(page, LSN_FILE_NO(log_descriptor.horizon));
+ page+= 3;
+ bzero(page, sizeof(page_buff) - (page- page_buff));
+
+ DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff,
+ sizeof(page_buff), 0, MYF(MY_WME | MY_NABP)) != 0);
+}
+
+
+/*
+ Initialize transaction log file buffer
+
+ SYNOPSIS
+ translog_buffer_init()
+ buffer The buffer to initialize
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_init(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_init");
+ /* This buffer offset */
+ buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
+ /* This Buffer File */
+ buffer->file= -1;
+ buffer->overlay= 0;
+ /* IO cache for current log */
+ bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER);
+ /* Buffer size */
+ buffer->size= 0;
+ /* cond of thread which is waiting for buffer filling */
+ buffer->waiting_filling_buffer.last_thread= 0;
+ /* Number of record which are in copy progress */
+ buffer->copy_to_buffer_in_progress= 0;
+ /* list of waiting buffer ready threads */
+ buffer->waiting_flush= 0;
+ /* lock for the buffer. Current buffer also lock the handler */
+ if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Close transaction log file by descriptor
+
+ SYNOPSIS
+ translog_close_log_file()
+ file file descriptor
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_close_log_file(File file)
+{
+ int rc;
+ PAGECACHE_FILE fl;
+ fl.file= file;
+ flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE);
+ /*
+ Sync file when we close it
+ TODO: sync only we have changed the log
+ */
+ rc= my_sync(file, MYF(MY_WME));
+ rc|= my_close(file, MYF(MY_WME));
+ return test(rc);
+}
+
+
+/*
+ Create and fill header of new file
+
+ SYNOPSIS
+ translog_create_new_file()
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_create_new_file()
+{
+ int i;
+ uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
+ DBUG_ENTER("translog_create_new_file");
+
+ if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 &&
+ translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM -
+ 1]))
+ DBUG_RETURN(1);
+ for (i= OPENED_FILES_NUM - 1; i > 0; i--)
+ log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1];
+
+ if ((log_descriptor.log_file_num[0]=
+ open_logfile_by_number_no_cache(file_no)) == -1 ||
+ translog_write_file_header())
+ DBUG_RETURN(1);
+
+ if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Lock the loghandler buffer
+
+ SYNOPSIS
+ translog_buffer_lock()
+ buffer This buffer which should be locked
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+#ifndef DBUG_OFF
+static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
+{
+ int res;
+ DBUG_ENTER("translog_buffer_lock");
+ DBUG_PRINT("enter",
+ ("Lock buffer #%u: (0x%lx) mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ res= (pthread_mutex_lock(&buffer->mutex) != 0);
+ DBUG_RETURN(res);
+}
+#else
+#define translog_buffer_lock(B) \
+ pthread_mutex_lock(&B->mutex);
+#endif
+
+
+/*
+ Unlock the loghandler buffer
+
+ SYNOPSIS
+ translog_buffer_unlock()
+ buffer This buffer which should be unlocked
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+#ifndef DBUG_OFF
+static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
+{
+ int res;
+ DBUG_ENTER("translog_buffer_unlock");
+ DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) "
+ "mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+
+ res= (pthread_mutex_unlock(&buffer->mutex) != 0);
+ DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ DBUG_RETURN(res);
+}
+#else
+#define translog_buffer_unlock(B) \
+ pthread_mutex_unlock(&B->mutex);
+#endif
+
+
+/*
+ Write a header on the page
+
+ SYNOPSIS
+ translog_new_page_header()
+ horizon Where to write the page
+ cursor Where to write the page
+
+ NOTE
+ - space for page header should be checked before
+*/
+
+static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ byte *ptr;
+
+ DBUG_ENTER("translog_new_page_header");
+ DBUG_ASSERT(cursor->ptr);
+
+ cursor->protected= 0;
+
+ ptr= cursor->ptr;
+ /* Page number */
+ int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
+ ptr+= 3;
+ /* File number */
+ int3store(ptr, LSN_FILE_NO(*horizon));
+ ptr+= 3;
+ *(ptr++)= (byte) log_descriptor.flags;
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+#ifndef DBUG_OFF
+ DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(*horizon),
+ (ulong) LSN_OFFSET(*horizon)));
+ /* This will be overwritten by real CRC; This is just for debugging */
+ int4store(ptr, 0x11223344);
+#endif
+ /* CRC will be put when page is finished */
+ ptr+= CRC_LENGTH;
+ }
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ time_t tm;
+ uint16 tmp_time= time(&tm);
+ int2store(ptr, tmp_time);
+ ptr+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
+ }
+ {
+ uint len= (ptr - cursor->ptr);
+ (*horizon)+= len; /* it is increasing of offset part of the address */
+ cursor->current_page_fill= len;
+ if (!cursor->chaser)
+ cursor->buffer->size+= len;
+ }
+ cursor->ptr= ptr;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put sector protection on the page image
+
+ SYNOPSIS
+ translog_put_sector_protection()
+ page reference on the page content
+ cursor cursor of the buffer
+
+ NOTES
+ We put a sector protection on all following sectors on the page,
+ except the first sector that is protected by page header.
+*/
+
+static void translog_put_sector_protection(byte *page,
+ struct st_buffer_cursor *cursor)
+{
+ byte *table= page + log_descriptor.page_overhead -
+ (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
+ uint16 value= uint2korr(table) + cursor->write_counter;
+ uint16 last_protected_sector= ((cursor->previous_offset - 1) /
+ DISK_DRIVE_SECTOR_SIZE);
+ uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
+ uint i, offset;
+ DBUG_ENTER("translog_put_sector_protection");
+
+ if (start_sector == 0)
+ start_sector= 1; /* First sector is protected */
+
+ DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
+ "last protected:%u start sector:%u",
+ (uint) cursor->write_counter,
+ (uint) value,
+ (uint) cursor->previous_offset,
+ (uint) last_protected_sector, (uint) start_sector));
+ if (last_protected_sector == start_sector)
+ {
+ i= last_protected_sector * 2;
+ offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
+ /* restore data, because we modified sector which was protected */
+ if (offset < cursor->previous_offset)
+ page[offset]= table[i];
+ offset++;
+ if (offset < cursor->previous_offset)
+ page[offset]= table[i + 1];
+ }
+ for (i= start_sector * 2, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
+ i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
+ (i+= 2), (offset+= DISK_DRIVE_SECTOR_SIZE))
+ {
+ DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x",
+ i / 2, offset, (uint) page[offset],
+ (uint) page[offset + 1]));
+ table[i]= page[offset];
+ table[i + 1]= page[offset + 1];
+ int2store(page + offset, value);
+ DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x",
+ i / 2, offset, (uint) page[offset],
+ (uint) page[offset + 1]));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Calculate CRC32 of given area
+
+ SYNOPSIS
+ translog_crc()
+ area Pointer of the area beginning
+ length The Area length
+
+ RETURN
+ CRC32
+*/
+
+static uint32 translog_crc(byte *area, uint length)
+{
+ return crc32(0L, area, length);
+}
+
+
+/*
+ Finish current page with zeros
+
+ SYNOPSIS
+ translog_finish_page()
+ horizon \ horizon & buffer pointers
+ cursor /
+*/
+
+static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
+ byte *page= cursor->ptr -cursor->current_page_fill;
+ DBUG_ENTER("translog_finish_page");
+ DBUG_PRINT("enter", ("Buffer: #%u 0x%lx "
+ "Buffer addr: (%lu,0x%lx) "
+ "Page addr: (%lu,0x%lx) "
+ "size:%lu (%lu) Pg:%u left:%u",
+ (uint) cursor->buffer_no, (ulong) cursor->buffer,
+ (ulong) LSN_FILE_NO(cursor->buffer->offset),
+ (ulong) LSN_OFFSET(cursor->buffer->offset),
+ (ulong) LSN_FILE_NO(*horizon),
+ (ulong) (LSN_OFFSET(*horizon) -
+ cursor->current_page_fill),
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer),
+ (uint) cursor->current_page_fill, (uint) left));
+ DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+ if (cursor->protected)
+ {
+ DBUG_PRINT("info", ("Already protected and finished"));
+ DBUG_VOID_RETURN;
+ }
+ cursor->protected= 1;
+
+ DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
+ if (left != 0)
+ {
+ DBUG_PRINT("info", ("left: %u", (uint) left));
+ bzero(cursor->ptr, left);
+ cursor->ptr +=left;
+ (*horizon)+= left; /* offset increasing */
+ if (!cursor->chaser)
+ cursor->buffer->size+= left;
+ cursor->current_page_fill= 0;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no,
+ (ulong) cursor->buffer, cursor->chaser,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+ }
+ if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(page, cursor);
+ DBUG_PRINT("info", ("drop write_counter"));
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ }
+ if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(page + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
+ /* We have page number, file number and flag before crc */
+ int4store(page + 3 + 3 + 1, crc);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Wait until all thread finish filling this buffer
+
+ SYNOPSIS
+ translog_wait_for_writers()
+ buffer This buffer should be check
+
+ NOTE
+ This buffer should be locked
+*/
+
+static void translog_wait_for_writers(struct st_translog_buffer *buffer)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_ENTER("translog_wait_for_writers");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress));
+
+ while (buffer->copy_to_buffer_in_progress)
+ {
+ DBUG_PRINT("info", ("wait for writers... "
+ "buffer: #%u 0x%lx "
+ "mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ DBUG_ASSERT(buffer->file != -1);
+ wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
+ &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done "
+ "buffer: #%u 0x%lx "
+ "mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+
+ Wait for buffer to become free
+
+ SYNOPSIS
+ translog_wait_for_buffer_free()
+ buffer The buffer we are waiting for
+
+ NOTE
+ - this buffer should be locked
+*/
+
+static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_ENTER("translog_wait_for_buffer_free");
+ DBUG_PRINT("enter", ("Buffer: #%u 0x%lx copies in progress: %u "
+ "File: %d size: 0x%lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress,
+ buffer->file, (ulong) buffer->size));
+
+ translog_wait_for_writers(buffer);
+
+ while (buffer->file != -1)
+ {
+ DBUG_PRINT("info", ("wait for writers... "
+ "buffer: #%u 0x%lx "
+ "mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
+ &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done. "
+ "buffer: #%u 0x%lx "
+ "mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ }
+ DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Initialize the cursor for a buffer
+
+ SYNOPSIS
+ translog_cursor_init()
+ buffer The buffer
+ cursor It's cursor
+ buffer_no Number of buffer
+*/
+
+static void translog_cursor_init(struct st_buffer_cursor *cursor,
+ struct st_translog_buffer *buffer,
+ uint8 buffer_no)
+{
+ DBUG_ENTER("translog_cursor_init");
+ cursor->ptr= buffer->buffer;
+ cursor->buffer= buffer;
+ cursor->buffer_no= buffer_no;
+ cursor->current_page_fill= 0;
+ cursor->chaser= (cursor != &log_descriptor.bc);
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ cursor->protected= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Initialize buffer for current file
+
+ SYNOPSIS
+ translog_start_buffer()
+ buffer The buffer
+ cursor It's cursor
+ buffer_no Number of buffer
+*/
+
+static void translog_start_buffer(struct st_translog_buffer *buffer,
+ struct st_buffer_cursor *cursor,
+ uint buffer_no)
+{
+ DBUG_ENTER("translog_start_buffer");
+ DBUG_PRINT("enter",
+ ("Assign buffer: #%u (0x%lx) to file: %d offset: 0x%lx(%lu)",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ log_descriptor.log_file_num[0],
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ DBUG_ASSERT(buffer_no == buffer->buffer_no);
+ buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
+ buffer->offset= log_descriptor.horizon;
+ buffer->file= log_descriptor.log_file_num[0];
+ buffer->overlay= 0;
+ buffer->size= 0;
+ translog_cursor_init(cursor, buffer, buffer_no);
+ DBUG_PRINT("info", ("init cursor #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Switch to the next buffer in a chain
+
+ SYNOPSIS
+ translog_buffer_next()
+ horizon \ Pointers on current position in file and buffer
+ cursor /
+ next_file Also start new file
+
+ NOTE:
+ - loghandler should be locked
+ - after return new and old buffer still are locked
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ my_bool new_file)
+{
+ uint old_buffer_no= cursor->buffer_no;
+ uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
+ my_bool chasing= cursor->chaser;
+ DBUG_ENTER("translog_buffer_next");
+
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon), chasing));
+
+ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
+
+ translog_finish_page(horizon, cursor);
+
+ if (!chasing)
+ {
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+ }
+#ifndef DBUG_OFF
+ else
+ DBUG_ASSERT(new_buffer->file != 0);
+#endif
+ if (new_file)
+ {
+ /* move the horizon to the next file and its header page */
+ (*horizon)+= LSN_ONE_FILE;
+ (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
+ if (!chasing && translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+
+ /* prepare next page */
+ if (chasing)
+ translog_cursor_init(cursor, new_buffer, new_buffer_no);
+ else
+ translog_start_buffer(new_buffer, cursor, new_buffer_no);
+ translog_new_page_header(horizon, cursor);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Set max LSN send to file
+
+ SYNOPSIS
+ translog_set_sent_to_file()
+ lsn LSN to assign
+*/
+
+static void translog_set_sent_to_file(LSN *lsn)
+{
+ DBUG_ENTER("translog_set_sent_to_file");
+ pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
+ DBUG_ASSERT(cmp_translog_addr(*lsn, log_descriptor.sent_to_file) >= 0);
+ log_descriptor.sent_to_file= *lsn;
+ pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Get max LSN send to file
+
+ SYNOPSIS
+ translog_get_sent_to_file()
+ lsn LSN to value
+*/
+
+static void translog_get_sent_to_file(LSN *lsn)
+{
+ DBUG_ENTER("translog_get_sent_to_file");
+ pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
+ *lsn= log_descriptor.sent_to_file;
+ pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Get first chunk address on the given page
+
+ SYNOPSIS
+ translog_get_first_chunk_offset()
+ page The page where to find first chunk
+
+ RETURN
+ first chunk offset
+*/
+
+static my_bool translog_get_first_chunk_offset(byte *page)
+{
+ uint16 page_header= 7;
+ DBUG_ENTER("translog_get_first_chunk_offset");
+
+ if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
+ page_header+= 4;
+ if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
+ page_header+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
+ DBUG_RETURN(page_header);
+}
+
+
+/*
+ Write coded length of record
+
+ SYNOPSIS
+ translog_write_variable_record_1group_code_len
+ dst Destination buffer pointer
+ length Length which should be coded
+ header_len Calculated total header length
+*/
+
+static void
+translog_write_variable_record_1group_code_len(byte *dst,
+ translog_size_t length,
+ uint16 header_len)
+{
+ switch (header_len) {
+ case 6: /* (5 + 1) */
+ DBUG_ASSERT(length <= 250);
+ *dst= (uint8) length;
+ return;
+ case 8: /* (5 + 3) */
+ DBUG_ASSERT(length <= 0xFFFF);
+ *dst= 251;
+ int2store(dst + 1, length);
+ return;
+ case 9: /* (5 + 4) */
+ DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
+ *dst= 252;
+ int3store(dst + 1, length);
+ return;
+ case 10: /* (5 + 5) */
+ *dst= 253;
+ int4store(dst + 1, length);
+ return;
+ default:
+ DBUG_ASSERT(0);
+ }
+ return;
+}
+
+
+/*
+ Decode record data length and advance given pointer to the next field
+
+ SYNOPSIS
+ translog_variable_record_1group_decode_len()
+ src The pointer to the pointer to the length beginning
+
+ RETURN
+ decoded length
+*/
+
+static translog_size_t translog_variable_record_1group_decode_len(byte **src)
+{
+ uint8 first= (uint8) (**src);
+ switch (first) {
+ case 251:
+ (*src)+= 3;
+ return (uint2korr((*src) - 2));
+ case 252:
+ (*src)+= 4;
+ return (uint3korr((*src) - 3));
+ case 253:
+ (*src)+= 5;
+ return (uint4korr((*src) - 4));
+ case 254:
+ case 255:
+ DBUG_ASSERT(0); /* reserved for future use */
+ return (0);
+ default:
+ (*src)++;
+ return (first);
+ }
+}
+
+
+/*
+ Get total length of this chunk (not only body)
+
+ SYNOPSIS
+ translog_get_total_chunk_length()
+ page The page where chunk placed
+ offset Offset of the chunk on this place
+
+ RETURN
+ total length of the chunk
+*/
+
+static uint16 translog_get_total_chunk_length(byte *page, uint16 offset)
+{
+ DBUG_ENTER("translog_get_total_chunk_length");
+ switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN:
+ {
+ /* 0 chunk referred as LSN (head or tail) */
+ translog_size_t rec_len;
+ byte *start= page + offset;
+ byte *ptr= start + 1 + 2;
+ uint16 chunk_len, header_len, page_rest;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr -start) + 2;
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ DBUG_PRINT("info", ("chunk len: %u + %u = %u",
+ (uint) header_len, (uint) chunk_len,
+ (uint) (chunk_len + header_len)));
+ DBUG_RETURN(chunk_len + header_len);
+ }
+ page_rest= TRANSLOG_PAGE_SIZE - offset;
+ DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
+ if (rec_len + header_len < page_rest)
+ DBUG_RETURN(rec_len + header_len);
+ DBUG_RETURN(page_rest);
+ break;
+ }
+ case TRANSLOG_CHUNK_FIXED:
+ {
+ byte *ptr;
+ uint type= page[offset] & TRANSLOG_REC_TYPE;
+ uint length;
+ int i;
+ /* 1 (pseudo)fixed record (also LSN) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
+ DBUG_ASSERT(log_record_type_descriptor[type].class ==
+ LOGRECTYPE_FIXEDLENGTH ||
+ log_record_type_descriptor[type].class ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH);
+ if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH)
+ {
+ DBUG_PRINT("info",
+ ("Fixed length: %u",
+ (uint) (log_record_type_descriptor[type].fixed_length + 3)));
+ DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
+ }
+ {
+ ptr= page + offset + 3; /* first compressed LSN */
+ length= log_record_type_descriptor[type].fixed_length + 3;
+ for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
+ {
+ /* first 2 bits is length - 2 */
+ uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2;
+ ptr+= len;
+ /* subtract economized bytes */
+ length-= (TRANSLOG_CLSN_MAX_LEN - len);
+ }
+ DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
+ DBUG_RETURN(length);
+ }
+ break;
+ }
+ case TRANSLOG_CHUNK_NOHDR:
+ /* 2 no header chunk (till page end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
+ (uint) (TRANSLOG_PAGE_SIZE - offset)));
+ DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
+ break;
+ case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
+ DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
+ DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
+ DBUG_RETURN(uint2korr(page + offset + 1) + 3);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+}
+
+
+/*
+ Flush given buffer
+
+ SYNOPSIS
+ translog_buffer_flush()
+ buffer This buffer should be flushed
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
+{
+ uint32 i;
+ DBUG_ENTER("translog_buffer_flush");
+ DBUG_PRINT("enter",
+ ("Buffer: #%u 0x%lx: "
+ "file: %d offset: (%lu,0x%lx) size: %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->file,
+ (ulong) LSN_FILE_NO(buffer->offset),
+ (ulong) LSN_OFFSET(buffer->offset),
+ (ulong) buffer->size));
+
+ DBUG_ASSERT(buffer->file != -1);
+
+ translog_wait_for_writers(buffer);
+ if (buffer->overlay && buffer->overlay->file != -1)
+ {
+ struct st_translog_buffer *overlay= buffer->overlay;
+ translog_buffer_unlock(buffer);
+ translog_buffer_lock(overlay);
+ translog_wait_for_buffer_free(overlay);
+ translog_buffer_unlock(overlay);
+ translog_buffer_lock(buffer);
+ }
+
+ for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE)
+ {
+ PAGECACHE_FILE file;
+ file.file= buffer->file;
+ if (pagecache_write(log_descriptor.pagecache,
+ &file,
+ (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE,
+ 3,
+ buffer->buffer + i,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0))
+ {
+ UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache",
+ (ulong) buffer->file,
+ (ulong) (LSN_OFFSET(buffer->offset)+ i)));
+ }
+ }
+ if (my_pwrite(buffer->file, (char*) buffer->buffer,
+ buffer->size, LSN_OFFSET(buffer->offset),
+ MYF(MY_WME | MY_NABP)))
+ {
+ UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu "
+ "to the disk (%d)",
+ (ulong) buffer->file,
+ (ulong) LSN_OFFSET(buffer->offset),
+ (ulong) buffer->size, errno));
+ DBUG_RETURN(1);
+ }
+ if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
+ translog_set_sent_to_file(&buffer->last_lsn);
+
+ /* Free buffer */
+ buffer->file= -1;
+ buffer->overlay= 0;
+ if (buffer->waiting_filling_buffer.last_thread)
+ {
+ wqueue_release_queue(&buffer->waiting_filling_buffer);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Recover page with sector protection (wipe out failed chunks)
+
+ SYNOPSYS
+ translog_recover_page_up_to_sector()
+ page reference on the page
+ offset offset of failed sector
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_recover_page_up_to_sector(byte *page, uint16 offset)
+{
+ uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
+ DBUG_ENTER("translog_recover_page_up_to_sector");
+ DBUG_PRINT("enter", ("offset: %u first chunk: %u",
+ (uint) offset, (uint) chunk_offset));
+
+ while (page[chunk_offset] != '\0' && chunk_offset < offset)
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ {
+ UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("chunk: offset: %u length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
+ {
+ UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ chunk_offset+= chunk_length;
+ }
+
+ valid_chunk_end= chunk_offset;
+ /* end of trusted area - sector parsing */
+ while (page[chunk_offset] != '\0')
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ break;
+
+ DBUG_PRINT("info", ("chunk: offset: %u length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) >
+ (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
+ break;
+
+ chunk_offset+= chunk_length;
+ valid_chunk_end= chunk_offset;
+ }
+ DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
+
+ bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Log page validator
+
+ SYNOPSIS
+ translog_page_validator()
+ page_addr The page to check
+ data data, need for validation (address in this case)
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+static my_bool translog_page_validator(byte *page_addr, gptr data_ptr)
+{
+ uint this_page_page_overhead;
+ uint flags;
+ byte *page= (byte*) page_addr, *page_pos;
+ TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr;
+ TRANSLOG_ADDRESS addr= *(data->addr);
+ DBUG_ENTER("translog_page_validator");
+
+ data->was_recovered= 0;
+
+ if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE ||
+ uint3korr(page + 3) != LSN_FILE_NO(addr))
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "page address written in the page is incorrect: "
+ "File %lu instead of %lu or page %lu instead of %lu",
+ (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr),
+ (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr),
+ (ulong) uint3korr(page),
+ (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE));
+ DBUG_RETURN(1);
+ }
+ flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
+ this_page_page_overhead= page_overhead[flags];
+ if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC))
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "Garbage in the page flags field detected : %x",
+ (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr),
+ (uint) flags));
+ DBUG_RETURN(1);
+ }
+ page_pos= page + (3 + 3 + 1);
+ if (flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(page + this_page_page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ this_page_page_overhead);
+ if (crc != uint4korr(page_pos))
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "CRC mismatch: calculated: %lx on the page %lx",
+ (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr),
+ (ulong) crc, (ulong) uint4korr(page_pos)));
+ DBUG_RETURN(1);
+ }
+ page_pos+= CRC_LENGTH; /* Skip crc */
+ }
+ if (flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ uint i, offset;
+ byte *table= page_pos;
+ uint16 current= uint2korr(table);
+ for (i= 2, offset= DISK_DRIVE_SECTOR_SIZE;
+ i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2;
+ i+= 2, offset+= DISK_DRIVE_SECTOR_SIZE)
+ {
+ /*
+ TODO: add chunk counting for "suspecting" sectors (difference is
+ more than 1-2)
+ */
+ uint16 test= uint2korr(page + offset);
+ DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
+ "read: 0x%x stored: 0x%x%x",
+ i / 2, offset, (ulong) current,
+ (uint) uint2korr(page + offset), (uint) table[i],
+ (uint) table[i + 1]));
+ if (((test < current) &&
+ (LL(0xFFFF) - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) ||
+ ((test >= current) &&
+ (test - current > DISK_DRIVE_SECTOR_SIZE / 3)))
+ {
+ if (translog_recover_page_up_to_sector(page, offset))
+ DBUG_RETURN(1);
+ data->was_recovered= 1;
+ DBUG_RETURN(0);
+ }
+
+ /* Return value on the page */
+ page[offset]= table[i];
+ page[offset + 1]= table[i + 1];
+ current= test;
+ DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
+ "read: 0x%x stored: 0x%x%x",
+ i / 2, offset, (ulong) current,
+ (uint) uint2korr(page + offset), (uint) table[i],
+ (uint) table[i + 1]));
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get log page by file number and offset of the beginning of the page
+
+ SYNOPSIS
+ translog_get_page()
+ data validator data, which contains the page address
+ buffer buffer for page placing
+ (might not be used in some cache implementations)
+
+ RETURN
+ NULL - Error
+ # pointer to the page cache which should be used to read this page
+*/
+
+static byte *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, byte *buffer)
+{
+ TRANSLOG_ADDRESS addr= *(data->addr);
+ uint cache_index;
+ uint32 file_no= LSN_FILE_NO(addr);
+ DBUG_ENTER("translog_get_page");
+ DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)",
+ (ulong) file_no,
+ (ulong) LSN_OFFSET(addr),
+ (ulong) LSN_OFFSET(addr)));
+
+ /* it is really page address */
+ DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
+
+ if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) <
+ OPENED_FILES_NUM)
+ {
+ PAGECACHE_FILE file;
+ /* file in the cache */
+ if (log_descriptor.log_file_num[cache_index] == -1)
+ {
+ if ((log_descriptor.log_file_num[cache_index]=
+ open_logfile_by_number_no_cache(file_no)) == -1)
+ DBUG_RETURN(NULL);
+ }
+ file.file= log_descriptor.log_file_num[cache_index];
+
+ buffer= (byte*)
+ pagecache_valid_read(log_descriptor.pagecache, &file,
+ LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
+ 3, (char*) buffer,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0,
+ &translog_page_validator, (gptr) data);
+ }
+ else
+ {
+ /*
+ TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT
+ THE LAST USED FILES. THIS WILL BE A NOTABLE PROBLEM IF WE ARE
+ FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES. WE WILL
+ PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG
+ FILES.
+ */
+
+ File file= open_logfile_by_number_no_cache(file_no);
+ if (file == -1)
+ DBUG_RETURN(NULL);
+ if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE,
+ LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME)))
+ buffer= NULL;
+ else if (translog_page_validator((byte*) buffer, (gptr) data))
+ buffer= NULL;
+ my_close(file, MYF(MY_WME));
+ }
+ DBUG_RETURN(buffer);
+}
+
+
+/*
+ Finds last page of the given log file
+
+ SYNOPSIS
+ translog_get_last_page_addr()
+ addr address structure to fill with data, which contain
+ file number of the log file
+ last_page_ok assigned 1 if last page was OK
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
+ my_bool *last_page_ok)
+{
+ MY_STAT stat_buff, *stat;
+ char path[FN_REFLEN];
+ uint32 rec_offset;
+ uint32 file_no= LSN_FILE_NO(*addr);
+ DBUG_ENTER("translog_get_last_page_addr");
+
+ if (!(stat= my_stat(translog_filename_by_fileno(file_no, path),
+ &stat_buff, MYF(MY_WME))))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size));
+ if (stat->st_size > TRANSLOG_PAGE_SIZE)
+ {
+ rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
+ TRANSLOG_PAGE_SIZE);
+ *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
+ }
+ else
+ {
+ *last_page_ok= 0;
+ rec_offset= 0;
+ }
+ *addr= MAKE_LSN(file_no, rec_offset);
+ DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
+ *last_page_ok));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get number bytes for record length storing
+
+ SYNOPSIS
+ translog_variable_record_length_bytes()
+ length Record length wich will be codded
+
+ RETURN
+ 1,3,4,5 - number of bytes to store given length
+*/
+
+static uint translog_variable_record_length_bytes(translog_size_t length)
+{
+ if (length < 250)
+ return 1;
+ if (length < 0xFFFF)
+ return 3;
+ if (length < (ulong) 0xFFFFFF)
+ return 4;
+ return 5;
+}
+
+
+/*
+ Get header of this chunk
+
+ SYNOPSIS
+ translog_get_chunk_header_length()
+ page The page where chunk placed
+ offset Offset of the chunk on this place
+
+ RETURN
+ # total length of the chunk
+ 0 Error
+*/
+
+static uint16 translog_get_chunk_header_length(byte *page, uint16 offset)
+{
+ DBUG_ENTER("translog_get_chunk_header_length");
+ page+= offset;
+ switch (*page & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN:
+ {
+ /* 0 chunk referred as LSN (head or tail) */
+ translog_size_t rec_len;
+ byte *start= page;
+ byte *ptr= start + 1 + 2;
+ uint16 chunk_len, header_len;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr - start) +2;
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ /* TODO: fine header end */
+ DBUG_ASSERT(0);
+ }
+ DBUG_RETURN(header_len);
+ break;
+ }
+ case TRANSLOG_CHUNK_FIXED:
+ {
+ /* 1 (pseudo)fixed record (also LSN) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
+ DBUG_RETURN(3);
+ }
+ case TRANSLOG_CHUNK_NOHDR:
+ /* 2 no header chunk (till page end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
+ DBUG_RETURN(1);
+ break;
+ case TRANSLOG_CHUNK_LNGTH:
+ /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
+ DBUG_RETURN(3);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+}
+
+
+/*
+ Initialize transaction log
+
+ SYNOPSIS
+ translog_init()
+ directory Directory where log files are put
+ log_file_max_size max size of one log size (for new logs creation)
+ server_version version of MySQL server (MYSQL_VERSION_ID)
+ server_id server ID (replication & Co)
+ pagecache Page cache for the log reads
+ flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
+ TRANSLOG_RECORD_CRC)
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_init(const char *directory,
+ uint32 log_file_max_size,
+ uint32 server_version,
+ uint32 server_id, PAGECACHE *pagecache, uint flags)
+{
+ int i;
+ int old_log_was_recovered= 0, logs_found= 0;
+ uint old_flags= flags;
+ TRANSLOG_ADDRESS sure_page, last_page, last_valid_page;
+ DBUG_ENTER("translog_init");
+
+
+ if (pthread_mutex_init(&log_descriptor.sent_to_file_lock,
+ MY_MUTEX_INIT_FAST))
+ DBUG_RETURN(1);
+
+ /* Directory to store files */
+ unpack_dirname(log_descriptor.directory, directory);
+
+ if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
+ O_RDONLY, MYF(MY_WME))) < 0)
+ {
+ UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'",
+ errno, log_descriptor.directory));
+ DBUG_RETURN(1);
+ }
+
+ /* max size of one log size (for new logs creation) */
+ log_descriptor.log_file_max_size=
+ log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE);
+ /* server version */
+ log_descriptor.server_version= server_version;
+ /* server ID */
+ log_descriptor.server_id= server_id;
+ /* Page cache for the log reads */
+ log_descriptor.pagecache= pagecache;
+ /* Flags */
+ DBUG_ASSERT((flags &
+ ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC)) == 0);
+ log_descriptor.flags= flags;
+ for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
+ {
+ page_overhead[i]= 7;
+ if (i & TRANSLOG_PAGE_CRC)
+ page_overhead[i]+= CRC_LENGTH;
+ if (i & TRANSLOG_SECTOR_PROTECTION)
+ page_overhead[i]+= (TRANSLOG_PAGE_SIZE /
+ DISK_DRIVE_SECTOR_SIZE) * 2;
+ }
+ log_descriptor.page_overhead= page_overhead[flags];
+ log_descriptor.page_capacity_chunk_2=
+ TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
+ DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
+ log_descriptor.buffer_capacity_chunk_2=
+ (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
+ log_descriptor.page_capacity_chunk_2;
+ log_descriptor.half_buffer_capacity_chunk_2=
+ log_descriptor.buffer_capacity_chunk_2 / 2;
+ DBUG_PRINT("info",
+ ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
+ log_descriptor.page_overhead,
+ log_descriptor.page_capacity_chunk_2,
+ log_descriptor.buffer_capacity_chunk_2,
+ log_descriptor.half_buffer_capacity_chunk_2));
+
+ /* *** Current state of the log handler *** */
+
+ /* Init log handler file handlers cache */
+ for (i= 0; i < OPENED_FILES_NUM; i++)
+ log_descriptor.log_file_num[i]= -1;
+
+ /* just to init it somehow */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+
+ /* Buffers for log writing */
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+ if (translog_buffer_init(log_descriptor.buffers + i))
+ DBUG_RETURN(1);
+#ifndef DBUG_OFF
+ log_descriptor.buffers[i].buffer_no= (uint8) i;
+#endif
+ DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
+ i, (ulong) log_descriptor.buffers + i));
+ }
+
+ logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO);
+
+ if (logs_found)
+ {
+ my_bool pageok;
+ /*
+ TODO: scan directory for maria_log.XXXXXXXX files and find
+ highest XXXXXXXX & set logs_found
+ TODO: check that last checkpoint within present log addresses space
+
+ find the log end
+ */
+ if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
+ /* there was no checkpoints we will read from the beginning */
+ sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE);
+ }
+ else
+ {
+ sure_page= last_checkpoint_lsn;
+ DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
+ sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
+ }
+ log_descriptor.horizon= last_page= MAKE_LSN(last_logno,0);
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ DBUG_RETURN(1);
+ if (LSN_OFFSET(last_page) == 0)
+ {
+ if (LSN_FILE_NO(last_page) == 1)
+ {
+ logs_found= 0; /* file #1 has no pages */
+ }
+ else
+ {
+ last_page-= LSN_ONE_FILE;
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ if (logs_found)
+ {
+ TRANSLOG_ADDRESS current_page= sure_page;
+ my_bool pageok;
+
+ DBUG_ASSERT(sure_page <= last_page);
+
+ /* TODO: check page size */
+
+ last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN;
+ /* scan and validate pages */
+ do
+ {
+ TRANSLOG_ADDRESS current_file_last_page;
+ current_file_last_page= current_page;
+ if (translog_get_last_page_addr(&current_file_last_page, &pageok))
+ DBUG_RETURN(1);
+ if (!pageok)
+ {
+ DBUG_PRINT("error", ("File %lu have no complete last page",
+ (ulong) LSN_FILE_NO(current_file_last_page)));
+ old_log_was_recovered= 1;
+ /* This file is not written till the end so it should be last */
+ last_page= current_file_last_page;
+ /* TODO: issue warning */
+ }
+ do
+ {
+ TRANSLOG_VALIDATOR_DATA data;
+ byte buffer[TRANSLOG_PAGE_SIZE], *page;
+ data.addr= &current_page;
+ if ((page= translog_get_page(&data, buffer)) == NULL)
+ DBUG_RETURN(1);
+ if (data.was_recovered)
+ {
+ DBUG_PRINT("error", ("file no: %lu (%d) "
+ "rec_offset: 0x%lx (%lu) (%d)",
+ (ulong) LSN_FILE_NO(current_page),
+ (uint3korr(page + 3) !=
+ LSN_FILE_NO(current_page)),
+ (ulong) LSN_OFFSET(current_page),
+ (ulong) (LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE),
+ (uint3korr(page) !=
+ LSN_OFFSET(current_page) /
+ TRANSLOG_PAGE_SIZE)));
+ old_log_was_recovered= 1;
+ break;
+ }
+ old_flags= page[TRANSLOG_PAGE_FLAGS];
+ last_valid_page= current_page;
+ current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
+ } while (current_page <= current_file_last_page);
+ current_page+= LSN_ONE_FILE;
+ current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
+ } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
+ !old_log_was_recovered);
+ if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ /* Panic!!! Even page which should be valid is invalid */
+ /* TODO: issue error */
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("Last valid page is in file: %lu "
+ "offset: %lu (0x%lx) "
+ "Logs found: %d was recovered: %d "
+ "flags match: %d",
+ (ulong) LSN_FILE_NO(last_valid_page),
+ (ulong) LSN_OFFSET(last_valid_page),
+ (ulong) LSN_OFFSET(last_valid_page),
+ logs_found, old_log_was_recovered,
+ (old_flags == flags)));
+
+ /* TODO: check server ID */
+ if (logs_found && !old_log_was_recovered && old_flags == flags)
+ {
+ TRANSLOG_VALIDATOR_DATA data;
+ byte buffer[TRANSLOG_PAGE_SIZE], *page;
+ uint16 chunk_offset;
+ data.addr= &last_valid_page;
+ /* continue old log */
+ DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
+ LSN_FILE_NO(log_descriptor.horizon));
+ if ((page= translog_get_page(&data, buffer)) == NULL ||
+ (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
+ DBUG_RETURN(1);
+
+ /* Puts filled part of old page in the buffer */
+ log_descriptor.horizon= last_valid_page;
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ /*
+ Free space if filled with 0 and first byte of
+ real chunk can't be 0
+ */
+ while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0')
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("chunk: offset: %u length: %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ chunk_offset+= chunk_length;
+
+ /* chunk can't cross the page border */
+ DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
+ }
+ memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
+ log_descriptor.bc.buffer->size+= chunk_offset;
+ log_descriptor.bc.ptr+= chunk_offset;
+ log_descriptor.bc.current_page_fill= chunk_offset;
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ (chunk_offset +
+ LSN_OFFSET(last_valid_page)));
+ DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
+ buffer->buffer)));
+ DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
+ }
+ }
+ DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
+ logs_found, old_log_was_recovered));
+ if (!logs_found)
+ {
+ /* Start new log system from scratch */
+ /* Used space */
+ log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */
+ /* Current logs file number in page cache */
+ if ((log_descriptor.log_file_num[0]=
+ open_logfile_by_number_no_cache(1)) == -1 ||
+ translog_write_file_header())
+ DBUG_RETURN(1);
+ if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ DBUG_RETURN(1);
+ /* assign buffer 0 */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+ else if (old_log_was_recovered || old_flags != flags)
+ {
+ /* leave the damaged file untouched */
+ log_descriptor.horizon+= LSN_ONE_FILE;
+ /* header page */
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ TRANSLOG_PAGE_SIZE);
+ if (translog_create_new_file())
+ DBUG_RETURN(1);
+ /*
+ Buffer system left untouched after recovery => we should init it
+ (starting from buffer 0)
+ */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+
+ /* all LSNs that are on disk are flushed */
+ log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon;
+ /*
+ horizon is (potentially) address of the next LSN we need decrease
+ it to signal that all LSNs before it are flushed
+ */
+ log_descriptor.flushed--; /* offset decreased */
+ log_descriptor.sent_to_file--; /* offset decreased */
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Free transaction log file buffer
+
+ SYNOPSIS
+ translog_buffer_destroy()
+ buffer_no The buffer to free
+
+ NOTE
+ This buffer should be locked
+*/
+
+static void translog_buffer_destroy(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_destroy");
+ DBUG_PRINT("enter",
+ ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->file,
+ (ulong) LSN_FILE_NO(buffer->offset),
+ (ulong) LSN_OFFSET(buffer->offset),
+ (ulong) buffer->size));
+ DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0);
+ if (buffer->file != -1)
+ {
+ /*
+ We ignore errors here, because we can't do something about it
+ (it is shutting down)
+ */
+ translog_buffer_flush(buffer);
+ }
+ DBUG_PRINT("info", ("Unlock mutex: 0x%lx", (ulong) &buffer->mutex));
+ pthread_mutex_unlock(&buffer->mutex);
+ DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
+ pthread_mutex_destroy(&buffer->mutex);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Free log handler resources
+
+ SYNOPSIS
+ translog_destroy()
+*/
+
+void translog_destroy()
+{
+ uint i;
+ DBUG_ENTER("translog_destroy");
+ if (log_descriptor.bc.buffer->file != -1)
+ translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
+
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+ struct st_translog_buffer *buffer= log_descriptor.buffers + i;
+ /*
+ Lock the buffer just for safety, there should not be other
+ threads running.
+ */
+ translog_buffer_lock(buffer);
+ translog_buffer_destroy(buffer);
+ }
+ /* close files */
+ for (i= 0; i < OPENED_FILES_NUM; i++)
+ {
+ if (log_descriptor.log_file_num[i] != -1)
+ translog_close_log_file(log_descriptor.log_file_num[i]);
+ }
+ pthread_mutex_destroy(&log_descriptor.sent_to_file_lock);
+ my_close(log_descriptor.directory_fd, MYF(MY_WME));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Lock the loghandler
+
+ SYNOPSIS
+ translog_lock()
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_lock()
+{
+ struct st_translog_buffer *current_buffer;
+ DBUG_ENTER("translog_lock");
+
+ /*
+ Locking the loghandler mean locking current buffer, but it can change
+ during locking, so we should check it
+ */
+ for (;;)
+ {
+ current_buffer= log_descriptor.bc.buffer;
+ if (translog_buffer_lock(current_buffer))
+ DBUG_RETURN(1);
+ if (log_descriptor.bc.buffer == current_buffer)
+ break;
+ translog_buffer_unlock(current_buffer);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Unlock the loghandler
+
+ SYNOPSIS
+ translog_unlock()
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static inline my_bool translog_unlock()
+{
+ DBUG_ENTER("translog_unlock");
+ translog_buffer_unlock(log_descriptor.bc.buffer);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Start new page
+
+ SYNOPSIS
+ translog_page_next()
+ horizon \ Position in file and buffer where we are
+ cursor /
+ prev_buffer Buffer which should be flushed will be assigned
+ here if it is need. This is always set.
+
+ NOTE
+ handler should be locked
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ struct st_translog_buffer **prev_buffer)
+{
+ struct st_translog_buffer *buffer= cursor->buffer;
+ DBUG_ENTER("translog_page_next");
+
+ if ((cursor->ptr +TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
+ (LSN_OFFSET(*horizon) >
+ log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
+ {
+ DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
+ "File size: %lu max: %lu => %d",
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer),
+ (cursor->ptr + TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
+ (ulong) LSN_OFFSET(*horizon),
+ (ulong) log_descriptor.log_file_max_size,
+ (LSN_OFFSET(*horizon) >
+ (log_descriptor.log_file_max_size -
+ TRANSLOG_PAGE_SIZE))));
+ if (translog_buffer_next(horizon, cursor,
+ LSN_OFFSET(*horizon) >
+ (log_descriptor.log_file_max_size -
+ TRANSLOG_PAGE_SIZE)))
+ DBUG_RETURN(1);
+ *prev_buffer= buffer;
+ DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
+ "Buffer Size: %lu (%lu)",
+ (uint) buffer->buffer_no,
+ (ulong) buffer,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ translog_finish_page(horizon, cursor);
+ translog_new_page_header(horizon, cursor);
+ *prev_buffer= NULL;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data of given length to the current page
+
+ SYNOPSIS
+ translog_write_data_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ buffer buffer with data
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length,
+ byte *buffer)
+{
+ DBUG_ENTER("translog_write_data_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
+ (ulong) length, (uint) cursor->current_page_fill));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ memcpy(cursor->ptr, buffer, length);
+ cursor->ptr+= length;
+ (*horizon)+= length; /* adds offset */
+ cursor->current_page_fill+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data from parts of given length to the current page
+
+ SYNOPSIS
+ translog_write_parts_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ parts IN/OUT chunk source
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length,
+ struct st_translog_parts *parts)
+{
+ translog_size_t left= length;
+ uint cur= (uint) parts->current;
+ DBUG_ENTER("translog_write_parts_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
+ "Buffer size: %lu (%lu)",
+ (ulong) length,
+ (uint) (cur + 1), (uint) parts->parts.elements,
+ (uint) cursor->current_page_fill,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer)));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ do
+ {
+ translog_size_t len;
+ struct st_translog_part *part;
+ byte *buff;
+
+ DBUG_ASSERT(cur < parts->parts.elements);
+ part= dynamic_element(&parts->parts, cur, struct st_translog_part *);
+ buff= part->buff;
+ DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu",
+ (uint) (cur + 1), (ulong) part->len, (ulong) left));
+
+ if (part->len > left)
+ {
+ /* we should write less then the current part */
+ len= left;
+ part->len-= len;
+ part->buff+= len;
+ DBUG_PRINT("info", ("Set new part: %u Length: %lu",
+ (uint) (cur + 1), (ulong) part->len));
+ }
+ else
+ {
+ len= part->len;
+ cur++;
+ DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
+ }
+ DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u",
+ (ulong) cursor->ptr, (ulong)buff, (uint)len));
+ memcpy(cursor->ptr, buff, len);
+ left-= len;
+ cursor->ptr+= len;
+ } while (left);
+
+ DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)",
+ (ulong) LSN_FILE_NO(*horizon),
+ (ulong) LSN_OFFSET(*horizon),
+ (ulong) length, (ulong) length));
+ parts->current= cur;
+ (*horizon)+= length; /* offset increasing */
+ cursor->current_page_fill+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx "
+ "chaser: %d Size: %lu (%lu) "
+ "Horizon: (%lu,0x%lx) buff offset: 0x%lx",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr - cursor->buffer->buffer),
+ (ulong) LSN_FILE_NO(*horizon),
+ (ulong) LSN_OFFSET(*horizon),
+ (ulong) (LSN_OFFSET(cursor->buffer->offset) +
+ cursor->buffer->size)));
+ DBUG_EXECUTE("info", translog_check_cursor(cursor););
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put 1 group chunk type 0 header into parts array
+
+ SYNOPSIS
+ translog_write_variable_record_1group_header()
+ parts Descriptor of record source parts
+ type The log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ header_length Calculated header length of chunk type 0
+ chunk0_header Buffer for the chunk header writing
+*/
+
+static void
+translog_write_variable_record_1group_header(struct st_translog_parts *parts,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ uint16 header_length,
+ byte *chunk0_header)
+{
+ struct st_translog_part part;
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ parts->total_record_length+= (part.len= header_length);
+ part.buff= chunk0_header;
+ /* puts chunk type */
+ *chunk0_header= (byte) (type | TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ /* puts record length */
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ /* puts 0 as chunk length which indicate 1 group record */
+ int2store(chunk0_header + header_length - 2, 0);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+}
+
+
+/*
+ Increase number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_increase_writers()
+ buffer target buffer
+*/
+
+static inline void
+translog_buffer_increase_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_increase_writers");
+ buffer->copy_to_buffer_in_progress++;
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Decrease number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_decrease_writers()
+ buffer target buffer
+*/
+
+
+static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_decrease_writers");
+ buffer->copy_to_buffer_in_progress--;
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ if (buffer->copy_to_buffer_in_progress == 0 &&
+ buffer->waiting_filling_buffer.last_thread != NULL)
+ wqueue_release_queue(&buffer->waiting_filling_buffer);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put chunk 2 from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk2_page()
+ parts Descriptor of record source parts
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush;
+ int rc;
+ byte chunk2_header[1];
+ DBUG_ENTER("translog_write_variable_record_chunk2_page");
+ chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
+
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+
+ /* Puts chunk type */
+ translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
+ /* Puts chunk body */
+ translog_write_parts_on_page(horizon, cursor,
+ log_descriptor.page_capacity_chunk_2, parts);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put chunk 3 of requested length in the buffer from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk3_page()
+ parts Descriptor of record source parts
+ length Length of this chunk
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
+ uint16 length,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush;
+ struct st_translog_part part;
+ int rc;
+ byte chunk3_header[1 + 2];
+ DBUG_ENTER("translog_write_variable_record_chunk3_page");
+
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+ if (length == 0)
+ {
+ /* It was call to write page header only (no data for chunk 3) */
+ DBUG_PRINT("info", ("It is a call to make page header only"));
+ DBUG_RETURN(0);
+ }
+
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ parts->total_record_length+= (part.len= 1 + 2);
+ part.buff= chunk3_header;
+ /* Puts chunk type */
+ *chunk3_header= (byte) (TRANSLOG_CHUNK_LNGTH);
+ /* Puts chunk length */
+ int2store(chunk3_header + 1, length);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+
+ translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
+ DBUG_RETURN(0);
+}
+
+/*
+ Move log pointer (horizon) on given number pages starting from next page,
+ and given offset on the last page
+
+ SYNOPSIS
+ translog_advance_pointer()
+ pages Number of full pages starting from the next one
+ last_page_data Plus this data on the last page
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
+{
+ translog_size_t last_page_offset= (log_descriptor.page_overhead +
+ last_page_data);
+ translog_size_t offset= (TRANSLOG_PAGE_SIZE -
+ log_descriptor.bc.current_page_fill +
+ pages * TRANSLOG_PAGE_SIZE + last_page_offset);
+ translog_size_t buffer_end_offset, file_end_offset, min_offset;
+ DBUG_ENTER("translog_advance_pointer");
+ DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (uint) (TRANSLOG_PAGE_SIZE -
+ log_descriptor.bc.current_page_fill),
+ pages, (uint) log_descriptor.page_overhead,
+ (uint) last_page_data));
+
+ for (;;)
+ {
+ uint8 new_buffer_no;
+ struct st_translog_buffer *new_buffer;
+ struct st_translog_buffer *old_buffer;
+ buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
+ file_end_offset= (log_descriptor.log_file_max_size -
+ LSN_OFFSET(log_descriptor.horizon));
+ DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, "
+ "file_end_offs: %lu",
+ (ulong) offset, (ulong) buffer_end_offset,
+ (ulong) file_end_offset));
+ DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
+ "0x%lx (0x%lx)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
+ log_descriptor.bc.buffer->size),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
+ log_descriptor.bc.buffer->size ==
+ LSN_OFFSET(log_descriptor.horizon));
+
+ if (offset <= buffer_end_offset && offset <= file_end_offset)
+ break;
+ old_buffer= log_descriptor.bc.buffer;
+ new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ new_buffer= log_descriptor.buffers + new_buffer_no;
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ min_offset= min(buffer_end_offset, file_end_offset);
+ /* TODO: check is it ptr or size enough */
+ log_descriptor.bc.buffer->size+= min_offset;
+ log_descriptor.bc.ptr+= min_offset;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer)));
+ DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
+ log_descriptor.bc.buffer->buffer) ==
+ log_descriptor.bc.buffer->size);
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+
+ if (file_end_offset <= buffer_end_offset)
+ {
+ log_descriptor.horizon+= LSN_ONE_FILE;
+ log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
+ TRANSLOG_PAGE_SIZE);
+ DBUG_PRINT("info", ("New file: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon)));
+ if (translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("info", ("The same file"));
+ log_descriptor.horizon+= min_offset; /* offset increasing */
+ }
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ if (translog_buffer_unlock(old_buffer))
+ DBUG_RETURN(1);
+ offset-= min_offset;
+ }
+ log_descriptor.bc.ptr+= offset;
+ log_descriptor.bc.buffer->size+= offset;
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+ log_descriptor.horizon+= offset; /* offset increasing */
+ log_descriptor.bc.current_page_fill= last_page_offset;
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) "
+ "offset: %u last page: %u",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -
+ log_descriptor.bc.buffer->
+ buffer), (uint) offset,
+ (uint) last_page_offset));
+ DBUG_PRINT("info",
+ ("pointer moved to: (%lu, 0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
+ log_descriptor.bc.protected= 0;
+ DBUG_RETURN(0);
+}
+
+
+
+/*
+ Get page rest
+
+ SYNOPSIS
+ translog_get_current_page_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of bytes left on the current page
+*/
+
+#define translog_get_current_page_rest() \
+ (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill)
+
+/*
+ Get buffer rest in full pages
+
+ SYNOPSIS
+ translog_get_current_buffer_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of full pages left on the current buffer
+*/
+
+#define translog_get_current_buffer_rest() \
+ ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \
+ log_descriptor.bc.ptr) / \
+ TRANSLOG_PAGE_SIZE)
+
+/*
+ Calculate possible group size without first (current) page
+
+ SYNOPSIS
+ translog_get_current_group_size()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ group size without first (current) page
+*/
+
+static translog_size_t translog_get_current_group_size()
+{
+ /* buffer rest in full pages */
+ translog_size_t buffer_rest= translog_get_current_buffer_rest();
+ DBUG_ENTER("translog_get_current_group_size");
+ DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
+
+ buffer_rest*= log_descriptor.page_capacity_chunk_2;
+ /* in case of only half of buffer free we can write this and next buffer */
+ if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
+ (ulong) buffer_rest,
+ (ulong) log_descriptor.buffer_capacity_chunk_2));
+ buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
+ }
+
+ DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
+
+ DBUG_RETURN(buffer_rest);
+}
+
+
+/*
+ Write variable record in 1 group
+
+ SYNOPSIS
+ translog_write_variable_record_1group()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Calculated header length of chunk type 0
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_1group(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ void *tcb)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i;
+ translog_size_t record_rest, full_pages, first_page;
+ uint additional_chunk3_page= 0;
+ byte chunk0_header[1 + 2 + 5 + 2];
+ DBUG_ENTER("translog_write_variable_record_1group");
+
+ *lsn= horizon= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook)(type, tcb, lsn, parts))
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+
+ /* Advance pointer To be able unlock the loghandler */
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - header_length);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+
+ if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("2 chunks type 3 is needed"));
+ /* We will write 2 chunks type 3 at the end of this group */
+ additional_chunk3_page= 1;
+ record_rest= 1;
+ }
+
+ DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
+ "additional: %u (%u) rest %u = %u",
+ first_page, first_page - header_length,
+ full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ additional_chunk3_page,
+ additional_chunk3_page *
+ (log_descriptor.page_capacity_chunk_2 - 1),
+ record_rest, parts->record_length));
+ /* record_rest + 3 is chunk type 3 overhead + record_rest */
+ rc|= translog_advance_pointer(full_pages + additional_chunk3_page,
+ (record_rest ? record_rest + 3 : 0));
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+
+ rc|= translog_unlock();
+
+ /*
+ Check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ /* fill the pages */
+ translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
+
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ DBUG_RETURN(1);
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+ }
+
+ if (additional_chunk3_page)
+ {
+ if (translog_write_variable_record_chunk3_page(parts,
+ log_descriptor.
+ page_capacity_chunk_2 - 2,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+ DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
+ }
+
+ if (translog_write_variable_record_chunk3_page(parts,
+ record_rest,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+
+ if (!(rc= translog_buffer_lock(cursor.buffer)))
+ {
+ /*
+ Check if we wrote something on 1:st not full page and need to reconstruct
+ CRC and sector protection
+ */
+ translog_buffer_decrease_writers(cursor.buffer);
+ }
+ rc|= translog_buffer_unlock(cursor.buffer);
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Write variable record in 1 chunk
+
+ SYNOPSIS
+ translog_write_variable_record_1chunk()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Calculated header length of chunk type 0
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_1chunk(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ void *tcb)
+{
+ int rc;
+ byte chunk0_header[1 + 2 + 5 + 2];
+ DBUG_ENTER("translog_write_variable_record_1chunk");
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ *lsn= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook)(type, tcb,
+ lsn, parts))
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Calculate and write LSN difference (compressed LSN)
+
+ SYNOPSIS
+ translog_put_LSN_diff()
+ base_lsn LSN from which we calculate difference
+ lsn LSN for codding
+ dst Result will be written to dst[-pack_length] .. dst[-1]
+
+ NOTE:
+ To store an LSN in a compact way we will use the following compression:
+
+ If a log record has LSN1, and it contains the lSN2 as a back reference,
+ Instead of LSN2 we write LSN1-LSN2, encoded as:
+
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+
+ That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ RETURN
+ # pointer on coded LSN
+ NULL Error
+*/
+
+static byte *translog_put_LSN_diff(LSN base_lsn, LSN lsn, byte *dst)
+{
+ DBUG_ENTER("translog_put_LSN_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx) val: (0x%lu,0x%lx) dst: 0x%lx",
+ (ulong) LSN_FILE_NO(base_lsn),
+ (ulong) LSN_OFFSET(base_lsn),
+ (ulong) LSN_FILE_NO(lsn),
+ (ulong) LSN_OFFSET(lsn), (ulong) dst));
+ if (LSN_FILE_NO(base_lsn) == LSN_FILE_NO(lsn))
+ {
+ uint32 diff;
+ DBUG_ASSERT(base_lsn > lsn);
+ diff= base_lsn - lsn;
+ DBUG_PRINT("info", ("File is the same. Diff: 0x%lx", (ulong) diff));
+ if (diff <= 0x3FFF)
+ {
+ dst-= 2;
+ /*
+ Note we store this high byte first to ensure that first byte has
+ 0 in the 3 upper bits.
+ */
+ dst[0]= diff >> 8;
+ dst[1]= (diff & 0xFF);
+ }
+ else if (diff <= 0x3FFFFF)
+ {
+ dst-= 3;
+ dst[0]= 0x40 | (diff >> 16);
+ int2store(dst + 1, diff & 0xFFFF);
+ }
+ else if (diff <= 0x3FFFFFFF)
+ {
+ dst-= 4;
+ dst[0]= 0x80 | (diff >> 24);
+ int3store(dst + 1, diff & 0xFFFFFF);
+ }
+ else
+ {
+ dst-= 5;
+ dst[0]= 0xC0;
+ int4store(dst + 1, diff);
+ }
+ }
+ else
+ {
+ uint32 diff;
+ uint32 offset_diff;
+ ulonglong base_offset= LSN_OFFSET(base_lsn);
+ DBUG_ASSERT(base_lsn > lsn);
+ diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn);
+ DBUG_PRINT("info", ("File is different. Diff: 0x%lx", (ulong) diff));
+
+ if (base_offset < LSN_OFFSET(lsn))
+ {
+ /* take 1 from file offset */
+ diff--;
+ base_offset+= LL(0x100000000);
+ }
+ offset_diff= base_offset - LSN_OFFSET(lsn);
+ if (diff > 0x3f)
+ {
+ /*TODO: error - too long transaction - panic!!! */
+ UNRECOVERABLE_ERROR(("Too big file diff: %lu", (ulong) diff));
+ DBUG_RETURN(NULL);
+ }
+ dst-= 5;
+ *dst= (0xC0 | diff);
+ int4store(dst + 1, offset_diff);
+ }
+ DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
+ DBUG_RETURN(dst);
+}
+
+
+/*
+ Get LSN from LSN-difference (compressed LSN)
+
+ SYNOPSIS
+ translog_get_LSN_from_diff()
+ base_lsn LSN from which we calculate difference
+ src pointer to coded lsn
+ dst pointer to buffer where to write 7byte LSN
+
+ NOTE:
+ To store an LSN in a compact way we will use the following compression:
+
+ If a log record has LSN1, and it contains the lSN2 as a back reference,
+ Instead of LSN2 we write LSN1-LSN2, encoded as:
+
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+
+ That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ RETURN
+ pointer to buffer after decoded LSN
+*/
+
+static byte *translog_get_LSN_from_diff(LSN base_lsn, byte *src, byte *dst)
+{
+ LSN lsn;
+ uint32 diff;
+ uint32 first_byte;
+ uint32 file_no, rec_offset;
+ uint8 code;
+ DBUG_ENTER("translog_get_LSN_from_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx) src: 0x%lx dst 0x%lx",
+ (ulong) LSN_FILE_NO(base_lsn),
+ (ulong) LSN_OFFSET(base_lsn),
+ (ulong) src, (ulong) dst));
+ first_byte= *((uint8*) src);
+ code= first_byte >> 6; /* Length in 2 upmost bits */
+ first_byte&= 0x3F;
+ src++; /* Skip length + encode */
+ file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
+ DBUG_PRINT("info", ("code: %u first byte: %lu",
+ (uint) code, (ulong) first_byte));
+ switch (code) {
+ case 0:
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
+ break;
+ case 1:
+ diff= uint2korr(src);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
+ break;
+ case 2:
+ diff= uint3korr(src);
+ rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
+ break;
+ case 3:
+ {
+ ulonglong base_offset= LSN_OFFSET(base_lsn);
+ diff= uint4korr(src);
+ if (diff > LSN_OFFSET(base_lsn))
+ {
+ /* take 1 from file offset */
+ first_byte++;
+ base_offset+= LL(0x100000000);
+ }
+ file_no= LSN_FILE_NO(base_lsn) - first_byte;
+ rec_offset= base_offset - diff;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ DBUG_RETURN(NULL);
+ }
+ lsn= MAKE_LSN(file_no, rec_offset);
+ src+= code + 1;
+ lsn_store(dst, lsn);
+ DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst));
+ DBUG_RETURN(src);
+}
+
+
+/*
+ Encode relative LSNs listed in the parameters
+
+ SYNOPSIS
+ translog_relative_LSN_encode()
+ parts Parts list with encoded LSN(s)
+ base_lsn LSN which is base for encoding
+ lsns number of LSN(s) to encode
+ compressed_LSNs buffer which can be used for storing compressed LSN(s)
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts,
+ LSN base_lsn,
+ uint lsns, byte *compressed_LSNs)
+{
+ struct st_translog_part *part;
+ uint lsns_len= lsns * LSN_STORE_SIZE;
+
+ DBUG_ENTER("translog_relative_LSN_encode");
+
+ part= dynamic_element(&parts->parts, parts->current,
+ struct st_translog_part *);
+ /* collect all LSN(s) in one chunk if it (they) is (are) divided */
+ if (part->len < lsns_len)
+ {
+ uint copied= part->len;
+ DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
+ memcpy(compressed_LSNs, part->buff, part->len);
+ do
+ {
+ struct st_translog_part *next_part;
+ next_part= dynamic_element(&parts->parts, parts->current + 1,
+ struct st_translog_part *);
+ if ((next_part->len + copied) < lsns_len)
+ {
+ memcpy(compressed_LSNs + copied, next_part->buff, next_part->len);
+ copied+= next_part->len;
+ delete_dynamic_element(&parts->parts, parts->current + 1);
+ }
+ else
+ {
+ uint len= lsns_len - copied;
+ memcpy(compressed_LSNs + copied, next_part->buff, len);
+ copied= lsns_len;
+ next_part->buff+= len;
+ next_part->len-= len;
+ }
+ } while (copied < lsns_len);
+ part->len= lsns_len;
+ part->buff= compressed_LSNs;
+ }
+ {
+ /* Compress */
+ LSN ref;
+ uint economy;
+ byte *ref_ptr= part->buff + lsns_len - LSN_STORE_SIZE;
+ byte *dst_ptr= part->buff + lsns_len;
+ for (; ref_ptr >= part->buff ; ref_ptr-= LSN_STORE_SIZE)
+ {
+ ref= lsn_korr(ref_ptr);
+ if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL)
+ DBUG_RETURN(1);
+ }
+ /* Note that dst_ptr did grow downward ! */
+ economy= (uint) (dst_ptr - part->buff);
+ DBUG_PRINT("info", ("Economy: %u", economy));
+ part->len-= economy;
+ parts->record_length-= economy;
+ parts->total_record_length-= economy;
+ part->buff= dst_ptr;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write multi-group variable-size record
+
+ SYNOPSIS
+ translog_write_variable_record_mgroup()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Header length calculated for 1 group
+ buffer_rest Beginning from which we plan to write in full pages
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_write_variable_record_mgroup(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush,
+ uint16 header_length,
+ translog_size_t buffer_rest,
+ void *tcb)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i, chunk2_page, full_pages;
+ uint curr_group= 0;
+ translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
+ translog_size_t done= 0;
+ struct st_translog_group_descriptor group;
+ DYNAMIC_ARRAY groups;
+ uint16 chunk3_size;
+ uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
+ uint16 last_page_capacity;
+ my_bool new_page_before_chunk0= 1, first_chunk0= 1;
+ byte chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
+ byte chunk2_header[1];
+ uint header_fixed_part= header_length + 2;
+ uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
+ DBUG_ENTER("translog_write_variable_record_mgroup");
+
+ chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
+
+ if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor),
+ 10, 10 CALLER_INFO))
+ {
+ translog_unlock();
+ UNRECOVERABLE_ERROR(("init array failed"));
+ DBUG_RETURN(1);
+ }
+
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - 1);
+ DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
+
+ if (record_rest < buffer_rest)
+ {
+ DBUG_PRINT("info", ("too many free space because changing header"));
+ buffer_rest-= log_descriptor.page_capacity_chunk_2;
+ DBUG_ASSERT(record_rest >= buffer_rest);
+ }
+
+ do
+ {
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
+ {
+ /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
+ full_pages= 255;
+ buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
+ }
+ /*
+ group chunks =
+ full pages + first page (which actually can be full, too).
+ But here we assign number of chunks - 1
+ */
+ group.num= full_pages;
+ if (insert_dynamic(&groups, (gptr) &group))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ goto err_unlock;
+ }
+
+ DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
+ "full_pages: %lu (%lu) "
+ "Left %lu",
+ groups.elements,
+ first_page, first_page - 1,
+ (ulong) full_pages,
+ (ulong) (full_pages *
+ log_descriptor.page_capacity_chunk_2),
+ (ulong)(parts->record_length - (first_page - 1 +
+ buffer_rest) -
+ done)));
+ rc|= translog_advance_pointer(full_pages, 0);
+
+ rc|= translog_unlock();
+
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ goto err;
+ }
+
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ goto err;
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) "
+ "local: (%lu,0x%lx) "
+ "Left: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ done+= (first_page - 1 + buffer_rest);
+
+ /* TODO: make separate function for following */
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ goto err;
+ }
+ rc= translog_buffer_lock(cursor.buffer);
+ if (!rc)
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+ if (rc)
+ goto err;
+
+ translog_lock();
+
+ first_page= translog_get_current_page_rest();
+ buffer_rest= translog_get_current_group_size();
+ } while (first_page + buffer_rest < (uint) (parts->record_length - done));
+
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ group.num= 0; /* 0 because it does not matter */
+ if (insert_dynamic(&groups, (gptr) &group))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ goto err_unlock;
+ }
+ record_rest= parts->record_length - done;
+ DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
+ if (first_page <= record_rest + 1)
+ {
+ chunk2_page= 1;
+ record_rest-= (first_page - 1);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+ last_page_capacity= page_capacity;
+ }
+ else
+ {
+ chunk2_page= full_pages= 0;
+ last_page_capacity= first_page;
+ }
+ chunk3_size= 0;
+ chunk3_pages= 0;
+ if (last_page_capacity > record_rest + 1 && record_rest != 0)
+ {
+ if (last_page_capacity >
+ record_rest + header_fixed_part + groups.elements * (7 + 1))
+ {
+ /* 1 record of type 0 */
+ chunk3_pages= 0;
+ }
+ else
+ {
+ chunk3_pages= 1;
+ if (record_rest + 2 == last_page_capacity)
+ {
+ chunk3_size= record_rest - 1;
+ record_rest= 1;
+ }
+ else
+ {
+ chunk3_size= record_rest;
+ record_rest= 0;
+ }
+ }
+ }
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers
+ */
+ while (page_capacity <
+ record_rest + header_fixed_part +
+ (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
+ chunk0_pages++;
+ DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
+ "Group on last page: %u",
+ chunk0_pages, groups.elements,
+ groups_per_page,
+ (groups.elements -
+ ((page_capacity - header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1))));
+ DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
+ "chunk3: %u (%u) rest: %u",
+ first_page,
+ chunk2_page, full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ chunk3_pages, (uint) chunk3_size, (uint) record_rest));
+ rc= translog_advance_pointer(full_pages + chunk3_pages +
+ (chunk0_pages - 1),
+ record_rest + header_fixed_part +
+ (groups.elements -
+ ((page_capacity -
+ header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1)) * (7 + 1));
+ rc|= translog_unlock();
+ if (rc)
+ goto err;
+
+ if (chunk2_page)
+ {
+ DBUG_PRINT("info", ("chunk 2 to finish first page"));
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+ }
+ else if (chunk3_pages)
+ {
+ DBUG_PRINT("info", ("chunk 3"));
+ DBUG_ASSERT(full_pages == 0);
+ byte chunk3_header[3];
+ chunk3_pages= 0;
+ chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
+ int2store(chunk3_header + 1, chunk3_size);
+ translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
+ translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon),
+ (ulong) (parts->record_length - chunk3_size - done)));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("no new_page_before_chunk0"));
+ new_page_before_chunk0= 0;
+ }
+
+ for (i= 0; i < full_pages; i++)
+ {
+ DBUG_ASSERT(chunk2_page != 0);
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ goto err;
+
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) "
+ "Left: %lu",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon),
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ if (chunk3_pages &&
+ translog_write_variable_record_chunk3_page(parts,
+ chunk3_size,
+ &horizon, &cursor))
+ goto err;
+ DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon),
+ (ulong) LSN_FILE_NO(horizon),
+ (ulong) LSN_OFFSET(horizon)));
+
+
+ *chunk0_header= (byte) (type |TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ do
+ {
+ int limit;
+ if (new_page_before_chunk0)
+ {
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ goto err;
+ }
+ }
+ new_page_before_chunk0= 1;
+
+ if (first_chunk0)
+ {
+ first_chunk0= 0;
+ *lsn= horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, tcb,
+ lsn, parts))
+ goto err;
+ }
+
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers => the fist page is full or number of groups less then
+ possible number of full page.
+ */
+ limit= (groups_per_page < groups.elements - curr_group ?
+ groups_per_page : groups.elements - curr_group);
+ DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
+ (uint) groups.elements, (uint) curr_group,
+ (uint) limit));
+
+ if (chunk0_pages == 1)
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
+ (uint) limit, (uint) record_rest,
+ (uint) (2 + limit * (7 + 1) + record_rest)));
+ int2store(chunk0_header + header_length - 2,
+ 2 + limit * (7 + 1) + record_rest);
+ }
+ else
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
+ (uint) limit, (uint) (2 + limit * (7 + 1))));
+ int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
+ }
+ int2store(chunk0_header + header_length, groups.elements - curr_group);
+ translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
+ chunk0_header);
+ for (i= curr_group; i < limit + curr_group; i++)
+ {
+ struct st_translog_group_descriptor *grp_ptr;
+ grp_ptr= dynamic_element(&groups, i,
+ struct st_translog_group_descriptor *);
+ lsn_store(group_desc, grp_ptr->addr);
+ group_desc[7]= grp_ptr->num;
+ translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
+ }
+
+ if (chunk0_pages == 1 && record_rest != 0)
+ translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
+
+ chunk0_pages--;
+ curr_group+= limit;
+
+ } while (chunk0_pages != 0);
+ rc= translog_buffer_lock(cursor.buffer);
+ if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
+ cursor.buffer->last_lsn= *lsn;
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+
+ delete_dynamic(&groups);
+ DBUG_RETURN(rc);
+
+err_unlock:
+ translog_unlock();
+err:
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Write the variable length log record
+
+ SYNOPSIS
+ translog_write_variable_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_variable_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ void *tcb)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ uint header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ ulong buffer_rest;
+ uint page_rest;
+ /* Max number of such LSNs per record is 2 */
+ byte compressed_LSNs[2 * LSN_STORE_SIZE];
+
+ DBUG_ENTER("translog_write_variable_record");
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+ page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
+ DBUG_PRINT("info", ("header length: %u page_rest: %u",
+ header_length1, page_rest));
+
+ /*
+ header and part which we should read have to fit in one chunk
+ TODO: allow to divide readable header
+ */
+ if (page_rest <
+ (header_length1 + log_record_type_descriptor[type].read_header_len))
+ {
+ DBUG_PRINT("info",
+ ("Next page, size: %u header: %u + %u",
+ log_descriptor.bc.current_page_fill,
+ header_length1,
+ log_record_type_descriptor[type].read_header_len));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ /* Chunk 2 header is 1 byte, so full page capacity will be one byte more */
+ page_rest= log_descriptor.page_capacity_chunk_2 + 1;
+ DBUG_PRINT("info", ("page_rest: %u", page_rest));
+ }
+
+ /*
+ To minimize compressed size we will compress always relative to
+ very first chunk address (log_descriptor.horizon for now)
+ */
+ if (log_record_type_descriptor[type].compressed_LSN > 0)
+ {
+ if (translog_relative_LSN_encode(parts, log_descriptor.horizon,
+ log_record_type_descriptor[type].
+ compressed_LSN, compressed_LSNs))
+ {
+ translog_unlock();
+ if (buffer_to_flush != NULL)
+ {
+ /*
+ It is just try to finish log in nice way in case of error, so we
+ do not check result of the following functions, because we are
+ going return error state in any case
+ */
+ translog_buffer_flush(buffer_to_flush);
+ translog_buffer_unlock(buffer_to_flush);
+ }
+ DBUG_RETURN(1);
+ }
+ /* recalculate header length after compression */
+ header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
+ "record length: %lu",
+ header_length1, (ulong)parts->record_length));
+ }
+
+ /* TODO: check space on current page for header + few bytes */
+ if (page_rest >= parts->record_length + header_length1)
+ {
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_1chunk(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1, tcb));
+ }
+
+ buffer_rest= translog_get_current_group_size();
+
+ if (buffer_rest >= parts->record_length + header_length1 - page_rest)
+ {
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_1group(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1, tcb));
+ }
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_mgroup(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1,
+ buffer_rest, tcb));
+}
+
+
+/*
+ Write the fixed and pseudo-fixed log record
+
+ SYNOPSIS
+ translog_write_fixed_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_write_fixed_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ void *tcb)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ byte chunk1_header[1 + 2];
+ /* Max number of such LSNs per record is 2 */
+ byte compressed_LSNs[2 * LSN_STORE_SIZE];
+ struct st_translog_part part;
+ int rc;
+ DBUG_ENTER("translog_write_fixed_record");
+ DBUG_ASSERT((log_record_type_descriptor[type].class ==
+ LOGRECTYPE_FIXEDLENGTH &&
+ parts->record_length ==
+ log_record_type_descriptor[type].fixed_length) ||
+ (log_record_type_descriptor[type].class ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH &&
+ (parts->record_length -
+ log_record_type_descriptor[type].compressed_LSN * 2) <=
+ log_record_type_descriptor[type].fixed_length));
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) LSN_OFFSET(log_descriptor.horizon)));
+
+ DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
+ DBUG_PRINT("info",
+ ("Page size: %u record: %u next cond: %d",
+ log_descriptor.bc.current_page_fill,
+ (parts->record_length -
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3),
+ ((((uint) log_descriptor.bc.current_page_fill) +
+ (parts->record_length -
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)));
+ /*
+ check that there is enough place on current page:
+ (log_record_type_descriptor[type].fixed_length - economized on compressed
+ LSNs) bytes
+ */
+ if ((((uint) log_descriptor.bc.current_page_fill) +
+ (parts->record_length -
+ log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)
+ {
+ DBUG_PRINT("info", ("Next page"));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ }
+
+ *lsn= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, tcb,
+ lsn, parts))
+ {
+ rc= 1;
+ goto err;
+ }
+
+ /* compress LSNs */
+ if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
+ if (translog_relative_LSN_encode(parts, *lsn,
+ log_record_type_descriptor[type].
+ compressed_LSN, compressed_LSNs))
+ {
+ rc= 1;
+ goto err;
+ }
+ }
+
+ /*
+ Write the whole record at once (we know that there is enough place on
+ the destination page)
+ */
+ DBUG_ASSERT(parts->current != 0); /* first part is left for header */
+ parts->total_record_length+= (part.len= 1 + 2);
+ part.buff= chunk1_header;
+ *chunk1_header= (byte) (type | TRANSLOG_CHUNK_FIXED);
+ int2store(chunk1_header + 1, short_trid);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+
+err:
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Write the log record
+
+ SYNOPSIS
+ translog_write_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ tcb Transaction control block pointer for hooks by
+ record log type
+ partN_length length of Ns part of the log
+ partN_buffer pointer on Ns part buffer
+ 0 sign of the end of parts
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_write_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ void *tcb,
+ translog_size_t part1_length,
+ byte *part1_buff, ...)
+{
+ struct st_translog_parts parts;
+ struct st_translog_part part;
+ va_list pvar;
+ int rc;
+ DBUG_ENTER("translog_write_record");
+ DBUG_PRINT("enter", ("type: %u ShortTrID: %u",
+ (uint) type, (uint)short_trid));
+
+ /* move information about parts into dynamic array */
+ if (init_dynamic_array(&parts.parts, sizeof(struct st_translog_part),
+ 10, 10 CALLER_INFO))
+ {
+ UNRECOVERABLE_ERROR(("init array failed"));
+ DBUG_RETURN(1);
+ }
+
+ /* reserve place for header */
+ parts.current= 1;
+ part.len= 0;
+ part.buff= 0;
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+
+ parts.record_length= part.len= part1_length;
+ part.buff= part1_buff;
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("record length: %lu %lu ...",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+
+ /* count record length */
+ va_start(pvar, part1_buff);
+ for (;;)
+ {
+ part.len= va_arg(pvar, translog_size_t);
+ if (part.len == 0)
+ break;
+ parts.record_length+= part.len;
+ part.buff= va_arg(pvar, byte*);
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("record length: %lu %lu ...",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+ }
+ va_end(pvar);
+
+ /*
+ Start total_record_length from record_length then overhead will
+ be add
+ */
+ parts.total_record_length= parts.record_length;
+ va_end(pvar);
+ DBUG_PRINT("info", ("record length: %lu %lu",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+
+ /* process this parts */
+ if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
+ (*log_record_type_descriptor[type].prewrite_hook) (type, tcb,
+ &parts))))
+ {
+ switch (log_record_type_descriptor[type].class) {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ rc= translog_write_variable_record(lsn, type, short_trid, &parts, tcb);
+ break;
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ rc= translog_write_fixed_record(lsn, type, short_trid, &parts, tcb);
+ break;
+ case LOGRECTYPE_NOT_ALLOWED:
+ default:
+ DBUG_ASSERT(0);
+ rc= 1;
+ }
+ }
+
+ delete_dynamic(&parts.parts);
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Decode compressed (relative) LSN(s)
+
+ SYNOPSIS
+ translog_relative_lsn_decode()
+ base_lsn LSN for encoding
+ src Decode LSN(s) from here
+ dst Put decoded LSNs here
+ lsns number of LSN(s)
+
+ RETURN
+ position in sources after decoded LSN(s)
+*/
+
+static byte *translog_relative_LSN_decode(LSN base_lsn,
+ byte *src, byte *dst, uint lsns)
+{
+ uint i;
+ for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
+ {
+ src= translog_get_LSN_from_diff(base_lsn, src, dst);
+ }
+ return src;
+}
+
+/*
+ Get header of fixed/pseudo length record and call hook for it processing
+
+ SYNOPSIS
+ translog_fixed_length_header()
+ page Pointer to the buffer with page where LSN chunk is
+ placed
+ page_offset Offset of the first chunk in the page
+ buff Buffer to be filled with header data
+
+ RETURN
+ 0 error
+ # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_fixed_length_header(byte *page,
+ translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ struct st_log_record_type_descriptor *desc=
+ log_record_type_descriptor + buff->type;
+ byte *src= page + page_offset + 3;
+ byte *dst= buff->header;
+ byte *start= src;
+ uint lsns= desc->compressed_LSN;
+ uint length= desc->fixed_length + (lsns * 2);
+
+ DBUG_ENTER("translog_fixed_length_header");
+
+ buff->record_length= length;
+
+ if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(lsns > 0);
+ src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
+ lsns*= LSN_STORE_SIZE;
+ dst+= lsns;
+ length-= lsns;
+ buff->compressed_LSN_economy= (uint16) (lsns - (src - start));
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ memcpy(dst, src, length);
+ buff->non_header_data_start_offset= page_offset +
+ ((src + length) - (page + page_offset));
+ buff->non_header_data_len= 0;
+ DBUG_RETURN(buff->record_length);
+}
+
+
+/*
+ Free resources used by TRANSLOG_HEADER_BUFFER
+
+ SYNOPSIS
+ translog_free_record_header();
+*/
+
+void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
+{
+ DBUG_ENTER("translog_free_record_header");
+ if (buff->groups_no != 0)
+ {
+ my_free((gptr) buff->groups, MYF(0));
+ buff->groups_no= 0;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set current horizon in the scanner data structure
+
+ SYNOPSIS
+ translog_scanner_set_horizon()
+ scanner Information about current chunk during scanning
+*/
+
+static void translog_scanner_set_horizon(struct st_translog_scanner_data
+ *scanner)
+{
+ translog_lock();
+ scanner->horizon= log_descriptor.horizon;
+ translog_unlock();
+}
+
+
+/*
+ Set last page in the scanner data structure
+
+ SYNOPSIS
+ translog_scanner_set_last_page()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA
+ *scanner)
+{
+ my_bool page_ok;
+ scanner->last_file_page= scanner->page_addr;
+ return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok));
+}
+
+
+/*
+ Initialize reader scanner
+
+ SYNOPSIS
+ translog_init_scanner()
+ lsn LSN with which it have to be inited
+ fixed_horizon true if it is OK do not read records which was written
+ after scanning beginning
+ scanner scanner which have to be inited
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_init_scanner(LSN lsn,
+ my_bool fixed_horizon,
+ struct st_translog_scanner_data *scanner)
+{
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_init_scanner");
+ DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(lsn),
+ (ulong) LSN_OFFSET(lsn));
+ DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
+
+ data.addr= &scanner->page_addr;
+ data.was_recovered= 0;
+
+ scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
+
+ scanner->fixed_horizon= fixed_horizon;
+
+ translog_scanner_set_horizon(scanner);
+ DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(scanner->horizon),
+ (ulong) LSN_OFFSET(scanner->horizon)));
+
+ /* lsn < horizon */
+ DBUG_ASSERT(lsn < scanner->horizon));
+
+ scanner->page_addr= lsn;
+ scanner->page_addr-= scanner->page_offset; /*decrease offset */
+
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+
+ if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL)
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Checks End of the Log
+
+ SYNOPSIS
+ translog_scanner_eol()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 End of the Log
+ 0 OK
+*/
+static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eol");
+ DBUG_PRINT("enter",
+ ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)",
+ (ulong) LSN_FILE_NO(scanner->horizon),
+ (ulong) LSN_OFFSET(scanner->horizon),
+ (ulong) LSN_FILE_NO(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->page_addr),
+ (uint) scanner->page_offset,
+ (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
+ if (scanner->horizon > (scanner->page_addr +
+ scanner->page_offset))
+ {
+ DBUG_PRINT("info", ("Horizon is not reached"));
+ DBUG_RETURN(0);
+ }
+ if (scanner->fixed_horizon)
+ {
+ DBUG_PRINT("info", ("Horizon is fixed and reached"));
+ DBUG_RETURN(1);
+ }
+ translog_scanner_set_horizon(scanner);
+ DBUG_PRINT("info",
+ ("Horizon is re-read, EOL: %d",
+ scanner->horizon <= (scanner->page_addr +
+ scanner->page_offset)));
+ DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
+ scanner->page_offset));
+}
+
+
+/*
+ Cheks End of the Page
+
+ SYNOPSIS
+ translog_scanner_eop()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 End of the Page
+ 0 OK
+*/
+static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eop");
+ DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
+ scanner->page[scanner->page_offset] == 0);
+}
+
+
+/*
+ Checks End of the File (I.e. we are scanning last page, which do not
+ mean end of this page)
+
+ SYNOPSIS
+ translog_scanner_eof()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 End of the File
+ 0 OK
+*/
+static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_scanner_eof");
+ DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
+ LSN_FILE_NO(scanner->last_file_page));
+ DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
+ "normal EOF: %d",
+ (ulong) LSN_OFFSET(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->last_file_page),
+ LSN_OFFSET(scanner->page_addr) ==
+ LSN_OFFSET(scanner->last_file_page)));
+ /*
+ TODO: detect damaged file EOF,
+ TODO: issue warning if damaged file EOF detected
+ */
+ DBUG_RETURN(scanner->page_addr ==
+ scanner->last_file_page);
+}
+
+
+/*
+ Move scanner to the next chunk
+
+ SYNOPSIS
+ translog_get_next_chunk()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool
+translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
+{
+ uint16 len;
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_get_next_chunk");
+
+ if ((len= translog_get_total_chunk_length(scanner->page,
+ scanner->page_offset)) == 0)
+ DBUG_RETURN(1);
+ scanner->page_offset+= len;
+
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= &end_of_log;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ if (translog_scanner_eop(scanner))
+ {
+ if (translog_scanner_eof(scanner))
+ {
+ DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(scanner->horizon),
+ (ulong) LSN_OFFSET(scanner->horizon),
+ (ulong) LSN_FILE_NO(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->page_addr)));
+ /* if it is log end it have to be caught before */
+ DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
+ LSN_FILE_NO(scanner->page_addr));
+ scanner->page_addr+= LSN_ONE_FILE;
+ scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
+ TRANSLOG_PAGE_SIZE);
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
+ }
+
+ data.addr= &scanner->page_addr;
+ data.was_recovered= 0;
+ if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL)
+ DBUG_RETURN(1);
+
+ scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= &end_of_log;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ DBUG_ASSERT(scanner->page[scanner->page_offset]);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get header of variable length record and call hook for it processing
+
+ SYNOPSIS
+ translog_variable_length_header()
+ page Pointer to the buffer with page where LSN chunk is
+ placed
+ page_offset Offset of the first chunk in the page
+ buff Buffer to be filled with header data
+ scanner If present should be moved to the header page if
+ it differ from LSN page
+
+ RETURN
+ 0 error
+ # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_variable_length_header(byte *page,
+ translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ TRANSLOG_SCANNER_DATA
+ *scanner)
+{
+ struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
+ buff->type);
+ byte *src= page + page_offset + 1 + 2;
+ byte *dst= buff->header;
+ LSN base_lsn;
+ uint lsns= desc->compressed_LSN;
+ uint16 chunk_len;
+ uint16 length= desc->read_header_len + (lsns * 2);
+ uint16 buffer_length= length;
+ uint16 body_len;
+ TRANSLOG_SCANNER_DATA internal_scanner;
+
+ DBUG_ENTER("translog_variable_length_header");
+
+ buff->record_length= translog_variable_record_1group_decode_len(&src);
+ chunk_len= uint2korr(src);
+ DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
+ (ulong) buff->record_length, (uint) chunk_len,
+ (uint) length, (uint) buffer_length));
+ if (chunk_len == 0)
+ {
+ uint16 page_rest;
+ DBUG_PRINT("info", ("1 group"));
+ src+= 2;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+
+ base_lsn= buff->lsn;
+ body_len= min(page_rest, buff->record_length);
+ }
+ else
+ {
+ uint grp_no, curr;
+ uint header_to_skip;
+ uint16 page_rest;
+
+ DBUG_PRINT("info", ("multi-group"));
+ grp_no= buff->groups_no= uint2korr(src + 2);
+ if (!(buff->groups=
+ (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
+ MYF(0))))
+ DBUG_RETURN(0);
+ DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
+ src+= (2 + 2);
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ curr= 0;
+ header_to_skip= src - (page + page_offset);
+ buff->chunk0_pages= 0;
+
+ for (;;)
+ {
+ uint i, read= grp_no;
+
+ buff->chunk0_pages++;
+ if (page_rest < grp_no * (7 + 1))
+ read= page_rest / (7 + 1);
+ DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
+ "start from: %u",
+ buff->chunk0_pages, read, grp_no, curr));
+ for (i= 0; i < read; i++, curr++)
+ {
+ DBUG_ASSERT(curr < buff->groups_no);
+ buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
+ buff->groups[curr].num= src[i * (7 + 1) + 7];
+ DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u",
+ curr,
+ (ulong) LSN_FILE_NO(buff->groups[curr].addr),
+ (ulong) LSN_OFFSET(buff->groups[curr].addr),
+ (uint) buff->groups[curr].num));
+ }
+ grp_no-= read;
+ if (grp_no == 0)
+ {
+ if (scanner)
+ {
+ buff->chunk0_data_addr= scanner->page_addr;
+ buff->chunk0_data_addr+= (page_offset + header_to_skip +
+ read * (7 + 1)); /* offset increased */
+ }
+ else
+ {
+ buff->chunk0_data_addr= buff->lsn;
+ /* offset increased */
+ buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1));
+ }
+ buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1);
+ DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u",
+ (ulong) LSN_FILE_NO(buff->chunk0_data_addr),
+ (ulong) LSN_OFFSET(buff->chunk0_data_addr),
+ buff->chunk0_data_len));
+ break;
+ }
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner for header reading"));
+ scanner= &internal_scanner;
+ translog_init_scanner(buff->lsn, 1, scanner);
+ }
+ translog_get_next_chunk(scanner);
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + header_to_skip;
+ chunk_len= uint2korr(src - 2 - 2);
+ DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ }
+
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner"));
+ scanner= &internal_scanner;
+ }
+
+ base_lsn= buff->groups[0].addr;
+ translog_init_scanner(base_lsn, 1, scanner);
+ /* first group chunk is always chunk type 2 */
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + 1;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ body_len= page_rest;
+ }
+ if (lsns)
+ {
+ byte *start= src;
+ src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
+ lsns*= LSN_STORE_SIZE;
+ dst+= lsns;
+ length-= lsns;
+ buff->record_length+= (buff->compressed_LSN_economy=
+ (uint16) (lsns - (src - start)));
+ DBUG_PRINT("info", ("lsns: %u length: %u economy: %u new length: %lu",
+ lsns / LSN_STORE_SIZE, (uint) length,
+ (uint) buff->compressed_LSN_economy,
+ (ulong) buff->record_length));
+ body_len-= (src - start);
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ DBUG_ASSERT(body_len >= length);
+ body_len-= length;
+ memcpy(dst, src, length);
+ buff->non_header_data_start_offset= src + length - page;
+ buff->non_header_data_len= body_len;
+ DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
+ buff->non_header_data_start_offset,
+ buff->non_header_data_len, buffer_length));
+ DBUG_RETURN(buffer_length);
+}
+
+
+/*
+ Read record header from the given buffer
+
+ SYNOPSIS
+ translog_read_record_header_from_buffer()
+ page page content buffer
+ page_offset offset of the chunk in the page
+ buff destination buffer
+ scanner If this is set the scanner will be moved to the
+ record header page (differ from LSN page in case of
+ multi-group records)
+*/
+
+translog_size_t
+translog_read_record_header_from_buffer(byte *page,
+ uint16 page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ TRANSLOG_SCANNER_DATA *scanner)
+{
+ DBUG_ENTER("translog_read_record_header_from_buffer");
+ DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_LSN ||
+ (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_FIXED);
+ buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
+ buff->short_trid= uint2korr(page + page_offset + 1);
+ DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%lu,0x%lx)",
+ (uint) buff->type, (uint)buff->short_trid,
+ (ulong) LSN_FILE_NO(buff->lsn),
+ (ulong) LSN_OFFSET(buff->lsn)));
+ /* Read required bytes from the header and call hook */
+ switch (log_record_type_descriptor[buff->type].class) {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ DBUG_RETURN(translog_variable_length_header(page, page_offset, buff,
+ scanner));
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ DBUG_RETURN(translog_fixed_length_header(page, page_offset, buff));
+ default:
+ DBUG_ASSERT(0);
+ }
+ DBUG_RETURN(0); /* purecov: deadcode */
+}
+
+
+/*
+ Read record header and some fixed part of a record (the part depend on
+ record type).
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ buff log record header buffer
+
+ NOTE
+ - Some type of record can be read completely by this call
+ - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added
+ (like actual header length in the record if the header has variable
+ length)
+
+ RETURN
+ 0 error
+ # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_read_record_header(LSN lsn,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ byte buffer[TRANSLOG_PAGE_SIZE], *page;
+ translog_size_t page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
+ TRANSLOG_ADDRESS addr;
+ TRANSLOG_VALIDATOR_DATA data;
+ DBUG_ENTER("translog_read_record_header");
+ DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn)));
+ DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
+
+ buff->lsn= lsn;
+ buff->groups_no= 0;
+ data.addr= &addr;
+ data.was_recovered= 0;
+ addr= lsn;
+ addr-= page_offset; /* offset decreasing */
+ if (!(page= translog_get_page(&data, buffer)))
+ DBUG_RETURN(0);
+
+ DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset,
+ buff, 0));
+}
+
+
+/*
+ Read record header and some fixed part of a record (the part depend on
+ record type).
+
+ SYNOPSIS
+ translog_read_record_header_scan()
+ scan scanner position to read
+ buff log record header buffer
+ move_scanner request to move scanner to the header position
+
+ NOTE
+ - Some type of record can be read completely by this call
+ - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added
+ (like actual header length in the record if the header has variable
+ length)
+
+ RETURN
+ 0 error
+ # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t
+translog_read_record_header_scan(TRANSLOG_SCANNER_DATA
+ *scanner,
+ TRANSLOG_HEADER_BUFFER *buff,
+ my_bool move_scanner)
+{
+ DBUG_ENTER("translog_read_record_header_scan");
+ DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d",
+ (ulong) LSN_FILE_NO(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->page_addr),
+ (ulong) LSN_FILE_NO(scanner->horizon),
+ (ulong) LSN_OFFSET(scanner->horizon),
+ (ulong) LSN_FILE_NO(scanner->last_file_page),
+ (ulong) LSN_OFFSET(scanner->last_file_page),
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+ buff->groups_no= 0;
+ buff->lsn= scanner->page_addr;
+ buff->lsn+= scanner->page_offset; /* offset increasing */
+ DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page,
+ scanner->page_offset,
+ buff,
+ (move_scanner ?
+ scanner : 0)));
+}
+
+
+/*
+ Read record header and some fixed part of the next record (the part
+ depend on record type).
+
+ SYNOPSIS
+ translog_read_next_record_header()
+ scanner data for scanning if lsn is NULL scanner data
+ will be used for continue scanning.
+ The scanner can be NULL.
+ buff log record header buffer
+
+ NOTE
+ - it is like translog_read_record_header, but read next record, so see
+ its NOTES.
+ - in case of end of the log buff->lsn will be set to
+ (CONTROL_FILE_IMPOSSIBLE_LSN)
+
+ RETURN
+ 0 error
+ TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 End of the log
+ # number of bytes in
+ TRANSLOG_HEADER_BUFFER::header
+ where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA
+ *scanner,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ uint8 chunk_type;
+
+ buff->groups_no= 0; /* to be sure that we will free it right */
+
+ DBUG_ENTER("translog_read_next_record_header");
+ DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
+ DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
+ (ulong) LSN_FILE_NO(scanner->page_addr),
+ (ulong) LSN_OFFSET(scanner->page_addr),
+ (ulong) LSN_FILE_NO(scanner->horizon),
+ (ulong) LSN_OFFSET(scanner->horizon),
+ (ulong) LSN_FILE_NO(scanner->last_file_page),
+ (ulong) LSN_OFFSET(scanner->last_file_page),
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+
+ do
+ {
+ if (translog_get_next_chunk(scanner))
+ DBUG_RETURN(0);
+ chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type,
+ (uint) scanner->page[scanner->page_offset]));
+ } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type !=
+ TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0);
+
+ if (scanner->page[scanner->page_offset] == 0)
+ {
+ /* Last record was read */
+ buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
+ /* Return 'end of log' marker */
+ DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1);
+ }
+ DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0));
+}
+
+
+/*
+ Moves record data reader to the next chunk and fill the data reader
+ information about that chunk.
+
+ SYNOPSIS
+ translog_record_read_next_chunk()
+ data data cursor
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_record_read_next_chunk(struct st_translog_reader_data
+ *data)
+{
+ translog_size_t new_current_offset= data->current_offset + data->chunk_size;
+ uint16 chunk_header_len, chunk_len;
+ uint8 type;
+ DBUG_ENTER("translog_record_read_next_chunk");
+
+ if (data->eor)
+ {
+ DBUG_PRINT("info", ("end of the record flag set"));
+ DBUG_RETURN(1);
+ }
+
+ if (data->header.groups_no &&
+ data->header.groups_no - 1 != data->current_group &&
+ data->header.groups[data->current_group].num == data->current_chunk)
+ {
+ /* Goto next group */
+ data->current_group++;
+ data->current_chunk= 0;
+ DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
+ translog_init_scanner(data->header.groups[data->current_group].addr,
+ 1, &data->scanner);
+ }
+ else
+ {
+ data->current_chunk++;
+ if (translog_get_next_chunk(&data->scanner))
+ DBUG_RETURN(1);
+ }
+ type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+
+ if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
+ {
+ DBUG_PRINT("info",
+ ("Last chunk: data len: %u offset: %u group: %u of %u",
+ data->header.chunk0_data_len, data->scanner.page_offset,
+ data->current_group, data->header.groups_no - 1));
+ DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
+ DBUG_ASSERT(data->header.lsn ==
+ data->scanner.page_addr + data->scanner.page_offset);
+ translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner);
+ data->chunk_size= data->header.chunk0_data_len;
+ data->body_offset= data->scanner.page_offset;
+ data->current_offset= new_current_offset;
+ data->eor= 1;
+ DBUG_RETURN(0);
+ }
+
+ if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
+ {
+ data->eor= 1;
+ DBUG_RETURN(1); /* End of record */
+ }
+
+ chunk_header_len=
+ translog_get_chunk_header_length(data->scanner.page,
+ data->scanner.page_offset);
+ chunk_len= translog_get_total_chunk_length(data->scanner.page,
+ data->scanner.page_offset);
+ data->chunk_size= chunk_len - chunk_header_len;
+ data->body_offset= data->scanner.page_offset + chunk_header_len;
+ data->current_offset= new_current_offset;
+ DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
+ "current_offset: %lu",
+ (uint) data->current_group,
+ (uint) data->current_chunk,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Initialize record reader data from LSN
+
+ SYNOPSIS
+ translog_init_reader_data()
+ lsn reference to LSN we should start from
+ data reader data to initialize
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_init_reader_data(LSN lsn,
+ struct st_translog_reader_data *data)
+{
+ DBUG_ENTER("translog_init_reader_data");
+ if (translog_init_scanner(lsn, 1, &data->scanner) ||
+ !(data->read_header=
+ translog_read_record_header_scan(&data->scanner, &data->header, 1)))
+ {
+ DBUG_RETURN(1);
+ }
+ data->body_offset= data->header.non_header_data_start_offset;
+ data->chunk_size= data->header.non_header_data_len;
+ data->current_offset= data->read_header;
+ data->current_group= 0;
+ data->current_chunk= 0;
+ data->eor= 0;
+ DBUG_PRINT("info", ("read_header: %u "
+ "body_offset: %u chunk_size: %u current_offset: %lu",
+ (uint) data->read_header,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read a part of the record.
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ offset From the beginning of the record beginning (read§
+ by translog_read_record_header).
+ length Length of record part which have to be read.
+ buffer Buffer where to read the record part (have to be at
+ least 'length' bytes length)
+
+ RETURN
+ length of data actually read
+*/
+
+translog_size_t translog_read_record(LSN lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ byte *buffer,
+ struct st_translog_reader_data *data)
+{
+ translog_size_t requested_length= length;
+ translog_size_t end= offset + length;
+ struct st_translog_reader_data internal_data;
+ DBUG_ENTER("translog_read_record");
+
+ if (data == NULL)
+ {
+ DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN);
+ data= &internal_data;
+ }
+ if (lsn ||
+ (offset < data->current_offset &&
+ !(offset < data->read_header && offset + length < data->read_header)))
+ {
+ if (translog_init_reader_data(lsn, data))
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("Offset: %lu length: %lu "
+ "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) "
+ "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d",
+ (ulong) offset, (ulong) length,
+ (ulong) LSN_FILE_NO(data->scanner.page_addr),
+ (ulong) LSN_OFFSET(data->scanner.page_addr),
+ (ulong) LSN_FILE_NO(data->scanner.horizon),
+ (ulong) LSN_OFFSET(data->scanner.horizon),
+ (ulong) LSN_FILE_NO(data->scanner.last_file_page),
+ (ulong) LSN_OFFSET(data->scanner.last_file_page),
+ (uint) data->scanner.page_offset,
+ (uint) data->scanner.page_offset,
+ data->scanner.fixed_horizon));
+ if (offset < data->read_header)
+ {
+ uint16 len= min(data->read_header, end) - offset;
+ DBUG_PRINT("info",
+ ("enter header offset: %lu length: %lu",
+ (ulong) offset, (ulong) length));
+ memcpy(buffer, data->header.header + offset, len);
+ length-= len;
+ if (length == 0)
+ DBUG_RETURN(requested_length);
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u offset: %lu curr: %lu length: %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ /* TODO: find first page which we should read by offset */
+
+ /* read the record chunk by chunk */
+ for(;;)
+ {
+ uint page_end= data->current_offset + data->chunk_size;
+ DBUG_PRINT("info",
+ ("enter body offset: %lu curr: %lu "
+ "length: %lu page_end: %lu",
+ (ulong) offset, (ulong) data->current_offset, (ulong) length,
+ (ulong) page_end));
+ if (offset < page_end)
+ {
+ uint len= page_end - offset;
+ DBUG_ASSERT(offset >= data->current_offset);
+ memcpy(buffer,
+ data->scanner.page + data->body_offset +
+ (offset - data->current_offset), len);
+ length-= len;
+ if (length == 0)
+ DBUG_RETURN(requested_length);
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u offset: %lu curr: %lu length: %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ if (translog_record_read_next_chunk(data))
+ DBUG_RETURN(requested_length - length);
+ }
+}
+
+
+/*
+ Force skipping to the next buffer
+
+ SYNOPSIS
+ translog_force_current_buffer_to_finish()
+*/
+
+static void translog_force_current_buffer_to_finish()
+{
+ TRANSLOG_ADDRESS new_buff_begunning;
+ uint16 old_buffer_no= log_descriptor.bc.buffer_no;
+ uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
+ new_buffer_no);
+ struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
+ byte *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_fill;
+ uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
+ uint16 current_page_fill, write_counter, previous_offset;
+ DBUG_ENTER("translog_force_current_buffer_to_finish");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx "
+ "Buffer addr: (%lu,0x%lx) "
+ "Page addr: (%lu,0x%lx) "
+ "New Buff: (%lu,0x%lx) "
+ "size: %lu (%lu) Pg: %u left: %u",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) LSN_FILE_NO(log_descriptor.bc.buffer->offset),
+ (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
+ (ulong) LSN_FILE_NO(log_descriptor.horizon),
+ (ulong) (LSN_OFFSET(log_descriptor.horizon) -
+ log_descriptor.bc.current_page_fill),
+ (ulong) LSN_FILE_NO(new_buff_begunning),
+ (ulong) LSN_OFFSET(new_buff_begunning),
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer),
+ (uint) log_descriptor.bc.current_page_fill,
+ (uint) left));
+
+ new_buff_begunning= log_descriptor.bc.buffer->offset;
+ new_buff_begunning+= log_descriptor.bc.buffer->size; /* increase offset */
+
+ DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
+ DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
+ LSN_FILE_NO(log_descriptor.bc.buffer->offset));
+ DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc););
+ DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
+ if (left != 0)
+ {
+ /*
+ TODO: if 'left' is so small that can't hold any other record
+ then do not move the page
+ */
+ DBUG_PRINT("info", ("left: %u", (uint) left));
+
+ /* decrease offset */
+ new_buff_begunning-= log_descriptor.bc.current_page_fill;
+ current_page_fill= log_descriptor.bc.current_page_fill;
+
+ bzero(log_descriptor.bc.ptr, left);
+ log_descriptor.bc.buffer->size+= left;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx "
+ "Size: %lu",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) log_descriptor.bc.buffer->size));
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ }
+ else
+ {
+ left= 0;
+ log_descriptor.bc.current_page_fill= 0;
+ }
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ write_counter= log_descriptor.bc.write_counter;
+ previous_offset= log_descriptor.bc.previous_offset;
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ log_descriptor.bc.buffer->offset= new_buff_begunning;
+ log_descriptor.bc.write_counter= write_counter;
+ log_descriptor.bc.previous_offset= previous_offset;
+
+ if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(data, &log_descriptor.bc);
+ if (left)
+ {
+ log_descriptor.bc.write_counter++;
+ log_descriptor.bc.previous_offset= current_page_fill;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ }
+ }
+
+ if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_crc(data + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
+ int4store(data + 3 + 3 + 1, crc);
+ }
+
+ if (left)
+ {
+ memcpy(new_buffer->buffer, data, current_page_fill);
+ log_descriptor.bc.ptr +=current_page_fill;
+ log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
+ current_page_fill;
+ new_buffer->overlay= old_buffer;
+ }
+ else
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Flush the log up to given LSN (included)
+
+ SYNOPSIS
+ translog_flush()
+ lsn log record serial number up to which (inclusive)
+ the log have to be flushed
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_flush(LSN lsn)
+{
+ LSN old_flushed, sent_to_file;
+ int rc= 0;
+ uint i;
+ my_bool full_circle= 0;
+ DBUG_ENTER("translog_flush");
+ DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(lsn),
+ (ulong) LSN_OFFSET(lsn)));
+
+ translog_lock();
+ old_flushed= log_descriptor.flushed;
+ for (;;)
+ {
+ uint16 buffer_no= log_descriptor.bc.buffer_no;
+ uint16 buffer_start= buffer_no;
+ struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
+ struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
+ /* we can't flush in future */
+ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0);
+ if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
+ {
+ DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)",
+ (ulong) LSN_FILE_NO(log_descriptor.flushed),
+ (ulong) LSN_OFFSET(log_descriptor.flushed)));
+ translog_unlock();
+ DBUG_RETURN(0);
+ }
+ /* send to the file if it is not sent */
+ translog_get_sent_to_file(&sent_to_file);
+ if (cmp_translog_addr(sent_to_file, lsn) >= 0)
+ break;
+
+ do
+ {
+ buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ buffer= log_descriptor.buffers + buffer_no;
+ translog_buffer_lock(buffer);
+ translog_buffer_unlock(buffer_unlock);
+ buffer_unlock= buffer;
+ if (buffer->file != -1)
+ {
+ buffer_unlock= NULL;
+ if (buffer_start == buffer_no)
+ {
+ /* we made a circle */
+ full_circle= 1;
+ translog_force_current_buffer_to_finish();
+ }
+ break;
+ }
+ } while ((buffer_start != buffer_no) &&
+ cmp_translog_addr(log_descriptor.flushed, lsn) < 0);
+ if (buffer_unlock != NULL)
+ translog_buffer_unlock(buffer_unlock);
+ rc= translog_buffer_flush(buffer);
+ translog_buffer_unlock(buffer);
+ if (rc)
+ DBUG_RETURN(1);
+ if (!full_circle)
+ translog_lock();
+ }
+
+ for (i= LSN_FILE_NO(old_flushed); i <= LSN_FILE_NO(lsn); i++)
+ {
+ uint cache_index;
+ File file;
+
+ if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - i) <
+ OPENED_FILES_NUM)
+ {
+ /* file in the cache */
+ if (log_descriptor.log_file_num[cache_index] == -1)
+ {
+ if ((log_descriptor.log_file_num[cache_index]=
+ open_logfile_by_number_no_cache(i)) == -1)
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+ }
+ file= log_descriptor.log_file_num[cache_index];
+ rc|= my_sync(file, MYF(MY_WME));
+ }
+ /* We sync file when we are closing it => do nothing if file closed */
+ }
+ log_descriptor.flushed= sent_to_file;
+ rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME));
+ translog_unlock();
+ DBUG_RETURN(rc);
+}
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
new file mode 100644
index 00000000000..3646afcf52a
--- /dev/null
+++ b/storage/maria/ma_loghandler.h
@@ -0,0 +1,182 @@
+
+/* Transaction log flags */
+#define TRANSLOG_PAGE_CRC 1
+#define TRANSLOG_SECTOR_PROTECTION (1<<1)
+#define TRANSLOG_RECORD_CRC (1<<2)
+#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \
+ TRANSLOG_RECORD_CRC) + 1)
+
+/*
+ Page size in transaction log
+ It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE
+ (DISK_DRIVE_SECTOR_SIZE * 2^N)
+*/
+#define TRANSLOG_PAGE_SIZE (8*1024)
+
+#include "ma_loghandler_lsn.h"
+
+/* short transaction ID type */
+typedef uint16 SHORT_TRANSACTION_ID;
+
+/* Length of CRC at end of pages */
+#define CRC_LENGTH 4
+/*
+ Length of disk drive sector size (we assume that writing it
+ to disk is atomic operation)
+*/
+#define DISK_DRIVE_SECTOR_SIZE 512
+
+/* types of records in the transaction log */
+enum translog_record_type
+{
+ LOGREC_RESERVED_FOR_CHUNKS23= 0,
+ LOGREC_REDO_INSERT_ROW_HEAD= 1,
+ LOGREC_REDO_INSERT_ROW_TAIL= 2,
+ LOGREC_REDO_INSERT_ROW_BLOB= 3,
+ LOGREC_REDO_INSERT_ROW_BLOBS= 4,
+ LOGREC_REDO_PURGE_ROW= 5,
+ eLOGREC_REDO_PURGE_BLOCKS= 6,
+ LOGREC_REDO_DELETE_ROW= 7,
+ LOGREC_REDO_UPDATE_ROW_HEAD= 8,
+ LOGREC_REDO_INDEX= 9,
+ LOGREC_REDO_UNDELETE_ROW= 10,
+ LOGREC_CLR_END= 11,
+ LOGREC_PURGE_END= 12,
+ LOGREC_UNDO_ROW_INSERT= 13,
+ LOGREC_UNDO_ROW_DELETE= 14,
+ LOGREC_UNDO_ROW_UPDATE= 15,
+ LOGREC_UNDO_KEY_INSERT= 16,
+ LOGREC_UNDO_KEY_DELETE= 17,
+ LOGREC_PREPARE= 18,
+ LOGREC_PREPARE_WITH_UNDO_PURGE= 19,
+ LOGREC_COMMIT= 20,
+ LOGREC_COMMIT_WITH_UNDO_PURGE= 21,
+ LOGREC_CHECKPOINT_PAGE= 22,
+ LOGREC_CHECKPOINT_TRAN= 23,
+ LOGREC_CHECKPOINT_TABL= 24,
+ LOGREC_REDO_CREATE_TABLE= 25,
+ LOGREC_REDO_RENAME_TABLE= 26,
+ LOGREC_REDO_DROP_TABLE= 27,
+ LOGREC_REDO_TRUNCATE_TABLE= 28,
+ LOGREC_FILE_ID= 29,
+ LOGREC_LONG_TRANSACTION_ID= 30,
+ LOGREC_RESERVED_FUTURE_EXTENSION= 63
+};
+#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */
+
+/* Size of log file; One log file is restricted to 4G */
+typedef uint32 translog_size_t;
+
+#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024
+
+typedef struct st_translog_group_descriptor
+{
+ TRANSLOG_ADDRESS addr;
+ uint8 num;
+} TRANSLOG_GROUP;
+
+
+typedef struct st_translog_header_buffer
+{
+ /* LSN of the read record */
+ LSN lsn;
+ /* array of groups descriptors, can be used only if groups_no > 0 */
+ TRANSLOG_GROUP *groups;
+ /* short transaction ID or 0 if it has no sense for the record */
+ SHORT_TRANSACTION_ID short_trid;
+ /*
+ The Record length in buffer (including read header, but excluding
+ hidden part of record (type, short TrID, length)
+ */
+ translog_size_t record_length;
+ /*
+ Buffer for write decoded header of the record (depend on the record
+ type)
+ */
+ byte header[TRANSLOG_RECORD_HEADER_MAX_SIZE];
+ /* number of groups listed in */
+ uint groups_no;
+ /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */
+ uint chunk0_pages;
+ /* type of the read record */
+ enum translog_record_type type;
+ /* chunk 0 data address (valid only if groups_no > 0) */
+ TRANSLOG_ADDRESS chunk0_data_addr;
+ /*
+ Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>)
+ */
+ uint16 compressed_LSN_economy;
+ /* short transaction ID or 0 if it has no sense for the record */
+ uint16 non_header_data_start_offset;
+ /* non read body data length in this first chunk */
+ uint16 non_header_data_len;
+ /* chunk 0 data size (valid only if groups_no > 0) */
+ uint16 chunk0_data_len;
+} TRANSLOG_HEADER_BUFFER;
+
+
+typedef struct st_translog_scanner_data
+{
+ byte buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */
+ TRANSLOG_ADDRESS page_addr; /* current page address */
+ /* end of the log which we saw last time */
+ TRANSLOG_ADDRESS horizon;
+ TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */
+ byte *page; /* page content pointer */
+ /* offset of the chunk in the page */
+ translog_size_t page_offset;
+ /* set horizon only once at init */
+ my_bool fixed_horizon;
+} TRANSLOG_SCANNER_DATA;
+
+
+struct st_translog_reader_data
+{
+ TRANSLOG_HEADER_BUFFER header; /* Header */
+ TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */
+ translog_size_t body_offset; /* current chunk body offset */
+ /* data offset from the record beginning */
+ translog_size_t current_offset;
+ /* number of bytes read in header */
+ uint16 read_header;
+ uint16 chunk_size; /* current chunk size */
+ uint current_group; /* current group */
+ uint current_chunk; /* current chunk in the group */
+ my_bool eor; /* end of the record */
+};
+
+
+my_bool translog_init(const char *directory, uint32 log_file_max_size,
+ uint32 server_version, uint32 server_id,
+ PAGECACHE *pagecache, uint flags);
+
+my_bool translog_write_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ void *tcb,
+ translog_size_t part1_length,
+ byte *part1_buff, ...);
+
+void translog_destroy();
+
+translog_size_t translog_read_record_header(LSN lsn,
+ TRANSLOG_HEADER_BUFFER *buff);
+
+void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff);
+
+translog_size_t translog_read_record(LSN lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ byte *buffer,
+ struct st_translog_reader_data *data);
+
+my_bool translog_flush(LSN lsn);
+
+my_bool translog_init_scanner(LSN lsn,
+ my_bool fixed_horizon,
+ struct st_translog_scanner_data *scanner);
+
+translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA
+ *scanner,
+ TRANSLOG_HEADER_BUFFER *buff);
+
diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h
new file mode 100644
index 00000000000..1789d3ce61b
--- /dev/null
+++ b/storage/maria/ma_loghandler_lsn.h
@@ -0,0 +1,56 @@
+#ifndef _ma_loghandler_lsn_h
+#define _ma_loghandler_lsn_h
+
+/*
+ Transaction log record address:
+ file_no << 32 | offset
+ file_no is only 3 bytes so we can use signed integer to make
+ comparison more simple.
+*/
+typedef int64 TRANSLOG_ADDRESS;
+
+/*
+ Compare addresses
+ A1 > A2 -> result > 0
+ A1 == A2 -> 0
+ A1 < A2 -> result < 0
+*/
+#define cmp_translog_addr(A1,A2) ((A1) - (A2))
+
+/* LSN type (address of certain log record chank */
+typedef TRANSLOG_ADDRESS LSN;
+
+/* Gets file number part of a LSN/log address */
+#define LSN_FILE_NO(L) ((L) >> 32)
+
+/* Gets raw file number part of a LSN/log address */
+#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL))
+
+/* Gets record offset of a LSN/log address */
+#define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL)
+
+/* Makes lsn/log address from file number and record offset */
+#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S))
+
+/* checks LSN */
+#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL)
+
+/* size of stored LSN on a disk */
+#define LSN_STORE_SIZE 7
+
+/* Puts LSN into buffer (dst) */
+#define lsn_store(dst, lsn) \
+ do { \
+ int3store((dst), LSN_FILE_NO(lsn)); \
+ int4store((dst) + 3, LSN_OFFSET(lsn)); \
+ } while (0)
+
+/* Unpacks LSN from the buffer (P) */
+#define lsn_korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3))
+
+/* what we need to add to LSN to increase it on one file */
+#define LSN_ONE_FILE ((int64)0x100000000LL)
+
+#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S))
+
+#endif
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
new file mode 100644
index 00000000000..19113196f15
--- /dev/null
+++ b/storage/maria/ma_open.c
@@ -0,0 +1,1324 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* open a isam-database */
+
+#include "ma_fulltext.h"
+#include "ma_sp_defs.h"
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include <m_ctype.h>
+
+#if defined(MSDOS) || defined(__WIN__)
+#ifdef __WIN__
+#include <fcntl.h>
+#else
+#include <process.h> /* Prototype for getpid */
+#endif
+#endif
+#ifdef VMS
+#include "static.c"
+#endif
+
+static void setup_key_functions(MARIA_KEYDEF *keyinfo);
+static my_bool maria_scan_init_dummy(MARIA_HA *info);
+static void maria_scan_end_dummy(MARIA_HA *info);
+static my_bool maria_once_init_dummy(MARIA_SHARE *, File);
+static my_bool maria_once_end_dummy(MARIA_SHARE *);
+static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base);
+
+#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \
+ pos+=size;}
+
+
+#define disk_pos_assert(pos, end_pos) \
+if (pos > end_pos) \
+{ \
+ my_errno=HA_ERR_CRASHED; \
+ goto err; \
+}
+
+
+/******************************************************************************
+** Return the shared struct if the table is already open.
+** In MySQL the server will handle version issues.
+******************************************************************************/
+
+MARIA_HA *_ma_test_if_reopen(char *filename)
+{
+ LIST *pos;
+
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info=(MARIA_HA*) pos->data;
+ MARIA_SHARE *share=info->s;
+ if (!strcmp(share->unique_file_name,filename) && share->last_version)
+ return info;
+ }
+ return 0;
+}
+
+
+/******************************************************************************
+ open a MARIA database.
+ See my_base.h for the handle_locking argument
+ if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table
+ is marked crashed or if we are not using locking and the table doesn't
+ have an open count of 0.
+******************************************************************************/
+
+MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
+{
+ int kfile,open_mode,save_errno,have_rtree=0;
+ uint i,j,len,errpos,head_length,base_pos,info_length,keys,
+ key_parts,unique_key_parts,fulltext_keys,uniques;
+ char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
+ data_name[FN_REFLEN];
+ char *disk_cache, *disk_pos, *end_pos;
+ MARIA_HA info,*m_info,*old_info;
+ MARIA_SHARE share_buff,*share;
+ ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY];
+ ulonglong max_key_file_length, max_data_file_length;
+ DBUG_ENTER("maria_open");
+
+ LINT_INIT(m_info);
+ kfile= -1;
+ errpos= 0;
+ head_length=sizeof(share_buff.state.header);
+ bzero((byte*) &info,sizeof(info));
+
+ my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME),MYF(0));
+ pthread_mutex_lock(&THR_LOCK_maria);
+ if (!(old_info=_ma_test_if_reopen(name_buff)))
+ {
+ share= &share_buff;
+ bzero((gptr) &share_buff,sizeof(share_buff));
+ share_buff.state.rec_per_key_part=rec_per_key_part;
+ share_buff.state.key_root=key_root;
+ share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff),
+ maria_key_cache);
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open",
+ if (strstr(name, "/t1"))
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ });
+ if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0)
+ {
+ if ((errno != EROFS && errno != EACCES) ||
+ mode != O_RDONLY ||
+ (kfile=my_open(name_buff,(open_mode=O_RDONLY) | O_SHARE,MYF(0))) < 0)
+ goto err;
+ }
+ share->mode=open_mode;
+ errpos= 1;
+ if (my_read(kfile,(char*) share->state.header.file_version,head_length,
+ MYF(MY_NABP)))
+ {
+ my_errno= HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ if (memcmp((byte*) share->state.header.file_version,
+ (byte*) maria_file_magic, 4))
+ {
+ DBUG_PRINT("error",("Wrong header in %s",name_buff));
+ DBUG_DUMP("error_dump",(char*) share->state.header.file_version,
+ head_length);
+ my_errno=HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ share->options= mi_uint2korr(share->state.header.options);
+ if (share->options &
+ ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
+ HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
+ HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
+ HA_OPTION_RELIES_ON_SQL_LAYER))
+ {
+ DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
+ my_errno=HA_ERR_OLD_FILE;
+ goto err;
+ }
+ if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
+ ! (open_flags & HA_OPEN_FROM_SQL_LAYER))
+ {
+ DBUG_PRINT("error", ("table cannot be openned from non-sql layer"));
+ my_errno= HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+ /* Don't call realpath() if the name can't be a link */
+ if (!strcmp(name_buff, org_name) ||
+ my_readlink(index_name, org_name, MYF(0)) == -1)
+ (void) strmov(index_name, org_name);
+ *strrchr(org_name, '.')= '\0';
+ (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
+ MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS);
+
+ info_length=mi_uint2korr(share->state.header.header_length);
+ base_pos= mi_uint2korr(share->state.header.base_pos);
+ if (!(disk_cache=(char*) my_alloca(info_length+128)))
+ {
+ my_errno=ENOMEM;
+ goto err;
+ }
+ end_pos=disk_cache+info_length;
+ errpos= 2;
+
+ VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0)));
+ errpos= 3;
+ if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP)))
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+ len=mi_uint2korr(share->state.header.state_info_length);
+ keys= (uint) share->state.header.keys;
+ uniques= (uint) share->state.header.uniques;
+ fulltext_keys= (uint) share->state.header.fulltext_keys;
+ key_parts= mi_uint2korr(share->state.header.key_parts);
+ unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
+ if (len != MARIA_STATE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",
+ ("saved_state_info_length: %d state_info_length: %d",
+ len,MARIA_STATE_INFO_SIZE));
+ }
+ share->state_diff_length=len-MARIA_STATE_INFO_SIZE;
+
+ _ma_state_info_read(disk_cache, &share->state);
+ len= mi_uint2korr(share->state.header.base_info_length);
+ if (len != MARIA_BASE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
+ len,MARIA_BASE_INFO_SIZE));
+ }
+ disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
+ share->state.state_length=base_pos;
+
+ if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
+ ((share->state.changed & STATE_CRASHED) ||
+ ((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
+ (my_disable_locking && share->state.open_count))))
+ {
+ DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u "
+ "changed: %u open_count: %u !locking: %d",
+ open_flags, share->state.changed,
+ share->state.open_count, my_disable_locking));
+ my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
+ HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
+ goto err;
+ }
+
+ /* sanity check */
+ if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+
+ key_parts+=fulltext_keys*FT_SEGS;
+ if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MARIA_MAX_KEY ||
+ key_parts >= MARIA_MAX_KEY * HA_MAX_KEY_SEG)
+ {
+ DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+
+ /* Correct max_file_length based on length of sizeof(off_t) */
+ max_data_file_length=
+ (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
+ (((ulonglong) 1 << (share->base.rec_reflength*8))-1) :
+ (_ma_safe_mul(share->base.pack_reclength,
+ (ulonglong) 1 << (share->base.rec_reflength*8))-1);
+
+ max_key_file_length=
+ _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH,
+ ((ulonglong) 1 << (share->base.key_reflength*8))-1);
+#if SIZEOF_OFF_T == 4
+ set_if_smaller(max_data_file_length, INT_MAX32);
+ set_if_smaller(max_key_file_length, INT_MAX32);
+#endif
+ share->base.max_data_file_length=(my_off_t) max_data_file_length;
+ share->base.max_key_file_length=(my_off_t) max_key_file_length;
+
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ share->base.max_key_length+=2; /* For safety */
+
+ if (!my_multi_malloc(MY_WME,
+ &share,sizeof(*share),
+ &share->state.rec_per_key_part,sizeof(long)*key_parts,
+ &share->keyinfo,keys*sizeof(MARIA_KEYDEF),
+ &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF),
+ &share->keyparts,
+ (key_parts+unique_key_parts+keys+uniques) *
+ sizeof(HA_KEYSEG),
+ &share->rec,
+ (share->base.fields+1)*sizeof(MARIA_COLUMNDEF),
+ &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &share->unique_file_name,strlen(name_buff)+1,
+ &share->index_file_name,strlen(index_name)+1,
+ &share->data_file_name,strlen(data_name)+1,
+ &share->state.key_root,keys*sizeof(my_off_t),
+#ifdef THREAD
+ &share->key_root_lock,sizeof(rw_lock_t)*keys,
+#endif
+ &share->mmap_lock,sizeof(rw_lock_t),
+ NullS))
+ goto err;
+ errpos= 4;
+
+ *share=share_buff;
+ memcpy((char*) share->state.rec_per_key_part,
+ (char*) rec_per_key_part, sizeof(long)*key_parts);
+ memcpy((char*) share->state.key_root,
+ (char*) key_root, sizeof(my_off_t)*keys);
+ strmov(share->unique_file_name, name_buff);
+ share->unique_name_length= strlen(name_buff);
+ strmov(share->index_file_name, index_name);
+ strmov(share->data_file_name, data_name);
+
+ share->block_size= maria_block_size;
+ {
+ HA_KEYSEG *pos=share->keyparts;
+ for (i=0 ; i < keys ; i++)
+ {
+ share->keyinfo[i].share= share;
+ disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]);
+ disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
+ end_pos);
+ if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE)
+ have_rtree=1;
+ set_if_smaller(share->block_size,share->keyinfo[i].block_length);
+ share->keyinfo[i].seg=pos;
+ for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ else if (pos->type == HA_KEYTYPE_BINARY)
+ pos->charset= &my_charset_bin;
+ }
+ if (share->keyinfo[i].flag & HA_SPATIAL)
+ {
+#ifdef HAVE_SPATIAL
+ uint sp_segs=SPDIMS*2;
+ share->keyinfo[i].seg=pos-sp_segs;
+ share->keyinfo[i].keysegs--;
+#else
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+#endif
+ }
+ else if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (!fulltext_keys)
+ { /* 4.0 compatibility code, to be removed in 5.0 */
+ share->keyinfo[i].seg=pos-FT_SEGS;
+ share->keyinfo[i].keysegs-=FT_SEGS;
+ }
+ else
+ {
+ uint j;
+ share->keyinfo[i].seg=pos;
+ for (j=0; j < FT_SEGS; j++)
+ {
+ *pos= ft_keysegs[j];
+ pos[0].language= pos[-1].language;
+ if (!(pos[0].charset= pos[-1].charset))
+ {
+ my_errno=HA_ERR_CRASHED;
+ goto err;
+ }
+ pos++;
+ }
+ }
+ if (!share->ft2_keyinfo.seg)
+ {
+ memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MARIA_KEYDEF));
+ share->ft2_keyinfo.keysegs=1;
+ share->ft2_keyinfo.flag=0;
+ share->ft2_keyinfo.keylength=
+ share->ft2_keyinfo.minlength=
+ share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength;
+ share->ft2_keyinfo.seg=pos-1;
+ share->ft2_keyinfo.end=pos;
+ setup_key_functions(& share->ft2_keyinfo);
+ }
+ }
+ setup_key_functions(share->keyinfo+i);
+ share->keyinfo[i].end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->length=share->base.rec_reflength;
+ pos->null_bit=0;
+ pos->flag=0; /* For purify */
+ pos++;
+ }
+ for (i=0 ; i < uniques ; i++)
+ {
+ disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
+ disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs *
+ HA_KEYSEG_SIZE, end_pos);
+ share->uniqueinfo[i].seg=pos;
+ for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ }
+ share->uniqueinfo[i].end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->null_bit=0;
+ pos->flag=0;
+ pos++;
+ }
+ share->ftparsers= 0;
+ }
+ share->data_file_type= share->state.header.data_file_type;
+ share->base_length= (BASE_ROW_HEADER_SIZE +
+ share->base.is_nulls_extended +
+ share->base.null_bytes +
+ share->base.pack_bytes +
+ test(share->options & HA_OPTION_CHECKSUM));
+ if (share->base.transactional)
+ share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE;
+ share->base.default_rec_buff_size= max(share->base.pack_reclength,
+ share->base.max_key_length);
+ if (share->data_file_type == DYNAMIC_RECORD)
+ {
+ share->base.extra_rec_buff_size=
+ (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH +
+ MARIA_REC_BUFF_OFFSET);
+ share->base.default_rec_buff_size+= share->base.extra_rec_buff_size;
+ }
+ disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
+ end_pos);
+ for (i= j= 0 ; i < share->base.fields ; i++)
+ {
+ disk_pos=_ma_recinfo_read(disk_pos,&share->rec[i]);
+ share->rec[i].pack_type=0;
+ share->rec[i].huff_tree=0;
+ if (share->rec[i].type == (int) FIELD_BLOB)
+ {
+ share->blobs[j].pack_length=
+ share->rec[i].length-maria_portable_sizeof_char_ptr;;
+ share->blobs[j].offset= share->rec[i].offset;
+ j++;
+ }
+ }
+ share->rec[i].type=(int) FIELD_LAST; /* End marker */
+
+ if (_ma_open_datafile(&info, share, -1))
+ goto err;
+ errpos= 5;
+
+ share->kfile=kfile;
+ share->this_process=(ulong) getpid();
+ share->last_process= share->state.process;
+ share->base.key_parts=key_parts;
+ share->base.all_key_parts=key_parts+unique_key_parts;
+ if (!(share->last_version=share->state.version))
+ share->last_version=1; /* Safety */
+ share->rec_reflength=share->base.rec_reflength; /* May be changed */
+ share->base.margin_key_file_length=(share->base.max_key_file_length -
+ (keys ? MARIA_INDEX_BLOCK_MARGIN *
+ share->block_size * keys : 0));
+ share->block_size= share->base.block_size;
+ my_afree((gptr) disk_cache);
+ _ma_setup_functions(share);
+ if ((*share->once_init)(share, info.dfile))
+ goto err;
+ share->is_log_table= FALSE;
+#ifdef THREAD
+ thr_lock_init(&share->lock);
+ VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST));
+ for (i=0; i<keys; i++)
+ VOID(my_rwlock_init(&share->key_root_lock[i], NULL));
+ VOID(my_rwlock_init(&share->mmap_lock, NULL));
+ if (!thr_lock_inited)
+ {
+ /* Probably a single threaded program; Don't use concurrent inserts */
+ maria_concurrent_insert=0;
+ }
+ else if (maria_concurrent_insert)
+ {
+ share->concurrent_insert=
+ ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE |
+ HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD)) ||
+ (open_flags & HA_OPEN_TMP_TABLE) ||
+ share->data_file_type == BLOCK_RECORD ||
+ have_rtree) ? 0 : 1;
+ if (share->concurrent_insert)
+ {
+ share->lock.get_status=_ma_get_status;
+ share->lock.copy_status=_ma_copy_status;
+ share->lock.update_status=_ma_update_status;
+ share->lock.check_status=_ma_check_status;
+ }
+ }
+#endif
+ }
+ else
+ {
+ share= old_info->s;
+ if (mode == O_RDWR && share->mode == O_RDONLY)
+ {
+ my_errno=EACCES; /* Can't open in write mode */
+ goto err;
+ }
+ if (share->data_file_type == BLOCK_RECORD)
+ info.dfile= share->bitmap.file;
+ else if (_ma_open_datafile(&info, share, old_info->dfile))
+ goto err;
+ errpos= 5;
+ have_rtree= old_info->maria_rtree_recursion_state != NULL;
+ }
+
+ /* alloc and set up private structure parts */
+ if (!my_multi_malloc(MY_WME,
+ &m_info,sizeof(MARIA_HA),
+ &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &info.buff,(share->base.max_key_block_length*2+
+ share->base.max_key_length),
+ &info.lastkey,share->base.max_key_length*3+1,
+ &info.first_mbr_key, share->base.max_key_length,
+ &info.filename,strlen(name)+1,
+ &info.maria_rtree_recursion_state,have_rtree ? 1024 : 0,
+ NullS))
+ goto err;
+ errpos= 6;
+
+ if (!have_rtree)
+ info.maria_rtree_recursion_state= NULL;
+
+ strmov(info.filename,name);
+ memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs);
+ info.lastkey2=info.lastkey+share->base.max_key_length;
+
+ info.s=share;
+ info.cur_row.lastpos= HA_OFFSET_ERROR;
+ info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
+ info.opt_flag=READ_CHECK_USED;
+ info.this_unique= (ulong) info.dfile; /* Uniq number in process */
+ if (share->data_file_type == COMPRESSED_RECORD)
+ info.this_unique= share->state.unique;
+ info.this_loop=0; /* Update counter */
+ info.last_unique= share->state.unique;
+ info.last_loop= share->state.update_count;
+ if (mode == O_RDONLY)
+ share->options|=HA_OPTION_READ_ONLY_DATA;
+ info.lock_type=F_UNLCK;
+ info.quick_mode=0;
+ info.bulk_insert=0;
+ info.ft1_to_ft2=0;
+ info.errkey= -1;
+ info.page_changed=1;
+ info.keyread_buff= info.buff + share->base.max_key_block_length;
+ if ((*share->init)(&info))
+ goto err;
+
+ pthread_mutex_lock(&share->intern_lock);
+ info.read_record= share->read_record;
+ share->reopen++;
+ share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL);
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ info.lock_type=F_RDLCK;
+ share->r_locks++;
+ share->tot_locks++;
+ }
+ if ((open_flags & HA_OPEN_TMP_TABLE) ||
+ (share->options & HA_OPTION_TMP_TABLE))
+ {
+ share->temporary= share->delay_key_write= 1;
+
+ share->write_flag=MYF(MY_NABP);
+ share->w_locks++; /* We don't have to update status */
+ share->tot_locks++;
+ info.lock_type=F_WRLCK;
+ }
+ if (((open_flags & HA_OPEN_DELAY_KEY_WRITE) ||
+ (share->options & HA_OPTION_DELAY_KEY_WRITE)) &&
+ maria_delay_key_write)
+ share->delay_key_write=1;
+
+ info.state= &share->state.state; /* Change global values by default */
+ pthread_mutex_unlock(&share->intern_lock);
+
+ /* Allocate buffer for one record */
+ /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */
+ if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size,
+ share->base.default_rec_buff_size))
+ goto err;
+
+ bzero(info.rec_buff, share->base.default_rec_buff_size);
+
+ *m_info=info;
+#ifdef THREAD
+ thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
+#endif
+ m_info->open_list.data=(void*) m_info;
+ maria_open_list=list_add(maria_open_list,&m_info->open_list);
+
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ DBUG_RETURN(m_info);
+
+err:
+ save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ _ma_report_error(save_errno, name);
+ switch (errpos) {
+ case 6:
+ (*share->end)(&info);
+ my_free((gptr) m_info,MYF(0));
+ /* fall through */
+ case 5:
+ if (share->data_file_type != BLOCK_RECORD)
+ VOID(my_close(info.dfile,MYF(0)));
+ if (old_info)
+ break; /* Don't remove open table */
+ (*share->once_end)(share);
+ /* fall through */
+ case 4:
+ my_free((gptr) share,MYF(0));
+ /* fall through */
+ case 3:
+ /* fall through */
+ case 2:
+ my_afree((gptr) disk_cache);
+ /* fall through */
+ case 1:
+ VOID(my_close(kfile,MYF(0)));
+ /* fall through */
+ case 0:
+ default:
+ break;
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ my_errno=save_errno;
+ DBUG_RETURN (NULL);
+} /* maria_open */
+
+
+/*
+ Reallocate a buffer, if the current buffer is not large enough
+*/
+
+my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size,
+ my_size_t new_size)
+{
+ if (*old_size < new_size)
+ {
+ byte *addr;
+ if (!(addr= (byte*) my_realloc((gptr) *old_addr, new_size,
+ MYF(MY_ALLOW_ZERO_PTR))))
+ return 1;
+ *old_addr= addr;
+ *old_size= new_size;
+ }
+ return 0;
+}
+
+
+ulonglong _ma_safe_mul(ulonglong a, ulonglong b)
+{
+ ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */
+
+ if (!a || max_val / a < b)
+ return max_val;
+ return a*b;
+}
+
+ /* Set up functions in structs */
+
+void _ma_setup_functions(register MARIA_SHARE *share)
+{
+ share->once_init= maria_once_init_dummy;
+ share->once_end= maria_once_end_dummy;
+ share->init= maria_scan_init_dummy;
+ share->end= maria_scan_end_dummy;
+ share->scan_init= maria_scan_init_dummy;
+ share->scan_end= maria_scan_end_dummy;
+ share->write_record_init= _ma_write_init_default;
+ share->write_record_abort= _ma_write_abort_default;
+
+ switch (share->data_file_type) {
+ case COMPRESSED_RECORD:
+ share->read_record= _ma_read_pack_record;
+ share->scan= _ma_read_rnd_pack_record;
+ share->once_init= _ma_once_init_pack_row;
+ share->once_end= _ma_once_end_pack_row;
+ /* Calculate checksum according how the original row was stored */
+ if (share->state.header.org_data_file_type == STATIC_RECORD)
+ share->calc_checksum= _ma_static_checksum;
+ else
+ share->calc_checksum= _ma_checksum;
+ share->calc_write_checksum= share->calc_checksum;
+ break;
+ case DYNAMIC_RECORD:
+ share->read_record= _ma_read_dynamic_record;
+ share->scan= _ma_read_rnd_dynamic_record;
+ share->delete_record= _ma_delete_dynamic_record;
+ share->compare_record= _ma_cmp_dynamic_record;
+ share->compare_unique= _ma_cmp_dynamic_unique;
+ share->calc_checksum= share->calc_write_checksum= _ma_checksum;
+ /* add bits used to pack data to pack_reclength for faster allocation */
+ share->base.pack_reclength+= share->base.pack_bytes;
+ if (share->base.blobs)
+ {
+ share->update_record= _ma_update_blob_record;
+ share->write_record= _ma_write_blob_record;
+ }
+ else
+ {
+ share->write_record= _ma_write_dynamic_record;
+ share->update_record= _ma_update_dynamic_record;
+ }
+ break;
+ case STATIC_RECORD:
+ share->read_record= _ma_read_static_record;
+ share->scan= _ma_read_rnd_static_record;
+ share->delete_record= _ma_delete_static_record;
+ share->compare_record= _ma_cmp_static_record;
+ share->update_record= _ma_update_static_record;
+ share->write_record= _ma_write_static_record;
+ share->compare_unique= _ma_cmp_static_unique;
+ share->calc_checksum= share->calc_write_checksum= _ma_static_checksum;
+ break;
+ case BLOCK_RECORD:
+ share->once_init= _ma_once_init_block_row;
+ share->once_end= _ma_once_end_block_row;
+ share->init= _ma_init_block_row;
+ share->end= _ma_end_block_row;
+ share->write_record_init= _ma_write_init_block_record;
+ share->write_record_abort= _ma_write_abort_block_record;
+ share->scan_init= _ma_scan_init_block_record;
+ share->scan_end= _ma_scan_end_block_record;
+ share->read_record= _ma_read_block_record;
+ share->scan= _ma_scan_block_record;
+ share->delete_record= _ma_delete_block_record;
+ share->compare_record= _ma_compare_block_record;
+ share->update_record= _ma_update_block_record;
+ share->write_record= _ma_write_block_record;
+ share->compare_unique= _ma_cmp_block_unique;
+ share->calc_checksum= _ma_checksum;
+ share->calc_write_checksum= 0;
+ break;
+ }
+ share->file_read= _ma_nommap_pread;
+ share->file_write= _ma_nommap_pwrite;
+ if (!(share->options & HA_OPTION_CHECKSUM) &&
+ share->data_file_type != COMPRESSED_RECORD)
+ share->calc_checksum= share->calc_write_checksum= 0;
+ return;
+}
+
+
+static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
+{
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
+ {
+#ifdef HAVE_RTREE_KEYS
+ keyinfo->ck_insert = maria_rtree_insert;
+ keyinfo->ck_delete = maria_rtree_delete;
+#else
+ DBUG_ASSERT(0); /* maria_open should check it never happens */
+#endif
+ }
+ else
+ {
+ keyinfo->ck_insert = _ma_ck_write;
+ keyinfo->ck_delete = _ma_ck_delete;
+ }
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ { /* Simple prefix compression */
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->get_key= _ma_get_binary_pack_key;
+ keyinfo->pack_key= _ma_calc_bin_pack_key_length;
+ keyinfo->store_key= _ma_store_bin_pack_key;
+ }
+ else if (keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ keyinfo->get_key= _ma_get_pack_key;
+ if (keyinfo->seg[0].flag & HA_PACK_KEY)
+ { /* Prefix compression */
+ if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) ||
+ (keyinfo->seg->flag & HA_NULL_PART))
+ keyinfo->bin_search= _ma_seq_search;
+ else
+ keyinfo->bin_search= _ma_prefix_search;
+ keyinfo->pack_key= _ma_calc_var_pack_key_length;
+ keyinfo->store_key= _ma_store_var_pack_key;
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */
+ keyinfo->store_key= _ma_store_static_key;
+ }
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_bin_search;
+ keyinfo->get_key= _ma_get_static_key;
+ keyinfo->pack_key= _ma_calc_static_key_length;
+ keyinfo->store_key= _ma_store_static_key;
+ }
+ return;
+}
+
+
+/*
+ Function to save and store the header in the index file (.MYI)
+*/
+
+uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite)
+{
+ uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+ uchar *ptr=buff;
+ uint i, keys= (uint) state->header.keys;
+ DBUG_ENTER("_ma_state_info_write");
+
+ memcpy_fixed(ptr,&state->header,sizeof(state->header));
+ ptr+=sizeof(state->header);
+
+ /* open_count must be first because of _ma_mark_file_changed ! */
+ mi_int2store(ptr,state->open_count); ptr+= 2;
+ *ptr++= (uchar)state->changed;
+ *ptr++= state->sortkey;
+ mi_rowstore(ptr,state->state.records); ptr+= 8;
+ mi_rowstore(ptr,state->state.del); ptr+= 8;
+ mi_rowstore(ptr,state->split); ptr+= 8;
+ mi_sizestore(ptr,state->dellink); ptr+= 8;
+ mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.data_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.empty); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_empty); ptr+= 8;
+ mi_int8store(ptr,state->auto_increment); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8;
+ mi_int4store(ptr,state->process); ptr+= 4;
+ mi_int4store(ptr,state->unique); ptr+= 4;
+ mi_int4store(ptr,state->status); ptr+= 4;
+ mi_int4store(ptr,state->update_count); ptr+= 4;
+
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ mi_sizestore(ptr,state->key_root[i]); ptr+= 8;
+ }
+ mi_sizestore(ptr,state->key_del); ptr+= 8;
+ if (pWrite & 2) /* From maria_chk */
+ {
+ uint key_parts= mi_uint2korr(state->header.key_parts);
+ mi_int4store(ptr,state->sec_index_changed); ptr+= 4;
+ mi_int4store(ptr,state->sec_index_used); ptr+= 4;
+ mi_int4store(ptr,state->version); ptr+= 4;
+ mi_int8store(ptr,state->key_map); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8;
+ mi_sizestore(ptr,state->rec_per_key_rows); ptr+= 8;
+ for (i=0 ; i < key_parts ; i++)
+ {
+ mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4;
+ }
+ }
+
+ if (pWrite & 1)
+ DBUG_RETURN(my_pwrite(file,(char*) buff, (uint) (ptr-buff), 0L,
+ MYF(MY_NABP | MY_THREADSAFE)));
+ DBUG_RETURN(my_write(file, (char*) buff, (uint) (ptr-buff),
+ MYF(MY_NABP)));
+}
+
+
+byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state)
+{
+ uint i,keys,key_parts;
+ memcpy_fixed(&state->header,ptr, sizeof(state->header));
+ ptr+= sizeof(state->header);
+ keys= (uint) state->header.keys;
+ key_parts= mi_uint2korr(state->header.key_parts);
+
+ state->open_count = mi_uint2korr(ptr); ptr+= 2;
+ state->changed= (my_bool) *ptr++;
+ state->sortkey= (uint) *ptr++;
+ state->state.records= mi_rowkorr(ptr); ptr+= 8;
+ state->state.del = mi_rowkorr(ptr); ptr+= 8;
+ state->split = mi_rowkorr(ptr); ptr+= 8;
+ state->dellink= mi_sizekorr(ptr); ptr+= 8;
+ state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8;
+ state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8;
+ state->state.empty = mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_empty= mi_sizekorr(ptr); ptr+= 8;
+ state->auto_increment=mi_uint8korr(ptr); ptr+= 8;
+ state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8;
+ state->process= mi_uint4korr(ptr); ptr+= 4;
+ state->unique = mi_uint4korr(ptr); ptr+= 4;
+ state->status = mi_uint4korr(ptr); ptr+= 4;
+ state->update_count=mi_uint4korr(ptr); ptr+= 4;
+
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ state->key_root[i]= mi_sizekorr(ptr); ptr+= 8;
+ }
+ state->key_del= mi_sizekorr(ptr); ptr+= 8;
+ state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4;
+ state->sec_index_used = mi_uint4korr(ptr); ptr+= 4;
+ state->version = mi_uint4korr(ptr); ptr+= 4;
+ state->key_map = mi_uint8korr(ptr); ptr+= 8;
+ state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->rec_per_key_rows=mi_sizekorr(ptr); ptr+= 8;
+ for (i=0 ; i < key_parts ; i++)
+ {
+ state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4;
+ }
+ return ptr;
+}
+
+
+uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead)
+{
+ char buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+
+ if (!maria_single_user)
+ {
+ if (pRead)
+ {
+ if (my_pread(file, buff, state->state_length,0L, MYF(MY_NABP)))
+ return (MY_FILE_ERROR);
+ }
+ else if (my_read(file, buff, state->state_length,MYF(MY_NABP)))
+ return (MY_FILE_ERROR);
+ _ma_state_info_read(buff, state);
+ }
+ return 0;
+}
+
+
+/****************************************************************************
+** store and read of MARIA_BASE_INFO
+****************************************************************************/
+
+uint _ma_base_info_write(File file, MARIA_BASE_INFO *base)
+{
+ uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff;
+
+ mi_sizestore(ptr,base->keystart); ptr+= 8;
+ mi_sizestore(ptr,base->max_data_file_length); ptr+= 8;
+ mi_sizestore(ptr,base->max_key_file_length); ptr+= 8;
+ mi_rowstore(ptr,base->records); ptr+= 8;
+ mi_rowstore(ptr,base->reloc); ptr+= 8;
+ mi_int4store(ptr,base->mean_row_length); ptr+= 4;
+ mi_int4store(ptr,base->reclength); ptr+= 4;
+ mi_int4store(ptr,base->pack_reclength); ptr+= 4;
+ mi_int4store(ptr,base->min_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->max_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->min_block_length); ptr+= 4;
+ mi_int2store(ptr,base->fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2;
+ mi_int2store(ptr,base->max_field_lengths); ptr+= 2;
+ mi_int2store(ptr,base->pack_fields); ptr+= 2;
+ mi_int2store(ptr,0); ptr+= 2;
+ mi_int2store(ptr,base->null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->original_null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->field_offsets); ptr+= 2;
+ mi_int2store(ptr,base->min_row_length); ptr+= 2;
+ mi_int2store(ptr,base->block_size); ptr+= 2;
+ *ptr++= base->rec_reflength;
+ *ptr++= base->key_reflength;
+ *ptr++= base->keys;
+ *ptr++= base->auto_key;
+ *ptr++= base->transactional;
+ *ptr++= 0; /* Reserved */
+ mi_int2store(ptr,base->pack_bytes); ptr+= 2;
+ mi_int2store(ptr,base->blobs); ptr+= 2;
+ mi_int2store(ptr,base->max_key_block_length); ptr+= 2;
+ mi_int2store(ptr,base->max_key_length); ptr+= 2;
+ mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2;
+ *ptr++= base->extra_alloc_procent;
+ bzero(ptr,16); ptr+= 16; /* extra */
+ DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE);
+ return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
+}
+
+
+static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base)
+{
+ base->keystart= mi_sizekorr(ptr); ptr+= 8;
+ base->max_data_file_length= mi_sizekorr(ptr); ptr+= 8;
+ base->max_key_file_length= mi_sizekorr(ptr); ptr+= 8;
+ base->records= (ha_rows) mi_sizekorr(ptr); ptr+= 8;
+ base->reloc= (ha_rows) mi_sizekorr(ptr); ptr+= 8;
+ base->mean_row_length= mi_uint4korr(ptr); ptr+= 4;
+ base->reclength= mi_uint4korr(ptr); ptr+= 4;
+ base->pack_reclength= mi_uint4korr(ptr); ptr+= 4;
+ base->min_pack_length= mi_uint4korr(ptr); ptr+= 4;
+ base->max_pack_length= mi_uint4korr(ptr); ptr+= 4;
+ base->min_block_length= mi_uint4korr(ptr); ptr+= 4;
+ base->fields= mi_uint2korr(ptr); ptr+= 2;
+ base->fixed_not_null_fields= mi_uint2korr(ptr); ptr+= 2;
+ base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2;
+ base->max_field_lengths= mi_uint2korr(ptr); ptr+= 2;
+ base->pack_fields= mi_uint2korr(ptr); ptr+= 2;
+ ptr+= 2;
+ base->null_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->original_null_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->field_offsets= mi_uint2korr(ptr); ptr+= 2;
+ base->min_row_length= mi_uint2korr(ptr); ptr+= 2;
+ base->block_size= mi_uint2korr(ptr); ptr+= 2;
+
+ base->rec_reflength= *ptr++;
+ base->key_reflength= *ptr++;
+ base->keys= *ptr++;
+ base->auto_key= *ptr++;
+ base->transactional= *ptr++;
+ ptr++;
+ base->pack_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->blobs= mi_uint2korr(ptr); ptr+= 2;
+ base->max_key_block_length= mi_uint2korr(ptr); ptr+= 2;
+ base->max_key_length= mi_uint2korr(ptr); ptr+= 2;
+ base->extra_alloc_bytes= mi_uint2korr(ptr); ptr+= 2;
+ base->extra_alloc_procent= *ptr++;
+ ptr+= 16;
+ return ptr;
+}
+
+/*--------------------------------------------------------------------------
+ maria_keydef
+---------------------------------------------------------------------------*/
+
+uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef)
+{
+ uchar buff[MARIA_KEYDEF_SIZE];
+ uchar *ptr=buff;
+
+ *ptr++= (uchar) keydef->keysegs;
+ *ptr++= keydef->key_alg; /* Rtree or Btree */
+ mi_int2store(ptr,keydef->flag); ptr+= 2;
+ mi_int2store(ptr,keydef->block_length); ptr+= 2;
+ mi_int2store(ptr,keydef->keylength); ptr+= 2;
+ mi_int2store(ptr,keydef->minlength); ptr+= 2;
+ mi_int2store(ptr,keydef->maxlength); ptr+= 2;
+ return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
+}
+
+char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef)
+{
+ keydef->keysegs = (uint) *ptr++;
+ keydef->key_alg = *ptr++; /* Rtree or Btree */
+
+ keydef->flag = mi_uint2korr(ptr); ptr+= 2;
+ keydef->block_length = mi_uint2korr(ptr); ptr+= 2;
+ keydef->keylength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->minlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->maxlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->underflow_block_length=keydef->block_length/3;
+ keydef->version = 0; /* Not saved */
+ keydef->parser = &ft_default_parser;
+ keydef->ftparser_nr = 0;
+ return ptr;
+}
+
+/***************************************************************************
+** maria_keyseg
+***************************************************************************/
+
+int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg)
+{
+ uchar buff[HA_KEYSEG_SIZE];
+ uchar *ptr=buff;
+ ulong pos;
+
+ *ptr++= keyseg->type;
+ *ptr++= keyseg->language;
+ *ptr++= keyseg->null_bit;
+ *ptr++= keyseg->bit_start;
+ *ptr++= keyseg->bit_end;
+ *ptr++= keyseg->bit_length;
+ mi_int2store(ptr,keyseg->flag); ptr+= 2;
+ mi_int2store(ptr,keyseg->length); ptr+= 2;
+ mi_int4store(ptr,keyseg->start); ptr+= 4;
+ pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
+ mi_int4store(ptr, pos);
+ ptr+=4;
+
+ return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
+}
+
+
+char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg)
+{
+ keyseg->type = *ptr++;
+ keyseg->language = *ptr++;
+ keyseg->null_bit = *ptr++;
+ keyseg->bit_start = *ptr++;
+ keyseg->bit_end = *ptr++;
+ keyseg->bit_length = *ptr++;
+ keyseg->flag = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->length = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->start = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->charset=0; /* Will be filled in later */
+ if (keyseg->null_bit)
+ keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
+ else
+ {
+ keyseg->bit_pos= (uint16)keyseg->null_pos;
+ keyseg->null_pos= 0;
+ }
+ return ptr;
+}
+
+/*--------------------------------------------------------------------------
+ maria_uniquedef
+---------------------------------------------------------------------------*/
+
+uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def)
+{
+ uchar buff[MARIA_UNIQUEDEF_SIZE];
+ uchar *ptr=buff;
+
+ mi_int2store(ptr,def->keysegs); ptr+=2;
+ *ptr++= (uchar) def->key;
+ *ptr++ = (uchar) def->null_are_equal;
+
+ return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
+}
+
+char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *def)
+{
+ def->keysegs = mi_uint2korr(ptr);
+ def->key = ptr[2];
+ def->null_are_equal=ptr[3];
+ return ptr+4; /* 1 extra byte */
+}
+
+/***************************************************************************
+** MARIA_COLUMNDEF
+***************************************************************************/
+
+uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo)
+{
+ uchar buff[MARIA_COLUMNDEF_SIZE];
+ uchar *ptr=buff;
+
+ mi_int6store(ptr,recinfo->offset); ptr+= 6;
+ mi_int2store(ptr,recinfo->type); ptr+= 2;
+ mi_int2store(ptr,recinfo->length); ptr+= 2;
+ mi_int2store(ptr,recinfo->fill_length); ptr+= 2;
+ mi_int2store(ptr,recinfo->null_pos); ptr+= 2;
+ mi_int2store(ptr,recinfo->empty_pos); ptr+= 2;
+ (*ptr++)= recinfo->null_bit;
+ (*ptr++)= recinfo->empty_bit;
+ return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP));
+}
+
+char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo)
+{
+ recinfo->offset= mi_uint6korr(ptr); ptr+= 6;
+ recinfo->type= mi_sint2korr(ptr); ptr+= 2;
+ recinfo->length= mi_uint2korr(ptr); ptr+= 2;
+ recinfo->fill_length= mi_uint2korr(ptr); ptr+= 2;
+ recinfo->null_pos= mi_uint2korr(ptr); ptr+= 2;
+ recinfo->empty_pos= mi_uint2korr(ptr); ptr+= 2;
+ recinfo->null_bit= (uint8) *ptr++;
+ recinfo->empty_bit= (uint8) *ptr++;
+ return ptr;
+}
+
+/**************************************************************************
+ Open data file
+ We can't use dup() here as the data file descriptors need to have different
+ active seek-positions.
+
+ The argument file_to_dup is here for the future if there would on some OS
+ exist a dup()-like call that would give us two different file descriptors.
+*************************************************************************/
+
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share,
+ File file_to_dup __attribute__((unused)))
+{
+ info->dfile= my_open(share->data_file_name, share->mode | O_SHARE,
+ MYF(MY_WME));
+ return info->dfile >= 0 ? 0 : 1;
+}
+
+
+int _ma_open_keyfile(MARIA_SHARE *share)
+{
+ if ((share->kfile=my_open(share->unique_file_name, share->mode | O_SHARE,
+ MYF(MY_WME))) < 0)
+ return 1;
+ return 0;
+}
+
+
+/*
+ Disable all indexes.
+
+ SYNOPSIS
+ maria_disable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Disable all indexes.
+
+ RETURN
+ 0 ok
+*/
+
+int maria_disable_indexes(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ maria_clear_all_keys_active(share->state.key_map);
+ return 0;
+}
+
+
+/*
+ Enable all indexes
+
+ SYNOPSIS
+ maria_enable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Enable all indexes. The indexes might have been disabled
+ by maria_disable_index() before.
+ The function works only if both data and indexes are empty,
+ otherwise a repair is required.
+ To be sure, call handler::delete_all_rows() before.
+
+ RETURN
+ 0 ok
+ HA_ERR_CRASHED data or index is non-empty.
+*/
+
+int maria_enable_indexes(MARIA_HA *info)
+{
+ int error= 0;
+ MARIA_SHARE *share= info->s;
+
+ if (share->state.state.data_file_length ||
+ (share->state.state.key_file_length != share->base.keystart))
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ error= HA_ERR_CRASHED;
+ }
+ else
+ maria_set_all_keys_active(share->state.key_map, share->base.keys);
+ return error;
+}
+
+
+/*
+ Test if indexes are disabled.
+
+ SYNOPSIS
+ maria_indexes_are_disabled()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Test if indexes are disabled.
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ 2 non-unique indexes are disabled
+*/
+
+int maria_indexes_are_disabled(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ /*
+ No keys or all are enabled. keys is the number of keys. Left shifted
+ gives us only one bit set. When decreased by one, gives us all all bits
+ up to this one set and it gets unset.
+ */
+ if (!share->base.keys ||
+ (maria_is_all_keys_active(share->state.key_map, share->base.keys)))
+ return 0;
+
+ /* All are disabled */
+ if (maria_is_any_key_active(share->state.key_map))
+ return 1;
+
+ /*
+ We have keys. Some enabled, some disabled.
+ Don't check for any non-unique disabled but return directly 2
+ */
+ return 2;
+}
+
+
+static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
+static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused)))
+{
+}
+
+static my_bool maria_once_init_dummy(MARIA_SHARE *share
+ __attribute__((unused)),
+ File dfile __attribute__((unused)))
+{
+ return 0;
+}
+
+static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused)))
+{
+ return 0;
+}
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
new file mode 100644
index 00000000000..7c875e7b91d
--- /dev/null
+++ b/storage/maria/ma_packrec.c
@@ -0,0 +1,1426 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+ /* Functions to compressed records */
+
+#include "maria_def.h"
+
+#define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */
+
+#if INT_MAX > 65536L
+#define BITS_SAVED 32
+#define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */
+#else
+#define BITS_SAVED 16
+#define MAX_QUICK_TABLE_BITS 6
+#endif
+
+#define get_bit(BU) ((BU)->bits ? \
+ (BU)->current_byte & ((maria_bit_type) 1 << --(BU)->bits) :\
+ (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\
+ (BU)->current_byte & ((maria_bit_type) 1 << (BITS_SAVED-1))))
+#define skip_to_next_byte(BU) ((BU)->bits&=~7)
+#define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count))
+
+#define decode_bytes_test_bit(bit) \
+ if (low_byte & (1 << (7-bit))) \
+ pos++; \
+ if (*pos & IS_CHAR) \
+ { bits-=(bit+1); break; } \
+ pos+= *pos
+
+#define OFFSET_TABLE_SIZE 512
+
+static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
+ pbool fix_keys);
+static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree,
+ uint16 **decode_table,byte **intervall_buff,
+ uint16 *tmp_buff);
+static void make_quick_table(uint16 *to_table,uint16 *decode_table,
+ uint *next_free,uint value,uint bits,
+ uint max_bits);
+static void fill_quick_table(uint16 *table,uint bits, uint max_bits,
+ uint value);
+static uint copy_decode_table(uint16 *to_pos,uint offset,
+ uint16 *decode_table);
+static uint find_longest_bitstream(uint16 *table, uint16 *end);
+static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field,
+ MARIA_BIT_BUFF *buff,
+ byte *to,
+ byte *end);
+static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end);
+static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end);
+static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end);
+static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end);
+static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end);
+static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to,byte *end);
+static uint decode_pos(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree);
+static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer,
+ uint length);
+static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count);
+static void fill_buffer(MARIA_BIT_BUFF *bit_buff);
+static uint max_bit(uint value);
+static uint read_pack_length(uint version, const uchar *buf, ulong *length);
+#ifdef HAVE_MMAP
+static uchar *_ma_mempack_get_block_info(MARIA_HA *maria,
+ MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ byte **rec_buff_p,
+ my_size_t *rec_buff_size_p,
+ uchar *header);
+#endif
+
+static maria_bit_type mask[]=
+{
+ 0x00000000,
+ 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+ 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+ 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+ 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+#if BITS_SAVED > 16
+ 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
+ 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
+ 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
+ 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff,
+#endif
+};
+
+
+my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile)
+{
+ share->options|= HA_OPTION_READ_ONLY_DATA;
+ if (_ma_read_pack_info(share, dfile,
+ (pbool)
+ test(!(share->options &
+ (HA_OPTION_PACK_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD)))))
+ return 1;
+ return 0;
+}
+
+my_bool _ma_once_end_pack_row(MARIA_SHARE *share)
+{
+ if (share->decode_trees)
+ {
+ my_free((gptr) share->decode_trees,MYF(0));
+ my_free((gptr) share->decode_tables,MYF(0));
+ }
+ return 0;
+}
+
+
+/* Read all packed info, allocate memory and fix field structs */
+
+static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file,
+ pbool fix_keys)
+{
+ int diff_length;
+ uint i,trees,huff_tree_bits,rec_reflength,length;
+ uint16 *decode_table,*tmp_buff;
+ ulong elements,intervall_length;
+ char *disk_cache,*intervall_buff;
+ uchar header[32];
+ MARIA_BIT_BUFF bit_buff;
+ DBUG_ENTER("_ma_read_pack_info");
+
+ if (maria_quick_table_bits < 4)
+ maria_quick_table_bits=4;
+ else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS)
+ maria_quick_table_bits=MAX_QUICK_TABLE_BITS;
+
+ my_errno=0;
+ if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP)))
+ {
+ if (!my_errno)
+ my_errno=HA_ERR_END_OF_FILE;
+ goto err0;
+ }
+ if (memcmp((byte*) header, (byte*) maria_pack_file_magic, 3))
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err0;
+ }
+ share->pack.version= header[3];
+ share->pack.header_length= uint4korr(header+4);
+ share->min_pack_length=(uint) uint4korr(header+8);
+ share->max_pack_length=(uint) uint4korr(header+12);
+ set_if_bigger(share->base.pack_reclength,share->max_pack_length);
+ elements=uint4korr(header+16);
+ intervall_length=uint4korr(header+20);
+ trees=uint2korr(header+24);
+ share->pack.ref_length=header[26];
+ rec_reflength=header[27];
+ diff_length=(int) rec_reflength - (int) share->base.rec_reflength;
+ if (fix_keys)
+ share->rec_reflength=rec_reflength;
+ share->base.min_block_length=share->min_pack_length+1;
+ if (share->min_pack_length > 254)
+ share->base.min_block_length+=2;
+
+ if (!(share->decode_trees=(MARIA_DECODE_TREE*)
+ my_malloc((uint) (trees*sizeof(MARIA_DECODE_TREE)+
+ intervall_length*sizeof(byte)),
+ MYF(MY_WME))))
+ goto err0;
+ intervall_buff=(byte*) (share->decode_trees+trees);
+
+ length=(uint) (elements*2+trees*(1 << maria_quick_table_bits));
+ if (!(share->decode_tables=(uint16*)
+ my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+
+ (uint) (share->pack.header_length+7),
+ MYF(MY_WME | MY_ZEROFILL))))
+ goto err1;
+ tmp_buff=share->decode_tables+length;
+ disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE);
+
+ if (my_read(file,disk_cache,
+ (uint) (share->pack.header_length-sizeof(header)),
+ MYF(MY_NABP)))
+ goto err2;
+
+ huff_tree_bits=max_bit(trees ? trees-1 : 0);
+ init_bit_buffer(&bit_buff, (uchar*) disk_cache,
+ (uint) (share->pack.header_length-sizeof(header)));
+ /* Read new info for each field */
+ for (i=0 ; i < share->base.fields ; i++)
+ {
+ share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5);
+ share->rec[i].pack_type=(uint) get_bits(&bit_buff,6);
+ share->rec[i].space_length_bits=get_bits(&bit_buff,5);
+ share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff,
+ huff_tree_bits);
+ share->rec[i].unpack= get_unpack_function(share->rec+i);
+ }
+ skip_to_next_byte(&bit_buff);
+ decode_table=share->decode_tables;
+ for (i=0 ; i < trees ; i++)
+ if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table,
+ &intervall_buff,tmp_buff))
+ goto err3;
+ decode_table=(uint16*)
+ my_realloc((gptr) share->decode_tables,
+ (uint) ((byte*) decode_table - (byte*) share->decode_tables),
+ MYF(MY_HOLD_ON_ERROR));
+ {
+ long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables);
+ share->decode_tables=decode_table;
+ for (i=0 ; i < trees ; i++)
+ share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table,
+ diff, uint16*);
+ }
+
+ /* Fix record-ref-length for keys */
+ if (fix_keys)
+ {
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
+ keyinfo->keylength+= (uint16) diff_length;
+ keyinfo->minlength+= (uint16) diff_length;
+ keyinfo->maxlength+= (uint16) diff_length;
+ keyinfo->seg[keyinfo->flag & HA_FULLTEXT ?
+ FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength;
+ }
+ if (share->ft2_keyinfo.seg)
+ {
+ MARIA_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo;
+ ft2_keyinfo->keylength+= (uint16) diff_length;
+ ft2_keyinfo->minlength+= (uint16) diff_length;
+ ft2_keyinfo->maxlength+= (uint16) diff_length;
+ }
+ }
+
+ if (bit_buff.error || bit_buff.pos < bit_buff.end)
+ goto err3;
+
+ DBUG_RETURN(0);
+
+err3:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+err2:
+ my_free((gptr) share->decode_tables,MYF(0));
+err1:
+ my_free((gptr) share->decode_trees,MYF(0));
+err0:
+ DBUG_RETURN(1);
+}
+
+
+ /* Read on huff-code-table from datafile */
+
+static uint read_huff_table(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree,
+ uint16 **decode_table, byte **intervall_buff,
+ uint16 *tmp_buff)
+{
+ uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits,
+ next_free_offset;
+ uint16 *ptr,*end;
+
+ LINT_INIT(ptr);
+ if (!get_bits(bit_buff,1))
+ {
+ min_chr=get_bits(bit_buff,8);
+ elements=get_bits(bit_buff,9);
+ char_bits=get_bits(bit_buff,5);
+ offset_bits=get_bits(bit_buff,5);
+ intervall_length=0;
+ ptr=tmp_buff;
+ }
+ else
+ {
+ min_chr=0;
+ elements=get_bits(bit_buff,15);
+ intervall_length=get_bits(bit_buff,16);
+ char_bits=get_bits(bit_buff,5);
+ offset_bits=get_bits(bit_buff,5);
+ decode_tree->quick_table_bits=0;
+ ptr= *decode_table;
+ }
+ size=elements*2-2;
+
+ for (end=ptr+size ; ptr < end ; ptr++)
+ {
+ if (get_bit(bit_buff))
+ *ptr= (uint16) get_bits(bit_buff,offset_bits);
+ else
+ *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr));
+ }
+ skip_to_next_byte(bit_buff);
+
+ decode_tree->table= *decode_table;
+ decode_tree->intervalls= *intervall_buff;
+ if (! intervall_length)
+ {
+ table_bits=find_longest_bitstream(tmp_buff, tmp_buff+OFFSET_TABLE_SIZE);
+ if (table_bits == (uint) ~0)
+ return 1;
+ if (table_bits > maria_quick_table_bits)
+ table_bits=maria_quick_table_bits;
+ next_free_offset= (1 << table_bits);
+ make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits,
+ table_bits);
+ (*decode_table)+= next_free_offset;
+ decode_tree->quick_table_bits=table_bits;
+ }
+ else
+ {
+ (*decode_table)=end;
+ bit_buff->pos-= bit_buff->bits/8;
+ memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length);
+ (*intervall_buff)+=intervall_length;
+ bit_buff->pos+=intervall_length;
+ bit_buff->bits=0;
+ }
+ return 0;
+}
+
+
+static void make_quick_table(uint16 *to_table, uint16 *decode_table,
+ uint *next_free_offset, uint value, uint bits,
+ uint max_bits)
+{
+ if (!bits--)
+ {
+ to_table[value]= (uint16) *next_free_offset;
+ *next_free_offset=copy_decode_table(to_table, *next_free_offset,
+ decode_table);
+ return;
+ }
+ if (!(*decode_table & IS_CHAR))
+ {
+ make_quick_table(to_table,decode_table+ *decode_table,
+ next_free_offset,value,bits,max_bits);
+ }
+ else
+ fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table);
+ decode_table++;
+ value|= (1 << bits);
+ if (!(*decode_table & IS_CHAR))
+ {
+ make_quick_table(to_table,decode_table+ *decode_table,
+ next_free_offset,value,bits,max_bits);
+ }
+ else
+ fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table);
+ return;
+}
+
+
+static void fill_quick_table(uint16 *table, uint bits, uint max_bits,
+ uint value)
+{
+ uint16 *end;
+ value|=(max_bits-bits) << 8;
+ for (end=table+ (1 << bits) ;
+ table < end ;
+ *table++ = (uint16) value | IS_CHAR) ;
+}
+
+
+static uint copy_decode_table(uint16 *to_pos, uint offset,
+ uint16 *decode_table)
+{
+ uint prev_offset;
+ prev_offset= offset;
+
+ if (!(*decode_table & IS_CHAR))
+ {
+ to_pos[offset]=2;
+ offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table);
+ }
+ else
+ {
+ to_pos[offset]= *decode_table;
+ offset+=2;
+ }
+ decode_table++;
+
+ if (!(*decode_table & IS_CHAR))
+ {
+ to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1);
+ offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table);
+ }
+ else
+ to_pos[prev_offset+1]= *decode_table;
+ return offset;
+}
+
+
+static uint find_longest_bitstream(uint16 *table, uint16 *end)
+{
+ uint length=1,length2;
+ if (!(*table & IS_CHAR))
+ {
+ uint16 *next= table + *table;
+ if (next > end || next == table)
+ return ~0;
+ length=find_longest_bitstream(next, end)+1;
+ }
+ table++;
+ if (!(*table & IS_CHAR))
+ {
+ uint16 *next= table + *table;
+ if (next > end || next == table)
+ return ~0;
+ length2=find_longest_bitstream(table+ *table, end)+1;
+ length=max(length,length2);
+ }
+ return length;
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_pack_record()
+ info A pointer to MARIA_HA.
+ filepos File offset of the record.
+ buf RETURN The buffer to receive the record.
+
+ RETURN
+ 0 on success
+ HA_ERR_WRONG_IN_RECORD or -1 on error
+*/
+
+int _ma_read_pack_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ File file;
+ DBUG_ENTER("maria_read_pack_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ DBUG_RETURN(-1); /* _search() didn't find record */
+
+ file=info->dfile;
+ if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size, file,
+ filepos))
+ goto err;
+ if (my_read(file,(byte*) info->rec_buff + block_info.offset ,
+ block_info.rec_len - block_info.offset, MYF(MY_NABP)))
+ goto panic;
+ info->update|= HA_STATE_AKTIV;
+ DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
+panic:
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+err:
+ DBUG_RETURN(-1);
+}
+
+
+
+int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
+ register byte *to, byte *from, ulong reclength)
+{
+ byte *end_field;
+ reg3 MARIA_COLUMNDEF *end;
+ MARIA_COLUMNDEF *current_field;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_pack_rec_unpack");
+
+ if (info->s->base.null_bytes)
+ {
+ memcpy(to, from, info->s->base.null_bytes);
+ to+= info->s->base.null_bytes;
+ from+= info->s->base.null_bytes;
+ reclength-= info->s->base.null_bytes;
+ }
+ init_bit_buffer(bit_buff, (uchar*) from, reclength);
+ for (current_field=share->rec, end=current_field+share->base.fields ;
+ current_field < end ;
+ current_field++,to=end_field)
+ {
+ end_field=to+current_field->length;
+ (*current_field->unpack)(current_field, bit_buff, to, end_field);
+ }
+ if (!bit_buff->error &&
+ bit_buff->pos - bit_buff->bits / 8 == bit_buff->end)
+ DBUG_RETURN(0);
+ info->update&= ~HA_STATE_AKTIV;
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD);
+} /* _ma_pack_rec_unpack */
+
+
+ /* Return function to unpack field */
+
+static void (*get_unpack_function(MARIA_COLUMNDEF *rec))
+ (MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, byte *, byte *)
+{
+ switch (rec->base_type) {
+ case FIELD_SKIP_ZERO:
+ if (rec->pack_type & PACK_TYPE_ZERO_FILL)
+ return &uf_zerofill_skip_zero;
+ return &uf_skip_zero;
+ case FIELD_NORMAL:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ return &uf_space_normal;
+ if (rec->pack_type & PACK_TYPE_ZERO_FILL)
+ return &uf_zerofill_normal;
+ return &decode_bytes;
+ case FIELD_SKIP_ENDSPACE:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_space_endspace_selected;
+ return &uf_space_endspace;
+ }
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_endspace_selected;
+ return &uf_endspace;
+ case FIELD_SKIP_PRESPACE:
+ if (rec->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_space_prespace_selected;
+ return &uf_space_prespace;
+ }
+ if (rec->pack_type & PACK_TYPE_SELECTED)
+ return &uf_prespace_selected;
+ return &uf_prespace;
+ case FIELD_CONSTANT:
+ return &uf_constant;
+ case FIELD_INTERVALL:
+ return &uf_intervall;
+ case FIELD_ZERO:
+ case FIELD_CHECK:
+ return &uf_zero;
+ case FIELD_BLOB:
+ return &uf_blob;
+ case FIELD_VARCHAR:
+ if (rec->length <= 256) /* 255 + 1 byte length */
+ return &uf_varchar1;
+ return &uf_varchar2;
+ case FIELD_LAST:
+ default:
+ return 0; /* This should never happend */
+ }
+}
+
+ /* The different functions to unpack a field */
+
+static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ if (get_bit(bit_buff))
+ bzero((char*) to,(uint) (end-to));
+ else
+ {
+ end-=rec->space_length_bits;
+ decode_bytes(rec,bit_buff,to,end);
+ bzero((char*) end,rec->space_length_bits);
+ }
+}
+
+static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ if (get_bit(bit_buff))
+ bzero((char*) to,(uint) (end-to));
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ if (get_bit(bit_buff))
+ bfill((byte*) to,(end-to),' ');
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((byte*) to,(end-to),' ');
+ else
+ {
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((byte*) end-spaces,spaces,' ');
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+ }
+}
+
+static void uf_endspace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((byte*) end-spaces,spaces,' ');
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((byte*) to,(end-to),' ');
+ else
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((byte*) end-spaces,spaces,' ');
+ }
+}
+
+static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to,end-spaces);
+ bfill((byte*) end-spaces,spaces,' ');
+}
+
+static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((byte*) to,(end-to),' ');
+ else
+ {
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((byte*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+ }
+}
+
+
+static void uf_prespace_selected(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((byte*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+ else
+ decode_bytes(rec,bit_buff,to,end);
+}
+
+
+static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if (get_bit(bit_buff))
+ bfill((byte*) to,(end-to),' ');
+ else
+ {
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((byte*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+ }
+}
+
+static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ uint spaces;
+ if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end)
+ {
+ bit_buff->error=1;
+ return;
+ }
+ bfill((byte*) to,spaces,' ');
+ if (to+spaces != end)
+ decode_bytes(rec,bit_buff,to+spaces,end);
+}
+
+static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ end-=rec->space_length_bits;
+ decode_bytes(rec,bit_buff, to, end);
+ bzero((char*) end,rec->space_length_bits);
+}
+
+static void uf_constant(MARIA_COLUMNDEF *rec,
+ MARIA_BIT_BUFF *bit_buff __attribute__((unused)),
+ byte *to, byte *end)
+{
+ memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to));
+}
+
+static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to,
+ byte *end)
+{
+ reg1 uint field_length=(uint) (end-to);
+ memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff,
+ rec->huff_tree),
+ (size_t) field_length);
+}
+
+
+/*ARGSUSED*/
+static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)),
+ MARIA_BIT_BUFF *bit_buff __attribute__((unused)),
+ byte *to, byte *end)
+{
+ bzero(to, (uint) (end-to));
+}
+
+static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ if (get_bit(bit_buff))
+ bzero(to, (uint) (end-to));
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ uint pack_length=(uint) (end-to)-maria_portable_sizeof_char_ptr;
+ if (bit_buff->blob_pos+length > bit_buff->blob_end)
+ {
+ bit_buff->error=1;
+ bzero((byte*) to,(end-to));
+ return;
+ }
+ decode_bytes(rec,bit_buff,(byte*) bit_buff->blob_pos,
+ (byte*) bit_buff->blob_pos+length);
+ _ma_store_blob_length((byte*) to,pack_length,length);
+ memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos,
+ sizeof(char*));
+ bit_buff->blob_pos+=length;
+ }
+}
+
+
+static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end __attribute__((unused)))
+{
+ if (get_bit(bit_buff))
+ to[0]= 0; /* Zero lengths */
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ *to= (char) length;
+ decode_bytes(rec,bit_buff,to+1,to+1+length);
+ }
+}
+
+
+static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end __attribute__((unused)))
+{
+ if (get_bit(bit_buff))
+ to[0]=to[1]=0; /* Zero lengths */
+ else
+ {
+ ulong length=get_bits(bit_buff,rec->space_length_bits);
+ int2store(to,length);
+ decode_bytes(rec,bit_buff,to+2,to+2+length);
+ }
+}
+
+ /* Functions to decode of buffer of bits */
+
+#if BITS_SAVED == 64
+
+static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ reg1 uint bits,low_byte;
+ reg3 uint16 *pos;
+ reg4 uint table_bits,table_and;
+ MARIA_DECODE_TREE *decode_tree;
+
+ decode_tree=rec->decode_tree;
+ bits=bit_buff->bits; /* Save in reg for quicker access */
+ table_bits=decode_tree->quick_table_bits;
+ table_and= (1 << table_bits)-1;
+
+ do
+ {
+ if (bits <= 32)
+ {
+ if (bit_buff->pos > bit_buff->end+4)
+ {
+ bit_buff->error=1;
+ return; /* Can't be right */
+ }
+ bit_buff->current_byte= (bit_buff->current_byte << 32) +
+ ((((uint) bit_buff->pos[3])) +
+ (((uint) bit_buff->pos[2]) << 8) +
+ (((uint) bit_buff->pos[1]) << 16) +
+ (((uint) bit_buff->pos[0]) << 24));
+ bit_buff->pos+=4;
+ bits+=32;
+ }
+ /* First use info in quick_table */
+ low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and;
+ low_byte=decode_tree->table[low_byte];
+ if (low_byte & IS_CHAR)
+ {
+ *to++ = (char) (low_byte & 255); /* Found char in quick table */
+ bits-= ((low_byte >> 8) & 31); /* Remove bits used */
+ }
+ else
+ { /* Map through rest of decode-table */
+ pos=decode_tree->table+low_byte;
+ bits-=table_bits;
+ for (;;)
+ {
+ low_byte=(uint) (bit_buff->current_byte >> (bits-8));
+ decode_bytes_test_bit(0);
+ decode_bytes_test_bit(1);
+ decode_bytes_test_bit(2);
+ decode_bytes_test_bit(3);
+ decode_bytes_test_bit(4);
+ decode_bytes_test_bit(5);
+ decode_bytes_test_bit(6);
+ decode_bytes_test_bit(7);
+ bits-=8;
+ }
+ *to++ = (char) *pos;
+ }
+ } while (to != end);
+
+ bit_buff->bits=bits;
+ return;
+}
+
+#else
+
+static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *end)
+{
+ reg1 uint bits,low_byte;
+ reg3 uint16 *pos;
+ reg4 uint table_bits,table_and;
+ MARIA_DECODE_TREE *decode_tree;
+
+ decode_tree=rec->huff_tree;
+ bits=bit_buff->bits; /* Save in reg for quicker access */
+ table_bits=decode_tree->quick_table_bits;
+ table_and= (1 << table_bits)-1;
+
+ do
+ {
+ if (bits < table_bits)
+ {
+ if (bit_buff->pos > bit_buff->end+1)
+ {
+ bit_buff->error=1;
+ return; /* Can't be right */
+ }
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (bit_buff->current_byte << 24) +
+ (((uint) ((uchar) bit_buff->pos[2]))) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 16);
+ bit_buff->pos+=3;
+ bits+=24;
+#else
+ if (bits) /* We must have at leasts 9 bits */
+ {
+ bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ (uint) ((uchar) bit_buff->pos[0]);
+ bit_buff->pos++;
+ bits+=8;
+ }
+ else
+ {
+ bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) +
+ ((uint) ((uchar) bit_buff->pos[1]));
+ bit_buff->pos+=2;
+ bits+=16;
+ }
+#endif
+ }
+ /* First use info in quick_table */
+ low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and;
+ low_byte=decode_tree->table[low_byte];
+ if (low_byte & IS_CHAR)
+ {
+ *to++ = (low_byte & 255); /* Found char in quick table */
+ bits-= ((low_byte >> 8) & 31); /* Remove bits used */
+ }
+ else
+ { /* Map through rest of decode-table */
+ pos=decode_tree->table+low_byte;
+ bits-=table_bits;
+ for (;;)
+ {
+ if (bits < 8)
+ { /* We don't need to check end */
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (bit_buff->current_byte << 24) +
+ (((uint) ((uchar) bit_buff->pos[2]))) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 16);
+ bit_buff->pos+=3;
+ bits+=24;
+#else
+ bit_buff->current_byte= (bit_buff->current_byte << 8) +
+ (uint) ((uchar) bit_buff->pos[0]);
+ bit_buff->pos+=1;
+ bits+=8;
+#endif
+ }
+ low_byte=(uint) (bit_buff->current_byte >> (bits-8));
+ decode_bytes_test_bit(0);
+ decode_bytes_test_bit(1);
+ decode_bytes_test_bit(2);
+ decode_bytes_test_bit(3);
+ decode_bytes_test_bit(4);
+ decode_bytes_test_bit(5);
+ decode_bytes_test_bit(6);
+ decode_bytes_test_bit(7);
+ bits-=8;
+ }
+ *to++ = (char) *pos;
+ }
+ } while (to != end);
+
+ bit_buff->bits=bits;
+ return;
+}
+#endif /* BIT_SAVED == 64 */
+
+
+static uint decode_pos(MARIA_BIT_BUFF *bit_buff,
+ MARIA_DECODE_TREE *decode_tree)
+{
+ uint16 *pos=decode_tree->table;
+ for (;;)
+ {
+ if (get_bit(bit_buff))
+ pos++;
+ if (*pos & IS_CHAR)
+ return (uint) (*pos & ~IS_CHAR);
+ pos+= *pos;
+ }
+}
+
+
+int _ma_read_rnd_pack_record(MARIA_HA *info,
+ byte *buf,
+ register MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ File file;
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_read_rnd_pack_record");
+
+ if (filepos >= info->state->data_file_length)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+ }
+
+ file= info->dfile;
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache, (byte*) block_info.header,
+ filepos, share->pack.ref_length,
+ skip_deleted_blocks ? READING_NEXT : 0))
+ goto err;
+ file= -1;
+ }
+ if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info,
+ &info->rec_buff, &info->rec_buff_size,
+ file, filepos))
+ goto err; /* Error code is already set */
+#ifndef DBUG_OFF
+ if (block_info.rec_len > share->max_pack_length)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+#endif
+
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(&info->rec_cache, (byte*) info->rec_buff,
+ block_info.filepos, block_info.rec_len,
+ skip_deleted_blocks ? READING_NEXT : 0))
+ goto err;
+ }
+ else
+ {
+ if (my_read(info->dfile,(byte*) info->rec_buff + block_info.offset,
+ block_info.rec_len-block_info.offset,
+ MYF(MY_NABP)))
+ goto err;
+ }
+ info->packed_length= block_info.rec_len;
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= block_info.filepos+block_info.rec_len;
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+
+ DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ info->rec_buff, block_info.rec_len));
+ err:
+ DBUG_RETURN(my_errno);
+}
+
+
+ /* Read and process header from a huff-record-file */
+
+uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ byte **rec_buff_p, my_size_t *rec_buff_size_p,
+ File file, my_off_t filepos)
+{
+ uchar *header= info->header;
+ uint head_length,ref_length;
+ LINT_INIT(ref_length);
+
+ if (file >= 0)
+ {
+ ref_length=maria->s->pack.ref_length;
+ /*
+ We can't use my_pread() here because maria_read_rnd_pack_record assumes
+ position is ok
+ */
+ VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
+ if (my_read(file,(char*) header,ref_length,MYF(MY_NABP)))
+ return BLOCK_FATAL_ERROR;
+ DBUG_DUMP("header",(byte*) header,ref_length);
+ }
+ head_length= read_pack_length((uint) maria->s->pack.version, header,
+ &info->rec_len);
+ if (maria->s->base.blobs)
+ {
+ head_length+= read_pack_length((uint) maria->s->pack.version,
+ header + head_length, &info->blob_len);
+ if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p,
+ info->rec_len + info->blob_len +
+ maria->s->base.extra_rec_buff_size))
+ return BLOCK_FATAL_ERROR; /* not enough memory */
+ bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len;
+ bit_buff->blob_end= bit_buff->blob_pos + info->blob_len;
+ maria->blob_length=info->blob_len;
+ }
+ info->filepos=filepos+head_length;
+ if (file > 0)
+ {
+ info->offset=min(info->rec_len, ref_length - head_length);
+ memcpy(*rec_buff_p, header + head_length, info->offset);
+ }
+ return 0;
+}
+
+
+ /* rutines for bit buffer */
+ /* Note buffer must be 6 byte bigger than longest row */
+
+static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer,
+ uint length)
+{
+ bit_buff->pos=buffer;
+ bit_buff->end=buffer+length;
+ bit_buff->bits=bit_buff->error=0;
+ bit_buff->current_byte=0; /* Avoid purify errors */
+}
+
+static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff, uint count)
+{
+ uint tmp;
+ count-=bit_buff->bits;
+ tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count;
+ fill_buffer(bit_buff);
+ bit_buff->bits=BITS_SAVED - count;
+ return tmp+(bit_buff->current_byte >> (BITS_SAVED - count));
+}
+
+ /* Fill in empty bit_buff->current_byte from buffer */
+ /* Sets bit_buff->error if buffer is exhausted */
+
+static void fill_buffer(MARIA_BIT_BUFF *bit_buff)
+{
+ if (bit_buff->pos >= bit_buff->end)
+ {
+ bit_buff->error= 1;
+ bit_buff->current_byte=0;
+ return;
+ }
+#if BITS_SAVED == 64
+ bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) +
+ (((uint) ((uchar) bit_buff->pos[6])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[5])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[4])) << 24) +
+ ((ulonglong)
+ ((((uint) ((uchar) bit_buff->pos[3]))) +
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32));
+ bit_buff->pos+=8;
+#else
+#if BITS_SAVED == 32
+ bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) +
+ (((uint) ((uchar) bit_buff->pos[2])) << 8) +
+ (((uint) ((uchar) bit_buff->pos[1])) << 16) +
+ (((uint) ((uchar) bit_buff->pos[0])) << 24));
+ bit_buff->pos+=4;
+#else
+ bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+
+ (((uint) ((uchar) bit_buff->pos[0])) << 8));
+ bit_buff->pos+=2;
+#endif
+#endif
+}
+
+ /* Get number of bits neaded to represent value */
+
+static uint max_bit(register uint value)
+{
+ reg2 uint power=1;
+
+ while ((value>>=1))
+ power++;
+ return (power);
+}
+
+
+/*****************************************************************************
+ Some redefined functions to handle files when we are using memmap
+*****************************************************************************/
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_MMAP
+
+static int _ma_read_mempack_record(MARIA_HA *info, byte *buf,
+ MARIA_RECORD_POS filepos);
+static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *, MARIA_RECORD_POS,
+ my_bool);
+
+my_bool _ma_memmap_file(MARIA_HA *info)
+{
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_memmap_file");
+
+ if (!info->s->file_map)
+ {
+ if (my_seek(info->dfile,0L,MY_SEEK_END,MYF(0)) <
+ share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN)
+ {
+ DBUG_PRINT("warning",("File isn't extended for memmap"));
+ DBUG_RETURN(0);
+ }
+ if (_ma_dynmap_file(info, share->state.state.data_file_length))
+ DBUG_RETURN(0);
+ }
+ info->opt_flag|= MEMMAP_USED;
+ info->read_record= share->read_record= _ma_read_mempack_record;
+ share->scan= _ma_read_rnd_mempack_record;
+ DBUG_RETURN(1);
+}
+
+
+void _ma_unmap_file(MARIA_HA *info)
+{
+ VOID(my_munmap(info->s->file_map,
+ (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN));
+}
+
+
+static uchar *
+_ma_mempack_get_block_info(MARIA_HA *maria,
+ MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info,
+ byte **rec_buff_p,
+ my_size_t *rec_buff_size_p,
+ uchar *header)
+{
+ header+= read_pack_length((uint) maria->s->pack.version, header,
+ &info->rec_len);
+ if (maria->s->base.blobs)
+ {
+ header+= read_pack_length((uint) maria->s->pack.version, header,
+ &info->blob_len);
+ /* _ma_alloc_rec_buff sets my_errno on error */
+ if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p,
+ info->blob_len + maria->s->base.extra_rec_buff_size))
+ return 0; /* not enough memory */
+ bit_buff->blob_pos= (uchar*) *rec_buff_p;
+ bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len;
+ }
+ return header;
+}
+
+
+static int _ma_read_mempack_record(MARIA_HA *info, byte *buf,
+ MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share=info->s;
+ byte *pos;
+ DBUG_ENTER("maria_read_mempack_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ DBUG_RETURN(-1); /* _search() didn't find record */
+
+ if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff,
+ &block_info, &info->rec_buff,
+ &info->rec_buff_size,
+ (uchar*) share->file_map+
+ filepos)))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
+}
+
+
+/*ARGSUSED*/
+static int _ma_read_rnd_mempack_record(MARIA_HA *info,
+ byte *buf,
+ register MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks
+ __attribute__((unused)))
+{
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share=info->s;
+ byte *pos,*start;
+ DBUG_ENTER("_ma_read_rnd_mempack_record");
+
+ if (filepos >= share->state.state.data_file_length)
+ {
+ my_errno=HA_ERR_END_OF_FILE;
+ goto err;
+ }
+ if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff,
+ &block_info,
+ &info->rec_buff,
+ &info->rec_buff_size,
+ (uchar*)
+ (start= share->file_map +
+ filepos))))
+ goto err;
+#ifndef DBUG_OFF
+ if (block_info.rec_len > info->s->max_pack_length)
+ {
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ goto err;
+ }
+#endif
+ info->packed_length=block_info.rec_len;
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= filepos+(uint) (pos-start)+block_info.rec_len;
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+
+ DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf,
+ pos, block_info.rec_len));
+ err:
+ DBUG_RETURN(my_errno);
+}
+
+#endif /* HAVE_MMAP */
+
+ /* Save length of row */
+
+uint _ma_save_pack_length(uint version, byte *block_buff, ulong length)
+{
+ if (length < 254)
+ {
+ *(uchar*) block_buff= (uchar) length;
+ return 1;
+ }
+ if (length <= 65535)
+ {
+ *(uchar*) block_buff=254;
+ int2store(block_buff+1,(uint) length);
+ return 3;
+ }
+ *(uchar*) block_buff=255;
+ if (version == 1) /* old format */
+ {
+ DBUG_ASSERT(length <= 0xFFFFFF);
+ int3store(block_buff + 1, (ulong) length);
+ return 4;
+ }
+ else
+ {
+ int4store(block_buff + 1, (ulong) length);
+ return 5;
+ }
+}
+
+
+static uint read_pack_length(uint version, const uchar *buf, ulong *length)
+{
+ if (buf[0] < 254)
+ {
+ *length= buf[0];
+ return 1;
+ }
+ else if (buf[0] == 254)
+ {
+ *length= uint2korr(buf + 1);
+ return 3;
+ }
+ if (version == 1) /* old format */
+ {
+ *length= uint3korr(buf + 1);
+ return 4;
+ }
+ else
+ {
+ *length= uint4korr(buf + 1);
+ return 5;
+ }
+}
+
+
+uint _ma_calc_pack_length(uint version, ulong length)
+{
+ return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5;
+}
diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c
new file mode 100644
index 00000000000..d0a11ad08ab
--- /dev/null
+++ b/storage/maria/ma_page.c
@@ -0,0 +1,158 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read and write key blocks */
+
+#include "maria_def.h"
+
+ /* Fetch a key-page in memory */
+
+byte *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, int level,
+ byte *buff, int return_buffer)
+{
+ byte *tmp;
+ uint page_size;
+ DBUG_ENTER("_ma_fetch_keypage");
+ DBUG_PRINT("enter",("page: %ld", (long) page));
+
+ tmp= key_cache_read(info->s->key_cache, info->s->kfile, page, level, buff,
+ info->s->block_size, info->s->block_size,
+ return_buffer);
+ if (tmp == info->buff)
+ info->keybuff_used=1;
+ else if (!tmp)
+ {
+ DBUG_PRINT("error",("Got errno: %d from key_cache_read",my_errno));
+ info->last_keypage=HA_OFFSET_ERROR;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ info->last_keypage=page;
+ page_size=maria_getint(tmp);
+ if (page_size < 4 || page_size > keyinfo->block_length)
+ {
+ DBUG_PRINT("error",("page %lu had wrong page length: %u",
+ (ulong) page, page_size));
+ DBUG_DUMP("page", (char*) tmp, keyinfo->block_length);
+ info->last_keypage = HA_OFFSET_ERROR;
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ tmp= 0;
+ }
+ DBUG_RETURN(tmp);
+} /* _ma_fetch_keypage */
+
+
+ /* Write a key-page on disk */
+
+int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ my_off_t page, int level, byte *buff)
+{
+ reg3 uint length;
+ DBUG_ENTER("_ma_write_keypage");
+
+#ifndef FAST /* Safety check */
+ if (page < info->s->base.keystart ||
+ page+keyinfo->block_length > info->state->key_file_length ||
+ (page & (MARIA_MIN_KEY_BLOCK_LENGTH-1)))
+ {
+ DBUG_PRINT("error",("Trying to write inside key status region: key_start: %lu length: %lu page: %lu",
+ (long) info->s->base.keystart,
+ (long) info->state->key_file_length,
+ (long) page));
+ my_errno=EINVAL;
+ DBUG_RETURN((-1));
+ }
+ DBUG_PRINT("page",("write page at: %lu",(long) page));
+ DBUG_DUMP("buff",(byte*) buff,maria_getint(buff));
+#endif
+
+ if ((length=keyinfo->block_length) > IO_SIZE*2 &&
+ info->state->key_file_length != page+length)
+ length= ((maria_getint(buff)+IO_SIZE-1) & (uint) ~(IO_SIZE-1));
+#ifdef HAVE_purify
+ {
+ length=maria_getint(buff);
+ bzero((byte*) buff+length,keyinfo->block_length-length);
+ length=keyinfo->block_length;
+ }
+#endif
+ DBUG_RETURN((key_cache_write(info->s->key_cache,
+ info->s->kfile,page, level, (byte*) buff,length,
+ (uint) keyinfo->block_length,
+ (int) ((info->lock_type != F_UNLCK) ||
+ info->s->delay_key_write))));
+} /* maria_write_keypage */
+
+
+ /* Remove page from disk */
+
+int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos,
+ int level)
+{
+ my_off_t old_link;
+ char buff[8];
+ DBUG_ENTER("_ma_dispose");
+ DBUG_PRINT("enter",("pos: %ld", (long) pos));
+
+ old_link= info->s->state.key_del;
+ info->s->state.key_del= pos;
+ mi_sizestore(buff,old_link);
+ info->s->state.changed|= STATE_NOT_SORTED_PAGES;
+ DBUG_RETURN(key_cache_write(info->s->key_cache,
+ info->s->kfile, pos , level, buff,
+ sizeof(buff),
+ (uint) keyinfo->block_length,
+ (int) (info->lock_type != F_UNLCK)));
+} /* _ma_dispose */
+
+
+ /* Make new page on disk */
+
+my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level)
+{
+ my_off_t pos;
+ byte buff[8];
+ DBUG_ENTER("_ma_new");
+
+ if ((pos= info->s->state.key_del) == HA_OFFSET_ERROR)
+ {
+ if (info->state->key_file_length >=
+ info->s->base.max_key_file_length - keyinfo->block_length)
+ {
+ my_errno=HA_ERR_INDEX_FILE_FULL;
+ DBUG_RETURN(HA_OFFSET_ERROR);
+ }
+ pos=info->state->key_file_length;
+ info->state->key_file_length+= keyinfo->block_length;
+ }
+ else
+ {
+ if (!key_cache_read(info->s->key_cache,
+ info->s->kfile, pos, level,
+ buff,
+ (uint) sizeof(buff),
+ (uint) keyinfo->block_length,0))
+ pos= HA_OFFSET_ERROR;
+ else
+ info->s->state.key_del= mi_sizekorr(buff);
+ }
+ info->s->state.changed|= STATE_NOT_SORTED_PAGES;
+ DBUG_PRINT("exit",("Pos: %ld",(long) pos));
+ DBUG_RETURN(pos);
+} /* _ma_new */
diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c
new file mode 100644
index 00000000000..c1312cb1e77
--- /dev/null
+++ b/storage/maria/ma_panic.c
@@ -0,0 +1,126 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "ma_fulltext.h"
+
+/*
+ Stop usage of Maria
+
+ SYNOPSIS
+ maria_panic()
+ flag HA_PANIC_CLOSE: All maria files (tables and log) are closed.
+ maria_end() is called.
+ HA_PANIC_WRITE: All misam files are unlocked and
+ all changed data in single user maria is
+ written to file
+ HA_PANIC_READ All maria files that was locked when
+ maria_panic(HA_PANIC_WRITE) was done is
+ locked. A maria_readinfo() is done for
+ all single user files to get changes
+ in database
+
+ RETURN
+ 0 ok
+ # error number in case of error
+*/
+
+int maria_panic(enum ha_panic_function flag)
+{
+ int error=0;
+ LIST *list_element,*next_open;
+ MARIA_HA *info;
+ DBUG_ENTER("maria_panic");
+
+ if (!maria_inited)
+ DBUG_RETURN(0);
+ pthread_mutex_lock(&THR_LOCK_maria);
+ for (list_element=maria_open_list ; list_element ; list_element=next_open)
+ {
+ next_open=list_element->next; /* Save if close */
+ info=(MARIA_HA*) list_element->data;
+ switch (flag) {
+ case HA_PANIC_CLOSE:
+ pthread_mutex_unlock(&THR_LOCK_maria); /* Not exactly right... */
+ if (maria_close(info))
+ error=my_errno;
+ pthread_mutex_lock(&THR_LOCK_maria);
+ break;
+ case HA_PANIC_WRITE: /* Do this to free databases */
+#ifdef CANT_OPEN_FILES_TWICE
+ if (info->s->options & HA_OPTION_READ_ONLY_DATA)
+ break;
+#endif
+ if (flush_key_blocks(info->s->key_cache, info->s->kfile, FLUSH_RELEASE))
+ error=my_errno;
+ if (info->opt_flag & WRITE_CACHE_USED)
+ if (flush_io_cache(&info->rec_cache))
+ error=my_errno;
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (flush_io_cache(&info->rec_cache))
+ error=my_errno;
+ reinit_io_cache(&info->rec_cache,READ_CACHE,0,
+ (pbool) (info->lock_type != F_UNLCK),1);
+ }
+ if (info->lock_type != F_UNLCK && ! info->was_locked)
+ {
+ info->was_locked=info->lock_type;
+ if (maria_lock_database(info,F_UNLCK))
+ error=my_errno;
+ }
+#ifdef CANT_OPEN_FILES_TWICE
+ if (info->s->kfile >= 0 && my_close(info->s->kfile,MYF(0)))
+ error = my_errno;
+ if (info->dfile >= 0 && my_close(info->dfile,MYF(0)))
+ error = my_errno;
+ info->s->kfile=info->dfile= -1; /* Files aren't open anymore */
+ break;
+#endif
+ case HA_PANIC_READ: /* Restore to before WRITE */
+#ifdef CANT_OPEN_FILES_TWICE
+ { /* Open closed files */
+ char name_buff[FN_REFLEN];
+ if (info->s->kfile < 0)
+ if ((info->s->kfile= my_open(fn_format(name_buff,info->filename,"",
+ N_NAME_IEXT,4),info->mode,
+ MYF(MY_WME))) < 0)
+ error = my_errno;
+ if (info->dfile < 0)
+ {
+ if ((info->dfile= my_open(fn_format(name_buff,info->filename,"",
+ N_NAME_DEXT,4),info->mode,
+ MYF(MY_WME))) < 0)
+ error = my_errno;
+ info->rec_cache.file=info->dfile;
+ }
+ }
+#endif
+ if (info->was_locked)
+ {
+ if (maria_lock_database(info, info->was_locked))
+ error=my_errno;
+ info->was_locked=0;
+ }
+ break;
+ }
+ }
+ pthread_mutex_unlock(&THR_LOCK_maria);
+ if (flag == HA_PANIC_CLOSE)
+ maria_end();
+ if (!error)
+ DBUG_RETURN(0);
+ DBUG_RETURN(my_errno=error);
+} /* maria_panic */
diff --git a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c
new file mode 100644
index 00000000000..f387f2b7de3
--- /dev/null
+++ b/storage/maria/ma_preload.c
@@ -0,0 +1,117 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Preload indexes into key cache
+*/
+
+#include "maria_def.h"
+
+
+/*
+ Preload pages of the index file for a table into the key cache
+
+ SYNOPSIS
+ maria_preload()
+ info open table
+ map map of indexes to preload into key cache
+ ignore_leaves only non-leaves pages are to be preloaded
+
+ RETURN VALUE
+ 0 if a success. error code - otherwise.
+
+ NOTES.
+ At present pages for all indexes are preloaded.
+ In future only pages for indexes specified in the key_map parameter
+ of the table will be preloaded.
+*/
+
+int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves)
+{
+ uint i;
+ ulong length, block_length= 0;
+ uchar *buff= NULL;
+ MARIA_SHARE* share= info->s;
+ uint keys= share->state.header.keys;
+ MARIA_KEYDEF *keyinfo= share->keyinfo;
+ my_off_t key_file_length= share->state.state.key_file_length;
+ my_off_t pos= share->base.keystart;
+ DBUG_ENTER("maria_preload");
+
+ if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos)
+ DBUG_RETURN(0);
+
+ block_length= keyinfo[0].block_length;
+
+ /* Check whether all indexes use the same block size */
+ for (i= 1 ; i < keys ; i++)
+ {
+ if (keyinfo[i].block_length != block_length)
+ DBUG_RETURN(my_errno= HA_ERR_NON_UNIQUE_BLOCK_SIZE);
+ }
+
+ length= info->preload_buff_size/block_length * block_length;
+ set_if_bigger(length, block_length);
+
+ if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME))))
+ DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
+
+ if (flush_key_blocks(share->key_cache,share->kfile, FLUSH_RELEASE))
+ goto err;
+
+ do
+ {
+ /* Read the next block of index file into the preload buffer */
+ if ((my_off_t) length > (key_file_length-pos))
+ length= (ulong) (key_file_length-pos);
+ if (my_pread(share->kfile, (byte*) buff, length, pos, MYF(MY_FAE|MY_FNABP)))
+ goto err;
+
+ if (ignore_leaves)
+ {
+ uchar *end= buff+length;
+ do
+ {
+ if (_ma_test_if_nod(buff))
+ {
+ if (key_cache_insert(share->key_cache,
+ share->kfile, pos, DFLT_INIT_HITS,
+ (byte*) buff, block_length))
+ goto err;
+ }
+ pos+= block_length;
+ }
+ while ((buff+= block_length) != end);
+ buff= end-length;
+ }
+ else
+ {
+ if (key_cache_insert(share->key_cache,
+ share->kfile, pos, DFLT_INIT_HITS,
+ (byte*) buff, length))
+ goto err;
+ pos+= length;
+ }
+ }
+ while (pos != key_file_length);
+
+ my_free((char*) buff, MYF(0));
+ DBUG_RETURN(0);
+
+err:
+ my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(my_errno= errno);
+}
diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c
new file mode 100644
index 00000000000..d3806aa79bc
--- /dev/null
+++ b/storage/maria/ma_range.c
@@ -0,0 +1,258 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Gives a approximated number of how many records there is between two keys.
+ Used when optimizing querries.
+ */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+static ha_rows _ma_record_pos(MARIA_HA *info,const byte *key,uint key_len,
+ enum ha_rkey_function search_flag);
+static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_len,uint nextflag, my_off_t pos);
+static uint _ma_keynr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page,
+ byte *keypos, uint *ret_max_key);
+
+
+/*
+ Estimate how many records there is in a given range
+
+ SYNOPSIS
+ maria_records_in_range()
+ info MARIA handler
+ inx Index to use
+ min_key Min key. Is = 0 if no min range
+ max_key Max key. Is = 0 if no max range
+
+ NOTES
+ We should ONLY return 0 if there is no rows in range
+
+ RETURN
+ HA_POS_ERROR error (or we can't estimate number of rows)
+ number Estimated number of rows
+*/
+
+
+ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key,
+ key_range *max_key)
+{
+ ha_rows start_pos,end_pos,res;
+ DBUG_ENTER("maria_records_in_range");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(HA_POS_ERROR);
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(HA_POS_ERROR);
+ info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+
+ switch(info->s->keyinfo[inx].key_alg){
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ {
+ byte *key_buff;
+ uint start_key_len;
+
+ /*
+ The problem is that the optimizer doesn't support
+ RTree keys properly at the moment.
+ Hope this will be fixed some day.
+ But now NULL in the min_key means that we
+ didn't make the task for the RTree key
+ and expect BTree functionality from it.
+ As it's not able to handle such request
+ we return the error.
+ */
+ if (!min_key)
+ {
+ res= HA_POS_ERROR;
+ break;
+ }
+ key_buff= info->lastkey+info->s->base.max_key_length;
+ start_key_len= _ma_pack_key(info,inx, key_buff,
+ min_key->key, min_key->length,
+ (HA_KEYSEG**) 0);
+ res= maria_rtree_estimate(info, inx, key_buff, start_key_len,
+ maria_read_vec[min_key->flag]);
+ res= res ? res : 1; /* Don't return 0 */
+ break;
+ }
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ start_pos= (min_key ?
+ _ma_record_pos(info, min_key->key, min_key->length,
+ min_key->flag) :
+ (ha_rows) 0);
+ end_pos= (max_key ?
+ _ma_record_pos(info, max_key->key, max_key->length,
+ max_key->flag) :
+ info->state->records+ (ha_rows) 1);
+ res= (end_pos < start_pos ? (ha_rows) 0 :
+ (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos));
+ if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR)
+ res=HA_POS_ERROR;
+ }
+
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ fast_ma_writeinfo(info);
+
+ DBUG_PRINT("info",("records: %ld",(ulong) (res)));
+ DBUG_RETURN(res);
+}
+
+
+ /* Find relative position (in records) for key in index-tree */
+
+static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len,
+ enum ha_rkey_function search_flag)
+{
+ uint inx=(uint) info->lastinx, nextflag;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx;
+ byte *key_buff;
+ double pos;
+ DBUG_ENTER("_ma_record_pos");
+ DBUG_PRINT("enter",("search_flag: %d",search_flag));
+
+ if (key_len == 0)
+ key_len= USE_WHOLE_KEY;
+ key_buff=info->lastkey+info->s->base.max_key_length;
+ key_len= _ma_pack_key(info, inx, key_buff, key, key_len,
+ (HA_KEYSEG**) 0);
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg,
+ key_buff, key_len););
+ nextflag=maria_read_vec[search_flag];
+ if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST)))
+ key_len=USE_WHOLE_KEY;
+
+ pos= _ma_search_pos(info,keyinfo, key_buff, key_len,
+ nextflag | SEARCH_SAVE_BUFF,
+ info->s->state.key_root[inx]);
+ if (pos >= 0.0)
+ {
+ DBUG_PRINT("exit",("pos: %ld",(ulong) (pos*info->state->records)));
+ DBUG_RETURN((ulong) (pos*info->state->records+0.5));
+ }
+ DBUG_RETURN(HA_POS_ERROR);
+}
+
+
+ /* This is a modified version of _ma_search */
+ /* Returns offset for key in indextable (decimal 0.0 <= x <= 1.0) */
+
+static double _ma_search_pos(register MARIA_HA *info,
+ register MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_len, uint nextflag,
+ register my_off_t pos)
+{
+ int flag;
+ uint nod_flag,keynr,max_keynr;
+ my_bool after_key;
+ byte *keypos, *buff;
+ double offset;
+ DBUG_ENTER("_ma_search_pos");
+
+ if (pos == HA_OFFSET_ERROR)
+ DBUG_RETURN(0.5);
+
+ if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1)))
+ goto err;
+ flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag,
+ &keypos,info->lastkey, &after_key);
+ nod_flag=_ma_test_if_nod(buff);
+ keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr);
+
+ if (flag)
+ {
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1); /* error */
+ /*
+ Didn't found match. keypos points at next (bigger) key
+ Try to find a smaller, better matching key.
+ Matches keynr + [0-1]
+ */
+ if (flag > 0 && ! nod_flag)
+ offset= 1.0;
+ else if ((offset= _ma_search_pos(info,keyinfo,key,key_len,nextflag,
+ _ma_kpos(nod_flag,keypos))) < 0)
+ DBUG_RETURN(offset);
+ }
+ else
+ {
+ /*
+ Found match. Keypos points at the start of the found key
+ Matches keynr+1
+ */
+ offset=1.0; /* Matches keynr+1 */
+ if ((nextflag & SEARCH_FIND) && nod_flag &&
+ ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME ||
+ key_len != USE_WHOLE_KEY))
+ {
+ /*
+ There may be identical keys in the tree. Try to match on of those.
+ Matches keynr + [0-1]
+ */
+ if ((offset= _ma_search_pos(info,keyinfo,key,key_len,SEARCH_FIND,
+ _ma_kpos(nod_flag,keypos))) < 0)
+ DBUG_RETURN(offset); /* Read error */
+ }
+ }
+ DBUG_PRINT("info",("keynr: %d offset: %g max_keynr: %d nod: %d flag: %d",
+ keynr,offset,max_keynr,nod_flag,flag));
+ DBUG_RETURN((keynr+offset)/(max_keynr+1));
+err:
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1.0);
+}
+
+
+ /* Get keynummer of current key and max number of keys in nod */
+
+static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *page, byte *keypos, uint *ret_max_key)
+{
+ uint nod_flag,keynr,max_key;
+ byte t_buff[HA_MAX_KEY_BUFF],*end;
+
+ end= page+maria_getint(page);
+ nod_flag=_ma_test_if_nod(page);
+ page+=2+nod_flag;
+
+ if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag);
+ return (uint) (keypos-page)/(keyinfo->keylength+nod_flag);
+ }
+
+ max_key=keynr=0;
+ t_buff[0]=0; /* Safety */
+ while (page < end)
+ {
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff))
+ return 0; /* Error */
+ max_key++;
+ if (page == keypos)
+ keynr=max_key;
+ }
+ *ret_max_key=max_key;
+ return(keynr);
+}
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
new file mode 100644
index 00000000000..b6739b86874
--- /dev/null
+++ b/storage/maria/ma_recovery.c
@@ -0,0 +1,268 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3072 Maria recovery
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* Here is the implementation of this module */
+
+#include "page_cache.h"
+#include "least_recently_dirtied.h"
+#include "transaction.h"
+#include "share.h"
+#include "log.h"
+
+typedef struct st_record_type_properties {
+ /* used for debug error messages or "maria_read_log" command-line tool: */
+ char *name,
+ my_bool record_ends_group;
+ /* a function to execute when we see the record during the REDO phase */
+ int (*record_execute_in_redo_phase)(RECORD *); /* param will be record header instead later */
+ /* a function to execute when we see the record during the UNDO phase */
+ int (*record_execute_in_undo_phase)(RECORD *); /* param will be record header instead later */
+} RECORD_TYPE_PROPERTIES;
+
+int no_op(RECORD *) {return 0};
+
+RECORD_TYPE_PROPERTIES all_record_type_properties[]=
+{
+ /* listed here in the order of the "log records type" enumeration */
+ {"REDO_INSERT_HEAD", FALSE, redo_insert_head_execute_in_redo_phase, no_op},
+ ...,
+ {"UNDO_INSERT" , TRUE , undo_insert_execute_in_redo_phase, undo_insert_execute_in_undo_phase},
+ {"COMMIT", , TRUE , commit_execute_in_redo_phase, no_op},
+ ...
+};
+
+int redo_insert_head_execute_in_redo_phase(RECORD *record)
+{
+ /* write the data to the proper page */
+}
+
+int undo_insert_execute_in_redo_phase(RECORD *record)
+{
+ trans_table[short_trans_id].undo_lsn= record.lsn;
+ /* don't restore the old version of the row */
+}
+
+int undo_insert_execute_in_undo_phase(RECORD *record)
+{
+ /* restore the old version of the row */
+ trans_table[short_trans_id].undo_lsn= record.prev_undo_lsn;
+}
+
+int commit_execute_in_redo_phase(RECORD *record)
+{
+ trans_table[short_trans_id].state= COMMITTED;
+ /*
+ and that's all: the delete/update handler should not be woken up! as there
+ may be REDO for purge further in the log.
+ */
+}
+
+#define record_ends_group(R) \
+ all_record_type_properties[(R)->type].record_ends_group)
+
+#define execute_log_record_in_redo_phase(R) \
+ all_record_type_properties[(R).type].record_execute_in_redo_phase(R)
+
+
+int recovery()
+{
+ control_file_create_or_open();
+ /*
+ init log handler: tell it that we are going to do large reads of the
+ log, sequential and backward. Log handler could decide to alloc a big
+ read-only IO_CACHE for this, or use its usual page cache.
+ */
+
+ /* read checkpoint log record from log handler */
+ RECORD *checkpoint_record= log_read_record(last_checkpoint_lsn_at_start);
+
+ /* parse this record, build structs (dirty_pages, transactions table, file_map) */
+ /*
+ read log records (note: sometimes only the header is needed, for ex during
+ REDO phase only the header of UNDO is needed, not the 4G blob in the
+ variable-length part, so I could use that; however for PREPARE (which is a
+ variable-length record) I'll need to read the full record in the REDO
+ phase):
+ */
+
+ /**** REDO PHASE *****/
+
+ record= log_read_record(min(rec_lsn, ...)); /* later, read only header */
+
+ /*
+ if log handler knows the end LSN of the log, we could print here how many
+ MB of log we have to read (to give an idea of the time), and print
+ progress notes.
+ */
+
+ while (record != NULL)
+ {
+ /*
+ A complete group is a set of log records with an "end mark" record
+ (e.g. a set of REDOs for an operation, terminated by an UNDO for this
+ operation); if there is no "end mark" record the group is incomplete
+ and won't be executed.
+ */
+ if (record_ends_group(record)
+ {
+ if (trans_table[record.short_trans_id].group_start_lsn != 0)
+ {
+ /*
+ There is a complete group for this transaction, containing more than
+ this event.
+ We're going to read recently read log records:
+ for this log_read_record() to be efficient (not touch the disk),
+ log handler could cache recently read pages
+ (can just use an IO_CACHE of 10 MB to read the log, or the normal
+ log handler page cache).
+ Without it only OS file cache will help.
+ */
+ record2=
+ log_read_record(trans_table[record.short_trans_id].group_start_lsn);
+
+ do
+ {
+ if (record2.short_trans_id == record.short_trans_id)
+ execute_log_record_in_redo_phase(record2); /* it's in our group */
+ record2= log_read_next_record();
+ }
+ while (record2.lsn < record.lsn);
+ trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */
+ }
+ execute_log_record_in_redo_phase(record);
+ }
+ else /* record does not end group */
+ {
+ /* just record the fact, can't know if can execute yet */
+ if (trans_table[short_trans_id].group_start_lsn == 0) /* group not yet started */
+ trans_table[short_trans_id].group_start_lsn= record.lsn;
+ }
+
+ /*
+ Later we can optimize: instead of "execute_log_record(record2)", do
+ copy_record_into_exec_buffer(record2):
+ this will just copy record into a multi-record (10 MB?) memory buffer,
+ and when buffer is full, will do sorting of REDOs per
+ page id and execute them.
+ This sorting will enable us to do more sequential reads of the
+ data/index pages.
+ Note that updating bitmap pages (when we have executed a REDO for a page
+ we update its bitmap page) may break the sequential read of pages,
+ so maybe we should read and cache bitmap pages in the beginning.
+ Or ok the sequence will be broken, but quickly all bitmap pages will be
+ in memory and so the sequence will not be broken anymore.
+ Sorting could even determine, based on physical device of files
+ ("st_dev" in stat()), that some files should be should be taken by
+ different threads, if we want to do parallism.
+ */
+ /*
+ Here's how to read a complete variable-length record if needed:
+ <sanja> read the header, allocate buffer of record length, read whole
+ record.
+ */
+ record= log_read_next_record();
+ }
+
+ /*
+ Earlier or here, create true transactions in TM.
+ If done earlier, note that TM should not wake up the delete/update handler
+ when it receives a commit info, as existing REDO for purge may exist in
+ the log, and so the delete/update handler may do changes which conflict
+ with these REDOs.
+ Even if done here, better to not wake it up now as we're going to free the
+ page cache.
+
+ MikaelR suggests: support checkpoints during REDO phase too: do checkpoint
+ after a certain amount of log records have been executed. This helps
+ against repeated crashes. Those checkpoints could not be user-requested
+ (as engine is not communicating during the REDO phase), so they would be
+ automatic: this changes the original assumption that we don't write to the
+ log while in the REDO phase, but why not. How often should we checkpoint?
+ */
+
+ /*
+ We want to have two steps:
+ engine->recover_with_max_memory();
+ next_engine->recover_with_max_memory();
+ engine->init_with_normal_memory();
+ next_engine->init_with_normal_memory();
+ So: in recover_with_max_memory() allocate a giant page cache, do REDO
+ phase, then all page cache is flushed and emptied and freed (only retain
+ small structures like TM): take full checkpoint, which is useful if
+ next engine crashes in its recovery the next second.
+ Destroy all shares (maria_close()), then at init_with_normal_memory() we
+ do this:
+ */
+
+ /**** UNDO PHASE *****/
+
+ print_information_to_error_log(nb of trans to roll back, nb of prepared trans);
+
+ /*
+ Launch one or more threads to do the background rollback. Don't wait for
+ them to complete their rollback (background rollback; for debugging, we
+ can have an option which waits). Set a counter (total_of_rollback_threads)
+ to the number of threads to lauch.
+
+ Note that InnoDB's rollback-in-background works as long as InnoDB is the
+ last engine to recover, otherwise MySQL will refuse new connections until
+ the last engine has recovered so it's not "background" from the user's
+ point of view. InnoDB is near top of sys_table_types so all others
+ (e.g. BDB) recover after it... So it's really "online rollback" only if
+ InnoDB is the only engine.
+ */
+
+ /* wake up delete/update handler */
+ /* tell the TM that it can now accept new transactions */
+
+ /*
+ mark that checkpoint requests are now allowed.
+ */
+}
+
+pthread_handler_decl rollback_background_thread()
+{
+ /*
+ execute the normal runtime-rollback code for a bunch of transactions.
+ */
+ while (trans in list_of_trans_to_rollback_by_this_thread)
+ {
+ while (trans->undo_lsn != 0)
+ {
+ /* this is the normal runtime-rollback code: */
+ record= log_read_record(trans->undo_lsn);
+ execute_log_record_in_undo_phase(record);
+ trans->undo_lsn= record.prev_undo_lsn;
+ }
+ /* remove trans from list */
+ }
+ lock_mutex(rollback_threads); /* or atomic counter */
+ if (--total_of_rollback_threads == 0)
+ {
+ /*
+ All rollback threads are done. Print "rollback finished" to the error
+ log and take a full checkpoint.
+ */
+ }
+ unlock_mutex(rollback_threads);
+ pthread_exit();
+}
diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h
new file mode 100644
index 00000000000..05026f4b52a
--- /dev/null
+++ b/storage/maria/ma_recovery.h
@@ -0,0 +1,26 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ WL#3072 Maria recovery
+ First version written by Guilhem Bichot on 2006-04-27.
+ Does not compile yet.
+*/
+
+/* This is the interface of this module. */
+
+/* Performs recovery of the engine at start */
+int recovery();
diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c
new file mode 100644
index 00000000000..5d89cc063d7
--- /dev/null
+++ b/storage/maria/ma_rename.c
@@ -0,0 +1,89 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Rename a table
+*/
+
+#include "ma_fulltext.h"
+
+int maria_rename(const char *old_name, const char *new_name)
+{
+ char from[FN_REFLEN],to[FN_REFLEN];
+ int data_file_rename_error;
+#ifdef USE_RAID
+ uint raid_type=0,raid_chunks=0;
+#endif
+ DBUG_ENTER("maria_rename");
+
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(old_name,"rename old_table");
+ _ma_check_table_is_closed(new_name,"rename new table2");
+#endif
+ /* LOCKTODO take X-lock on table here */
+#ifdef USE_RAID
+ {
+ MARIA_HA *info;
+ if (!(info=maria_open(old_name, O_RDONLY, 0)))
+ DBUG_RETURN(my_errno);
+ raid_type = info->s->base.raid_type;
+ raid_chunks = info->s->base.raid_chunks;
+ maria_close(info);
+ }
+#ifdef EXTRA_DEBUG
+ _ma_check_table_is_closed(old_name,"rename raidcheck");
+#endif
+#endif /* USE_RAID */
+
+ fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ /*
+ RECOVERYTODO log the two renames below. Update
+ ZeroDirtyPagesLSN of the table on disk (=> sync the files), this is
+ needed so that Recovery does not pick a wrong table.
+ Then do the file renames.
+ For this log record to be of any use for Recovery, we need the upper MySQL
+ layer to be crash-safe in DDLs; when it is we should reconsider the moment
+ of writing this log record, how to use it in Recovery, and force the log.
+ For now this record is only informative. But ZeroDirtyPagesLSN is
+ critically needed!
+ */
+ if (my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR)))
+ DBUG_RETURN(my_errno);
+ fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+ fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
+#ifdef USE_RAID
+ if (raid_type)
+ data_file_rename_error= my_raid_rename(from, to, raid_chunks,
+ MYF(MY_WME | MY_SYNC_DIR));
+ else
+#endif
+ data_file_rename_error=
+ my_rename_with_symlink(from, to, MYF(MY_WME | MY_SYNC_DIR));
+ if (data_file_rename_error)
+ {
+ /*
+ now we have a renamed index file and a non-renamed data file, try to
+ undo the rename of the index file.
+ */
+ data_file_rename_error= my_errno;
+ fn_format(from, old_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT));
+ fn_format(to, new_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT));
+ my_rename_with_symlink(to, from, MYF(MY_WME | MY_SYNC_DIR));
+ }
+ DBUG_RETURN(data_file_rename_error);
+
+}
diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c
new file mode 100644
index 00000000000..6fa8af75c40
--- /dev/null
+++ b/storage/maria/ma_rfirst.c
@@ -0,0 +1,27 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /* Read first row through a specfic key */
+
+int maria_rfirst(MARIA_HA *info, byte *buf, int inx)
+{
+ DBUG_ENTER("maria_rfirst");
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_PREV_FOUND;
+ DBUG_RETURN(maria_rnext(info,buf,inx));
+} /* maria_rfirst */
diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c
new file mode 100644
index 00000000000..ad27d3c286c
--- /dev/null
+++ b/storage/maria/ma_rkey.c
@@ -0,0 +1,179 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read record based on a key */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+ /* Read a record using key */
+ /* Ordinary search_flag is 0 ; Give error if no record with key */
+
+int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key,
+ uint key_len, enum ha_rkey_function search_flag)
+{
+ byte *key_buff;
+ MARIA_SHARE *share=info->s;
+ MARIA_KEYDEF *keyinfo;
+ HA_KEYSEG *last_used_keyseg;
+ uint pack_key_length, use_key_length, nextflag;
+ DBUG_ENTER("maria_rkey");
+ DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d",
+ (long) info, (long) buf, inx, search_flag));
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->last_key_func= search_flag;
+ keyinfo= share->keyinfo + inx;
+
+ if (info->once_flags & USE_PACKED_KEYS)
+ {
+ info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */
+ /*
+ key is already packed!; This happens when we are using a MERGE TABLE
+ */
+ key_buff= info->lastkey+info->s->base.max_key_length;
+ pack_key_length= key_len;
+ bmove(key_buff,key,key_len);
+ last_used_keyseg= 0;
+ }
+ else
+ {
+ if (key_len == 0)
+ key_len=USE_WHOLE_KEY;
+ /* Save the packed key for later use in the second buffer of lastkey. */
+ key_buff=info->lastkey+info->s->base.max_key_length;
+ pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, key,
+ key_len, &last_used_keyseg);
+ /* Save packed_key_length for use by the MERGE engine. */
+ info->pack_key_length= pack_key_length;
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg,
+ key_buff, pack_key_length););
+ }
+
+ if (fast_ma_readinfo(info))
+ goto err;
+ if (share->concurrent_insert)
+ rw_rdlock(&share->key_root_lock[inx]);
+
+ nextflag=maria_read_vec[search_flag];
+ use_key_length=pack_key_length;
+ if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST)))
+ use_key_length=USE_WHOLE_KEY;
+
+ switch (info->s->keyinfo[inx].key_alg) {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ }
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!_ma_search(info, keyinfo, key_buff, use_key_length,
+ maria_read_vec[search_flag],
+ info->s->state.key_root[inx]) &&
+ share->concurrent_insert)
+ {
+ /*
+ If we searching for a partial key (or using >, >=, < or <=) and
+ the data is outside of the data file, we need to continue searching
+ for the first key inside the data file
+ */
+ if (info->cur_row.lastpos >= info->state->data_file_length &&
+ (search_flag != HA_READ_KEY_EXACT ||
+ last_used_keyseg != keyinfo->seg + keyinfo->keysegs))
+ {
+ do
+ {
+ uint not_used[2];
+ /*
+ Skip rows that are inserted by other threads since we got a lock
+ Note that this can only happen if we are not searching after an
+ full length exact key, because the keys are sorted
+ according to position
+ */
+ if (_ma_search_next(info, keyinfo, info->lastkey,
+ info->lastkey_length,
+ maria_readnext_vec[search_flag],
+ info->s->state.key_root[inx]))
+ break;
+ /*
+ Check that the found key does still match the search.
+ _ma_search_next() delivers the next key regardless of its
+ value.
+ */
+ if (search_flag == HA_READ_KEY_EXACT &&
+ ha_key_cmp(keyinfo->seg, (uchar*) key_buff,
+ (uchar*) info->lastkey, use_key_length,
+ SEARCH_FIND, not_used))
+ {
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ } while (info->cur_row.lastpos >= info->state->data_file_length);
+ }
+ }
+ }
+ if (share->concurrent_insert)
+ rw_unlock(&share->key_root_lock[inx]);
+
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR)
+ {
+ fast_ma_writeinfo(info);
+ goto err;
+ }
+
+ /* Calculate length of the found key; Used by maria_rnext_same */
+ if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg)
+ info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey,
+ last_used_keyseg);
+ else
+ info->last_rkey_length= pack_key_length;
+
+
+ /* Check if we don't want to have record back, only error message */
+ if (!buf)
+ {
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(0);
+ }
+ if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+
+ info->cur_row.lastpos= HA_OFFSET_ERROR; /* Didn't find row */
+
+err:
+ /* Store last used key as a base for read next */
+ memcpy(info->lastkey,key_buff,pack_key_length);
+ info->last_rkey_length= pack_key_length;
+ bzero((char*) info->lastkey+pack_key_length,info->s->base.rec_reflength);
+ info->lastkey_length=pack_key_length+info->s->base.rec_reflength;
+
+ if (search_flag == HA_READ_AFTER_KEY)
+ info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */
+ DBUG_RETURN(my_errno);
+} /* _ma_rkey */
diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c
new file mode 100644
index 00000000000..504cc89aed3
--- /dev/null
+++ b/storage/maria/ma_rlast.c
@@ -0,0 +1,27 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /* Read last row with the same key as the previous read. */
+
+int maria_rlast(MARIA_HA *info, byte *buf, int inx)
+{
+ DBUG_ENTER("maria_rlast");
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_NEXT_FOUND;
+ DBUG_RETURN(maria_rprev(info,buf,inx));
+} /* maria_rlast */
diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c
new file mode 100644
index 00000000000..c7feded933e
--- /dev/null
+++ b/storage/maria/ma_rnext.c
@@ -0,0 +1,123 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#include "ma_rt_index.h"
+
+ /*
+ Read next row with the same key as previous read
+ One may have done a write, update or delete of the previous row.
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+ */
+
+int maria_rnext(MARIA_HA *info, byte *buf, int inx)
+{
+ int error,changed;
+ uint flag;
+ DBUG_ENTER("maria_rnext");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+ flag=SEARCH_BIGGER; /* Read next */
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR &&
+ info->update & HA_STATE_PREV_FOUND)
+ flag=0; /* Read first */
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+ changed= _ma_test_if_changed(info);
+ if (!flag)
+ {
+ switch(info->s->keyinfo[inx].key_alg){
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ error=maria_rtree_get_first(info,inx,info->lastkey_length);
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ error= _ma_search_first(info,info->s->keyinfo+inx,
+ info->s->state.key_root[inx]);
+ break;
+ }
+ }
+ else
+ {
+ switch (info->s->keyinfo[inx].key_alg) {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ /*
+ Note that rtree doesn't support that the table
+ may be changed since last call, so we do need
+ to skip rows inserted by other threads like in btree
+ */
+ error= maria_rtree_get_next(info,inx,info->lastkey_length);
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!changed)
+ error= _ma_search_next(info,info->s->keyinfo+inx,info->lastkey,
+ info->lastkey_length,flag,
+ info->s->state.key_root[inx]);
+ else
+ error= _ma_search(info,info->s->keyinfo+inx,info->lastkey,
+ USE_WHOLE_KEY,flag, info->s->state.key_root[inx]);
+ }
+ }
+
+ if (info->s->concurrent_insert)
+ {
+ if (!error)
+ {
+ while (info->cur_row.lastpos >= info->state->data_file_length)
+ {
+ /* Skip rows inserted by other threads since we got a lock */
+ if ((error= _ma_search_next(info,info->s->keyinfo+inx,
+ info->lastkey,
+ info->lastkey_length,
+ SEARCH_BIGGER,
+ info->s->state.key_root[inx])))
+ break;
+ }
+ }
+ rw_unlock(&info->s->key_root_lock[inx]);
+ }
+ /* Don't clear if database-changed */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_NEXT_FOUND;
+
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("error",("Got error: %d, errno: %d",error, my_errno));
+ DBUG_RETURN(my_errno);
+} /* maria_rnext */
diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c
new file mode 100644
index 00000000000..a5ce0cfe15c
--- /dev/null
+++ b/storage/maria/ma_rnext_same.c
@@ -0,0 +1,108 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+#include "ma_rt_index.h"
+
+/*
+ Read next row with the same key as previous read, but abort if
+ the key changes.
+ One may have done a write, update or delete of the previous row.
+
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+*/
+
+int maria_rnext_same(MARIA_HA *info, byte *buf)
+{
+ int error;
+ uint inx,not_used[2];
+ MARIA_KEYDEF *keyinfo;
+ DBUG_ENTER("maria_rnext_same");
+
+ if ((int) (inx= info->lastinx) < 0 ||
+ info->cur_row.lastpos == HA_OFFSET_ERROR)
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ keyinfo= info->s->keyinfo+inx;
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+
+ switch (keyinfo->key_alg)
+ {
+#ifdef HAVE_RTREE_KEYS
+ case HA_KEY_ALG_RTREE:
+ if ((error=maria_rtree_find_next(info,inx,
+ maria_read_vec[info->last_key_func])))
+ {
+ error=1;
+ my_errno=HA_ERR_END_OF_FILE;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ break;
+#endif
+ case HA_KEY_ALG_BTREE:
+ default:
+ if (!(info->update & HA_STATE_RNEXT_SAME))
+ {
+ /* First rnext_same; Store old key */
+ memcpy(info->lastkey2,info->lastkey,info->last_rkey_length);
+ }
+ for (;;)
+ {
+ if ((error= _ma_search_next(info,keyinfo,info->lastkey,
+ info->lastkey_length,SEARCH_BIGGER,
+ info->s->state.key_root[inx])))
+ break;
+ if (ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey,
+ (uchar*) info->lastkey2,
+ info->last_rkey_length, SEARCH_FIND, not_used))
+ {
+ error=1;
+ my_errno=HA_ERR_END_OF_FILE;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ break;
+ }
+ /* Skip rows that are inserted by other threads since we got a lock */
+ if (info->cur_row.lastpos < info->state->data_file_length)
+ break;
+ }
+ }
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ /* Don't clear if database-changed */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME;
+
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(my_errno);
+} /* maria_rnext_same */
diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c
new file mode 100644
index 00000000000..ea562359ded
--- /dev/null
+++ b/storage/maria/ma_rprev.c
@@ -0,0 +1,89 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+ /*
+ Read previous row with the same key as previous read
+ One may have done a write, update or delete of the previous row.
+ NOTE! Even if one changes the previous row, the next read is done
+ based on the position of the last used key!
+ */
+
+int maria_rprev(MARIA_HA *info, byte *buf, int inx)
+{
+ int error,changed;
+ register uint flag;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("maria_rprev");
+
+ if ((inx = _ma_check_index(info,inx)) < 0)
+ DBUG_RETURN(my_errno);
+ flag=SEARCH_SMALLER; /* Read previous */
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR &&
+ info->update & HA_STATE_NEXT_FOUND)
+ flag=0; /* Read last */
+
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+ changed= _ma_test_if_changed(info);
+ if (share->concurrent_insert)
+ rw_rdlock(&share->key_root_lock[inx]);
+ if (!flag)
+ error= _ma_search_last(info, share->keyinfo+inx,
+ share->state.key_root[inx]);
+ else if (!changed)
+ error= _ma_search_next(info,share->keyinfo+inx,info->lastkey,
+ info->lastkey_length,flag,
+ share->state.key_root[inx]);
+ else
+ error= _ma_search(info,share->keyinfo+inx,info->lastkey,
+ USE_WHOLE_KEY, flag, share->state.key_root[inx]);
+
+ if (share->concurrent_insert)
+ {
+ if (!error)
+ {
+ while (info->cur_row.lastpos >= info->state->data_file_length)
+ {
+ /* Skip rows that are inserted by other threads since we got a lock */
+ if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey,
+ info->lastkey_length,
+ SEARCH_SMALLER,
+ share->state.key_root[inx])))
+ break;
+ }
+ }
+ rw_unlock(&share->key_root_lock[inx]);
+ }
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ info->update|= HA_STATE_PREV_FOUND;
+ if (error)
+ {
+ if (my_errno == HA_ERR_KEY_NOT_FOUND)
+ my_errno=HA_ERR_END_OF_FILE;
+ }
+ else if (!buf)
+ {
+ DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0);
+ }
+ else if (!(*info->read_record)(info, buf, info->cur_row.lastpos))
+ {
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(my_errno);
+} /* maria_rprev */
diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c
new file mode 100644
index 00000000000..33940d5f23f
--- /dev/null
+++ b/storage/maria/ma_rrnd.c
@@ -0,0 +1,54 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read a record with random-access. The position to the record must
+ get by MARIA_HA. The next record can be read with pos= MARIA_POS_ERROR */
+
+
+#include "maria_def.h"
+
+/*
+ Read a row based on position.
+
+ RETURN
+ 0 Ok.
+ HA_ERR_RECORD_DELETED Record is deleted.
+ HA_ERR_END_OF_FILE EOF.
+*/
+
+int maria_rrnd(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos)
+{
+ DBUG_ENTER("maria_rrnd");
+
+ DBUG_ASSERT(filepos != HA_OFFSET_ERROR);
+#ifdef NOT_USED
+ if (filepos == HA_OFFSET_ERROR)
+ {
+ skip_deleted_blocks=1;
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR) /* First read ? */
+ filepos= info->s->pack.header_length; /* Read first record */
+ else
+ filepos= info->cur_row.nextpos;
+ }
+#endif
+
+ /* Init all but update-flag */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+
+ DBUG_RETURN((*info->s->read_record)(info, buf, filepos));
+}
diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c
new file mode 100644
index 00000000000..7556c1e7332
--- /dev/null
+++ b/storage/maria/ma_rsame.c
@@ -0,0 +1,70 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+/*
+ Find current row with read on position or read on key
+
+ NOTES
+ If inx >= 0 find record using key
+
+ RETURN
+ 0 Ok
+ HA_ERR_KEY_NOT_FOUND Row is deleted
+ HA_ERR_END_OF_FILE End of file
+*/
+
+
+int maria_rsame(MARIA_HA *info, byte *record, int inx)
+{
+ DBUG_ENTER("maria_rsame");
+
+ if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ }
+ if (info->cur_row.lastpos == HA_OFFSET_ERROR ||
+ info->update & HA_STATE_DELETED)
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */
+ }
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ /* Read row from data file */
+ if (fast_ma_readinfo(info))
+ DBUG_RETURN(my_errno);
+
+ if (inx >= 0)
+ {
+ info->lastinx=inx;
+ info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record,
+ info->cur_row.lastpos);
+ if (info->s->concurrent_insert)
+ rw_rdlock(&info->s->key_root_lock[inx]);
+ VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY,
+ SEARCH_SAME,
+ info->s->state.key_root[inx]));
+ if (info->s->concurrent_insert)
+ rw_unlock(&info->s->key_root_lock[inx]);
+ }
+
+ if (!(*info->read_record)(info, record, info->cur_row.lastpos))
+ DBUG_RETURN(0);
+ if (my_errno == HA_ERR_RECORD_DELETED)
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(my_errno);
+} /* maria_rsame */
diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c
new file mode 100644
index 00000000000..92138693c62
--- /dev/null
+++ b/storage/maria/ma_rsamepos.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* read record through position and fix key-position */
+/* As maria_rsame but supply a position */
+
+#include "maria_def.h"
+
+
+ /*
+ ** If inx >= 0 update index pointer
+ ** Returns one of the following values:
+ ** 0 = Ok.
+ ** HA_ERR_KEY_NOT_FOUND = Row is deleted
+ ** HA_ERR_END_OF_FILE = End of file
+ */
+
+int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx,
+ MARIA_RECORD_POS filepos)
+{
+ DBUG_ENTER("maria_rsame_with_pos");
+ DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos));
+
+ if (inx < -1 ||
+ (inx >= 0 && ! maria_is_key_active(info->s->state.key_map, inx)))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX);
+ }
+
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ if ((*info->s->read_record)(info, record, filepos))
+ {
+ if (my_errno == HA_ERR_RECORD_DELETED)
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(my_errno);
+ }
+ info->cur_row.lastpos= filepos;
+ info->lastinx= inx;
+ if (inx >= 0)
+ {
+ info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record,
+ info->cur_row.lastpos);
+ info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */
+ }
+ DBUG_RETURN(0);
+} /* maria_rsame_pos */
diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c
new file mode 100644
index 00000000000..8e8ec6c991b
--- /dev/null
+++ b/storage/maria/ma_rt_index.c
@@ -0,0 +1,1093 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+#define REINSERT_BUFFER_INC 10
+#define PICK_BY_AREA
+/*#define PICK_BY_PERIMETER*/
+
+typedef struct st_page_level
+{
+ uint level;
+ my_off_t offs;
+} stPageLevel;
+
+typedef struct st_page_list
+{
+ ulong n_pages;
+ ulong m_pages;
+ stPageLevel *pages;
+} stPageList;
+
+
+/*
+ Find next key in r-tree according to search_flag recursively
+
+ NOTES
+ Used in maria_rtree_find_first() and maria_rtree_find_next()
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uint search_flag,
+ uint nod_cmp_flag, my_off_t page, int level)
+{
+ uint nod_flag;
+ int res;
+ byte *page_buf, *k, *last;
+ int k_len;
+ uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level;
+
+ if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno = HA_ERR_OUT_OF_MEM;
+ return -1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+
+ k_len = keyinfo->keylength - info->s->base.rec_reflength;
+
+ if(info->maria_rtree_recursion_depth >= level)
+ {
+ k= page_buf + *saved_key;
+ }
+ else
+ {
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ }
+ last= rt_PAGE_END(page_buf);
+
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag))
+ {
+ if (nod_flag)
+ {
+ /* this is an internal node in the tree */
+ if (!(res = maria_rtree_key_cmp(keyinfo->seg,
+ info->first_mbr_key, k,
+ info->last_rkey_length, nod_cmp_flag)))
+ {
+ switch ((res = maria_rtree_find_req(info, keyinfo, search_flag,
+ nod_cmp_flag,
+ _ma_kpos(nod_flag, k),
+ level + 1)))
+ {
+ case 0: /* found - exit from recursion */
+ *saved_key = k - page_buf;
+ goto ok;
+ case 1: /* not found - continue searching */
+ info->maria_rtree_recursion_depth = level;
+ break;
+ default: /* error */
+ case -1:
+ goto err1;
+ }
+ }
+ }
+ else
+ {
+ /* this is a leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key,
+ k, info->last_rkey_length, search_flag))
+ {
+ byte *after_key = (byte*) rt_PAGE_NEXT_KEY(k, k_len, nod_flag);
+ info->cur_row.lastpos = _ma_dpos(info, 0, after_key);
+ info->lastkey_length = k_len + info->s->base.rec_reflength;
+ memcpy(info->lastkey, k, info->lastkey_length);
+ info->maria_rtree_recursion_depth = level;
+ *saved_key = last - page_buf;
+
+ if (after_key < last)
+ {
+ info->int_keypos = info->buff;
+ info->int_maxpos = info->buff + (last - after_key);
+ memcpy(info->buff, after_key, last - after_key);
+ info->keybuff_used = 0;
+ }
+ else
+ {
+ info->keybuff_used = 1;
+ }
+
+ res = 0;
+ goto ok;
+ }
+ }
+ }
+ info->cur_row.lastpos = HA_OFFSET_ERROR;
+ my_errno = HA_ERR_KEY_NOT_FOUND;
+ res = 1;
+
+ok:
+ my_afree((byte*)page_buf);
+ return res;
+
+err1:
+ my_afree((byte*)page_buf);
+ info->cur_row.lastpos = HA_OFFSET_ERROR;
+ return -1;
+}
+
+
+/*
+ Find first key in r-tree according to search_flag condition
+
+ SYNOPSIS
+ maria_rtree_find_first()
+ info Handler to MARIA file
+ uint keynr Key number to use
+ key Key to search for
+ key_length Length of 'key'
+ search_flag Bitmap of flags how to do the search
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length, uint search_flag)
+{
+ my_off_t root;
+ uint nod_cmp_flag;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+
+ if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ /*
+ Save searched key, include data pointer.
+ The data pointer is required if the search_flag contains MBR_DATA.
+ */
+ memcpy(info->first_mbr_key, key, keyinfo->keylength);
+ info->last_rkey_length = key_length;
+
+ info->maria_rtree_recursion_depth = -1;
+ info->keybuff_used = 1;
+
+ nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ?
+ MBR_WITHIN : MBR_INTERSECT);
+ return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root,
+ 0);
+}
+
+
+/*
+ Find next key in r-tree according to search_flag condition
+
+ SYNOPSIS
+ maria_rtree_find_next()
+ info Handler to MARIA file
+ uint keynr Key number to use
+ search_flag Bitmap of flags how to do the search
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag)
+{
+ my_off_t root;
+ uint nod_cmp_flag;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+
+ if (info->update & HA_STATE_DELETED)
+ return maria_rtree_find_first(info, keynr, info->lastkey,
+ info->lastkey_length,
+ search_flag);
+
+ if (!info->keybuff_used)
+ {
+ byte *key= info->int_keypos;
+
+ while (key < info->int_maxpos)
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg,
+ info->first_mbr_key, key,
+ info->last_rkey_length, search_flag))
+ {
+ byte *after_key= key + keyinfo->keylength;
+
+ info->cur_row.lastpos= _ma_dpos(info, 0, after_key);
+ memcpy(info->lastkey, key, info->lastkey_length);
+
+ if (after_key < info->int_maxpos)
+ info->int_keypos= after_key;
+ else
+ info->keybuff_used= 1;
+ return 0;
+ }
+ key+= keyinfo->keylength;
+ }
+ }
+ if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ?
+ MBR_WITHIN : MBR_INTERSECT);
+ return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0);
+}
+
+
+/*
+ Get next key in r-tree recursively
+
+ NOTES
+ Used in maria_rtree_get_first() and maria_rtree_get_next()
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_length,
+ my_off_t page, int level)
+{
+ byte *page_buf, *last, *k;
+ uint nod_flag, k_len;
+ int res;
+ uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level;
+
+ if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length)))
+ return -1;
+ if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+
+ k_len = keyinfo->keylength - info->s->base.rec_reflength;
+
+ if(info->maria_rtree_recursion_depth >= level)
+ {
+ k = page_buf + *saved_key;
+ if (!nod_flag)
+ {
+ /* Only leaf pages contain data references. */
+ /* Need to check next key with data reference. */
+ k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag);
+ }
+ }
+ else
+ {
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ }
+ last = rt_PAGE_END(page_buf);
+
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag))
+ {
+ if (nod_flag)
+ {
+ /* this is an internal node in the tree */
+ switch ((res = maria_rtree_get_req(info, keyinfo, key_length,
+ _ma_kpos(nod_flag, k), level + 1)))
+ {
+ case 0: /* found - exit from recursion */
+ *saved_key = k - page_buf;
+ goto ok;
+ case 1: /* not found - continue searching */
+ info->maria_rtree_recursion_depth = level;
+ break;
+ default:
+ case -1: /* error */
+ goto err1;
+ }
+ }
+ else
+ {
+ /* this is a leaf */
+ byte *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag);
+ info->cur_row.lastpos = _ma_dpos(info, 0, after_key);
+ info->lastkey_length = k_len + info->s->base.rec_reflength;
+ memcpy(info->lastkey, k, info->lastkey_length);
+
+ info->maria_rtree_recursion_depth = level;
+ *saved_key = k - page_buf;
+
+ if (after_key < last)
+ {
+ info->int_keypos = (byte*) saved_key;
+ memcpy(info->buff, page_buf, keyinfo->block_length);
+ info->int_maxpos = rt_PAGE_END(info->buff);
+ info->keybuff_used = 0;
+ }
+ else
+ {
+ info->keybuff_used = 1;
+ }
+
+ res = 0;
+ goto ok;
+ }
+ }
+ info->cur_row.lastpos = HA_OFFSET_ERROR;
+ my_errno = HA_ERR_KEY_NOT_FOUND;
+ res = 1;
+
+ok:
+ my_afree((byte*)page_buf);
+ return res;
+
+err1:
+ my_afree((byte*)page_buf);
+ info->cur_row.lastpos = HA_OFFSET_ERROR;
+ return -1;
+}
+
+
+/*
+ Get first key in r-tree
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length)
+{
+ my_off_t root;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+
+ if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ info->maria_rtree_recursion_depth = -1;
+ info->keybuff_used = 1;
+
+ return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0);
+}
+
+
+/*
+ Get next key in r-tree
+
+ RETURN
+ -1 Error
+ 0 Found
+ 1 Not found
+*/
+
+int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length)
+{
+ my_off_t root;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+
+ if (!info->keybuff_used)
+ {
+ uint k_len = keyinfo->keylength - info->s->base.rec_reflength;
+ /* rt_PAGE_NEXT_KEY(info->int_keypos) */
+ byte *key = info->buff + *(int*)info->int_keypos + k_len +
+ info->s->base.rec_reflength;
+ /* rt_PAGE_NEXT_KEY(key) */
+ byte *after_key = key + k_len + info->s->base.rec_reflength;
+
+ info->cur_row.lastpos = _ma_dpos(info, 0, after_key);
+ info->lastkey_length = k_len + info->s->base.rec_reflength;
+ memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength);
+
+ *(int*)info->int_keypos = key - info->buff;
+ if (after_key >= info->int_maxpos)
+ {
+ info->keybuff_used = 1;
+ }
+
+ return 0;
+ }
+ else
+ {
+ if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0);
+ }
+}
+
+
+/*
+ Choose non-leaf better key for insertion
+*/
+
+#ifdef PICK_BY_PERIMETER
+static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key,
+ uint key_length, byte *page_buf,
+ uint nod_flag)
+{
+ double increase;
+ double best_incr = DBL_MAX;
+ double perimeter;
+ double best_perimeter;
+ uchar *best_key;
+ uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ uchar *last = rt_PAGE_END(page_buf);
+
+ LINT_INIT(best_perimeter);
+ LINT_INIT(best_key);
+
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ {
+ if ((increase = maria_rtree_perimeter_increase(keyinfo->seg, k, key, key_length,
+ &perimeter)) == -1)
+ return NULL;
+ if ((increase < best_incr)||
+ (increase == best_incr && perimeter < best_perimeter))
+ {
+ best_key = k;
+ best_perimeter= perimeter;
+ best_incr = increase;
+ }
+ }
+ return best_key;
+}
+
+#endif /*PICK_BY_PERIMETER*/
+
+#ifdef PICK_BY_AREA
+static byte *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key,
+ uint key_length, byte *page_buf,
+ uint nod_flag)
+{
+ double increase;
+ double best_incr = DBL_MAX;
+ double area;
+ double best_area;
+ byte *best_key;
+ byte *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ byte *last = rt_PAGE_END(page_buf);
+
+ LINT_INIT(best_area);
+ LINT_INIT(best_key);
+
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ {
+ /* The following is safe as -1.0 is an exact number */
+ if ((increase = maria_rtree_area_increase(keyinfo->seg, k, key, key_length,
+ &area)) == -1.0)
+ return NULL;
+ /* The following should be safe, even if we compare doubles */
+ if (increase < best_incr)
+ {
+ best_key = k;
+ best_area = area;
+ best_incr = increase;
+ }
+ else
+ {
+ /* The following should be safe, even if we compare doubles */
+ if ((increase == best_incr) && (area < best_area))
+ {
+ best_key = k;
+ best_area = area;
+ best_incr = increase;
+ }
+ }
+ }
+ return best_key;
+}
+
+#endif /*PICK_BY_AREA*/
+
+/*
+ Go down and insert key into tree
+
+ RETURN
+ -1 Error
+ 0 Child was not split
+ 1 Child was split
+*/
+
+static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key,
+ uint key_length, my_off_t page,
+ my_off_t *new_page,
+ int ins_level, int level)
+{
+ uint nod_flag;
+ int res;
+ byte *page_buf, *k;
+
+ if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length +
+ HA_MAX_KEY_BUFF)))
+ {
+ my_errno = HA_ERR_OUT_OF_MEM;
+ return -1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+
+ if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */
+ (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */
+ {
+ if ((k = maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf,
+ nod_flag)) == NULL)
+ goto err1;
+ switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length,
+ _ma_kpos(nod_flag, k), new_page,
+ ins_level, level + 1)))
+ {
+ case 0: /* child was not split */
+ {
+ maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length);
+ if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ goto err1;
+ goto ok;
+ }
+ case 1: /* child was split */
+ {
+ byte *new_key = page_buf + keyinfo->block_length + nod_flag;
+ /* set proper MBR for key */
+ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length,
+ _ma_kpos(nod_flag, k)))
+ goto err1;
+ /* add new key for new page */
+ _ma_kpointer(info, new_key - nod_flag, *new_page);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ *new_page))
+ goto err1;
+ res = maria_rtree_add_key(info, keyinfo, new_key, key_length,
+ page_buf, new_page);
+ if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ goto err1;
+ goto ok;
+ }
+ default:
+ case -1: /* error */
+ {
+ goto err1;
+ }
+ }
+ }
+ else
+ {
+ res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf,
+ new_page);
+ if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+
+ok:
+ my_afree(page_buf);
+ return res;
+
+err1:
+ my_afree(page_buf);
+ return -1;
+}
+
+
+/*
+ Insert key into the tree
+
+ RETURN
+ -1 Error
+ 0 Root was not split
+ 1 Root was split
+*/
+
+static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length, int ins_level)
+{
+ my_off_t old_root;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+ int res;
+ my_off_t new_page;
+
+ if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ int res;
+
+ if ((old_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ return -1;
+ info->keybuff_used = 1;
+ maria_putint(info->buff, 2, 0);
+ res = maria_rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL);
+ if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff))
+ return 1;
+ info->s->state.key_root[keynr] = old_root;
+ return res;
+ }
+
+ switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length,
+ old_root, &new_page, ins_level, 0)))
+ {
+ case 0: /* root was not split */
+ {
+ break;
+ }
+ case 1: /* root was split, grow a new root */
+ {
+ byte *new_root_buf, *new_key;
+ my_off_t new_root;
+ uint nod_flag = info->s->base.key_reflength;
+
+ if (!(new_root_buf= (byte*) my_alloca((uint)keyinfo->block_length +
+ HA_MAX_KEY_BUFF)))
+ {
+ my_errno = HA_ERR_OUT_OF_MEM;
+ return -1;
+ }
+
+ maria_putint(new_root_buf, 2, nod_flag);
+ if ((new_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) ==
+ HA_OFFSET_ERROR)
+ goto err1;
+
+ new_key = new_root_buf + keyinfo->block_length + nod_flag;
+
+ _ma_kpointer(info, new_key - nod_flag, old_root);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ old_root))
+ goto err1;
+ if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf,
+ NULL)
+ == -1)
+ goto err1;
+ _ma_kpointer(info, new_key - nod_flag, new_page);
+ if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length,
+ new_page))
+ goto err1;
+ if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf,
+ NULL)
+ == -1)
+ goto err1;
+ if (_ma_write_keypage(info, keyinfo, new_root,
+ DFLT_INIT_HITS, new_root_buf))
+ goto err1;
+ info->s->state.key_root[keynr] = new_root;
+
+ my_afree((byte*)new_root_buf);
+ break;
+err1:
+ my_afree((byte*)new_root_buf);
+ return -1;
+ }
+ default:
+ case -1: /* error */
+ {
+ break;
+ }
+ }
+ return res;
+}
+
+
+/*
+ Insert key into the tree - interface function
+
+ RETURN
+ -1 Error
+ 0 OK
+*/
+
+int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length)
+{
+ return (!key_length ||
+ (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ?
+ -1 : 0;
+}
+
+
+/*
+ Fill reinsert page buffer
+
+ RETURN
+ -1 Error
+ 0 OK
+*/
+
+static int maria_rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page,
+ int level)
+{
+ if (ReinsertList->n_pages == ReinsertList->m_pages)
+ {
+ ReinsertList->m_pages += REINSERT_BUFFER_INC;
+ if (!(ReinsertList->pages = (stPageLevel*)my_realloc((gptr)ReinsertList->pages,
+ ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR))))
+ goto err1;
+ }
+ /* save page to ReinsertList */
+ ReinsertList->pages[ReinsertList->n_pages].offs = page;
+ ReinsertList->pages[ReinsertList->n_pages].level = level;
+ ReinsertList->n_pages++;
+ return 0;
+
+err1:
+ return -1;
+}
+
+
+/*
+ Go down and delete key from the tree
+
+ RETURN
+ -1 Error
+ 0 Deleted
+ 1 Not found
+ 2 Empty leaf
+*/
+
+static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key,
+ uint key_length, my_off_t page,
+ uint *page_size,
+ stPageList *ReinsertList, int level)
+{
+ ulong i;
+ uint nod_flag;
+ int res;
+ byte *page_buf, *last, *k;
+
+ if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno = HA_ERR_OUT_OF_MEM;
+ return -1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ last = rt_PAGE_END(page_buf);
+
+ for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), i++)
+ {
+ if (nod_flag)
+ {
+ /* not leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN))
+ {
+ switch ((res = maria_rtree_delete_req(info, keyinfo, key, key_length,
+ _ma_kpos(nod_flag, k), page_size, ReinsertList, level + 1)))
+ {
+ case 0: /* deleted */
+ {
+ /* test page filling */
+ if (*page_size + key_length >=
+ rt_PAGE_MIN_SIZE(keyinfo->block_length))
+ {
+ /* OK */
+ if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length,
+ _ma_kpos(nod_flag, k)))
+ goto err1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+ else
+ {
+ /* too small: delete key & add it descendant to reinsert list */
+ if (maria_rtree_fill_reinsert_list(ReinsertList,
+ _ma_kpos(nod_flag, k),
+ level + 1))
+ goto err1;
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ if (_ma_write_keypage(info, keyinfo, page,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ *page_size = maria_getint(page_buf);
+ }
+
+ goto ok;
+ }
+ case 1: /* not found - continue searching */
+ {
+ break;
+ }
+ case 2: /* vacuous case: last key in the leaf */
+ {
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ if (_ma_write_keypage(info, keyinfo, page,
+ DFLT_INIT_HITS, page_buf))
+ goto err1;
+ *page_size = maria_getint(page_buf);
+ res = 0;
+ goto ok;
+ }
+ default: /* error */
+ case -1:
+ {
+ goto err1;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* leaf */
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_EQUAL | MBR_DATA))
+ {
+ maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ *page_size = maria_getint(page_buf);
+ if (*page_size == 2)
+ {
+ /* last key in the leaf */
+ res = 2;
+ if (_ma_dispose(info, keyinfo, page, DFLT_INIT_HITS))
+ goto err1;
+ }
+ else
+ {
+ res = 0;
+ if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ goto err1;
+ }
+ goto ok;
+ }
+ }
+ }
+ res = 1;
+
+ok:
+ my_afree((byte*)page_buf);
+ return res;
+
+err1:
+ my_afree((byte*)page_buf);
+ return -1;
+}
+
+
+/*
+ Delete key - interface function
+
+ RETURN
+ -1 Error
+ 0 Deleted
+*/
+
+int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length)
+{
+ uint page_size;
+ stPageList ReinsertList;
+ my_off_t old_root;
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+
+ if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ ReinsertList.pages = NULL;
+ ReinsertList.n_pages = 0;
+ ReinsertList.m_pages = 0;
+
+ switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root,
+ &page_size, &ReinsertList, 0))
+ {
+ case 2:
+ {
+ info->s->state.key_root[keynr] = HA_OFFSET_ERROR;
+ return 0;
+ }
+ case 0:
+ {
+ uint nod_flag;
+ ulong i;
+ for (i = 0; i < ReinsertList.n_pages; ++i)
+ {
+ uint nod_flag;
+ byte *page_buf, *k, *last;
+
+ if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno = HA_ERR_OUT_OF_MEM;
+ goto err1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs,
+ DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ last = rt_PAGE_END(page_buf);
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ {
+ if (maria_rtree_insert_level(info, keynr, k, key_length,
+ ReinsertList.pages[i].level) == -1)
+ {
+ my_afree(page_buf);
+ goto err1;
+ }
+ }
+ my_afree(page_buf);
+ if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs,
+ DFLT_INIT_HITS))
+ goto err1;
+ }
+ if (ReinsertList.pages)
+ my_free((byte*) ReinsertList.pages, MYF(0));
+
+ /* check for redundant root (not leaf, 1 child) and eliminate */
+ if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ goto err1;
+ if (!_ma_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS,
+ info->buff, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(info->buff);
+ page_size = maria_getint(info->buff);
+ if (nod_flag && (page_size == 2 + key_length + nod_flag))
+ {
+ my_off_t new_root = _ma_kpos(nod_flag,
+ rt_PAGE_FIRST_KEY(info->buff, nod_flag));
+ if (_ma_dispose(info, keyinfo, old_root, DFLT_INIT_HITS))
+ goto err1;
+ info->s->state.key_root[keynr] = new_root;
+ }
+ info->update= HA_STATE_DELETED;
+ return 0;
+
+err1:
+ return -1;
+ }
+ case 1: /* not found */
+ {
+ my_errno = HA_ERR_KEY_NOT_FOUND;
+ return -1;
+ }
+ default:
+ case -1: /* error */
+ return -1;
+ }
+}
+
+
+/*
+ Estimate number of suitable keys in the tree
+
+ RETURN
+ estimated value
+*/
+
+ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length, uint flag)
+{
+ MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr;
+ my_off_t root;
+ uint i = 0;
+ uint nod_flag, k_len;
+ byte *page_buf, *k, *last;
+ double area = 0;
+ ha_rows res = 0;
+
+ if (flag & MBR_DISJOINT)
+ return info->state->records;
+
+ if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ return HA_POS_ERROR;
+ if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length)))
+ return HA_POS_ERROR;
+ if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0))
+ goto err1;
+ nod_flag = _ma_test_if_nod(page_buf);
+
+ k_len = keyinfo->keylength - info->s->base.rec_reflength;
+
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+ last = rt_PAGE_END(page_buf);
+
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), i++)
+ {
+ if (nod_flag)
+ {
+ double k_area = maria_rtree_rect_volume(keyinfo->seg, k, key_length);
+
+ /* The following should be safe, even if we compare doubles */
+ if (k_area == 0)
+ {
+ if (flag & (MBR_CONTAIN | MBR_INTERSECT))
+ {
+ area += 1;
+ }
+ else if (flag & (MBR_WITHIN | MBR_EQUAL))
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
+ MBR_WITHIN))
+ area += 1;
+ }
+ else
+ goto err1;
+ }
+ else
+ {
+ if (flag & (MBR_CONTAIN | MBR_INTERSECT))
+ {
+ area+= maria_rtree_overlapping_area(keyinfo->seg, key, k,
+ key_length) / k_area;
+ }
+ else if (flag & (MBR_WITHIN | MBR_EQUAL))
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
+ MBR_WITHIN))
+ area+= (maria_rtree_rect_volume(keyinfo->seg, key, key_length) /
+ k_area);
+ }
+ else
+ goto err1;
+ }
+ }
+ else
+ {
+ if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, flag))
+ ++res;
+ }
+ }
+ if (nod_flag)
+ {
+ if (i)
+ res = (ha_rows) (area / i * info->state->records);
+ else
+ res = HA_POS_ERROR;
+ }
+
+ my_afree((byte*)page_buf);
+ return res;
+
+err1:
+ my_afree(page_buf);
+ return HA_POS_ERROR;
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h
new file mode 100644
index 00000000000..76b6c6e230c
--- /dev/null
+++ b/storage/maria/ma_rt_index.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _rt_index_h
+#define _rt_index_h
+
+#ifdef HAVE_RTREE_KEYS
+
+#define rt_PAGE_FIRST_KEY(page, nod_flag) (page + 2 + nod_flag)
+#define rt_PAGE_NEXT_KEY(key, key_length, nod_flag) (key + key_length + \
+ (nod_flag ? nod_flag : info->s->base.rec_reflength))
+#define rt_PAGE_END(page) (page + maria_getint(page))
+
+#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 3)
+
+int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length);
+int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length);
+
+int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length, uint search_flag);
+int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag);
+
+int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length);
+int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length);
+
+ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length, uint flag);
+
+int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint key_length,
+ my_off_t *new_page_offs);
+
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_index_h */
diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c
new file mode 100644
index 00000000000..1453195d263
--- /dev/null
+++ b/storage/maria/ma_rt_key.c
@@ -0,0 +1,102 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+/*
+ Add key to the page
+
+ RESULT VALUES
+ -1 Error
+ 0 Not split
+ 1 Split
+*/
+
+int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_length, byte *page_buf, my_off_t *new_page)
+{
+ uint page_size = maria_getint(page_buf);
+ uint nod_flag = _ma_test_if_nod(page_buf);
+
+ if (page_size + key_length + info->s->base.rec_reflength <=
+ keyinfo->block_length)
+ {
+ /* split won't be necessary */
+ if (nod_flag)
+ {
+ /* save key */
+ memcpy(rt_PAGE_END(page_buf), key - nod_flag, key_length + nod_flag);
+ page_size += key_length + nod_flag;
+ }
+ else
+ {
+ /* save key */
+ memcpy(rt_PAGE_END(page_buf), key, key_length +
+ info->s->base.rec_reflength);
+ page_size += key_length + info->s->base.rec_reflength;
+ }
+ maria_putint(page_buf, page_size, nod_flag);
+ return 0;
+ }
+
+ return (maria_rtree_split_page(info, keyinfo, page_buf, key, key_length,
+ new_page) ? -1 : 1);
+}
+
+
+/*
+ Delete key from the page
+*/
+
+int maria_rtree_delete_key(MARIA_HA *info, byte *page_buf, byte *key,
+ uint key_length, uint nod_flag)
+{
+ uint16 page_size = maria_getint(page_buf);
+ byte *key_start;
+
+ key_start= key - nod_flag;
+ if (!nod_flag)
+ key_length += info->s->base.rec_reflength;
+
+ memmove(key_start, key + key_length, page_size - key_length -
+ (key - page_buf));
+ page_size-= key_length + nod_flag;
+
+ maria_putint(page_buf, page_size, nod_flag);
+ return 0;
+}
+
+
+/*
+ Calculate and store key MBR
+*/
+
+int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_length, my_off_t child_page)
+{
+ if (!_ma_fetch_keypage(info, keyinfo, child_page,
+ DFLT_INIT_HITS, info->buff, 0))
+ return -1;
+
+ return maria_rtree_page_mbr(info, keyinfo->seg, info->buff, key, key_length);
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h
new file mode 100644
index 00000000000..f44251782c1
--- /dev/null
+++ b/storage/maria/ma_rt_key.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Ramil Kalimullin, who has a shared copyright to this code */
+
+#ifndef _rt_key_h
+#define _rt_key_h
+
+#ifdef HAVE_RTREE_KEYS
+
+int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_length, byte *page_buf, my_off_t *new_page);
+int maria_rtree_delete_key(MARIA_HA *info, byte *page, byte *key,
+ uint key_length, uint nod_flag);
+int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_length, my_off_t child_page);
+
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_key_h */
diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c
new file mode 100644
index 00000000000..851618a4300
--- /dev/null
+++ b/storage/maria/ma_rt_mbr.c
@@ -0,0 +1,807 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_mbr.h"
+
+#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax))
+#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax))
+#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax))
+#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax))
+#define EQUAL_CMP(amin, amax, bmin, bmax) ((amin != bmin) || (amax != bmax))
+
+#define FCMP(A, B) ((int)(A) - (int)(B))
+#define p_inc(A, B, X) {A += X; B += X;}
+
+#define RT_CMP(nextflag) \
+ if (nextflag & MBR_INTERSECT) \
+ { \
+ if (INTERSECT_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_CONTAIN) \
+ { \
+ if (CONTAIN_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_WITHIN) \
+ { \
+ if (WITHIN_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_EQUAL) \
+ { \
+ if (EQUAL_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ } \
+ else if (nextflag & MBR_DISJOINT) \
+ { \
+ if (DISJOINT_CMP(amin, amax, bmin, bmax)) \
+ return 1; \
+ }\
+ else /* if unknown comparison operator */ \
+ { \
+ DBUG_ASSERT(0); \
+ }
+
+#define RT_CMP_KORR(type, korr_func, len, nextflag) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(a); \
+ bmin = korr_func(b); \
+ amax = korr_func(a+len); \
+ bmax = korr_func(b+len); \
+ RT_CMP(nextflag); \
+}
+
+#define RT_CMP_GET(type, get_func, len, nextflag) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ RT_CMP(nextflag); \
+}
+
+/*
+ Compares two keys a and b depending on nextflag
+ nextflag can contain these flags:
+ MBR_INTERSECT(a,b) a overlaps b
+ MBR_CONTAIN(a,b) a contains b
+ MBR_DISJOINT(a,b) a disjoint b
+ MBR_WITHIN(a,b) a within b
+ MBR_EQUAL(a,b) All coordinates of MBRs are equal
+ MBR_DATA(a,b) Data reference is the same
+ Returns 0 on success.
+*/
+
+int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *b, byte *a, uint key_length,
+ uint nextflag)
+{
+ for (; (int) key_length > 0; keyseg += 2 )
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_CMP_KORR(int8, mi_sint1korr, 1, nextflag);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_CMP_KORR(uint8, mi_uint1korr, 1, nextflag);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag)
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag)
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ /* The following should be safe, even if we compare doubles */
+ RT_CMP_GET(float, mi_float4get, 4, nextflag);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_CMP_GET(double, mi_float8get, 8, nextflag);
+ break;
+ case HA_KEYTYPE_END:
+ goto end;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+
+end:
+ if (nextflag & MBR_DATA)
+ {
+ byte *end = a + keyseg->length;
+ do
+ {
+ if (*a++ != *b++)
+ return FCMP(a[-1], b[-1]);
+ } while (a != end);
+ }
+ return 0;
+}
+
+#define RT_VOL_KORR(type, korr_func, len, cast) \
+{ \
+ type amin, amax; \
+ amin = korr_func(a); \
+ amax = korr_func(a+len); \
+ res *= (cast(amax) - cast(amin)); \
+}
+
+#define RT_VOL_GET(type, get_func, len, cast) \
+{ \
+ type amin, amax; \
+ get_func(amin, a); \
+ get_func(amax, a+len); \
+ res *= (cast(amax) - cast(amin)); \
+}
+
+/*
+ Calculates rectangle volume
+*/
+double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte *a, uint key_length)
+{
+ double res = 1;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_VOL_KORR(int8, mi_sint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_VOL_KORR(uint8, mi_uint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_VOL_KORR(int16, mi_sint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_VOL_KORR(uint16, mi_uint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_VOL_KORR(int32, mi_sint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_VOL_KORR(uint32, mi_uint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_VOL_KORR(int32, mi_sint4korr, 4, (double));
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_VOL_KORR(uint32, mi_uint4korr, 4, (double));
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_VOL_KORR(longlong, mi_sint8korr, 8, (double));
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_VOL_GET(float, mi_float4get, 4, (double));
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_VOL_GET(double, mi_float8get, 8, (double));
+ break;
+ case HA_KEYTYPE_END:
+ key_length = 0;
+ break;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ }
+ return res;
+}
+
+#define RT_D_MBR_KORR(type, korr_func, len, cast) \
+{ \
+ type amin, amax; \
+ amin = korr_func(a); \
+ amax = korr_func(a+len); \
+ *res++ = cast(amin); \
+ *res++ = cast(amax); \
+}
+
+#define RT_D_MBR_GET(type, get_func, len, cast) \
+{ \
+ type amin, amax; \
+ get_func(amin, a); \
+ get_func(amax, a+len); \
+ *res++ = cast(amin); \
+ *res++ = cast(amax); \
+}
+
+
+/*
+ Creates an MBR as an array of doubles.
+*/
+
+int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, double *res)
+{
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_D_MBR_KORR(int8, mi_sint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_D_MBR_KORR(uint8, mi_uint1korr, 1, (double));
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double));
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double));
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double));
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double));
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double));
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_D_MBR_GET(float, mi_float4get, 4, (double));
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_D_MBR_GET(double, mi_float8get, 8, (double));
+ break;
+ case HA_KEYTYPE_END:
+ key_length = 0;
+ break;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ }
+ return 0;
+}
+
+#define RT_COMB_KORR(type, korr_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(a); \
+ bmin = korr_func(b); \
+ amax = korr_func(a+len); \
+ bmax = korr_func(b+len); \
+ amin = min(amin, bmin); \
+ amax = max(amax, bmax); \
+ store_func(c, amin); \
+ store_func(c+len, amax); \
+}
+
+#define RT_COMB_GET(type, get_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ amin = min(amin, bmin); \
+ amax = max(amax, bmax); \
+ store_func(c, amin); \
+ store_func(c+len, amax); \
+}
+
+/*
+ Creates common minimal bounding rectungle
+ for two input rectagnles a and b
+ Result is written to c
+*/
+
+int maria_rtree_combine_rect(HA_KEYSEG *keyseg, byte* a, byte* b, byte* c,
+ uint key_length)
+{
+ for ( ; (int) key_length > 0 ; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_COMB_KORR(int8, mi_sint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_COMB_KORR(uint8, mi_uint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_COMB_GET(float, mi_float4get, mi_float4store, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_COMB_GET(double, mi_float8get, mi_float8store, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return 0;
+ default:
+ return 1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ c+= keyseg_length;
+ }
+ return 0;
+}
+
+
+#define RT_OVL_AREA_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(a); \
+ bmin = korr_func(b); \
+ amax = korr_func(a+len); \
+ bmax = korr_func(b+len); \
+ amin = max(amin, bmin); \
+ amax = min(amax, bmax); \
+ if (amin >= amax) \
+ return 0; \
+ res *= amax - amin; \
+}
+
+#define RT_OVL_AREA_GET(type, get_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ amin = max(amin, bmin); \
+ amax = min(amax, bmax); \
+ if (amin >= amax) \
+ return 0; \
+ res *= amax - amin; \
+}
+
+/*
+Calculates overlapping area of two MBRs a & b
+*/
+double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte* a, byte* b,
+ uint key_length)
+{
+ double res = 1;
+ for (; (int) key_length > 0 ; keyseg += 2)
+ {
+ uint32 keyseg_length;
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_OVL_AREA_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_OVL_AREA_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_OVL_AREA_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_OVL_AREA_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_OVL_AREA_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_OVL_AREA_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_OVL_AREA_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return res;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+ return res;
+}
+
+#define RT_AREA_INC_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(a); \
+ bmin = korr_func(b); \
+ amax = korr_func(a+len); \
+ bmax = korr_func(b+len); \
+ a_area *= (((double)amax) - ((double)amin)); \
+ loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+#define RT_AREA_INC_GET(type, get_func, len)\
+{\
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ a_area *= (((double)amax) - ((double)amin)); \
+ loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+/*
+ Calculates MBR_AREA(a+b) - MBR_AREA(a)
+*/
+
+double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b,
+ uint key_length, double *ab_area)
+{
+ double a_area= 1.0;
+ double loc_ab_area= 1.0;
+
+ *ab_area= 1.0;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+
+ if (keyseg->null_bit) /* Handle NULL part */
+ return -1;
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_AREA_INC_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_AREA_INC_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_AREA_INC_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_AREA_INC_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_AREA_INC_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_AREA_INC_KORR(int32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_AREA_INC_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_AREA_INC_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_AREA_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_AREA_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_AREA_INC_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_AREA_INC_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ goto safe_end;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+safe_end:
+ *ab_area= loc_ab_area;
+ return loc_ab_area - a_area;
+}
+
+#define RT_PERIM_INC_KORR(type, korr_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(a); \
+ bmin = korr_func(b); \
+ amax = korr_func(a+len); \
+ bmax = korr_func(b+len); \
+ a_perim+= (((double)amax) - ((double)amin)); \
+ *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+#define RT_PERIM_INC_GET(type, get_func, len)\
+{\
+ type amin, amax, bmin, bmax; \
+ get_func(amin, a); \
+ get_func(bmin, b); \
+ get_func(amax, a+len); \
+ get_func(bmax, b+len); \
+ a_perim+= (((double)amax) - ((double)amin)); \
+ *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+}
+
+/*
+Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a)
+*/
+double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b,
+ uint key_length, double *ab_perim)
+{
+ double a_perim = 0.0;
+
+ *ab_perim= 0.0;
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ uint32 keyseg_length;
+
+ if (keyseg->null_bit) /* Handle NULL part */
+ return -1;
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_PERIM_INC_KORR(int8, mi_sint1korr, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_PERIM_INC_KORR(uint8, mi_uint1korr, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_PERIM_INC_KORR(int16, mi_sint2korr, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_PERIM_INC_KORR(uint16, mi_uint2korr, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_PERIM_INC_KORR(int32, mi_sint3korr, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_PERIM_INC_KORR(int32, mi_uint3korr, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_PERIM_INC_KORR(int32, mi_sint4korr, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_PERIM_INC_KORR(uint32, mi_uint4korr, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_PERIM_INC_GET(float, mi_float4get, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_PERIM_INC_GET(double, mi_float8get, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return *ab_perim - a_perim;
+ default:
+ return -1;
+ }
+ keyseg_length= keyseg->length * 2;
+ key_length-= keyseg_length;
+ a+= keyseg_length;
+ b+= keyseg_length;
+ }
+ return *ab_perim - a_perim;
+}
+
+
+#define RT_PAGE_MBR_KORR(type, korr_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ amin = korr_func(k + inc); \
+ amax = korr_func(k + inc + len); \
+ k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \
+{ \
+ bmin = korr_func(k + inc); \
+ bmax = korr_func(k + inc + len); \
+ if (amin > bmin) \
+ amin = bmin; \
+ if (amax < bmax) \
+ amax = bmax; \
+} \
+ store_func(c, amin); \
+ c += len; \
+ store_func(c, amax); \
+ c += len; \
+ inc += 2 * len; \
+}
+
+#define RT_PAGE_MBR_GET(type, get_func, store_func, len) \
+{ \
+ type amin, amax, bmin, bmax; \
+ get_func(amin, k + inc); \
+ get_func(amax, k + inc + len); \
+ k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \
+ for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \
+{ \
+ get_func(bmin, k + inc); \
+ get_func(bmax, k + inc + len); \
+ if (amin > bmin) \
+ amin = bmin; \
+ if (amax < bmax) \
+ amax = bmax; \
+} \
+ store_func(c, amin); \
+ c += len; \
+ store_func(c, amax); \
+ c += len; \
+ inc += 2 * len; \
+}
+
+/*
+ Calculates key page total MBR = MBR(key1) + MBR(key2) + ...
+*/
+int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf,
+ byte *c, uint key_length)
+{
+ uint inc = 0;
+ uint k_len = key_length;
+ uint nod_flag = _ma_test_if_nod(page_buf);
+ byte *k;
+ byte *last = rt_PAGE_END(page_buf);
+
+ for (; (int)key_length > 0; keyseg += 2)
+ {
+ key_length -= keyseg->length * 2;
+
+ /* Handle NULL part */
+ if (keyseg->null_bit)
+ {
+ return 1;
+ }
+
+ k = rt_PAGE_FIRST_KEY(page_buf, nod_flag);
+
+ switch ((enum ha_base_keytype) keyseg->type) {
+ case HA_KEYTYPE_INT8:
+ RT_PAGE_MBR_KORR(int8, mi_sint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_BINARY:
+ RT_PAGE_MBR_KORR(uint8, mi_uint1korr, mi_int1store, 1);
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ RT_PAGE_MBR_KORR(int16, mi_sint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ RT_PAGE_MBR_KORR(uint16, mi_uint2korr, mi_int2store, 2);
+ break;
+ case HA_KEYTYPE_INT24:
+ RT_PAGE_MBR_KORR(int32, mi_sint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_UINT24:
+ RT_PAGE_MBR_KORR(uint32, mi_uint3korr, mi_int3store, 3);
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ RT_PAGE_MBR_KORR(int32, mi_sint4korr, mi_int4store, 4);
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ RT_PAGE_MBR_KORR(uint32, mi_uint4korr, mi_int4store, 4);
+ break;
+#ifdef HAVE_LONG_LONG
+ case HA_KEYTYPE_LONGLONG:
+ RT_PAGE_MBR_KORR(longlong, mi_sint8korr, mi_int8store, 8);
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ RT_PAGE_MBR_KORR(ulonglong, mi_uint8korr, mi_int8store, 8);
+ break;
+#endif
+ case HA_KEYTYPE_FLOAT:
+ RT_PAGE_MBR_GET(float, mi_float4get, mi_float4store, 4);
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ RT_PAGE_MBR_GET(double, mi_float8get, mi_float8store, 8);
+ break;
+ case HA_KEYTYPE_END:
+ return 0;
+ default:
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h
new file mode 100644
index 00000000000..3282ee0d7a3
--- /dev/null
+++ b/storage/maria/ma_rt_mbr.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _rt_mbr_h
+#define _rt_mbr_h
+
+#ifdef HAVE_RTREE_KEYS
+
+int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *a, byte *b, uint key_length,
+ uint nextflag);
+int maria_rtree_combine_rect(HA_KEYSEG *keyseg,byte *, byte *, byte*,
+ uint key_length);
+double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte*, uint key_length);
+int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length,
+ double *res);
+double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte *a, byte *b,
+ uint key_length);
+double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b,
+ uint key_length, double *ab_area);
+double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b,
+ uint key_length, double *ab_perim);
+int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf,
+ byte* c, uint key_length);
+#endif /*HAVE_RTREE_KEYS*/
+#endif /* _rt_mbr_h */
diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c
new file mode 100644
index 00000000000..00c8d18f5e5
--- /dev/null
+++ b/storage/maria/ma_rt_split.c
@@ -0,0 +1,351 @@
+/* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+#include "ma_rt_key.h"
+#include "ma_rt_mbr.h"
+
+typedef struct
+{
+ double square;
+ int n_node;
+ byte *key;
+ double *coords;
+} SplitStruct;
+
+inline static double *reserve_coords(double **d_buffer, int n_dim)
+{
+ double *coords = *d_buffer;
+ (*d_buffer) += n_dim * 2;
+ return coords;
+}
+
+static void mbr_join(double *a, const double *b, int n_dim)
+{
+ double *end = a + n_dim * 2;
+ do
+ {
+ if (a[0] > b[0])
+ a[0] = b[0];
+
+ if (a[1] < b[1])
+ a[1] = b[1];
+
+ a += 2;
+ b += 2;
+ }while (a != end);
+}
+
+/*
+Counts the square of mbr which is a join of a and b
+*/
+static double mbr_join_square(const double *a, const double *b, int n_dim)
+{
+ const double *end = a + n_dim * 2;
+ double square = 1.0;
+ do
+ {
+ square *=
+ ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]);
+
+ a += 2;
+ b += 2;
+ }while (a != end);
+
+ return square;
+}
+
+static double count_square(const double *a, int n_dim)
+{
+ const double *end = a + n_dim * 2;
+ double square = 1.0;
+ do
+ {
+ square *= a[1] - a[0];
+ a += 2;
+ }while (a != end);
+ return square;
+}
+
+inline static void copy_coords(double *dst, const double *src, int n_dim)
+{
+ memcpy(dst, src, sizeof(double) * (n_dim * 2));
+}
+
+/*
+Select two nodes to collect group upon
+*/
+static void pick_seeds(SplitStruct *node, int n_entries,
+ SplitStruct **seed_a, SplitStruct **seed_b, int n_dim)
+{
+ SplitStruct *cur1;
+ SplitStruct *lim1 = node + (n_entries - 1);
+ SplitStruct *cur2;
+ SplitStruct *lim2 = node + n_entries;
+
+ double max_d = -DBL_MAX;
+ double d;
+
+ for (cur1 = node; cur1 < lim1; ++cur1)
+ {
+ for (cur2=cur1 + 1; cur2 < lim2; ++cur2)
+ {
+
+ d = mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square -
+ cur2->square;
+ if (d > max_d)
+ {
+ max_d = d;
+ *seed_a = cur1;
+ *seed_b = cur2;
+ }
+ }
+ }
+}
+
+/*
+Select next node and group where to add
+*/
+static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2,
+ SplitStruct **choice, int *n_group, int n_dim)
+{
+ SplitStruct *cur = node;
+ SplitStruct *end = node + n_entries;
+
+ double max_diff = -DBL_MAX;
+
+ for (; cur<end; ++cur)
+ {
+ double diff;
+ double abs_diff;
+
+ if (cur->n_node)
+ {
+ continue;
+ }
+
+ diff = mbr_join_square(g1, cur->coords, n_dim) -
+ mbr_join_square(g2, cur->coords, n_dim);
+
+ abs_diff = fabs(diff);
+ if (abs_diff > max_diff)
+ {
+ max_diff = abs_diff;
+ *n_group = 1 + (diff > 0);
+ *choice = cur;
+ }
+ }
+}
+
+/*
+Mark not-in-group entries as n_group
+*/
+static void mark_all_entries(SplitStruct *node, int n_entries, int n_group)
+{
+ SplitStruct *cur = node;
+ SplitStruct *end = node + n_entries;
+ for (; cur<end; ++cur)
+ {
+ if (cur->n_node)
+ {
+ continue;
+ }
+ cur->n_node = n_group;
+ }
+}
+
+static int split_maria_rtree_node(SplitStruct *node, int n_entries,
+ int all_size, /* Total key's size */
+ int key_size,
+ int min_size, /* Minimal group size */
+ int size1, int size2 /* initial group sizes */,
+ double **d_buffer, int n_dim)
+{
+ SplitStruct *cur;
+ SplitStruct *a;
+ SplitStruct *b;
+ double *g1 = reserve_coords(d_buffer, n_dim);
+ double *g2 = reserve_coords(d_buffer, n_dim);
+ SplitStruct *next;
+ int next_node;
+ int i;
+ SplitStruct *end = node + n_entries;
+
+ if (all_size < min_size * 2)
+ {
+ return 1;
+ }
+
+ cur = node;
+ for (; cur<end; ++cur)
+ {
+ cur->square = count_square(cur->coords, n_dim);
+ cur->n_node = 0;
+ }
+
+ pick_seeds(node, n_entries, &a, &b, n_dim);
+ a->n_node = 1;
+ b->n_node = 2;
+
+
+ copy_coords(g1, a->coords, n_dim);
+ size1 += key_size;
+ copy_coords(g2, b->coords, n_dim);
+ size2 += key_size;
+
+
+ for (i=n_entries - 2; i>0; --i)
+ {
+ if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */
+ {
+ mark_all_entries(node, n_entries, 1);
+ break;
+ }
+
+ if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */
+ {
+ mark_all_entries(node, n_entries, 2);
+ break;
+ }
+
+ pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim);
+ if (next_node == 1)
+ {
+ size1 += key_size;
+ mbr_join(g1, next->coords, n_dim);
+ }
+ else
+ {
+ size2 += key_size;
+ mbr_join(g2, next->coords, n_dim);
+ }
+ next->n_node = next_node;
+ }
+
+ return 0;
+}
+
+int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key,
+ uint key_length, my_off_t *new_page_offs)
+{
+ int n1, n2; /* Number of items in groups */
+
+ SplitStruct *task;
+ SplitStruct *cur;
+ SplitStruct *stop;
+ double *coord_buf;
+ double *next_coord;
+ double *old_coord;
+ int n_dim;
+ byte *source_cur, *cur1, *cur2;
+ byte *new_page;
+ int err_code= 0;
+ uint nod_flag= _ma_test_if_nod(page);
+ uint full_length= key_length + (nod_flag ? nod_flag :
+ info->s->base.rec_reflength);
+ int max_keys= (maria_getint(page)-2) / (full_length);
+
+ n_dim = keyinfo->keysegs / 2;
+
+ if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) *
+ (max_keys + 1 + 4) +
+ sizeof(SplitStruct) * (max_keys + 1))))
+ return -1;
+
+ task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4));
+
+ next_coord = coord_buf;
+
+ stop = task + max_keys;
+ source_cur = rt_PAGE_FIRST_KEY(page, nod_flag);
+
+ for (cur = task; cur < stop; ++cur, source_cur = rt_PAGE_NEXT_KEY(source_cur,
+ key_length, nod_flag))
+ {
+ cur->coords = reserve_coords(&next_coord, n_dim);
+ cur->key = source_cur;
+ maria_rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords);
+ }
+
+ cur->coords = reserve_coords(&next_coord, n_dim);
+ maria_rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords);
+ cur->key = key;
+
+ old_coord = next_coord;
+
+ if (split_maria_rtree_node(task, max_keys + 1,
+ maria_getint(page) + full_length + 2, full_length,
+ rt_PAGE_MIN_SIZE(keyinfo->block_length),
+ 2, 2, &next_coord, n_dim))
+ {
+ err_code = 1;
+ goto split_err;
+ }
+
+ if (!(new_page = (byte*) my_alloca((uint)keyinfo->block_length)))
+ {
+ err_code= -1;
+ goto split_err;
+ }
+
+ stop = task + (max_keys + 1);
+ cur1 = rt_PAGE_FIRST_KEY(page, nod_flag);
+ cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag);
+
+ n1= n2 = 0;
+ for (cur = task; cur < stop; ++cur)
+ {
+ byte *to;
+ if (cur->n_node == 1)
+ {
+ to = cur1;
+ cur1 = rt_PAGE_NEXT_KEY(cur1, key_length, nod_flag);
+ ++n1;
+ }
+ else
+ {
+ to = cur2;
+ cur2 = rt_PAGE_NEXT_KEY(cur2, key_length, nod_flag);
+ ++n2;
+ }
+ if (to != cur->key)
+ memcpy(to - nod_flag, cur->key - nod_flag, full_length);
+ }
+
+ maria_putint(page, 2 + n1 * full_length, nod_flag);
+ maria_putint(new_page, 2 + n2 * full_length, nod_flag);
+
+ if ((*new_page_offs= _ma_new(info, keyinfo, DFLT_INIT_HITS)) ==
+ HA_OFFSET_ERROR)
+ err_code= -1;
+ else
+ err_code= _ma_write_keypage(info, keyinfo, *new_page_offs,
+ DFLT_INIT_HITS, new_page);
+
+ my_afree((byte*)new_page);
+
+split_err:
+ my_afree((byte*) coord_buf);
+ return err_code;
+}
+
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c
new file mode 100644
index 00000000000..04b0c88c222
--- /dev/null
+++ b/storage/maria/ma_rt_test.c
@@ -0,0 +1,474 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA rtree table */
+/* Written by Alex Barkov who has a shared copyright to this code */
+
+
+#include "maria.h"
+
+#ifdef HAVE_RTREE_KEYS
+
+#include "ma_rt_index.h"
+
+#define MAX_REC_LENGTH 1024
+#define ndims 2
+#define KEYALG HA_KEY_ALG_RTREE
+
+static int read_with_pos(MARIA_HA * file, int silent);
+static void create_record(char *record,uint rownr);
+static void create_record1(char *record,uint rownr);
+static void print_record(char * record,my_off_t offs,const char * tail);
+static int run_test(const char *filename);
+
+static double rt_data[]=
+{
+ /*1*/ 0,10,0,10,
+ /*2*/ 5,15,0,10,
+ /*3*/ 0,10,5,15,
+ /*4*/ 10,20,10,20,
+ /*5*/ 0,10,0,10,
+ /*6*/ 5,15,0,10,
+ /*7*/ 0,10,5,15,
+ /*8*/ 10,20,10,20,
+ /*9*/ 0,10,0,10,
+ /*10*/ 5,15,0,10,
+ /*11*/ 0,10,5,15,
+ /*12*/ 10,20,10,20,
+ /*13*/ 0,10,0,10,
+ /*14*/ 5,15,0,10,
+ /*15*/ 0,10,5,15,
+ /*16*/ 10,20,10,20,
+ /*17*/ 5,15,0,10,
+ /*18*/ 0,10,5,15,
+ /*19*/ 10,20,10,20,
+ /*20*/ 0,10,0,10,
+
+ /*1*/ 100,110,0,10,
+ /*2*/ 105,115,0,10,
+ /*3*/ 100,110,5,15,
+ /*4*/ 110,120,10,20,
+ /*5*/ 100,110,0,10,
+ /*6*/ 105,115,0,10,
+ /*7*/ 100,110,5,15,
+ /*8*/ 110,120,10,20,
+ /*9*/ 100,110,0,10,
+ /*10*/ 105,115,0,10,
+ /*11*/ 100,110,5,15,
+ /*12*/ 110,120,10,20,
+ /*13*/ 100,110,0,10,
+ /*14*/ 105,115,0,10,
+ /*15*/ 100,110,5,15,
+ /*16*/ 110,120,10,20,
+ /*17*/ 105,115,0,10,
+ /*18*/ 100,110,5,15,
+ /*19*/ 110,120,10,20,
+ /*20*/ 100,110,0,10,
+ -1
+};
+
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ MY_INIT(argv[0]);
+ maria_init();
+ exit(run_test("rt_test"));
+}
+
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+ MARIA_COLUMNDEF recinfo[20];
+ MARIA_KEYDEF keyinfo[20];
+ HA_KEYSEG keyseg[20];
+ key_range range;
+
+ int silent=0;
+ int opt_unique=0;
+ int create_flag=0;
+ int key_type=HA_KEYTYPE_DOUBLE;
+ int key_length=8;
+ int null_fields=0;
+ int nrecords=sizeof(rt_data)/(sizeof(double)*4);/* 3000;*/
+ int rec_length=0;
+ int uniques=0;
+ int i;
+ int error;
+ int row_count=0;
+ char record[MAX_REC_LENGTH];
+ char read_record[MAX_REC_LENGTH];
+ int upd= 10;
+ ha_rows hrows;
+
+ /* Define a column for NULLs and DEL markers*/
+
+ recinfo[0].type=FIELD_NORMAL;
+ recinfo[0].length=1; /* For NULL bits */
+ rec_length=1;
+
+ /* Define 2*ndims columns for coordinates*/
+
+ for (i=1; i<=2*ndims ;i++){
+ recinfo[i].type=FIELD_NORMAL;
+ recinfo[i].length=key_length;
+ rec_length+=key_length;
+ }
+
+ /* Define a key with 2*ndims segments */
+
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=2*ndims;
+ keyinfo[0].flag=0;
+ keyinfo[0].key_alg=KEYALG;
+
+ for (i=0; i<2*ndims; i++){
+ keyinfo[0].seg[i].type= key_type;
+ keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */
+ keyinfo[0].seg[i].start= (key_length*i)+1;
+ keyinfo[0].seg[i].length=key_length;
+ keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[i].null_pos=0;
+ keyinfo[0].seg[i].language=default_charset_info->number;
+ }
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=10000000;
+
+ if (maria_create(filename,
+ DYNAMIC_RECORD,
+ 1, /* keys */
+ keyinfo,
+ 1+2*ndims+opt_unique, /* columns */
+ recinfo,uniques,&uniquedef,&create_info,create_flag))
+ goto err;
+
+ if (!silent)
+ printf("- Open isam-file\n");
+
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ for (i=0; i<nrecords; i++ )
+ {
+ create_record(record,i);
+ error=maria_write(file,record);
+ print_record(record,maria_position(file),"\n");
+ if (!error)
+ {
+ row_count++;
+ }
+ else
+ {
+ printf("maria_write: %d\n", error);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Reading rows with key\n");
+
+ for (i=0 ; i < nrecords ; i++)
+ {
+ my_errno=0;
+ create_record(record,i);
+
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_EQUAL);
+
+ if (error && error!=HA_ERR_KEY_NOT_FOUND)
+ {
+ printf(" maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ if (error == HA_ERR_KEY_NOT_FOUND)
+ {
+ print_record(record,maria_position(file)," NOT FOUND\n");
+ continue;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ }
+
+ if (!silent)
+ printf("- Deleting rows\n");
+ for (i=0; i < nrecords/4; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"\n");
+
+ error=maria_delete(file,read_record);
+ if (error)
+ {
+ printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ printf("- Updating rows with position\n");
+ for (i=0; i < (nrecords - nrecords/4) ; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"");
+ create_record(record,i+nrecords*upd);
+ printf("\t-> ");
+ print_record(record,maria_position(file),"\n");
+ error=maria_update(file,read_record,record);
+ if (error)
+ {
+ printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Test maria_rkey then a sequence of maria_rnext_same\n");
+
+ create_record(record, nrecords*4/5);
+ print_record(record,0," search for\n");
+
+ if ((error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT)))
+ {
+ printf("maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rkey\n");
+ row_count=1;
+
+ for (;;)
+ {
+ if ((error=maria_rnext_same(file,read_record)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext_same\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_rfirst then a sequence of maria_rnext\n");
+
+ error=maria_rfirst(file,read_record,0);
+ if (error)
+ {
+ printf("maria_rfirst: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ row_count=1;
+ print_record(read_record,maria_position(file)," maria_frirst\n");
+
+ for (i=0;i<nrecords;i++)
+ {
+ if ((error=maria_rnext(file,read_record,0)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_records_in_range()\n");
+
+ create_record1(record, nrecords*4/5);
+ print_record(record,0,"\n");
+
+ range.key= record+1;
+ range.length= 1000; /* Big enough */
+ range.flag= HA_READ_MBR_INTERSECT;
+ hrows= maria_records_in_range(file,0, &range, (key_range*) 0);
+ printf(" %ld rows\n", (long) hrows);
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return 0;
+
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+
+
+static int read_with_pos (MARIA_HA * file,int silent)
+{
+ int error;
+ int i;
+ char read_record[MAX_REC_LENGTH];
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ for (i=0;;i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ return error;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ }
+ return 0;
+}
+
+
+#ifdef NOT_USED
+static void bprint_record(char * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ i=(unsigned char)record[0];
+ printf("%02X ",i);
+
+ for( pos=record+1, i=0; i<32; i++,pos++){
+ int b=(unsigned char)*pos;
+ printf("%02X",b);
+ }
+ printf("%s",tail);
+}
+#endif
+
+
+static void print_record(char * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ double c;
+
+ printf(" rec=(%d)",(unsigned char)record[0]);
+ for ( pos=record+1, i=0; i<2*ndims; i++)
+ {
+ memcpy(&c,pos,sizeof(c));
+ float8get(c,pos);
+ printf(" %.14g ",c);
+ pos+=sizeof(c);
+ }
+ printf("pos=%ld",(long int)offs);
+ printf("%s",tail);
+}
+
+
+
+static void create_record1(char *record,uint rownr)
+{
+ int i;
+ char * pos;
+ double c=rownr+10;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=0x01; /* DEL marker */
+
+ for ( pos=record+1, i=0; i<2*ndims; i++)
+ {
+ memcpy(pos,&c,sizeof(c));
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+#ifdef NOT_USED
+
+static void create_record0(char *record,uint rownr)
+{
+ int i;
+ char * pos;
+ double c=rownr+10;
+ double c0=0;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=0x01; /* DEL marker */
+
+ for ( pos=record+1, i=0; i<ndims; i++)
+ {
+ memcpy(pos,&c0,sizeof(c0));
+ float8store(pos,c0);
+ pos+=sizeof(c0);
+ memcpy(pos,&c,sizeof(c));
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+#endif
+
+static void create_record(char *record,uint rownr)
+{
+ int i;
+ char *pos;
+ double *data= rt_data+rownr*4;
+ record[0]=0x01; /* DEL marker */
+ for ( pos=record+1, i=0; i<ndims*2; i++)
+ {
+ float8store(pos,data[i]);
+ pos+=8;
+ }
+}
+
+#else
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ exit(0);
+}
+#endif /*HAVE_RTREE_KEYS*/
diff --git a/storage/maria/ma_scan.c b/storage/maria/ma_scan.c
new file mode 100644
index 00000000000..4538c87e2be
--- /dev/null
+++ b/storage/maria/ma_scan.c
@@ -0,0 +1,61 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Read through all rows sequntially */
+
+#include "maria_def.h"
+
+int maria_scan_init(register MARIA_HA *info)
+{
+ DBUG_ENTER("maria_scan_init");
+
+ info->cur_row.nextpos= info->s->pack.header_length; /* Read first record */
+ info->lastinx= -1; /* Can't forward or backward */
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+
+ if ((*info->s->scan_init)(info))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+}
+
+/*
+ Read a row based on position.
+
+ SYNOPSIS
+ maria_scan()
+ info Maria handler
+ record Read data here
+
+ RETURN
+ 0 ok
+ HA_ERR_END_OF_FILE End of file
+ # Error code
+*/
+
+int maria_scan(MARIA_HA *info, byte *record)
+{
+ DBUG_ENTER("maria_scan");
+ /* Init all but update-flag */
+ info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+ DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1));
+}
+
+
+void maria_scan_end(MARIA_HA *info)
+{
+ (*info->s->scan_end)(info);
+}
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
new file mode 100644
index 00000000000..d8738ae4639
--- /dev/null
+++ b/storage/maria/ma_search.c
@@ -0,0 +1,1906 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* key handling functions */
+
+#include "ma_fulltext.h"
+#include "m_ctype.h"
+
+static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page,
+ byte *key, byte *keypos,
+ uint *return_key_length);
+
+ /* Check index */
+
+int _ma_check_index(MARIA_HA *info, int inx)
+{
+ if (inx == -1) /* Use last index */
+ inx=info->lastinx;
+ if (inx < 0 || ! maria_is_key_active(info->s->state.key_map, inx))
+ {
+ my_errno=HA_ERR_WRONG_INDEX;
+ return -1;
+ }
+ if (info->lastinx != inx) /* Index changed */
+ {
+ info->lastinx = inx;
+ info->page_changed=1;
+ info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) |
+ HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND);
+ }
+ if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache))
+ return(-1);
+ return(inx);
+} /* _ma_check_index */
+
+
+ /*
+ ** Search after row by a key
+ ** Position to row is stored in info->lastpos
+ ** Return: -1 if not found
+ ** 1 if one should continue search on higher level
+ */
+
+int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_len, uint nextflag, register my_off_t pos)
+{
+ my_bool last_key;
+ int error,flag;
+ uint nod_flag;
+ byte *keypos,*maxpos;
+ byte lastkey[HA_MAX_KEY_BUFF],*buff;
+ DBUG_ENTER("_ma_search");
+ DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu",
+ (ulong) pos, nextflag, (ulong) info->cur_row.lastpos));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len););
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST)))
+ DBUG_RETURN(-1); /* Not found ; return error */
+ DBUG_RETURN(1); /* Search at upper levels */
+ }
+
+ if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,
+ info->keyread_buff,
+ test(!(nextflag & SEARCH_SAVE_BUFF)))))
+ goto err;
+ DBUG_DUMP("page", buff, maria_getint(buff));
+
+ flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag,
+ &keypos,lastkey, &last_key);
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1);
+ nod_flag=_ma_test_if_nod(buff);
+ maxpos=buff+maria_getint(buff)-1;
+
+ if (flag)
+ {
+ if ((error= _ma_search(info,keyinfo,key,key_len,nextflag,
+ _ma_kpos(nod_flag,keypos))) <= 0)
+ DBUG_RETURN(error);
+
+ if (flag >0)
+ {
+ if (nextflag & (SEARCH_SMALLER | SEARCH_LAST) &&
+ keypos == buff+2+nod_flag)
+ DBUG_RETURN(1); /* Bigger than key */
+ }
+ else if (nextflag & SEARCH_BIGGER && keypos >= maxpos)
+ DBUG_RETURN(1); /* Smaller than key */
+ }
+ else
+ {
+ if ((nextflag & SEARCH_FIND) && nod_flag &&
+ ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME ||
+ key_len != USE_WHOLE_KEY))
+ {
+ if ((error= _ma_search(info,keyinfo,key,key_len,SEARCH_FIND,
+ _ma_kpos(nod_flag,keypos))) >= 0 ||
+ my_errno != HA_ERR_KEY_NOT_FOUND)
+ DBUG_RETURN(error);
+ info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */
+ }
+ }
+ if (pos != info->last_keypage)
+ {
+ byte *old_buff=buff;
+ if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,
+ info->keyread_buff,
+ test(!(nextflag & SEARCH_SAVE_BUFF)))))
+ goto err;
+ keypos=buff+(keypos-old_buff);
+ maxpos=buff+(maxpos-old_buff);
+ }
+
+ if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0)
+ {
+ uint not_used[2];
+ if (_ma_get_prev_key(info,keyinfo, buff, info->lastkey, keypos,
+ &info->lastkey_length))
+ goto err;
+ if (!(nextflag & SEARCH_SMALLER) &&
+ ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, key_len,
+ SEARCH_FIND, not_used))
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ goto err;
+ }
+ }
+ else
+ {
+ info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey);
+ if (!info->lastkey_length)
+ goto err;
+ memcpy(info->lastkey,lastkey,info->lastkey_length);
+ }
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ /* Save position for a possible read next / previous */
+ info->int_keypos= info->keyread_buff+ (keypos-buff);
+ info->int_maxpos= info->keyread_buff+ (maxpos-buff);
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=0;
+ info->keybuff_used= (info->keyread_buff != buff); /* If we have to reread */
+
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+
+err:
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->page_changed=1;
+ DBUG_RETURN (-1);
+} /* _ma_search */
+
+
+ /* Search after key in page-block */
+ /* If packed key puts smaller or identical key in buff */
+ /* ret_pos point to where find or bigger key starts */
+ /* ARGSUSED */
+
+int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint key_len, uint comp_flag, byte **ret_pos,
+ byte *buff __attribute__((unused)), my_bool *last_key)
+{
+ reg4 int start,mid,end,save_end;
+ int flag;
+ uint totlength,nod_flag,not_used[2];
+ DBUG_ENTER("_ma_bin_search");
+
+ LINT_INIT(flag);
+ totlength=keyinfo->keylength+(nod_flag=_ma_test_if_nod(page));
+ start=0; mid=1;
+ save_end=end=(int) ((maria_getint(page)-2-nod_flag)/totlength-1);
+ DBUG_PRINT("test",("maria_getint: %d end: %d",maria_getint(page),end));
+ page+=2+nod_flag;
+
+ while (start != end)
+ {
+ mid= (start+end)/2;
+ if ((flag=ha_key_cmp(keyinfo->seg,(uchar*) page+(uint) mid*totlength,
+ (uchar*) key, key_len, comp_flag, not_used))
+ >= 0)
+ end=mid;
+ else
+ start=mid+1;
+ }
+ if (mid != start)
+ flag=ha_key_cmp(keyinfo->seg, (uchar*) page+(uint) start*totlength,
+ (uchar*) key, key_len, comp_flag, not_used);
+ if (flag < 0)
+ start++; /* point at next, bigger key */
+ *ret_pos=page+(uint) start*totlength;
+ *last_key= end == save_end;
+ DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start));
+ DBUG_RETURN(flag);
+} /* _ma_bin_search */
+
+
+/*
+ Locate a packed key in a key page.
+
+ SYNOPSIS
+ _ma_seq_search()
+ info Open table information.
+ keyinfo Key definition information.
+ page Key page (beginning).
+ key Search key.
+ key_len Length to use from search key or USE_WHOLE_KEY
+ comp_flag Search flags like SEARCH_SAME etc.
+ ret_pos RETURN Position in key page behind this key.
+ buff RETURN Copy of previous or identical unpacked key.
+ last_key RETURN If key is last in page.
+
+ DESCRIPTION
+ Used instead of _ma_bin_search() when key is packed.
+ Puts smaller or identical key in buff.
+ Key is searched sequentially.
+
+ RETURN
+ > 0 Key in 'buff' is smaller than search key.
+ 0 Key in 'buff' is identical to search key.
+ < 0 Not found.
+*/
+
+int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint key_len, uint comp_flag, byte **ret_pos,
+ byte *buff, my_bool *last_key)
+{
+ int flag;
+ uint nod_flag,length,not_used[2];
+ byte t_buff[HA_MAX_KEY_BUFF],*end;
+ DBUG_ENTER("_ma_seq_search");
+
+ LINT_INIT(flag); LINT_INIT(length);
+ end= page+maria_getint(page);
+ nod_flag=_ma_test_if_nod(page);
+ page+=2+nod_flag;
+ *ret_pos=page;
+ t_buff[0]=0; /* Avoid bugs */
+ while (page < end)
+ {
+ length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff);
+ if (length == 0 || page > end)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_PRINT("error",
+ ("Found wrong key: length: %u page: 0x%lx end: 0x%lx",
+ length, (long) page, (long) end));
+ DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
+ }
+ if ((flag= ha_key_cmp(keyinfo->seg, (uchar*) t_buff,(uchar*) key,
+ key_len,comp_flag, not_used)) >= 0)
+ break;
+#ifdef EXTRA_DEBUG
+ DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff,
+ flag));
+#endif
+ memcpy(buff,t_buff,length);
+ *ret_pos=page;
+ }
+ if (flag == 0)
+ memcpy(buff,t_buff,length); /* Result is first key */
+ *last_key= page == end;
+ DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_RETURN(flag);
+} /* _ma_seq_search */
+
+
+int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, uint key_len, uint nextflag,
+ byte **ret_pos, byte *buff, my_bool *last_key)
+{
+ /*
+ my_flag is raw comparison result to be changed according to
+ SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags.
+ flag is the value returned by ha_key_cmp and as treated as final
+ */
+ int flag=0, my_flag=-1;
+ uint nod_flag, length, len, matched, cmplen, kseg_len;
+ uint prefix_len,suffix_len;
+ int key_len_skip, seg_len_pack, key_len_left;
+ byte *end;
+ uchar *kseg, *vseg, *saved_vseg, *saved_from;
+ uchar *sort_order= keyinfo->seg->charset->sort_order;
+ byte tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
+ byte *saved_to;
+ uint saved_length=0, saved_prefix_len=0;
+ uint length_pack;
+ DBUG_ENTER("_ma_prefix_search");
+
+ LINT_INIT(length);
+ LINT_INIT(prefix_len);
+ LINT_INIT(seg_len_pack);
+ LINT_INIT(saved_from);
+ LINT_INIT(saved_to);
+ LINT_INIT(saved_vseg);
+
+ t_buff[0]=0; /* Avoid bugs */
+ end= page+maria_getint(page);
+ nod_flag=_ma_test_if_nod(page);
+ page+=2+nod_flag;
+ *ret_pos=page;
+ kseg= (uchar*) key;
+
+ get_key_pack_length(kseg_len, length_pack, kseg);
+ key_len_skip=length_pack+kseg_len;
+ key_len_left=(int) key_len- (int) key_len_skip;
+ /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */
+ cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack;
+ DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg));
+
+ /*
+ Keys are compressed the following way:
+
+ If the max length of first key segment <= 127 bytes the prefix is
+ 1 byte else it's 2 byte
+
+ (prefix) length The high bit is set if this is a prefix for the prev key.
+ [suffix length] Packed length of suffix if the previous was a prefix.
+ (suffix) data Key data bytes (past the common prefix or whole segment).
+ [next-key-seg] Next key segments (([packed length], data), ...)
+ pointer Reference to the data file (last_keyseg->length).
+ */
+
+ matched=0; /* how many char's from prefix were alredy matched */
+ len=0; /* length of previous key unpacked */
+
+ while (page < end)
+ {
+ uint packed= *page & 128;
+
+ vseg= (uchar*) page;
+ if (keyinfo->seg->length >= 127)
+ {
+ suffix_len=mi_uint2korr(vseg) & 32767;
+ vseg+=2;
+ }
+ else
+ suffix_len= *vseg++ & 127;
+
+ if (packed)
+ {
+ if (suffix_len == 0)
+ {
+ /* == 0x80 or 0x8000, same key, prefix length == old key length. */
+ prefix_len=len;
+ }
+ else
+ {
+ /* > 0x80 or 0x8000, this is prefix lgt, packed suffix lgt follows. */
+ prefix_len=suffix_len;
+ get_key_length(suffix_len,vseg);
+ }
+ }
+ else
+ {
+ /* Not packed. No prefix used from last key. */
+ prefix_len=0;
+ }
+
+ len=prefix_len+suffix_len;
+ seg_len_pack=get_pack_length(len);
+ t_buff=tt_buff+3-seg_len_pack;
+ store_key_length(t_buff,len);
+
+ if (prefix_len > saved_prefix_len)
+ memcpy(t_buff+seg_len_pack+saved_prefix_len,saved_vseg,
+ prefix_len-saved_prefix_len);
+ saved_vseg=vseg;
+ saved_prefix_len=prefix_len;
+
+ DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack,
+ suffix_len,vseg));
+ {
+ uchar *from= vseg+suffix_len;
+ HA_KEYSEG *keyseg;
+ uint l;
+
+ for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ )
+ {
+
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!(*from++))
+ continue;
+ }
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ get_key_length(l,from);
+ }
+ else
+ l=keyseg->length;
+
+ from+=l;
+ }
+ from+= keyseg->length;
+ page= (byte*) from+nod_flag;
+ length= (uint) (from-vseg);
+ }
+
+ if (page > end)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_PRINT("error",
+ ("Found wrong key: length: %u page: 0x%lx end: %lx",
+ length, (long) page, (long) end));
+ DBUG_RETURN(MARIA_FOUND_WRONG_KEY);
+ }
+
+ if (matched >= prefix_len)
+ {
+ /* We have to compare. But we can still skip part of the key */
+ uint left;
+ uchar *k= kseg+prefix_len;
+
+ /*
+ If prefix_len > cmplen then we are in the end-space comparison
+ phase. Do not try to acces the key any more ==> left= 0.
+ */
+ left= ((len <= cmplen) ? suffix_len :
+ ((prefix_len < cmplen) ? cmplen - prefix_len : 0));
+
+ matched=prefix_len+left;
+
+ if (sort_order)
+ {
+ for (my_flag=0;left;left--)
+ if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++]))
+ break;
+ }
+ else
+ {
+ for (my_flag=0;left;left--)
+ if ((my_flag= (int) *vseg++ - (int) *k++))
+ break;
+ }
+
+ if (my_flag>0) /* mismatch */
+ break;
+ if (my_flag==0) /* match */
+ {
+ /*
+ ** len cmplen seg_left_len more_segs
+ ** < matched=len; continue search
+ ** > = prefix ? found : (matched=len; continue search)
+ ** > < - ok, found
+ ** = < - ok, found
+ ** = = - ok, found
+ ** = = + next seg
+ */
+ if (len < cmplen)
+ {
+ if ((keyinfo->seg->type != HA_KEYTYPE_TEXT &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 &&
+ keyinfo->seg->type != HA_KEYTYPE_VARTEXT2))
+ my_flag= -1;
+ else
+ {
+ /* We have to compare k and vseg as if they were space extended */
+ uchar *end= k+ (cmplen - len);
+ for ( ; k < end && *k == ' '; k++) ;
+ if (k == end)
+ goto cmp_rest; /* should never happen */
+ if ((uchar) *k < (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
+ }
+ }
+ else if (len > cmplen)
+ {
+ uchar *end;
+ if ((nextflag & SEARCH_PREFIX) && key_len_left == 0)
+ goto fix_flag;
+
+ /* We have to compare k and vseg as if they were space extended */
+ for (end=vseg + (len-cmplen) ;
+ vseg < end && *vseg == (byte) ' ';
+ vseg++, matched++) ;
+ DBUG_ASSERT(vseg < end);
+
+ if ((uchar) *vseg > (uchar) ' ')
+ {
+ my_flag= 1; /* Compared string is smaller */
+ break;
+ }
+ my_flag= -1; /* Continue searching */
+ }
+ else
+ {
+ cmp_rest:
+ if (key_len_left>0)
+ {
+ uint not_used[2];
+ if ((flag = ha_key_cmp(keyinfo->seg+1,vseg,
+ k, key_len_left, nextflag, not_used)) >= 0)
+ break;
+ }
+ else
+ {
+ /*
+ at this line flag==-1 if the following lines were already
+ visited and 0 otherwise, i.e. flag <=0 here always !!!
+ */
+ fix_flag:
+ DBUG_ASSERT(flag <= 0);
+ if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST))
+ flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1;
+ if (flag>=0)
+ break;
+ }
+ }
+ }
+ matched-=left;
+ }
+ /* else (matched < prefix_len) ---> do nothing. */
+
+ memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len);
+ saved_to= buff+saved_length;
+ saved_from= saved_vseg;
+ saved_length=length;
+ *ret_pos=page;
+ }
+ if (my_flag)
+ flag=(keyinfo->seg->flag & HA_REVERSE_SORT) ? -my_flag : my_flag;
+ if (flag == 0)
+ {
+ memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len);
+ saved_to= buff+saved_length;
+ saved_from= saved_vseg;
+ saved_length=length;
+ }
+ if (saved_length)
+ memcpy(saved_to, (byte*) saved_from, saved_length);
+
+ *last_key= page == end;
+
+ DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos));
+ DBUG_RETURN(flag);
+} /* _ma_prefix_search */
+
+
+ /* Get pos to a key_block */
+
+my_off_t _ma_kpos(uint nod_flag, byte *after_key)
+{
+ after_key-=nod_flag;
+ switch (nod_flag) {
+#if SIZEOF_OFF_T > 4
+ case 7:
+ return mi_uint7korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 6:
+ return mi_uint6korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 5:
+ return mi_uint5korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+#else
+ case 7:
+ after_key++;
+ case 6:
+ after_key++;
+ case 5:
+ after_key++;
+#endif
+ case 4:
+ return ((my_off_t) mi_uint4korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 3:
+ return ((my_off_t) mi_uint3korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 2:
+ return (my_off_t) (mi_uint2korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH);
+ case 1:
+ return (uint) (*after_key)*MARIA_MIN_KEY_BLOCK_LENGTH;
+ case 0: /* At leaf page */
+ default: /* Impossible */
+ return(HA_OFFSET_ERROR);
+ }
+} /* _kpos */
+
+
+ /* Save pos to a key_block */
+
+void _ma_kpointer(register MARIA_HA *info, register byte *buff, my_off_t pos)
+{
+ pos/=MARIA_MIN_KEY_BLOCK_LENGTH;
+ switch (info->s->base.key_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 7: mi_int7store(buff,pos); break;
+ case 6: mi_int6store(buff,pos); break;
+ case 5: mi_int5store(buff,pos); break;
+#else
+ case 7: *buff++=0;
+ /* fall trough */
+ case 6: *buff++=0;
+ /* fall trough */
+ case 5: *buff++=0;
+ /* fall trough */
+#endif
+ case 4: mi_int4store(buff,pos); break;
+ case 3: mi_int3store(buff,pos); break;
+ case 2: mi_int2store(buff,(uint) pos); break;
+ case 1: buff[0]= (char) (uchar) pos; break;
+ default: abort(); /* impossible */
+ }
+} /* _ma_kpointer */
+
+
+ /* Calc pos to a data-record from a key */
+
+
+my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const byte *after_key)
+{
+ my_off_t pos;
+ after_key-=(nod_flag + info->s->rec_reflength);
+ switch (info->s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8: pos= (my_off_t) mi_uint8korr(after_key); break;
+ case 7: pos= (my_off_t) mi_uint7korr(after_key); break;
+ case 6: pos= (my_off_t) mi_uint6korr(after_key); break;
+ case 5: pos= (my_off_t) mi_uint5korr(after_key); break;
+#else
+ case 8: pos= (my_off_t) mi_uint4korr(after_key+4); break;
+ case 7: pos= (my_off_t) mi_uint4korr(after_key+3); break;
+ case 6: pos= (my_off_t) mi_uint4korr(after_key+2); break;
+ case 5: pos= (my_off_t) mi_uint4korr(after_key+1); break;
+#endif
+ case 4: pos= (my_off_t) mi_uint4korr(after_key); break;
+ case 3: pos= (my_off_t) mi_uint3korr(after_key); break;
+ case 2: pos= (my_off_t) mi_uint2korr(after_key); break;
+ default:
+ pos=0L; /* Shut compiler up */
+ }
+ return ((info->s->data_file_type == STATIC_RECORD) ?
+ pos * info->s->base.pack_reclength : pos);
+}
+
+
+/* Calc position from a record pointer ( in delete link chain ) */
+
+my_off_t _ma_rec_pos(MARIA_SHARE *s, byte *ptr)
+{
+ my_off_t pos;
+ switch (s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8:
+ pos= (my_off_t) mi_uint8korr(ptr);
+ if (pos == HA_OFFSET_ERROR)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 7:
+ pos= (my_off_t) mi_uint7korr(ptr);
+ if (pos == (((my_off_t) 1) << 56) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 6:
+ pos= (my_off_t) mi_uint6korr(ptr);
+ if (pos == (((my_off_t) 1) << 48) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+ case 5:
+ pos= (my_off_t) mi_uint5korr(ptr);
+ if (pos == (((my_off_t) 1) << 40) -1)
+ return HA_OFFSET_ERROR; /* end of list */
+ break;
+#else
+ case 8:
+ case 7:
+ case 6:
+ case 5:
+ ptr+= (s->rec_reflength-4);
+ /* fall through */
+#endif
+ case 4:
+ pos= (my_off_t) mi_uint4korr(ptr);
+ if (pos == (my_off_t) (uint32) ~0L)
+ return HA_OFFSET_ERROR;
+ break;
+ case 3:
+ pos= (my_off_t) mi_uint3korr(ptr);
+ if (pos == (my_off_t) (1 << 24) -1)
+ return HA_OFFSET_ERROR;
+ break;
+ case 2:
+ pos= (my_off_t) mi_uint2korr(ptr);
+ if (pos == (my_off_t) (1 << 16) -1)
+ return HA_OFFSET_ERROR;
+ break;
+ default: abort(); /* Impossible */
+ }
+ return ((s->data_file_type == STATIC_RECORD) ?
+ pos * s->base.pack_reclength : pos);
+}
+
+
+ /* save position to record */
+
+void _ma_dpointer(MARIA_HA *info, byte *buff, my_off_t pos)
+{
+ if (info->s->data_file_type == STATIC_RECORD &&
+ pos != HA_OFFSET_ERROR)
+ pos/= info->s->base.pack_reclength;
+
+ switch (info->s->rec_reflength) {
+#if SIZEOF_OFF_T > 4
+ case 8: mi_int8store(buff,pos); break;
+ case 7: mi_int7store(buff,pos); break;
+ case 6: mi_int6store(buff,pos); break;
+ case 5: mi_int5store(buff,pos); break;
+#else
+ case 8: *buff++=0;
+ /* fall trough */
+ case 7: *buff++=0;
+ /* fall trough */
+ case 6: *buff++=0;
+ /* fall trough */
+ case 5: *buff++=0;
+ /* fall trough */
+#endif
+ case 4: mi_int4store(buff,pos); break;
+ case 3: mi_int3store(buff,pos); break;
+ case 2: mi_int2store(buff,(uint) pos); break;
+ default: abort(); /* Impossible */
+ }
+} /* _ma_dpointer */
+
+
+ /* Get key from key-block */
+ /* page points at previous key; its advanced to point at next key */
+ /* key should contain previous key */
+ /* Returns length of found key + pointers */
+ /* nod_flag is a flag if we are on nod */
+
+ /* same as _ma_get_key but used with fixed length keys */
+
+uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register byte **page, register byte *key)
+{
+ memcpy((byte*) key,(byte*) *page,
+ (size_t) (keyinfo->keylength+nod_flag));
+ *page+=keyinfo->keylength+nod_flag;
+ return(keyinfo->keylength);
+} /* _ma_get_static_key */
+
+
+/*
+ get key witch is packed against previous key or key with a NULL column.
+
+ SYNOPSIS
+ _ma_get_pack_key()
+ keyinfo key definition information.
+ nod_flag If nod: Length of node pointer, else zero.
+ page_pos RETURN position in key page behind this key.
+ key IN/OUT in: prev key, out: unpacked key.
+
+ RETURN
+ key_length + length of data pointer
+*/
+
+uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register byte **page_pos, register byte *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ byte *start_key,*page=*page_pos;
+ uint length;
+
+ start_key=key;
+ for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->flag & HA_PACK_KEY)
+ {
+ /* key with length, packed to previous key */
+ byte *start= key;
+ uint packed= *page & 128,tot_length,rest_length;
+ if (keyseg->length >= 127)
+ {
+ length=mi_uint2korr(page) & 32767;
+ page+=2;
+ }
+ else
+ length= *page++ & 127;
+
+ if (packed)
+ {
+ if (length > (uint) keyseg->length)
+ {
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0; /* Error */
+ }
+ if (length == 0) /* Same key */
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ *key++=1; /* Can't be NULL */
+ get_key_length(length,key);
+ key+= length; /* Same diff_key as prev */
+ if (length > keyseg->length)
+ {
+ DBUG_PRINT("error",
+ ("Found too long null packed key: %u of %u at 0x%lx",
+ length, keyseg->length, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0;
+ }
+ continue;
+ }
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ key++; /* Skip null marker*/
+ start++;
+ }
+
+ get_key_length(rest_length,page);
+ tot_length=rest_length+length;
+
+ /* If the stored length has changed, we must move the key */
+ if (tot_length >= 255 && *start != (char) 255)
+ {
+ /* length prefix changed from a length of one to a length of 3 */
+ bmove_upp((char*) key+length+3,(char*) key+length+1,length);
+ *key=255;
+ mi_int2store(key+1,tot_length);
+ key+=3+length;
+ }
+ else if (tot_length < 255 && *start == (char) 255)
+ {
+ bmove(key+1,key+3,length);
+ *key=tot_length;
+ key+=1+length;
+ }
+ else
+ {
+ store_key_length_inc(key,tot_length);
+ key+=length;
+ }
+ memcpy(key,page,rest_length);
+ page+=rest_length;
+ key+=rest_length;
+ continue;
+ }
+ else
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!length--) /* Null part */
+ {
+ *key++=0;
+ continue;
+ }
+ *key++=1; /* Not null */
+ }
+ }
+ if (length > (uint) keyseg->length)
+ {
+ DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
+ length, keyseg->length, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ return 0; /* Error */
+ }
+ store_key_length_inc(key,length);
+ }
+ else
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!(*key++ = *page++))
+ continue;
+ }
+ if (keyseg->flag &
+ (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ byte *tmp=page;
+ get_key_length(length,tmp);
+ length+=(uint) (tmp-page);
+ }
+ else
+ length=keyseg->length;
+ }
+ memcpy((byte*) key,(byte*) page,(size_t) length);
+ key+=length;
+ page+=length;
+ }
+ length=keyseg->length+nod_flag;
+ bmove((byte*) key,(byte*) page,length);
+ *page_pos= page+length;
+ return ((uint) (key-start_key)+keyseg->length);
+} /* _ma_get_pack_key */
+
+
+
+/* key that is packed relatively to previous */
+
+uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
+ register byte **page_pos, register byte *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ byte *start_key,*page,*page_end,*from,*from_end;
+ uint length,tmp;
+ DBUG_ENTER("_ma_get_binary_pack_key");
+
+ page= *page_pos;
+ page_end=page+HA_MAX_KEY_BUFF+1;
+ start_key=key;
+
+ /*
+ Keys are compressed the following way:
+
+ prefix length Packed length of prefix for the prev key. (1 or 3 bytes)
+ for each key segment:
+ [is null] Null indicator if can be null (1 byte, zero means null)
+ [length] Packed length if varlength (1 or 3 bytes)
+ pointer Reference to the data file (last_keyseg->length).
+ */
+ get_key_length(length,page);
+ if (length)
+ {
+ if (length > keyinfo->maxlength)
+ {
+ DBUG_PRINT("error",
+ ("Found too long binary packed key: %u of %u at 0x%lx",
+ length, keyinfo->maxlength, (long) *page_pos));
+ DBUG_DUMP("key",(char*) *page_pos,16);
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0); /* Wrong key */
+ }
+ from=key; from_end=key+length;
+ }
+ else
+ {
+ from=page; from_end=page_end; /* Not packed key */
+ }
+
+ /*
+ The trouble is that key is split in two parts:
+ The first part is in from ...from_end-1.
+ The second part starts at page
+ */
+ for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (from == from_end) { from=page; from_end=page_end; }
+ if (!(*key++ = *from++))
+ continue; /* Null part */
+ }
+ if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK))
+ {
+ /* Get length of dynamic length key part */
+ if (from == from_end) { from=page; from_end=page_end; }
+ if ((length= (*key++ = *from++)) == 255)
+ {
+ if (from == from_end) { from=page; from_end=page_end; }
+ length= (uint) ((*key++ = *from++)) << 8;
+ if (from == from_end) { from=page; from_end=page_end; }
+ length+= (uint) ((*key++ = *from++));
+ }
+ }
+ else
+ length=keyseg->length;
+
+ if ((tmp=(uint) (from_end-from)) <= length)
+ {
+ key+=tmp; /* Use old key */
+ length-=tmp;
+ from=page; from_end=page_end;
+ }
+ DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u",
+ (long) key, (long) from, length));
+ memmove((byte*) key, (byte*) from, (size_t) length);
+ key+=length;
+ from+=length;
+ }
+ length=keyseg->length+nod_flag;
+ if ((tmp=(uint) (from_end-from)) <= length)
+ {
+ memcpy(key+tmp,page,length-tmp); /* Get last part of key */
+ *page_pos= page+length-tmp;
+ }
+ else
+ {
+ if (from_end != page_end)
+ {
+ DBUG_PRINT("error",("Error when unpacking key"));
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0); /* Error */
+ }
+ memcpy((byte*) key,(byte*) from,(size_t) length);
+ *page_pos= from+length;
+ }
+ DBUG_RETURN((uint) (key-start_key)+keyseg->length);
+}
+
+
+ /* Get key at position without knowledge of previous key */
+ /* Returns pointer to next key */
+
+byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, byte *keypos, uint *return_key_length)
+{
+ uint nod_flag;
+ DBUG_ENTER("_ma_get_key");
+
+ nod_flag=_ma_test_if_nod(page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ bmove((byte*) key,(byte*) keypos,keyinfo->keylength+nod_flag);
+ DBUG_RETURN(keypos+keyinfo->keylength+nod_flag);
+ }
+ else
+ {
+ page+=2+nod_flag;
+ key[0]=0; /* safety */
+ while (page <= keypos)
+ {
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
+ if (*return_key_length == 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ }
+ DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page,
+ *return_key_length));
+ DBUG_RETURN(page);
+} /* _ma_get_key */
+
+
+ /* Get key at position without knowledge of previous key */
+ /* Returns 0 if ok */
+
+static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, byte *keypos,
+ uint *return_key_length)
+{
+ uint nod_flag;
+ DBUG_ENTER("_ma_get_prev_key");
+
+ nod_flag=_ma_test_if_nod(page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ *return_key_length=keyinfo->keylength;
+ bmove((byte*) key,(byte*) keypos- *return_key_length-nod_flag,
+ *return_key_length);
+ DBUG_RETURN(0);
+ }
+ else
+ {
+ page+=2+nod_flag;
+ key[0]=0; /* safety */
+ while (page < keypos)
+ {
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key);
+ if (*return_key_length == 0)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ DBUG_RETURN(0);
+} /* _ma_get_key */
+
+
+
+ /* Get last key from key-page */
+ /* Return pointer to where key starts */
+
+byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page,
+ byte *lastkey, byte *endpos, uint *return_key_length)
+{
+ uint nod_flag;
+ byte *lastpos;
+ DBUG_ENTER("_ma_get_last_key");
+ DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page,
+ (long) endpos));
+
+ nod_flag=_ma_test_if_nod(page);
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ {
+ lastpos=endpos-keyinfo->keylength-nod_flag;
+ *return_key_length=keyinfo->keylength;
+ if (lastpos > page)
+ bmove((byte*) lastkey,(byte*) lastpos,keyinfo->keylength+nod_flag);
+ }
+ else
+ {
+ lastpos=(page+=2+nod_flag);
+ lastkey[0]=0;
+ while (page < endpos)
+ {
+ lastpos=page;
+ *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey);
+ if (*return_key_length == 0)
+ {
+ DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx",
+ (long) page));
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ }
+ DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos,
+ *return_key_length));
+ DBUG_RETURN(lastpos);
+} /* _ma_get_last_key */
+
+
+ /* Calculate length of key */
+
+uint _ma_keylength(MARIA_KEYDEF *keyinfo, register const byte *key)
+{
+ reg1 HA_KEYSEG *keyseg;
+ const byte *start;
+
+ if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)))
+ return (keyinfo->keylength);
+
+ start= key;
+ for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ if (!*key++)
+ continue;
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint length;
+ get_key_length(length,key);
+ key+=length;
+ }
+ else
+ key+= keyseg->length;
+ }
+ return((uint) (key-start)+keyseg->length);
+} /* _ma_keylength */
+
+
+/*
+ Calculate length of part key.
+
+ Used in maria_rkey() to find the key found for the key-part that was used.
+ This is needed in case of multi-byte character sets where we may search
+ after '0xDF' but find 'ss'
+*/
+
+uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key,
+ HA_KEYSEG *end)
+{
+ reg1 HA_KEYSEG *keyseg;
+ const byte *start= key;
+
+ for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++)
+ {
+ if (keyseg->flag & HA_NULL_PART)
+ if (!*key++)
+ continue;
+ if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint length;
+ get_key_length(length,key);
+ key+=length;
+ }
+ else
+ key+= keyseg->length;
+ }
+ return (uint) (key-start);
+}
+
+
+/* Move a key */
+
+byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from)
+{
+ reg1 uint length;
+ memcpy(to, from, (size_t) (length= _ma_keylength(keyinfo, from)));
+ return to+length;
+}
+
+
+/*
+ Find next/previous record with same key
+
+ WARNING
+ This can't be used when database is touched after last read
+*/
+
+int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length, uint nextflag, my_off_t pos)
+{
+ int error;
+ uint nod_flag;
+ byte lastkey[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_search_next");
+ DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu page_changed %d keybuff_used: %d",
+ nextflag, (ulong) info->cur_row.lastpos,
+ (ulong) info->int_keypos,
+ info->page_changed, info->keybuff_used));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length););
+
+ /* Force full read if we are at last key or if we are not on a leaf
+ and the key tree has changed since we used it last time
+ Note that even if the key tree has changed since last read, we can use
+ the last read data from the leaf if we haven't used the buffer for
+ something else.
+ */
+
+ if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) ||
+ info->page_changed ||
+ (info->int_keytree_version != keyinfo->version &&
+ (info->int_nod_flag || info->keybuff_used)))
+ DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, pos));
+
+ if (info->keybuff_used)
+ {
+ if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage,
+ DFLT_INIT_HITS,info->keyread_buff,0))
+ DBUG_RETURN(-1);
+ info->keybuff_used=0;
+ }
+
+ /* Last used buffer is in info->keyread_buff */
+ nod_flag=_ma_test_if_nod(info->keyread_buff);
+
+ if (nextflag & SEARCH_BIGGER) /* Next key */
+ {
+ my_off_t tmp_pos= _ma_kpos(nod_flag,info->int_keypos);
+ if (tmp_pos != HA_OFFSET_ERROR)
+ {
+ if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, tmp_pos)) <=0)
+ DBUG_RETURN(error);
+ }
+ memcpy(lastkey,key,key_length);
+ if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,
+ &info->int_keypos,lastkey)))
+ DBUG_RETURN(-1);
+ }
+ else /* Previous key */
+ {
+ uint length;
+ /* Find start of previous key */
+ info->int_keypos= _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey,
+ info->int_keypos, &length);
+ if (!info->int_keypos)
+ DBUG_RETURN(-1);
+ if (info->int_keypos == info->keyread_buff+2)
+ DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF, pos));
+ if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY,
+ nextflag | SEARCH_SAVE_BUFF,
+ _ma_kpos(nod_flag,info->int_keypos))) <= 0)
+ DBUG_RETURN(error);
+
+ /* QQ: We should be able to optimize away the following call */
+ if (! _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey,
+ info->int_keypos,&info->lastkey_length))
+ DBUG_RETURN(-1);
+ }
+ memcpy(info->lastkey,lastkey,info->lastkey_length);
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_next */
+
+
+ /* Search after position for the first row in an index */
+ /* This is stored in info->cur_row.lastpos */
+
+int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ register my_off_t pos)
+{
+ uint nod_flag;
+ byte *page;
+ DBUG_ENTER("_ma_search_first");
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND;
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+
+ do
+ {
+ if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->keyread_buff,0))
+ {
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+ nod_flag=_ma_test_if_nod(info->keyread_buff);
+ page=info->keyread_buff+2+nod_flag;
+ } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR);
+
+ if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,
+ info->lastkey)))
+ DBUG_RETURN(-1); /* Crashed */
+
+ info->int_keypos=page; info->int_maxpos=info->keyread_buff+maria_getint(info->keyread_buff)-1;
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=info->keybuff_used=0;
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+
+ DBUG_PRINT("exit",("found key at %lu", (ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_first */
+
+
+ /* Search after position for the last row in an index */
+ /* This is stored in info->cur_row.lastpos */
+
+int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ register my_off_t pos)
+{
+ uint nod_flag;
+ byte *buff,*page;
+ DBUG_ENTER("_ma_search_last");
+
+ if (pos == HA_OFFSET_ERROR)
+ {
+ my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+
+ buff=info->keyread_buff;
+ do
+ {
+ if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0))
+ {
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ DBUG_RETURN(-1);
+ }
+ page= buff+maria_getint(buff);
+ nod_flag=_ma_test_if_nod(buff);
+ } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR);
+
+ if (!_ma_get_last_key(info,keyinfo,buff,info->lastkey,page,
+ &info->lastkey_length))
+ DBUG_RETURN(-1);
+ info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length);
+ info->int_keypos=info->int_maxpos=page;
+ info->int_nod_flag=nod_flag;
+ info->int_keytree_version=keyinfo->version;
+ info->last_search_keypage=info->last_keypage;
+ info->page_changed=info->keybuff_used=0;
+
+ DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos));
+ DBUG_RETURN(0);
+} /* _ma_search_last */
+
+
+
+/****************************************************************************
+**
+** Functions to store and pack a key in a page
+**
+** maria_calc_xx_key_length takes the following arguments:
+** nod_flag If nod: Length of nod-pointer
+** next_key Position to pos after the new key in buffer
+** org_key Key that was before the next key in buffer
+** prev_key Last key before current key
+** key Key that will be stored
+** s_temp Information how next key will be packed
+****************************************************************************/
+
+/* Static length key */
+
+int
+_ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
+ byte *next_pos __attribute__((unused)),
+ byte *org_key __attribute__((unused)),
+ byte *prev_key __attribute__((unused)),
+ const byte *key, MARIA_KEY_PARAM *s_temp)
+{
+ s_temp->key= key;
+ return (int) (s_temp->totlength=keyinfo->keylength+nod_flag);
+}
+
+/* Variable length key */
+
+int
+_ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
+ byte *next_pos __attribute__((unused)),
+ byte *org_key __attribute__((unused)),
+ byte *prev_key __attribute__((unused)),
+ const byte *key, MARIA_KEY_PARAM *s_temp)
+{
+ s_temp->key= key;
+ return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag);
+}
+
+/*
+ length of key with a variable length first segment which is prefix
+ compressed (maria_chk reports 'packed + stripped')
+
+ Keys are compressed the following way:
+
+ If the max length of first key segment <= 127 bytes the prefix is
+ 1 byte else it's 2 byte
+
+ prefix byte(s) The high bit is set if this is a prefix for the prev key
+ length Packed length if the previous was a prefix byte
+ [length] data bytes ('length' bytes)
+ next-key-seg Next key segments
+
+ If the first segment can have NULL:
+ The length is 0 for NULLS and 1+length for not null columns.
+
+*/
+
+int
+_ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte *next_key,
+ byte *org_key, byte *prev_key, const byte *key,
+ MARIA_KEY_PARAM *s_temp)
+{
+ reg1 HA_KEYSEG *keyseg;
+ int length;
+ uint key_length,ref_length,org_key_length=0,
+ length_pack,new_key_length,diff_flag,pack_marker;
+ const byte *start,*end,*key_end;
+ uchar *sort_order;
+ bool same_length;
+
+ length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0;
+ same_length=0; keyseg=keyinfo->seg;
+ key_length= _ma_keylength(keyinfo,key)+nod_flag;
+
+ sort_order=0;
+ if ((keyinfo->flag & HA_FULLTEXT) &&
+ ((keyseg->type == HA_KEYTYPE_TEXT) ||
+ (keyseg->type == HA_KEYTYPE_VARTEXT1) ||
+ (keyseg->type == HA_KEYTYPE_VARTEXT2)) &&
+ !use_strnxfrm(keyseg->charset))
+ sort_order= keyseg->charset->sort_order;
+
+ /* diff flag contains how many bytes is needed to pack key */
+ if (keyseg->length >= 127)
+ {
+ diff_flag=2;
+ pack_marker=32768;
+ }
+ else
+ {
+ diff_flag= 1;
+ pack_marker=128;
+ }
+ s_temp->pack_marker=pack_marker;
+
+ /* Handle the case that the first part have NULL values */
+ if (keyseg->flag & HA_NULL_PART)
+ {
+ if (!*key++)
+ {
+ s_temp->key= key;
+ s_temp->key_length= 0;
+ s_temp->totlength= key_length-1+diff_flag;
+ s_temp->next_key_pos= 0; /* No next key */
+ return (s_temp->totlength);
+ }
+ s_temp->store_not_null=1;
+ key_length--; /* We don't store NULL */
+ if (prev_key && !*prev_key++)
+ org_key=prev_key=0; /* Can't pack against prev */
+ else if (org_key)
+ org_key++; /* Skip NULL */
+ }
+ else
+ s_temp->store_not_null=0;
+ s_temp->prev_key= org_key;
+
+ /* The key part will start with a packed length */
+
+ get_key_pack_length(new_key_length,length_pack,key);
+ end= key_end= key+ new_key_length;
+ start= key;
+
+ /* Calc how many characters are identical between this and the prev. key */
+ if (prev_key)
+ {
+ get_key_length(org_key_length,prev_key);
+ s_temp->prev_key=prev_key; /* Pointer at data */
+ /* Don't use key-pack if length == 0 */
+ if (new_key_length && new_key_length == org_key_length)
+ same_length=1;
+ else if (new_key_length > org_key_length)
+ end= key + org_key_length;
+
+ if (sort_order) /* SerG */
+ {
+ while (key < end &&
+ sort_order[* (uchar*) key] == sort_order[* (uchar*) prev_key])
+ {
+ key++; prev_key++;
+ }
+ }
+ else
+ {
+ while (key < end && *key == *prev_key)
+ {
+ key++; prev_key++;
+ }
+ }
+ }
+
+ s_temp->key=key;
+ s_temp->key_length= (uint) (key_end-key);
+
+ if (same_length && key == key_end)
+ {
+ /* identical variable length key */
+ s_temp->ref_length= pack_marker;
+ length=(int) key_length-(int) (key_end-start)-length_pack;
+ length+= diff_flag;
+ if (next_key)
+ { /* Can't combine with next */
+ s_temp->n_length= *next_key; /* Needed by _ma_store_key */
+ next_key=0;
+ }
+ }
+ else
+ {
+ if (start != key)
+ { /* Starts as prev key */
+ ref_length= (uint) (key-start);
+ s_temp->ref_length= ref_length + pack_marker;
+ length= (int) (key_length - ref_length);
+
+ length-= length_pack;
+ length+= diff_flag;
+ length+= ((new_key_length-ref_length) >= 255) ? 3 : 1;/* Rest_of_key */
+ }
+ else
+ {
+ s_temp->key_length+=s_temp->store_not_null; /* If null */
+ length= key_length - length_pack+ diff_flag;
+ }
+ }
+ s_temp->totlength=(uint) length;
+ s_temp->prev_length=0;
+ DBUG_PRINT("test",("tot_length: %u length: %d uniq_key_length: %u",
+ key_length, length, s_temp->key_length));
+
+ /* If something after that hasn't length=0, test if we can combine */
+ if ((s_temp->next_key_pos=next_key))
+ {
+ uint packed,n_length;
+
+ packed = *next_key & 128;
+ if (diff_flag == 2)
+ {
+ n_length= mi_uint2korr(next_key) & 32767; /* Length of next key */
+ next_key+=2;
+ }
+ else
+ n_length= *next_key++ & 127;
+ if (!packed)
+ n_length-= s_temp->store_not_null;
+
+ if (n_length || packed) /* Don't pack 0 length keys */
+ {
+ uint next_length_pack, new_ref_length=s_temp->ref_length;
+
+ if (packed)
+ {
+ /* If first key and next key is packed (only on delete) */
+ if (!prev_key && org_key)
+ {
+ get_key_length(org_key_length,org_key);
+ key=start;
+ if (sort_order) /* SerG */
+ {
+ while (key < end &&
+ sort_order[*(uchar*) key] == sort_order[*(uchar*) org_key])
+ {
+ key++; org_key++;
+ }
+ }
+ else
+ {
+ while (key < end && *key == *org_key)
+ {
+ key++; org_key++;
+ }
+ }
+ if ((new_ref_length= (uint) (key - start)))
+ new_ref_length+=pack_marker;
+ }
+
+ if (!n_length)
+ {
+ /*
+ We put a different key between two identical variable length keys
+ Extend next key to have same prefix as this key
+ */
+ if (new_ref_length) /* prefix of previus key */
+ { /* make next key longer */
+ s_temp->part_of_prev_key= new_ref_length;
+ s_temp->prev_length= org_key_length -
+ (new_ref_length-pack_marker);
+ s_temp->n_ref_length= s_temp->part_of_prev_key;
+ s_temp->n_length= s_temp->prev_length;
+ n_length= get_pack_length(s_temp->prev_length);
+ s_temp->prev_key+= (new_ref_length - pack_marker);
+ length+= s_temp->prev_length + n_length;
+ }
+ else
+ { /* Can't use prev key */
+ s_temp->part_of_prev_key=0;
+ s_temp->prev_length= org_key_length;
+ s_temp->n_ref_length=s_temp->n_length= org_key_length;
+ length+= org_key_length;
+ }
+ return (int) length;
+ }
+
+ ref_length=n_length;
+ /* Get information about not packed key suffix */
+ get_key_pack_length(n_length,next_length_pack,next_key);
+
+ /* Test if new keys has fewer characters that match the previous key */
+ if (!new_ref_length)
+ { /* Can't use prev key */
+ s_temp->part_of_prev_key= 0;
+ s_temp->prev_length= ref_length;
+ s_temp->n_ref_length= s_temp->n_length= n_length+ref_length;
+ return (int) length+ref_length-next_length_pack;
+ }
+ if (ref_length+pack_marker > new_ref_length)
+ {
+ uint new_pack_length=new_ref_length-pack_marker;
+ /* We must copy characters from the original key to the next key */
+ s_temp->part_of_prev_key= new_ref_length;
+ s_temp->prev_length= ref_length - new_pack_length;
+ s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length;
+ s_temp->prev_key+= new_pack_length;
+ length-= (next_length_pack - get_pack_length(s_temp->n_length));
+ return (int) length + s_temp->prev_length;
+ }
+ }
+ else
+ {
+ /* Next key wasn't a prefix of previous key */
+ ref_length=0;
+ next_length_pack=0;
+ }
+ DBUG_PRINT("test",("length: %d next_key: 0x%lx", length,
+ (long) next_key));
+
+ {
+ uint tmp_length;
+ key=(start+=ref_length);
+ if (key+n_length < key_end) /* Normalize length based */
+ key_end= key+n_length;
+ if (sort_order) /* SerG */
+ {
+ while (key < key_end &&
+ sort_order[*(uchar*) key] == sort_order[*(uchar*) next_key])
+ {
+ key++; next_key++;
+ }
+ }
+ else
+ {
+ while (key < key_end && *key == *next_key)
+ {
+ key++; next_key++;
+ }
+ }
+ if (!(tmp_length=(uint) (key-start)))
+ { /* Key can't be re-packed */
+ s_temp->next_key_pos=0;
+ return length;
+ }
+ ref_length+=tmp_length;
+ n_length-=tmp_length;
+ length-=tmp_length+next_length_pack; /* We gained these chars */
+ }
+ if (n_length == 0 && ref_length == new_key_length)
+ {
+ s_temp->n_ref_length=pack_marker; /* Same as prev key */
+ }
+ else
+ {
+ s_temp->n_ref_length=ref_length | pack_marker;
+ length+= get_pack_length(n_length);
+ s_temp->n_length=n_length;
+ }
+ }
+ }
+ return length;
+}
+
+
+/* Length of key which is prefix compressed */
+
+int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte *next_key,
+ byte *org_key, byte *prev_key,
+ const byte *key,
+ MARIA_KEY_PARAM *s_temp)
+{
+ uint length,key_length,ref_length;
+
+ s_temp->totlength=key_length= _ma_keylength(keyinfo,key)+nod_flag;
+#ifdef HAVE_purify
+ s_temp->n_length= s_temp->n_ref_length=0; /* For valgrind */
+#endif
+ s_temp->key=key;
+ s_temp->prev_key=org_key;
+ if (prev_key) /* If not first key in block */
+ {
+ /* pack key against previous key */
+ /*
+ As keys may be identical when running a sort in maria_chk, we
+ have to guard against the case where keys may be identical
+ */
+ const byte *end;
+ end=key+key_length;
+ for ( ; *key == *prev_key && key < end; key++,prev_key++) ;
+ s_temp->ref_length= ref_length=(uint) (key-s_temp->key);
+ length=key_length - ref_length + get_pack_length(ref_length);
+ }
+ else
+ {
+ /* No previous key */
+ s_temp->ref_length=ref_length=0;
+ length=key_length+1;
+ }
+ if ((s_temp->next_key_pos=next_key)) /* If another key after */
+ {
+ /* pack key against next key */
+ uint next_length,next_length_pack;
+ get_key_pack_length(next_length,next_length_pack,next_key);
+
+ /* If first key and next key is packed (only on delete) */
+ if (!prev_key && org_key && next_length)
+ {
+ const byte *end;
+ for (key= s_temp->key, end=key+next_length ;
+ *key == *org_key && key < end;
+ key++,org_key++) ;
+ ref_length= (uint) (key - s_temp->key);
+ }
+
+ if (next_length > ref_length)
+ {
+ /* We put a key with different case between two keys with the same prefix
+ Extend next key to have same prefix as
+ this key */
+ s_temp->n_ref_length= ref_length;
+ s_temp->prev_length= next_length-ref_length;
+ s_temp->prev_key+= ref_length;
+ return (int) (length+ s_temp->prev_length - next_length_pack +
+ get_pack_length(ref_length));
+ }
+ /* Check how many characters are identical to next key */
+ key= s_temp->key+next_length;
+ while (*key++ == *next_key++) ;
+ if ((ref_length= (uint) (key - s_temp->key)-1) == next_length)
+ {
+ s_temp->next_key_pos=0;
+ return length; /* can't pack next key */
+ }
+ s_temp->prev_length=0;
+ s_temp->n_ref_length=ref_length;
+ return (int) (length-(ref_length - next_length) - next_length_pack +
+ get_pack_length(ref_length));
+ }
+ return (int) length;
+}
+
+
+/*
+** store a key packed with _ma_calc_xxx_key_length in page-buffert
+*/
+
+/* store key without compression */
+
+void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register byte *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ memcpy((byte*) key_pos,(byte*) s_temp->key,(size_t) s_temp->totlength);
+}
+
+
+/* store variable length key with prefix compression */
+
+#define store_pack_length(test,pos,length) { \
+ if (test) { *((pos)++) = (uchar) (length); } else \
+ { *((pos)++) = (uchar) ((length) >> 8); *((pos)++) = (uchar) (length); } }
+
+
+void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register byte *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ uint length;
+ byte *start;
+
+ start=key_pos;
+
+ if (s_temp->ref_length)
+ {
+ /* Packed against previous key */
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->ref_length);
+ /* If not same key after */
+ if (s_temp->ref_length != s_temp->pack_marker)
+ store_key_length_inc(key_pos,s_temp->key_length);
+ }
+ else
+ {
+ /* Not packed against previous key */
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length);
+ }
+ bmove((byte*) key_pos,(byte*) s_temp->key,
+ (length=s_temp->totlength-(uint) (key_pos-start)));
+
+ if (!s_temp->next_key_pos) /* No following key */
+ return;
+ key_pos+=length;
+
+ if (s_temp->prev_length)
+ {
+ /* Extend next key because new key didn't have same prefix as prev key */
+ if (s_temp->part_of_prev_key)
+ {
+ store_pack_length(s_temp->pack_marker == 128,key_pos,
+ s_temp->part_of_prev_key);
+ store_key_length_inc(key_pos,s_temp->n_length);
+ }
+ else
+ {
+ s_temp->n_length+= s_temp->store_not_null;
+ store_pack_length(s_temp->pack_marker == 128,key_pos,
+ s_temp->n_length);
+ }
+ memcpy(key_pos, s_temp->prev_key, s_temp->prev_length);
+ }
+ else if (s_temp->n_ref_length)
+ {
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length);
+ if (s_temp->n_ref_length == s_temp->pack_marker)
+ return; /* Identical key */
+ store_key_length(key_pos,s_temp->n_length);
+ }
+ else
+ {
+ s_temp->n_length+= s_temp->store_not_null;
+ store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length);
+ }
+}
+
+
+/* variable length key with prefix compression */
+
+void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
+ register byte *key_pos,
+ register MARIA_KEY_PARAM *s_temp)
+{
+ store_key_length_inc(key_pos,s_temp->ref_length);
+ memcpy((char*) key_pos,(char*) s_temp->key+s_temp->ref_length,
+ (size_t) s_temp->totlength-s_temp->ref_length);
+
+ if (s_temp->next_key_pos)
+ {
+ key_pos+=(uint) (s_temp->totlength-s_temp->ref_length);
+ store_key_length_inc(key_pos,s_temp->n_ref_length);
+ if (s_temp->prev_length) /* If we must extend key */
+ {
+ memcpy(key_pos,s_temp->prev_key,s_temp->prev_length);
+ }
+ }
+}
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
new file mode 100644
index 00000000000..9134645f847
--- /dev/null
+++ b/storage/maria/ma_sort.c
@@ -0,0 +1,1056 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Creates a index for a database by reading keys, sorting them and outputing
+ them in sorted order through MARIA_SORT_INFO functions.
+*/
+
+#include "ma_fulltext.h"
+#if defined(MSDOS) || defined(__WIN__)
+#include <fcntl.h>
+#else
+#include <stddef.h>
+#endif
+#include <queues.h>
+
+/* static variables */
+
+#undef MIN_SORT_MEMORY
+#undef MYF_RW
+#undef DISK_BUFFER_SIZE
+
+#define MERGEBUFF 15
+#define MERGEBUFF2 31
+#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD)
+#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL)
+#define DISK_BUFFER_SIZE (IO_SIZE*16)
+
+
+/*
+ Pointers of functions for store and read keys from temp file
+*/
+
+extern void print_error _VARARGS((const char *fmt,...));
+
+/* Functions defined in this file */
+
+static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info,uint keys,
+ byte **sort_keys,
+ DYNAMIC_ARRAY *buffpek,int *maxbuffer,
+ IO_CACHE *tempfile,
+ IO_CACHE *tempfile_for_exceptions);
+static int NEAR_F write_keys(MARIA_SORT_PARAM *info, byte **sort_keys,
+ uint count, BUFFPEK *buffpek,IO_CACHE *tempfile);
+static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key,
+ IO_CACHE *tempfile);
+static int NEAR_F write_index(MARIA_SORT_PARAM *info, byte **sort_keys,
+ uint count);
+static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info,uint keys,
+ byte **sort_keys,
+ BUFFPEK *buffpek,int *maxbuffer,
+ IO_CACHE *t_file);
+static uint NEAR_F read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek,
+ uint sort_length);
+static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info,uint keys,
+ IO_CACHE *from_file, IO_CACHE *to_file,
+ byte **sort_keys, BUFFPEK *lastbuff,
+ BUFFPEK *Fb, BUFFPEK *Tb);
+static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint, byte **,BUFFPEK *, int,
+ IO_CACHE *);
+static int flush_maria_ft_buf(MARIA_SORT_PARAM *info);
+
+static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, byte **sort_keys,
+ uint count, BUFFPEK *buffpek,
+ IO_CACHE *tempfile);
+static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek,
+ uint sort_length);
+static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info, IO_CACHE *to_file,
+ char *key, uint sort_length, uint count);
+static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info,
+ IO_CACHE *to_file,
+ char* key, uint sort_length,
+ uint count);
+static inline int
+my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs);
+
+/*
+ Creates a index of sorted keys
+
+ SYNOPSIS
+ _ma_create_index_by_sort()
+ info Sort parameters
+ no_messages Set to 1 if no output
+ sortbuff_size Size if sortbuffer to allocate
+
+ RESULT
+ 0 ok
+ <> 0 Error
+*/
+
+int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
+ ulong sortbuff_size)
+{
+ int error,maxbuffer,skr;
+ uint memavl,old_memavl,keys,sort_length;
+ DYNAMIC_ARRAY buffpek;
+ ha_rows records;
+ byte **sort_keys;
+ IO_CACHE tempfile, tempfile_for_exceptions;
+ DBUG_ENTER("_ma_create_index_by_sort");
+ DBUG_PRINT("enter",("sort_length: %d", info->key_length));
+
+ if (info->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ info->write_keys= write_keys_varlen;
+ info->read_to_buffer=read_to_buffer_varlen;
+ info->write_key=write_merge_key_varlen;
+ }
+ else
+ {
+ info->write_keys= write_keys;
+ info->read_to_buffer=read_to_buffer;
+ info->write_key=write_merge_key;
+ }
+
+ my_b_clear(&tempfile);
+ my_b_clear(&tempfile_for_exceptions);
+ bzero((char*) &buffpek,sizeof(buffpek));
+ sort_keys= (byte **) NULL; error= 1;
+ maxbuffer=1;
+
+ memavl=max(sortbuff_size,MIN_SORT_MEMORY);
+ records= info->sort_info->max_records;
+ sort_length= info->key_length;
+ LINT_INIT(keys);
+
+ while (memavl >= MIN_SORT_MEMORY)
+ {
+ if ((my_off_t) (records+1)*(sort_length+sizeof(char*)) <=
+ (my_off_t) memavl)
+ keys= records+1;
+ else
+ do
+ {
+ skr=maxbuffer;
+ if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer ||
+ (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/
+ (sort_length+sizeof(char*))) <= 1 ||
+ keys < (uint) maxbuffer)
+ {
+ _ma_check_print_error(info->sort_info->param,
+ "sort_buffer_size is to small");
+ goto err;
+ }
+ }
+ while ((maxbuffer= (int) (records/(keys-1)+1)) != skr);
+
+ if ((sort_keys=(byte**) my_malloc(keys*(sort_length+sizeof(char*))+
+ HA_FT_MAXBYTELEN, MYF(0))))
+ {
+ if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer,
+ maxbuffer/2))
+ {
+ my_free((gptr) sort_keys,MYF(0));
+ sort_keys= 0;
+ }
+ else
+ break;
+ }
+ old_memavl=memavl;
+ if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY)
+ memavl=MIN_SORT_MEMORY;
+ }
+ if (memavl < MIN_SORT_MEMORY)
+ {
+ _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */
+ goto err; /* purecov: tested */
+ }
+ (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */
+
+ if (!no_messages)
+ printf(" - Searching for keys, allocating buffer for %d keys\n",keys);
+
+ if ((records=find_all_keys(info,keys,sort_keys,&buffpek,&maxbuffer,
+ &tempfile,&tempfile_for_exceptions))
+ == HA_POS_ERROR)
+ goto err; /* purecov: tested */
+ if (maxbuffer == 0)
+ {
+ if (!no_messages)
+ printf(" - Dumping %lu keys\n", (ulong) records);
+ if (write_index(info,sort_keys, (uint) records))
+ goto err; /* purecov: inspected */
+ }
+ else
+ {
+ keys=(keys*(sort_length+sizeof(char*)))/sort_length;
+ if (maxbuffer >= MERGEBUFF2)
+ {
+ if (!no_messages)
+ printf(" - Merging %lu keys\n", (ulong) records); /* purecov: tested */
+ if (merge_many_buff(info,keys,sort_keys,
+ dynamic_element(&buffpek,0,BUFFPEK *),&maxbuffer,&tempfile))
+ goto err; /* purecov: inspected */
+ }
+ if (flush_io_cache(&tempfile) ||
+ reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
+ goto err; /* purecov: inspected */
+ if (!no_messages)
+ printf(" - Last merge and dumping keys\n"); /* purecov: tested */
+ if (merge_index(info,keys,sort_keys,dynamic_element(&buffpek,0,BUFFPEK *),
+ maxbuffer,&tempfile))
+ goto err; /* purecov: inspected */
+ }
+
+ if (flush_maria_ft_buf(info) || _ma_flush_pending_blocks(info))
+ goto err;
+
+ if (my_b_inited(&tempfile_for_exceptions))
+ {
+ MARIA_HA *index=info->sort_info->info;
+ uint keyno=info->key;
+ uint key_length, ref_length=index->s->rec_reflength;
+
+ if (!no_messages)
+ printf(" - Adding exceptions\n"); /* purecov: tested */
+ if (flush_io_cache(&tempfile_for_exceptions) ||
+ reinit_io_cache(&tempfile_for_exceptions,READ_CACHE,0L,0,0))
+ goto err;
+
+ while (!my_b_read(&tempfile_for_exceptions,(byte*)&key_length,
+ sizeof(key_length))
+ && !my_b_read(&tempfile_for_exceptions,(byte*)sort_keys,
+ (uint) key_length))
+ {
+ if (_ma_ck_write(index,keyno,(byte*) sort_keys,key_length-ref_length))
+ goto err;
+ }
+ }
+
+ error =0;
+
+err:
+ if (sort_keys)
+ my_free((gptr) sort_keys,MYF(0));
+ delete_dynamic(&buffpek);
+ close_cached_file(&tempfile);
+ close_cached_file(&tempfile_for_exceptions);
+
+ DBUG_RETURN(error ? -1 : 0);
+} /* _ma_create_index_by_sort */
+
+
+/* Search after all keys and place them in a temp. file */
+
+static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys,
+ byte **sort_keys, DYNAMIC_ARRAY *buffpek,
+ int *maxbuffer, IO_CACHE *tempfile,
+ IO_CACHE *tempfile_for_exceptions)
+{
+ int error;
+ uint idx;
+ DBUG_ENTER("find_all_keys");
+
+ idx=error=0;
+ sort_keys[0]= (byte*) (sort_keys+keys);
+
+ while (!(error=(*info->key_read)(info,sort_keys[idx])))
+ {
+ if (info->real_key_length > info->key_length)
+ {
+ if (write_key(info,sort_keys[idx],tempfile_for_exceptions))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ continue;
+ }
+
+ if (++idx == keys)
+ {
+ if (info->write_keys(info,sort_keys,idx-1,
+ (BUFFPEK *)alloc_dynamic(buffpek),
+ tempfile))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+
+ sort_keys[0]=(byte*) (sort_keys+keys);
+ memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length);
+ idx=1;
+ }
+ sort_keys[idx]=sort_keys[idx-1]+info->key_length;
+ }
+ if (error > 0)
+ DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */
+ if (buffpek->elements)
+ {
+ if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek),
+ tempfile))
+ DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */
+ *maxbuffer=buffpek->elements-1;
+ }
+ else
+ *maxbuffer=0;
+
+ DBUG_RETURN((*maxbuffer)*(keys-1)+idx);
+} /* find_all_keys */
+
+
+#ifdef THREAD
+/* Search after all keys and place them in a temp. file */
+
+pthread_handler_t _ma_thr_find_all_keys(void *arg)
+{
+ MARIA_SORT_PARAM *sort_param= (MARIA_SORT_PARAM*) arg;
+ int error;
+ uint memavl,old_memavl,keys,sort_length;
+ uint idx, maxbuffer;
+ byte **sort_keys=0;
+
+ LINT_INIT(keys);
+
+ error=1;
+
+ if (my_thread_init())
+ goto err;
+
+ { /* Add extra block since DBUG_ENTER declare variables */
+ DBUG_ENTER("_ma_thr_find_all_keys");
+ DBUG_PRINT("enter", ("master: %d", sort_param->master));
+ if (sort_param->sort_info->got_error)
+ goto err;
+
+ if (sort_param->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ sort_param->write_keys= write_keys_varlen;
+ sort_param->read_to_buffer= read_to_buffer_varlen;
+ sort_param->write_key= write_merge_key_varlen;
+ }
+ else
+ {
+ sort_param->write_keys= write_keys;
+ sort_param->read_to_buffer= read_to_buffer;
+ sort_param->write_key= write_merge_key;
+ }
+
+ my_b_clear(&sort_param->tempfile);
+ my_b_clear(&sort_param->tempfile_for_exceptions);
+ bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek));
+ bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
+
+ memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+ idx= sort_param->sort_info->max_records;
+ sort_length= sort_param->key_length;
+ maxbuffer= 1;
+
+ while (memavl >= MIN_SORT_MEMORY)
+ {
+ if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <=
+ (my_off_t) memavl)
+ keys= idx+1;
+ else
+ {
+ uint skr;
+ do
+ {
+ skr= maxbuffer;
+ if (memavl < sizeof(BUFFPEK)*maxbuffer ||
+ (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/
+ (sort_length+sizeof(char*))) <= 1 ||
+ keys < (uint) maxbuffer)
+ {
+ _ma_check_print_error(sort_param->sort_info->param,
+ "sort_buffer_size is to small");
+ goto err;
+ }
+ }
+ while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr);
+ }
+ if ((sort_keys= (byte **)
+ my_malloc(keys*(sort_length+sizeof(char*))+
+ ((sort_param->keyinfo->flag & HA_FULLTEXT) ?
+ HA_FT_MAXBYTELEN : 0), MYF(0))))
+ {
+ if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK),
+ maxbuffer, maxbuffer/2))
+ {
+ my_free((gptr) sort_keys,MYF(0));
+ sort_keys= (byte **) NULL; /* for err: label */
+ }
+ else
+ break;
+ }
+ old_memavl= memavl;
+ if ((memavl= memavl/4*3) < MIN_SORT_MEMORY &&
+ old_memavl > MIN_SORT_MEMORY)
+ memavl= MIN_SORT_MEMORY;
+ }
+ if (memavl < MIN_SORT_MEMORY)
+ {
+ _ma_check_print_error(sort_param->sort_info->param,
+ "Sort buffer too small");
+ goto err; /* purecov: tested */
+ }
+
+ if (sort_param->sort_info->param->testflag & T_VERBOSE)
+ printf("Key %d - Allocating buffer for %d keys\n",
+ sort_param->key+1, keys);
+ sort_param->sort_keys= sort_keys;
+
+ idx= error= 0;
+ sort_keys[0]= (byte*) (sort_keys+keys);
+
+ DBUG_PRINT("info", ("reading keys"));
+ while (!(error= sort_param->sort_info->got_error) &&
+ !(error= (*sort_param->key_read)(sort_param, sort_keys[idx])))
+ {
+ if (sort_param->real_key_length > sort_param->key_length)
+ {
+ if (write_key(sort_param,sort_keys[idx],
+ &sort_param->tempfile_for_exceptions))
+ goto err;
+ continue;
+ }
+
+ if (++idx == keys)
+ {
+ if (sort_param->write_keys(sort_param, sort_keys, idx - 1,
+ (BUFFPEK *)alloc_dynamic(&sort_param->
+ buffpek),
+ &sort_param->tempfile))
+ goto err;
+ sort_keys[0]= (byte*) (sort_keys+keys);
+ memcpy(sort_keys[0], sort_keys[idx - 1],
+ (size_t) sort_param->key_length);
+ idx= 1;
+ }
+ sort_keys[idx]=sort_keys[idx - 1] + sort_param->key_length;
+ }
+ if (error > 0)
+ goto err;
+ if (sort_param->buffpek.elements)
+ {
+ if (sort_param->write_keys(sort_param,sort_keys, idx,
+ (BUFFPEK *) alloc_dynamic(&sort_param->
+ buffpek),
+ &sort_param->tempfile))
+ goto err;
+ sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx;
+ }
+ else
+ sort_param->keys= idx;
+
+ sort_param->sort_keys_length= keys;
+ goto ok;
+
+err:
+ DBUG_PRINT("error", ("got some error"));
+ sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */
+ my_free((gptr) sort_keys,MYF(MY_ALLOW_ZERO_PTR));
+ sort_param->sort_keys=0;
+ delete_dynamic(& sort_param->buffpek);
+ close_cached_file(&sort_param->tempfile);
+ close_cached_file(&sort_param->tempfile_for_exceptions);
+
+ok:
+ free_root(&sort_param->wordroot, MYF(0));
+ /*
+ Detach from the share if the writer is involved. Avoid others to
+ be blocked. This includes a flush of the write buffer. This will
+ also indicate EOF to the readers.
+ */
+ if (sort_param->sort_info->info->rec_cache.share)
+ remove_io_thread(&sort_param->sort_info->info->rec_cache);
+
+ /* Readers detach from the share if any. Avoid others to be blocked. */
+ if (sort_param->read_cache.share)
+ remove_io_thread(&sort_param->read_cache);
+
+ pthread_mutex_lock(&sort_param->sort_info->mutex);
+ if (!--sort_param->sort_info->threads_running)
+ pthread_cond_signal(&sort_param->sort_info->cond);
+ pthread_mutex_unlock(&sort_param->sort_info->mutex);
+ DBUG_PRINT("exit", ("======== ending thread ========"));
+ }
+ my_thread_end();
+ return NULL;
+}
+
+
+int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param)
+{
+ MARIA_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ ulong length, keys;
+ ulong *rec_per_key_part=param->rec_per_key_part;
+ int got_error=sort_info->got_error;
+ uint i;
+ MARIA_HA *info=sort_info->info;
+ MARIA_SHARE *share=info->s;
+ MARIA_SORT_PARAM *sinfo;
+ byte *mergebuf=0;
+ DBUG_ENTER("_ma_thr_write_keys");
+ LINT_INIT(length);
+
+ for (i= 0, sinfo= sort_param ;
+ i < sort_info->total_keys ;
+ i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++)
+ {
+ if (!sinfo->sort_keys)
+ {
+ got_error=1;
+ my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ continue;
+ }
+ if (!got_error)
+ {
+ maria_set_key_active(share->state.key_map, sinfo->key);
+
+ if (!sinfo->buffpek.elements)
+ {
+ if (param->testflag & T_VERBOSE)
+ {
+ printf("Key %d - Dumping %u keys\n",sinfo->key+1, sinfo->keys);
+ fflush(stdout);
+ }
+ if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) ||
+ flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo))
+ got_error=1;
+ }
+ if (!got_error && param->testflag & T_STATISTICS)
+ maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
+ sinfo->notnull: NULL,
+ (ulonglong) info->state->records);
+ }
+ my_free((gptr) sinfo->sort_keys,MYF(0));
+ my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR));
+ sinfo->sort_keys=0;
+ }
+
+ for (i= 0, sinfo= sort_param ;
+ i < sort_info->total_keys ;
+ i++,
+ delete_dynamic(&sinfo->buffpek),
+ close_cached_file(&sinfo->tempfile),
+ close_cached_file(&sinfo->tempfile_for_exceptions),
+ sinfo++)
+ {
+ if (got_error)
+ continue;
+ if (sinfo->keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ sinfo->write_keys=write_keys_varlen;
+ sinfo->read_to_buffer=read_to_buffer_varlen;
+ sinfo->write_key=write_merge_key_varlen;
+ }
+ else
+ {
+ sinfo->write_keys=write_keys;
+ sinfo->read_to_buffer=read_to_buffer;
+ sinfo->write_key=write_merge_key;
+ }
+ if (sinfo->buffpek.elements)
+ {
+ uint maxbuffer=sinfo->buffpek.elements-1;
+ if (!mergebuf)
+ {
+ length=param->sort_buffer_length;
+ while (length >= MIN_SORT_MEMORY && !mergebuf)
+ {
+ mergebuf=my_malloc(length, MYF(0));
+ length=length*3/4;
+ }
+ if (!mergebuf)
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ keys=length/sinfo->key_length;
+ if (maxbuffer >= MERGEBUFF2)
+ {
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys);
+ if (merge_many_buff(sinfo, keys, (byte **) mergebuf,
+ dynamic_element(&sinfo->buffpek, 0, BUFFPEK *),
+ (int*) &maxbuffer, &sinfo->tempfile))
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ if (flush_io_cache(&sinfo->tempfile) ||
+ reinit_io_cache(&sinfo->tempfile,READ_CACHE,0L,0,0))
+ {
+ got_error=1;
+ continue;
+ }
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Last merge and dumping keys\n", sinfo->key+1);
+ if (merge_index(sinfo, keys, (byte**) mergebuf,
+ dynamic_element(&sinfo->buffpek,0,BUFFPEK *),
+ maxbuffer,&sinfo->tempfile) ||
+ flush_maria_ft_buf(sinfo) ||
+ _ma_flush_pending_blocks(sinfo))
+ {
+ got_error=1;
+ continue;
+ }
+ }
+ if (my_b_inited(&sinfo->tempfile_for_exceptions))
+ {
+ uint key_length;
+
+ if (param->testflag & T_VERBOSE)
+ printf("Key %d - Dumping 'long' keys\n", sinfo->key+1);
+
+ if (flush_io_cache(&sinfo->tempfile_for_exceptions) ||
+ reinit_io_cache(&sinfo->tempfile_for_exceptions,READ_CACHE,0L,0,0))
+ {
+ got_error=1;
+ continue;
+ }
+
+ while (!got_error &&
+ !my_b_read(&sinfo->tempfile_for_exceptions,(byte*)&key_length,
+ sizeof(key_length)))
+ {
+ byte maria_ft_buf[HA_FT_MAXBYTELEN + HA_FT_WLEN + 10];
+ if (key_length > sizeof(maria_ft_buf) ||
+ my_b_read(&sinfo->tempfile_for_exceptions, (byte*)maria_ft_buf,
+ (uint)key_length) ||
+ _ma_ck_write(info, sinfo->key, maria_ft_buf,
+ key_length - info->s->rec_reflength))
+ got_error=1;
+ }
+ }
+ }
+ my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR));
+ DBUG_RETURN(got_error);
+}
+#endif /* THREAD */
+
+/* Write all keys in memory to file for later merge */
+
+static int write_keys(MARIA_SORT_PARAM *info, register byte **sort_keys,
+ uint count, BUFFPEK *buffpek, IO_CACHE *tempfile)
+{
+ byte **end;
+ uint sort_length=info->key_length;
+ DBUG_ENTER("write_keys");
+
+ qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp,
+ info);
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ buffpek->file_pos=my_b_tell(tempfile);
+ buffpek->count=count;
+
+ for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
+ {
+ if (my_b_write(tempfile, *sort_keys, (uint) sort_length))
+ DBUG_RETURN(1); /* purecov: inspected */
+ }
+ DBUG_RETURN(0);
+} /* write_keys */
+
+
+static inline int
+my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs)
+{
+ int err;
+ uint16 len= _ma_keylength(info->keyinfo, bufs);
+
+ /* The following is safe as this is a local file */
+ if ((err= my_b_write(to_file, (byte*)&len, sizeof(len))))
+ return (err);
+ if ((err= my_b_write(to_file,bufs, (uint) len)))
+ return (err);
+ return (0);
+}
+
+
+static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info,
+ register byte **sort_keys,
+ uint count, BUFFPEK *buffpek,
+ IO_CACHE *tempfile)
+{
+ byte **end;
+ int err;
+ DBUG_ENTER("write_keys_varlen");
+
+ qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp,
+ info);
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ buffpek->file_pos=my_b_tell(tempfile);
+ buffpek->count=count;
+ for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
+ {
+ if ((err= my_var_write(info,tempfile, *sort_keys)))
+ DBUG_RETURN(err);
+ }
+ DBUG_RETURN(0);
+} /* write_keys_varlen */
+
+
+static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key,
+ IO_CACHE *tempfile)
+{
+ uint key_length=info->real_key_length;
+ DBUG_ENTER("write_key");
+
+ if (!my_b_inited(tempfile) &&
+ open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1);
+
+ if (my_b_write(tempfile, (byte*)&key_length,sizeof(key_length)) ||
+ my_b_write(tempfile, key, (uint) key_length))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+} /* write_key */
+
+
+/* Write index */
+
+static int NEAR_F write_index(MARIA_SORT_PARAM *info,
+ register byte **sort_keys,
+ register uint count)
+{
+ DBUG_ENTER("write_index");
+
+ qsort2((gptr) sort_keys,(size_t) count,sizeof(byte*),
+ (qsort2_cmp) info->key_cmp,info);
+ while (count--)
+ {
+ if ((*info->key_write)(info, *sort_keys++))
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ DBUG_RETURN(0);
+} /* write_index */
+
+
+ /* Merge buffers to make < MERGEBUFF2 buffers */
+
+static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info, uint keys,
+ byte **sort_keys, BUFFPEK *buffpek,
+ int *maxbuffer, IO_CACHE *t_file)
+{
+ register int i;
+ IO_CACHE t_file2, *from_file, *to_file, *temp;
+ BUFFPEK *lastbuff;
+ DBUG_ENTER("merge_many_buff");
+
+ if (*maxbuffer < MERGEBUFF2)
+ DBUG_RETURN(0); /* purecov: inspected */
+ if (flush_io_cache(t_file) ||
+ open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST",
+ DISK_BUFFER_SIZE, info->sort_info->param->myf_rw))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ from_file= t_file ; to_file= &t_file2;
+ while (*maxbuffer >= MERGEBUFF2)
+ {
+ reinit_io_cache(from_file,READ_CACHE,0L,0,0);
+ reinit_io_cache(to_file,WRITE_CACHE,0L,0,0);
+ lastbuff=buffpek;
+ for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF)
+ {
+ if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
+ buffpek+i,buffpek+i+MERGEBUFF-1))
+ break; /* purecov: inspected */
+ }
+ if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++,
+ buffpek+i,buffpek+ *maxbuffer))
+ break; /* purecov: inspected */
+ if (flush_io_cache(to_file))
+ break; /* purecov: inspected */
+ temp=from_file; from_file=to_file; to_file=temp;
+ *maxbuffer= (int) (lastbuff-buffpek)-1;
+ }
+ close_cached_file(to_file); /* This holds old result */
+ if (to_file == t_file)
+ *t_file=t_file2; /* Copy result file */
+
+ DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */
+} /* merge_many_buff */
+
+
+/*
+ Read data to buffer
+
+ SYNOPSIS
+ read_to_buffer()
+ fromfile File to read from
+ buffpek Where to read from
+ sort_length max length to read
+ RESULT
+ > 0 Ammount of bytes read
+ -1 Error
+*/
+
+static uint NEAR_F read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
+ uint sort_length)
+{
+ register uint count;
+ uint length;
+
+ if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ {
+ if (my_pread(fromfile->file,(byte*) buffpek->base,
+ (length= sort_length*count),buffpek->file_pos,MYF_RW))
+ return((uint) -1); /* purecov: inspected */
+ buffpek->key=buffpek->base;
+ buffpek->file_pos+= length; /* New filepos */
+ buffpek->count-= count;
+ buffpek->mem_count= count;
+ }
+ return (count*sort_length);
+} /* read_to_buffer */
+
+static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
+ uint sort_length)
+{
+ register uint count;
+ uint16 length_of_key = 0;
+ uint idx;
+ byte *buffp;
+
+ if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ {
+ buffp= buffpek->base;
+
+ for (idx=1;idx<=count;idx++)
+ {
+ if (my_pread(fromfile->file,(byte*)&length_of_key,sizeof(length_of_key),
+ buffpek->file_pos,MYF_RW))
+ return((uint) -1);
+ buffpek->file_pos+=sizeof(length_of_key);
+ if (my_pread(fromfile->file,(byte*) buffp,length_of_key,
+ buffpek->file_pos,MYF_RW))
+ return((uint) -1);
+ buffpek->file_pos+=length_of_key;
+ buffp = buffp + sort_length;
+ }
+ buffpek->key=buffpek->base;
+ buffpek->count-= count;
+ buffpek->mem_count= count;
+ }
+ return (count*sort_length);
+} /* read_to_buffer_varlen */
+
+
+static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info,
+ IO_CACHE *to_file,char* key,
+ uint sort_length, uint count)
+{
+ uint idx;
+
+ char *bufs = key;
+ for (idx=1;idx<=count;idx++)
+ {
+ int err;
+ if ((err= my_var_write(info,to_file, (byte*) bufs)))
+ return (err);
+ bufs=bufs+sort_length;
+ }
+ return(0);
+}
+
+
+static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)),
+ IO_CACHE *to_file, char* key,
+ uint sort_length, uint count)
+{
+ return my_b_write(to_file,(byte*) key,(uint) sort_length*count);
+}
+
+/*
+ Merge buffers to one buffer
+ If to_file == 0 then use info->key_write
+*/
+
+static int NEAR_F
+merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
+ IO_CACHE *to_file, byte **sort_keys, BUFFPEK *lastbuff,
+ BUFFPEK *Fb, BUFFPEK *Tb)
+{
+ int error;
+ uint sort_length,maxcount;
+ ha_rows count;
+ my_off_t to_start_filepos;
+ byte *strpos;
+ BUFFPEK *buffpek,**refpek;
+ QUEUE queue;
+ volatile int *killed= _ma_killed_ptr(info->sort_info->param);
+ DBUG_ENTER("merge_buffers");
+
+ count=error=0;
+ maxcount=keys/((uint) (Tb-Fb) +1);
+ LINT_INIT(to_start_filepos);
+ if (to_file)
+ to_start_filepos=my_b_tell(to_file);
+ strpos= (byte*) sort_keys;
+ sort_length=info->key_length;
+
+ if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0,
+ (int (*)(void*, byte *,byte*)) info->key_cmp,
+ (void*) info))
+ DBUG_RETURN(1); /* purecov: inspected */
+
+ for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
+ {
+ count+= buffpek->count;
+ buffpek->base= strpos;
+ buffpek->max_keys=maxcount;
+ strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek,
+ sort_length));
+ if (error == -1)
+ goto err; /* purecov: inspected */
+ queue_insert(&queue,(char*) buffpek);
+ }
+
+ while (queue.elements > 1)
+ {
+ for (;;)
+ {
+ if (*killed)
+ {
+ error=1; goto err;
+ }
+ buffpek=(BUFFPEK*) queue_top(&queue);
+ if (to_file)
+ {
+ if (info->write_key(info,to_file,(byte*) buffpek->key,
+ (uint) sort_length,1))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ else
+ {
+ if ((*info->key_write)(info,(void*) buffpek->key))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ buffpek->key+=sort_length;
+ if (! --buffpek->mem_count)
+ {
+ if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length)))
+ {
+ byte *base= buffpek->base;
+ uint max_keys=buffpek->max_keys;
+
+ VOID(queue_remove(&queue,0));
+
+ /* Put room used by buffer to use in other buffer */
+ for (refpek= (BUFFPEK**) &queue_top(&queue);
+ refpek <= (BUFFPEK**) &queue_end(&queue);
+ refpek++)
+ {
+ buffpek= *refpek;
+ if (buffpek->base+buffpek->max_keys*sort_length == base)
+ {
+ buffpek->max_keys+=max_keys;
+ break;
+ }
+ else if (base+max_keys*sort_length == buffpek->base)
+ {
+ buffpek->base=base;
+ buffpek->max_keys+=max_keys;
+ break;
+ }
+ }
+ break; /* One buffer have been removed */
+ }
+ }
+ else if (error == -1)
+ goto err; /* purecov: inspected */
+ queue_replaced(&queue); /* Top element has been replaced */
+ }
+ }
+ buffpek=(BUFFPEK*) queue_top(&queue);
+ buffpek->base= (byte*) sort_keys;
+ buffpek->max_keys=keys;
+ do
+ {
+ if (to_file)
+ {
+ if (info->write_key(info,to_file,(byte*) buffpek->key,
+ sort_length,buffpek->mem_count))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ else
+ {
+ register byte *end;
+ strpos= buffpek->key;
+ for (end= strpos+buffpek->mem_count*sort_length;
+ strpos != end ;
+ strpos+=sort_length)
+ {
+ if ((*info->key_write)(info, (byte*) strpos))
+ {
+ error=1; goto err; /* purecov: inspected */
+ }
+ }
+ }
+ }
+ while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) !=
+ -1 && error != 0);
+
+ lastbuff->count=count;
+ if (to_file)
+ lastbuff->file_pos=to_start_filepos;
+err:
+ delete_queue(&queue);
+ DBUG_RETURN(error);
+} /* merge_buffers */
+
+
+ /* Do a merge to output-file (save only positions) */
+
+static int NEAR_F
+merge_index(MARIA_SORT_PARAM *info, uint keys, byte **sort_keys,
+ BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile)
+{
+ DBUG_ENTER("merge_index");
+ if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek,
+ buffpek+maxbuffer))
+ DBUG_RETURN(1); /* purecov: inspected */
+ DBUG_RETURN(0);
+} /* merge_index */
+
+
+static int flush_maria_ft_buf(MARIA_SORT_PARAM *info)
+{
+ int err=0;
+ if (info->sort_info->ft_buf)
+ {
+ err=_ma_sort_ft_buf_flush(info);
+ my_free((gptr)info->sort_info->ft_buf, MYF(0));
+ info->sort_info->ft_buf=0;
+ }
+ return err;
+}
+
diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h
new file mode 100644
index 00000000000..6aac741bb2c
--- /dev/null
+++ b/storage/maria/ma_sp_defs.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB
+ & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _SP_DEFS_H
+#define _SP_DEFS_H
+
+#define SPDIMS 2
+#define SPTYPE HA_KEYTYPE_DOUBLE
+#define SPLEN 8
+
+#ifdef HAVE_SPATIAL
+
+enum wkbType
+{
+ wkbPoint = 1,
+ wkbLineString = 2,
+ wkbPolygon = 3,
+ wkbMultiPoint = 4,
+ wkbMultiLineString = 5,
+ wkbMultiPolygon = 6,
+ wkbGeometryCollection = 7
+};
+
+enum wkbByteOrder
+{
+ wkbXDR = 0, /* Big Endian */
+ wkbNDR = 1 /* Little Endian */
+};
+
+uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key,
+ const byte *record, my_off_t filepos);
+
+#endif /*HAVE_SPATIAL*/
+#endif /* _SP_DEFS_H */
diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c
new file mode 100644
index 00000000000..b365f8deb0f
--- /dev/null
+++ b/storage/maria/ma_sp_key.c
@@ -0,0 +1,300 @@
+/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "maria_def.h"
+
+#ifdef HAVE_SPATIAL
+
+#include "ma_sp_defs.h"
+
+static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr);
+static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ double *mbr, int top);
+static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr);
+
+static void get_double(double *d, const byte *pos)
+{
+ float8get(*d, pos);
+}
+
+uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key,
+ const byte *record, my_off_t filepos)
+{
+ HA_KEYSEG *keyseg;
+ MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr];
+ uint len = 0;
+ byte *pos;
+ uint dlen;
+ uchar *dptr;
+ double mbr[SPDIMS * 2];
+ uint i;
+
+ keyseg = &keyinfo->seg[-1];
+ pos = (byte*)record + keyseg->start;
+
+ dlen = _ma_calc_blob_length(keyseg->bit_start, pos);
+ memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*));
+ if (!dptr)
+ {
+ my_errno= HA_ERR_NULL_IN_SPATIAL;
+ return 0;
+ }
+ sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */
+
+ for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++)
+ {
+ uint length = keyseg->length;
+
+ pos = ((byte*)mbr) + keyseg->start;
+ if (keyseg->flag & HA_SWAP_KEY)
+ {
+#ifdef HAVE_ISNAN
+ if (keyseg->type == HA_KEYTYPE_FLOAT)
+ {
+ float nr;
+ float4get(nr, pos);
+ if (isnan(nr))
+ {
+ /* Replace NAN with zero */
+ bzero(key, length);
+ key+= length;
+ continue;
+ }
+ }
+ else if (keyseg->type == HA_KEYTYPE_DOUBLE)
+ {
+ double nr;
+ get_double(&nr, pos);
+ if (isnan(nr))
+ {
+ bzero(key, length);
+ key+= length;
+ continue;
+ }
+ }
+#endif
+ pos += length;
+ while (length--)
+ {
+ *key++ = *--pos;
+ }
+ }
+ else
+ {
+ memcpy((byte*)key, pos, length);
+ key += keyseg->length;
+ }
+ len += keyseg->length;
+ }
+ _ma_dpointer(info, key, filepos);
+ return len;
+}
+
+/*
+Calculate minimal bounding rectangle (mbr) of the spatial object
+stored in "well-known binary representation" (wkb) format.
+*/
+static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr)
+{
+ uint i;
+
+ for (i=0; i < n_dims; ++i)
+ {
+ mbr[i * 2] = DBL_MAX;
+ mbr[i * 2 + 1] = -DBL_MAX;
+ }
+
+ return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1);
+}
+
+/*
+ Add one point stored in wkb to mbr
+*/
+
+static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order __attribute__((unused)),
+ double *mbr)
+{
+ double ord;
+ double *mbr_end= mbr + n_dims * 2;
+
+ while (mbr < mbr_end)
+ {
+ if ((*wkb) > end - 8)
+ return -1;
+ get_double(&ord, (const byte*) *wkb);
+ (*wkb)+= 8;
+ if (ord < *mbr)
+ float8store((char*) mbr, ord);
+ mbr++;
+ if (ord > *mbr)
+ float8store((char*) mbr, ord);
+ mbr++;
+ }
+ return 0;
+}
+
+
+static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr);
+}
+
+
+static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ uint n_points;
+
+ n_points = uint4korr(*wkb);
+ (*wkb) += 4;
+ for (; n_points > 0; --n_points)
+ {
+ /* Add next point to mbr */
+ if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ return 0;
+}
+
+
+static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ uchar byte_order, double *mbr)
+{
+ uint n_linear_rings;
+ uint n_points;
+
+ n_linear_rings = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ for (; n_linear_rings > 0; --n_linear_rings)
+ {
+ n_points = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_points > 0; --n_points)
+ {
+ /* Add next point to mbr */
+ if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims,
+ double *mbr, int top)
+{
+ int res;
+ uchar byte_order;
+ uint wkb_type;
+
+ byte_order = *(*wkb);
+ ++(*wkb);
+
+ wkb_type = uint4korr((*wkb));
+ (*wkb) += 4;
+
+ switch ((enum wkbType) wkb_type)
+ {
+ case wkbPoint:
+ res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbLineString:
+ res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbPolygon:
+ res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr);
+ break;
+ case wkbMultiPoint:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbMultiLineString:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbMultiPolygon:
+ {
+ uint n_items;
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ byte_order = *(*wkb);
+ ++(*wkb);
+ (*wkb) += 4;
+ if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ case wkbGeometryCollection:
+ {
+ uint n_items;
+
+ if (!top)
+ return -1;
+
+ n_items = uint4korr((*wkb));
+ (*wkb) += 4;
+ for (; n_items > 0; --n_items)
+ {
+ if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0))
+ return -1;
+ }
+ res = 0;
+ break;
+ }
+ default:
+ res = -1;
+ }
+ return res;
+}
+
+#endif /*HAVE_SPATIAL*/
diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c
new file mode 100644
index 00000000000..1ac1a74d7d7
--- /dev/null
+++ b/storage/maria/ma_sp_test.c
@@ -0,0 +1,569 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA spatial table */
+/* Written by Alex Barkov, who has a shared copyright to this code */
+
+#include "maria.h"
+
+#ifdef HAVE_SPATIAL
+#include "ma_sp_defs.h"
+
+#define MAX_REC_LENGTH 1024
+#define KEYALG HA_KEY_ALG_RTREE
+
+static void create_linestring(char *record,uint rownr);
+static void print_record(char * record,my_off_t offs,const char * tail);
+
+static void create_key(char *key,uint rownr);
+static void print_key(const char *key,const char * tail);
+
+static int run_test(const char *filename);
+static int read_with_pos(MARIA_HA * file, int silent);
+
+static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points,
+ uchar *wkb);
+static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims);
+
+static char blob_key[MAX_REC_LENGTH];
+
+
+int main(int argc __attribute__((unused)),char *argv[])
+{
+ MY_INIT(argv[0]);
+ maria_init();
+ exit(run_test("sp_test"));
+}
+
+
+int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+ MARIA_COLUMNDEF recinfo[20];
+ MARIA_KEYDEF keyinfo[20];
+ HA_KEYSEG keyseg[20];
+ key_range min_range, max_range;
+ int silent=0;
+ int create_flag=0;
+ int null_fields=0;
+ int nrecords=30;
+ int uniques=0;
+ int i;
+ int error;
+ int row_count=0;
+ char record[MAX_REC_LENGTH];
+ char key[MAX_REC_LENGTH];
+ char read_record[MAX_REC_LENGTH];
+ int upd=10;
+ ha_rows hrows;
+
+ /* Define a column for NULLs and DEL markers*/
+
+ recinfo[0].type=FIELD_NORMAL;
+ recinfo[0].length=1; /* For NULL bits */
+
+
+ /* Define spatial column */
+
+ recinfo[1].type=FIELD_BLOB;
+ recinfo[1].length=4 + maria_portable_sizeof_char_ptr;
+
+
+
+ /* Define a key with 1 spatial segment */
+
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag=HA_SPATIAL;
+ keyinfo[0].key_alg=KEYALG;
+
+ keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY;
+ keyinfo[0].seg[0].flag=0;
+ keyinfo[0].seg[0].start= 1;
+ keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */
+ keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language=default_charset_info->number;
+ keyinfo[0].seg[0].bit_start=4; /* Long BLOB */
+
+
+ if (!silent)
+ printf("- Creating isam-file\n");
+
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=10000000;
+
+ if (maria_create(filename,
+ DYNAMIC_RECORD,
+ 1, /* keys */
+ keyinfo,
+ 2, /* columns */
+ recinfo,uniques,&uniquedef,&create_info,create_flag))
+ goto err;
+
+ if (!silent)
+ printf("- Open isam-file\n");
+
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ for (i=0; i<nrecords; i++ )
+ {
+ create_linestring(record,i);
+ error=maria_write(file,record);
+ print_record(record,maria_position(file),"\n");
+ if (!error)
+ {
+ row_count++;
+ }
+ else
+ {
+ printf("maria_write: %d\n", error);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Deleting rows with position\n");
+ for (i=0; i < nrecords/4; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"\n");
+ error=maria_delete(file,read_record);
+ if (error)
+ {
+ printf("pos: %2d maria_delete: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if (!silent)
+ printf("- Updating rows with position\n");
+ for (i=0; i < nrecords/2 ; i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file),"");
+ create_linestring(record,i+nrecords*upd);
+ printf("\t-> ");
+ print_record(record,maria_position(file),"\n");
+ error=maria_update(file,read_record,record);
+ if (error)
+ {
+ printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno);
+ goto err;
+ }
+ }
+
+ if ((error=read_with_pos(file,silent)))
+ goto err;
+
+ if (!silent)
+ printf("- Test maria_rkey then a sequence of maria_rnext_same\n");
+
+ create_key(key, nrecords*4/5);
+ print_key(key," search for INTERSECT\n");
+
+ if ((error=maria_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT)))
+ {
+ printf("maria_rkey: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rkey\n");
+ row_count=1;
+
+ for (;;)
+ {
+ if ((error=maria_rnext_same(file,read_record)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext_same\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_rfirst then a sequence of maria_rnext\n");
+
+ error=maria_rfirst(file,read_record,0);
+ if (error)
+ {
+ printf("maria_rfirst: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ row_count=1;
+ print_record(read_record,maria_position(file)," maria_frirst\n");
+
+ for(i=0;i<nrecords;i++) {
+ if ((error=maria_rnext(file,read_record,0)))
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ printf("maria_next: %3d errno: %3d\n",error,my_errno);
+ goto err;
+ }
+ print_record(read_record,maria_position(file)," maria_rnext\n");
+ row_count++;
+ }
+ printf(" %d rows\n",row_count);
+
+ if (!silent)
+ printf("- Test maria_records_in_range()\n");
+
+ create_key(key, nrecords*upd);
+ print_key(key," INTERSECT\n");
+ min_range.key= key;
+ min_range.length= 1000; /* Big enough */
+ min_range.flag= HA_READ_MBR_INTERSECT;
+ max_range.key= record+1;
+ max_range.length= 1000; /* Big enough */
+ max_range.flag= HA_READ_KEY_EXACT;
+ hrows= maria_records_in_range(file,0, &min_range, &max_range);
+ printf(" %ld rows\n", (long) hrows);
+
+ if (maria_close(file)) goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return 0;
+
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ maria_end();
+ return 1; /* skip warning */
+}
+
+
+static int read_with_pos (MARIA_HA * file,int silent)
+{
+ int error;
+ int i;
+ char read_record[MAX_REC_LENGTH];
+ int rows=0;
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ for (i=0;;i++)
+ {
+ my_errno=0;
+ bzero((char*) read_record,MAX_REC_LENGTH);
+ error=maria_rrnd(file,read_record,i == 0 ? 0L : HA_OFFSET_ERROR);
+ if (error)
+ {
+ if (error==HA_ERR_END_OF_FILE)
+ break;
+ if (error==HA_ERR_RECORD_DELETED)
+ continue;
+ printf("pos: %2d maria_rrnd: %3d errno: %3d\n",i,error,my_errno);
+ return error;
+ }
+ rows++;
+ print_record(read_record,maria_position(file),"\n");
+ }
+ printf(" %d rows\n",rows);
+ return 0;
+}
+
+
+#ifdef NOT_USED
+static void bprint_record(char * record,
+ my_off_t offs __attribute__((unused)),
+ const char * tail)
+{
+ int i;
+ char * pos;
+ i=(unsigned char)record[0];
+ printf("%02X ",i);
+
+ for( pos=record+1, i=0; i<32; i++,pos++)
+ {
+ int b=(unsigned char)*pos;
+ printf("%02X",b);
+ }
+ printf("%s",tail);
+}
+#endif
+
+
+static void print_record(char * record, my_off_t offs,const char * tail)
+{
+ char *pos;
+ char *ptr;
+ uint len;
+
+ printf(" rec=(%d)",(unsigned char)record[0]);
+ pos=record+1;
+ len=sint4korr(pos);
+ pos+=4;
+ printf(" len=%d ",len);
+ memcpy_fixed(&ptr,pos,sizeof(char*));
+ if (ptr)
+ maria_rtree_PrintWKB((uchar*) ptr,SPDIMS);
+ else
+ printf("<NULL> ");
+ printf(" offs=%ld ",(long int)offs);
+ printf("%s",tail);
+}
+
+
+#ifdef NOT_USED
+static void create_point(char *record,uint rownr)
+{
+ uint tmp;
+ char *ptr;
+ char *pos=record;
+ double x[200];
+ int i;
+
+ for(i=0;i<SPDIMS;i++)
+ x[i]=rownr;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ *pos=0x01; /* DEL marker */
+ pos++;
+
+ memset(blob_key,0,sizeof(blob_key));
+ tmp=maria_rtree_CreatePointWKB(x,SPDIMS,blob_key);
+
+ int4store(pos,tmp);
+ pos+=4;
+
+ ptr=blob_key;
+ memcpy_fixed(pos,&ptr,sizeof(char*));
+}
+#endif
+
+
+static void create_linestring(char *record,uint rownr)
+{
+ uint tmp;
+ char *ptr;
+ char *pos=record;
+ double x[200];
+ int i,j;
+ int npoints=2;
+
+ for(j=0;j<npoints;j++)
+ for(i=0;i<SPDIMS;i++)
+ x[i+j*SPDIMS]=rownr*j;
+
+ bzero((char*) record,MAX_REC_LENGTH);
+ *pos=0x01; /* DEL marker */
+ pos++;
+
+ memset(blob_key,0,sizeof(blob_key));
+ tmp=maria_rtree_CreateLineStringWKB(x,SPDIMS,npoints, (uchar*) blob_key);
+
+ int4store(pos,tmp);
+ pos+=4;
+
+ ptr=blob_key;
+ memcpy_fixed(pos,&ptr,sizeof(char*));
+}
+
+
+static void create_key(char *key,uint rownr)
+{
+ double c=rownr;
+ char *pos;
+ uint i;
+
+ bzero(key,MAX_REC_LENGTH);
+ for ( pos=key, i=0; i<2*SPDIMS; i++)
+ {
+ float8store(pos,c);
+ pos+=sizeof(c);
+ }
+}
+
+static void print_key(const char *key,const char * tail)
+{
+ double c;
+ uint i;
+
+ printf(" key=");
+ for (i=0; i<2*SPDIMS; i++)
+ {
+ float8get(c,key);
+ key+=sizeof(c);
+ printf("%.14g ",c);
+ }
+ printf("%s",tail);
+}
+
+
+#ifdef NOT_USED
+
+static int maria_rtree_CreatePointWKB(double *ords, uint n_dims, uchar *wkb)
+{
+ uint i;
+
+ *wkb = wkbXDR;
+ ++wkb;
+ int4store(wkb, wkbPoint);
+ wkb += 4;
+
+ for (i=0; i < n_dims; ++i)
+ {
+ float8store(wkb, ords[i]);
+ wkb += 8;
+ }
+ return 5 + n_dims * 8;
+}
+#endif
+
+
+static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points,
+ uchar *wkb)
+{
+ uint i;
+ uint n_ords = n_dims * n_points;
+
+ *wkb = wkbXDR;
+ ++wkb;
+ int4store(wkb, wkbLineString);
+ wkb += 4;
+ int4store(wkb, n_points);
+ wkb += 4;
+ for (i=0; i < n_ords; ++i)
+ {
+ float8store(wkb, ords[i]);
+ wkb += 8;
+ }
+ return 9 + n_points * n_dims * 8;
+}
+
+
+static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims)
+{
+ uint wkb_type;
+
+ ++wkb;
+ wkb_type = uint4korr(wkb);
+ wkb += 4;
+
+ switch ((enum wkbType)wkb_type)
+ {
+ case wkbPoint:
+ {
+ uint i;
+ double ord;
+
+ printf("POINT(");
+ for (i=0; i < n_dims; ++i)
+ {
+ float8get(ord, wkb);
+ wkb += 8;
+ printf("%.14g", ord);
+ if (i < n_dims - 1)
+ printf(" ");
+ else
+ printf(")");
+ }
+ break;
+ }
+ case wkbLineString:
+ {
+ uint p, i;
+ uint n_points;
+ double ord;
+
+ printf("LineString(");
+ n_points = uint4korr(wkb);
+ wkb += 4;
+ for (p=0; p < n_points; ++p)
+ {
+ for (i=0; i < n_dims; ++i)
+ {
+ float8get(ord, wkb);
+ wkb += 8;
+ printf("%.14g", ord);
+ if (i < n_dims - 1)
+ printf(" ");
+ }
+ if (p < n_points - 1)
+ printf(", ");
+ else
+ printf(")");
+ }
+ break;
+ }
+ case wkbPolygon:
+ {
+ printf("POLYGON(...)");
+ break;
+ }
+ case wkbMultiPoint:
+ {
+ printf("MULTIPOINT(...)");
+ break;
+ }
+ case wkbMultiLineString:
+ {
+ printf("MULTILINESTRING(...)");
+ break;
+ }
+ case wkbMultiPolygon:
+ {
+ printf("MULTIPOLYGON(...)");
+ break;
+ }
+ case wkbGeometryCollection:
+ {
+ printf("GEOMETRYCOLLECTION(...)");
+ break;
+ }
+ default:
+ {
+ printf("UNKNOWN GEOMETRY TYPE");
+ break;
+ }
+ }
+}
+
+#else
+int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused)))
+{
+ exit(0);
+}
+#endif /*HAVE_SPATIAL*/
diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c
new file mode 100644
index 00000000000..c5580e1e981
--- /dev/null
+++ b/storage/maria/ma_static.c
@@ -0,0 +1,67 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ Static variables for MARIA library. All definied here for easy making of
+ a shared library
+*/
+
+#ifndef _global_h
+#include "maria_def.h"
+#endif
+
+LIST *maria_open_list=0;
+uchar NEAR maria_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 9, '\001', };
+uchar NEAR maria_pack_file_magic[]=
+{ (uchar) 254, (uchar) 254, (uchar) 10, '\001', };
+uint maria_quick_table_bits=9;
+ulong maria_block_size= MARIA_KEY_BLOCK_LENGTH;
+my_bool maria_flush= 0, maria_single_user= 0;
+my_bool maria_delay_key_write= 0;
+#if defined(THREAD) && !defined(DONT_USE_RW_LOCKS)
+ulong maria_concurrent_insert= 2;
+#else
+ulong maria_concurrent_insert= 0;
+#endif
+my_off_t maria_max_temp_length= MAX_FILE_SIZE;
+ulong maria_bulk_insert_tree_size=8192*1024;
+ulong maria_data_pointer_size= 4;
+
+KEY_CACHE maria_key_cache_var;
+KEY_CACHE *maria_key_cache= &maria_key_cache_var;
+
+/* Enough for comparing if number is zero */
+byte maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+/*
+ read_vec[] is used for converting between P_READ_KEY.. and SEARCH_
+ Position is , == , >= , <= , > , <
+*/
+
+uint NEAR maria_read_vec[]=
+{
+ SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER,
+ SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER,
+ SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER,
+ MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL
+};
+
+uint NEAR maria_readnext_vec[]=
+{
+ SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER,
+ SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER
+};
diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c
new file mode 100644
index 00000000000..72c5e10d9ab
--- /dev/null
+++ b/storage/maria/ma_statrec.c
@@ -0,0 +1,300 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+ /* Functions to handle fixed-length-records */
+
+#include "maria_def.h"
+
+
+my_bool _ma_write_static_record(MARIA_HA *info, const byte *record)
+{
+ byte temp[8]; /* max pointer length */
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
+ {
+ my_off_t filepos=info->s->state.dellink;
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info,(char*) &temp[0],info->s->base.rec_reflength,
+ info->s->state.dellink+1,
+ MYF(MY_NABP)))
+ goto err;
+ info->s->state.dellink= _ma_rec_pos(info->s,temp);
+ info->state->del--;
+ info->state->empty-=info->s->base.pack_reclength;
+ if (info->s->file_write(info, (char*) record, info->s->base.reclength,
+ filepos,
+ MYF(MY_NABP)))
+ goto err;
+ }
+ else
+ {
+ if (info->state->data_file_length > info->s->base.max_data_file_length-
+ info->s->base.pack_reclength)
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ return(2);
+ }
+ if (info->opt_flag & WRITE_CACHE_USED)
+ { /* Cash in use */
+ if (my_b_write(&info->rec_cache, (byte*) record,
+ info->s->base.reclength))
+ goto err;
+ if (info->s->base.pack_reclength != info->s->base.reclength)
+ {
+ uint length=info->s->base.pack_reclength - info->s->base.reclength;
+ bzero((char*) temp,length);
+ if (my_b_write(&info->rec_cache, (byte*) temp,length))
+ goto err;
+ }
+ }
+ else
+ {
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_write(info,(char*) record,info->s->base.reclength,
+ info->state->data_file_length,
+ info->s->write_flag))
+ goto err;
+ if (info->s->base.pack_reclength != info->s->base.reclength)
+ {
+ uint length=info->s->base.pack_reclength - info->s->base.reclength;
+ bzero((char*) temp,length);
+ if (info->s->file_write(info, (byte*) temp,length,
+ info->state->data_file_length+
+ info->s->base.reclength,
+ info->s->write_flag))
+ goto err;
+ }
+ }
+ info->state->data_file_length+=info->s->base.pack_reclength;
+ info->s->state.split++;
+ }
+ return 0;
+ err:
+ return 1;
+}
+
+my_bool _ma_update_static_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const byte *record)
+{
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ return (info->s->file_write(info,
+ (char*) record,info->s->base.reclength,
+ pos,
+ MYF(MY_NABP)) != 0);
+}
+
+
+my_bool _ma_delete_static_record(MARIA_HA *info)
+{
+ byte temp[9]; /* 1+sizeof(uint32) */
+ info->state->del++;
+ info->state->empty+=info->s->base.pack_reclength;
+ temp[0]= '\0'; /* Mark that record is deleted */
+ _ma_dpointer(info,temp+1,info->s->state.dellink);
+ info->s->state.dellink= info->cur_row.lastpos;
+ info->rec_cache.seek_not_done=1;
+ return (info->s->file_write(info, temp, 1+info->s->rec_reflength,
+ info->cur_row.lastpos, MYF(MY_NABP)) != 0);
+}
+
+
+my_bool _ma_cmp_static_record(register MARIA_HA *info,
+ register const byte *old)
+{
+ DBUG_ENTER("_ma_cmp_static_record");
+
+ /* We are going to do changes; dont let anybody disturb */
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ if (flush_io_cache(&info->rec_cache))
+ {
+ DBUG_RETURN(1);
+ }
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ }
+
+ if ((info->opt_flag & READ_CHECK_USED))
+ { /* If check isn't disabled */
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info, (char*) info->rec_buff,
+ info->s->base.reclength,
+ info->cur_row.lastpos,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ if (memcmp((byte*) info->rec_buff, (byte*) old,
+ (uint) info->s->base.reclength))
+ {
+ DBUG_DUMP("read",old,info->s->base.reclength);
+ DBUG_DUMP("disk",info->rec_buff,info->s->base.reclength);
+ my_errno=HA_ERR_RECORD_CHANGED; /* Record have changed */
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos)
+{
+ DBUG_ENTER("_ma_cmp_static_unique");
+
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+ if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength,
+ pos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ DBUG_RETURN(_ma_unique_comp(def, record, (byte*) info->rec_buff,
+ def->null_are_equal));
+}
+
+
+/*
+ Read a fixed-length-record
+
+ RETURN
+ 0 Ok
+ 1 record delete
+ -1 on read-error or locking-error
+*/
+
+int _ma_read_static_record(register MARIA_HA *info, register byte *record,
+ MARIA_RECORD_POS pos)
+{
+ int error;
+
+ if (pos != HA_OFFSET_ERROR)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file <= pos &&
+ flush_io_cache(&info->rec_cache))
+ return(-1);
+ info->rec_cache.seek_not_done=1; /* We have done a seek */
+
+ error=info->s->file_read(info,(char*) record,info->s->base.reclength,
+ pos, MYF(MY_NABP)) != 0;
+ fast_ma_writeinfo(info);
+ if (! error)
+ {
+ if (!*record)
+ {
+ my_errno=HA_ERR_RECORD_DELETED;
+ return(1); /* Record is deleted */
+ }
+ info->update|= HA_STATE_AKTIV; /* Record is read */
+ return(0);
+ }
+ return(-1); /* Error on read */
+ }
+ fast_ma_writeinfo(info); /* No such record */
+ return(-1);
+}
+
+
+
+int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ int locked,error,cache_read;
+ uint cache_length;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_read_rnd_static_record");
+
+ cache_read=0;
+ cache_length=0;
+ if (info->opt_flag & READ_CACHE_USED)
+ { /* Cache in use */
+ if (filepos == my_b_tell(&info->rec_cache) &&
+ (skip_deleted_blocks || !filepos))
+ {
+ cache_read=1; /* Read record using cache */
+ cache_length=(uint) (info->rec_cache.read_end - info->rec_cache.read_pos);
+ }
+ else
+ info->rec_cache.seek_not_done=1; /* Filepos is changed */
+ }
+ locked=0;
+ if (info->lock_type == F_UNLCK)
+ {
+ if (filepos >= info->state->data_file_length)
+ { /* Test if new records */
+ if (_ma_readinfo(info,F_RDLCK,0))
+ DBUG_RETURN(my_errno);
+ locked=1;
+ }
+ else
+ { /* We don't nead new info */
+#ifndef UNSAFE_LOCKING
+ if ((! cache_read || share->base.reclength > cache_length) &&
+ share->tot_locks == 0)
+ { /* record not in cache */
+ locked=1;
+ }
+#else
+ info->tmp_lock_type=F_RDLCK;
+#endif
+ }
+ }
+ if (filepos >= info->state->data_file_length)
+ {
+ DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld",
+ (long) filepos/share->base.reclength, (long) filepos,
+ (long) info->state->records, (long) info->state->del));
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE);
+ }
+ info->cur_row.lastpos= filepos;
+ info->cur_row.nextpos= filepos+share->base.pack_reclength;
+
+ if (! cache_read) /* No cacheing */
+ {
+ if ((error= _ma_read_static_record(info, buf, filepos)))
+ {
+ if (error > 0)
+ error=my_errno=HA_ERR_RECORD_DELETED;
+ else
+ error=my_errno;
+ }
+ DBUG_RETURN(error);
+ }
+
+ /* Read record with cacheing */
+ error=my_b_read(&info->rec_cache,(byte*) buf,share->base.reclength);
+ if (info->s->base.pack_reclength != info->s->base.reclength && !error)
+ {
+ char tmp[8]; /* Skill fill bytes */
+ error=my_b_read(&info->rec_cache,(byte*) tmp,
+ info->s->base.pack_reclength - info->s->base.reclength);
+ }
+ if (locked)
+ VOID(_ma_writeinfo(info,0)); /* Unlock keyfile */
+ if (!error)
+ {
+ if (!buf[0])
+ { /* Record is removed */
+ DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED);
+ }
+ /* Found and may be updated */
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+ DBUG_RETURN(0);
+ }
+ /* my_errno should be set if rec_cache.error == -1 */
+ if (info->rec_cache.error != -1 || my_errno == 0)
+ my_errno=HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(my_errno); /* Something wrong (EOF?) */
+}
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
new file mode 100644
index 00000000000..0f37391c1d4
--- /dev/null
+++ b/storage/maria/ma_test1.c
@@ -0,0 +1,714 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Testing of the basic functions of a MARIA table */
+
+#include "maria.h"
+#include <my_getopt.h>
+#include <m_string.h>
+
+#define MAX_REC_LENGTH 1024
+
+static void usage();
+
+static int rec_pointer_size=0, flags[50];
+static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE;
+static int key_type=HA_KEYTYPE_NUM;
+static int create_flag=0;
+static enum data_file_type record_type= DYNAMIC_RECORD;
+
+static uint insert_count, update_count, remove_count;
+static uint pack_keys=0, pack_seg=0, key_length;
+static uint unique_key=HA_NOSAME;
+static my_bool key_cacheing, null_fields, silent, skip_update, opt_unique,
+ verbose, skip_delete;
+static MARIA_COLUMNDEF recinfo[4];
+static MARIA_KEYDEF keyinfo[10];
+static HA_KEYSEG keyseg[10];
+static HA_KEYSEG uniqueseg[10];
+
+static int run_test(const char *filename);
+static void get_options(int argc, char *argv[]);
+static void create_key(char *key,uint rownr);
+static void create_record(char *record,uint rownr);
+static void update_record(char *record);
+
+int main(int argc,char *argv[])
+{
+ MY_INIT(argv[0]);
+ my_init();
+ maria_init();
+ if (key_cacheing)
+ init_key_cache(maria_key_cache,KEY_CACHE_BLOCK_SIZE,IO_SIZE*16,0,0);
+ get_options(argc,argv);
+
+ exit(run_test("test1"));
+}
+
+
+static int run_test(const char *filename)
+{
+ MARIA_HA *file;
+ int i,j,error,deleted,rec_length,uniques=0;
+ ha_rows found,row_count;
+ char record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH];
+ MARIA_UNIQUEDEF uniquedef;
+ MARIA_CREATE_INFO create_info;
+
+ bzero((char*) recinfo,sizeof(recinfo));
+ bzero((char*) &create_info,sizeof(create_info));
+
+ /* First define 2 columns */
+ create_info.null_bytes= 1;
+ recinfo[0].type= key_field;
+ recinfo[0].length= (key_field == FIELD_BLOB ? 4+maria_portable_sizeof_char_ptr :
+ key_length);
+ if (key_field == FIELD_VARCHAR)
+ recinfo[0].length+= HA_VARCHAR_PACKLENGTH(key_length);
+ recinfo[1].type=extra_field;
+ recinfo[1].length= (extra_field == FIELD_BLOB ? 4 + maria_portable_sizeof_char_ptr : 24);
+ if (extra_field == FIELD_VARCHAR)
+ recinfo[1].length+= HA_VARCHAR_PACKLENGTH(recinfo[1].length);
+ if (opt_unique)
+ {
+ recinfo[2].type=FIELD_CHECK;
+ recinfo[2].length=MARIA_UNIQUE_HASH_LENGTH;
+ }
+ rec_length= recinfo[0].length+recinfo[1].length+recinfo[2].length;
+
+ if (key_type == HA_KEYTYPE_VARTEXT1 &&
+ key_length > 255)
+ key_type= HA_KEYTYPE_VARTEXT2;
+
+ /* Define a key over the first column */
+ keyinfo[0].seg=keyseg;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].seg[0].type= key_type;
+ keyinfo[0].seg[0].flag= pack_seg;
+ keyinfo[0].seg[0].start=1;
+ keyinfo[0].seg[0].length=key_length;
+ keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ if (pack_seg & HA_BLOB_PART)
+ {
+ keyinfo[0].seg[0].bit_start=4; /* Length of blob length */
+ }
+ keyinfo[0].flag = (uint8) (pack_keys | unique_key);
+
+ bzero((byte*) flags,sizeof(flags));
+ if (opt_unique)
+ {
+ uint start;
+ uniques=1;
+ bzero((char*) &uniquedef,sizeof(uniquedef));
+ bzero((char*) uniqueseg,sizeof(uniqueseg));
+ uniquedef.seg=uniqueseg;
+ uniquedef.keysegs=2;
+
+ /* Make a unique over all columns (except first NULL fields) */
+ for (i=0, start=1 ; i < 2 ; i++)
+ {
+ uniqueseg[i].start=start;
+ start+=recinfo[i].length;
+ uniqueseg[i].length=recinfo[i].length;
+ uniqueseg[i].language= default_charset_info->number;
+ }
+ uniqueseg[0].type= key_type;
+ uniqueseg[0].null_bit= null_fields ? 2 : 0;
+ uniqueseg[1].type= HA_KEYTYPE_TEXT;
+ if (extra_field == FIELD_BLOB)
+ {
+ uniqueseg[1].length=0; /* The whole blob */
+ uniqueseg[1].bit_start=4; /* long blob */
+ uniqueseg[1].flag|= HA_BLOB_PART;
+ }
+ else if (extra_field == FIELD_VARCHAR)
+ {
+ uniqueseg[1].flag|= HA_VAR_LENGTH_PART;
+ uniqueseg[1].type= (HA_VARCHAR_PACKLENGTH(recinfo[1].length-1) == 1 ?
+ HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2);
+ }
+ }
+ else
+ uniques=0;
+
+ if (!silent)
+ printf("- Creating maria file\n");
+ create_info.max_rows=(ulong) (rec_pointer_size ?
+ (1L << (rec_pointer_size*8))/40 :
+ 0);
+ if (maria_create(filename, record_type, 1, keyinfo,2+opt_unique,recinfo,
+ uniques, &uniquedef, &create_info,
+ create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+ if (!silent)
+ printf("- Writing key:s\n");
+
+ my_errno=0;
+ row_count=deleted=0;
+ for (i=49 ; i>=1 ; i-=2 )
+ {
+ if (insert_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; }
+ j=i%25 +1;
+ create_record(record,j);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ flags[j]=1;
+ if (verbose || error)
+ printf("J= %2d maria_write: %d errno: %d\n", j,error,my_errno);
+ }
+
+ /* Insert 2 rows with null values */
+ if (null_fields)
+ {
+ create_record(record,0);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ if (verbose || error)
+ printf("J= NULL maria_write: %d errno: %d\n", error,my_errno);
+ error=maria_write(file,record);
+ if (!error)
+ row_count++;
+ if (verbose || error)
+ printf("J= NULL maria_write: %d errno: %d\n", error,my_errno);
+ flags[0]=2;
+ }
+
+ if (!skip_update)
+ {
+ if (opt_unique)
+ {
+ if (!silent)
+ printf("- Checking unique constraint\n");
+ create_record(record,j);
+ if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE)
+ {
+ printf("unique check failed\n");
+ }
+ }
+ if (!silent)
+ printf("- Updating rows\n");
+
+ /* Update first last row to force extend of file */
+ if (maria_rsame(file,read_record,-1))
+ {
+ printf("Can't find last row with maria_rsame\n");
+ }
+ else
+ {
+ memcpy(record,read_record,rec_length);
+ update_record(record);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update last row: %.*s\n",
+ keyinfo[0].seg[0].length,read_record+1);
+ }
+ }
+
+ /* Read through all rows and update them */
+ assert(maria_scan_init(file) == 0);
+
+ found=0;
+ while ((error= maria_scan(file,read_record)) == 0)
+ {
+ if (update_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; }
+ memcpy(record,read_record,rec_length);
+ update_record(record);
+ if (maria_update(file,read_record,record))
+ {
+ printf("Can't update row: %.*s, error: %d\n",
+ keyinfo[0].seg[0].length,record+1,my_errno);
+ }
+ found++;
+ }
+ if (found != row_count)
+ printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count);
+ maria_scan_end(file);
+ }
+
+ if (!silent)
+ printf("- Reopening file\n");
+ if (maria_close(file)) goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err;
+ if (!skip_delete)
+ {
+ if (!silent)
+ printf("- Removing keys\n");
+
+ for (i=0 ; i <= 10 ; i++)
+ {
+ /* testing */
+ if (remove_count-- == 0)
+ {
+ fprintf(stderr,
+ "delete-rows number of rows deleted; Going down hard!\n");
+ VOID(maria_close(file));
+ exit(0) ;
+ }
+ j=i*2;
+ if (!flags[j])
+ continue;
+ create_key(key,j);
+ my_errno=0;
+ if ((error = maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)))
+ {
+ if (verbose || (flags[j] >= 1 ||
+ (error && my_errno != HA_ERR_KEY_NOT_FOUND)))
+ printf("key: '%.*s' maria_rkey: %3d errno: %3d\n",
+ (int) key_length,key+test(null_fields),error,my_errno);
+ }
+ else
+ {
+ error=maria_delete(file,read_record);
+ if (verbose || error)
+ printf("key: '%.*s' maria_delete: %3d errno: %3d\n",
+ (int) key_length, key+test(null_fields), error, my_errno);
+ if (! error)
+ {
+ deleted++;
+ flags[j]--;
+ }
+ }
+ }
+ }
+ if (!silent)
+ printf("- Reading rows with key\n");
+ record[1]= 0; /* For nicer printf */
+ for (i=0 ; i <= 25 ; i++)
+ {
+ create_key(key,i);
+ my_errno=0;
+ error=maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT);
+ if (verbose ||
+ (error == 0 && flags[i] == 0 && unique_key) ||
+ (error && (flags[i] != 0 || my_errno != HA_ERR_KEY_NOT_FOUND)))
+ {
+ printf("key: '%.*s' maria_rkey: %3d errno: %3d record: %s\n",
+ (int) key_length,key+test(null_fields),error,my_errno,record+1);
+ }
+ }
+
+ if (!silent)
+ printf("- Reading rows with position\n");
+ if (maria_scan_init(file))
+ {
+ fprintf(stderr, "maria_scan_init failed\n");
+ goto err;
+ }
+
+ for (i=1,found=0 ; i <= 30 ; i++)
+ {
+ my_errno=0;
+ if ((error= maria_scan(file, read_record)) == HA_ERR_END_OF_FILE)
+ {
+ if (found != row_count-deleted)
+ printf("Found only %ld of %ld rows\n", (ulong) found,
+ (ulong) (row_count - deleted));
+ break;
+ }
+ if (!error)
+ found++;
+ if (verbose || (error != 0 && error != HA_ERR_RECORD_DELETED &&
+ error != HA_ERR_END_OF_FILE))
+ {
+ printf("pos: %2d maria_rrnd: %3d errno: %3d record: %s\n",
+ i-1,error,my_errno,read_record+1);
+ }
+ }
+ if (maria_close(file))
+ goto err;
+ maria_end();
+ my_end(MY_CHECK_ERROR);
+
+ return (0);
+err:
+ printf("got error: %3d when using maria-database\n",my_errno);
+ return 1; /* skip warning */
+}
+
+
+static void create_key_part(char *key,uint rownr)
+{
+ if (!unique_key)
+ rownr&=7; /* Some identical keys */
+ if (keyinfo[0].seg[0].type == HA_KEYTYPE_NUM)
+ {
+ sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr);
+ }
+ else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 ||
+ keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2)
+ { /* Alpha record */
+ /* Create a key that may be easily packed */
+ bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B');
+ sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr);
+ if ((rownr & 7) == 0)
+ {
+ /* Change the key to force a unpack of the next key */
+ bfill(key+3,keyinfo[0].seg[0].length-5,rownr < 10 ? 'a' : 'b');
+ }
+ }
+ else
+ { /* Alpha record */
+ if (keyinfo[0].seg[0].flag & HA_SPACE_PACK)
+ sprintf(key,"%-*d",keyinfo[0].seg[0].length,rownr);
+ else
+ {
+ /* Create a key that may be easily packed */
+ bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B');
+ sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr);
+ if ((rownr & 7) == 0)
+ {
+ /* Change the key to force a unpack of the next key */
+ key[1]= (rownr < 10 ? 'a' : 'b');
+ }
+ }
+ }
+}
+
+
+static void create_key(char *key,uint rownr)
+{
+ if (keyinfo[0].seg[0].null_bit)
+ {
+ if (rownr == 0)
+ {
+ key[0]=1; /* null key */
+ key[1]=0; /* For easy print of key */
+ return;
+ }
+ *key++=0;
+ }
+ if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
+ {
+ uint tmp;
+ create_key_part(key+2,rownr);
+ tmp=strlen(key+2);
+ int2store(key,tmp);
+ }
+ else
+ create_key_part(key,rownr);
+}
+
+
+static char blob_key[MAX_REC_LENGTH];
+static char blob_record[MAX_REC_LENGTH+20*20];
+
+
+static void create_record(char *record,uint rownr)
+{
+ char *pos;
+ bzero((char*) record,MAX_REC_LENGTH);
+ record[0]=1; /* delete marker */
+ if (rownr == 0 && keyinfo[0].seg[0].null_bit)
+ record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */
+
+ pos=record+1;
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;
+ create_key_part(blob_key,rownr);
+ tmp=strlen(blob_key);
+ int4store(pos,tmp);
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1);
+ create_key_part(pos+pack_length,rownr);
+ tmp= strlen(pos+pack_length);
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
+ pos+= recinfo[0].length;
+ }
+ else
+ {
+ create_key_part(pos,rownr);
+ pos+=recinfo[0].length;
+ }
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ uint tmp;
+ char *ptr;;
+ sprintf(blob_record,"... row: %d", rownr);
+ strappend(blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+ tmp=strlen(blob_record);
+ int4store(pos,tmp);
+ ptr=blob_record;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*));
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ sprintf(pos+pack_length, "... row: %d", rownr);
+ tmp= strlen(pos+pack_length);
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) tmp;
+ else
+ int2store(pos,tmp);
+ }
+ else
+ {
+ sprintf(pos,"... row: %d", rownr);
+ strappend(pos,recinfo[1].length,' ');
+ }
+}
+
+/* change row to test re-packing of rows and reallocation of keys */
+
+static void update_record(char *record)
+{
+ char *pos=record+1;
+ if (recinfo[0].type == FIELD_BLOB)
+ {
+ char *column,*ptr;
+ int length;
+ length=uint4korr(pos); /* Long blob */
+ memcpy_fixed(&column,pos+4,sizeof(char*));
+ memcpy(blob_key,column,length); /* Move old key */
+ ptr=blob_key;
+ memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */
+ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
+ default_charset_info->cset->casedn(default_charset_info,
+ blob_key, length, blob_key, length);
+ pos+=recinfo[0].length;
+ }
+ else if (recinfo[0].type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ default_charset_info->cset->casedn(default_charset_info,
+ pos + pack_length, length,
+ pos + pack_length, length);
+ pos+=recinfo[0].length;
+ }
+ else
+ {
+ if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM)
+ default_charset_info->cset->casedn(default_charset_info,
+ pos, keyinfo[0].seg[0].length,
+ pos, keyinfo[0].seg[0].length);
+ pos+=recinfo[0].length;
+ }
+
+ if (recinfo[1].type == FIELD_BLOB)
+ {
+ char *column;
+ int length;
+ length=uint4korr(pos);
+ memcpy_fixed(&column,pos+4,sizeof(char*));
+ memcpy(blob_record,column,length);
+ bfill(blob_record+length,20,'.'); /* Make it larger */
+ length+=20;
+ int4store(pos,length);
+ column=blob_record;
+ memcpy_fixed(pos+4,&column,sizeof(char*));
+ }
+ else if (recinfo[1].type == FIELD_VARCHAR)
+ {
+ /* Second field is longer than 10 characters */
+ uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1);
+ uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos);
+ pos= record+ recinfo[1].offset;
+ bfill(pos+pack_length+length,recinfo[1].length-length-pack_length,'.');
+ length=recinfo[1].length-pack_length;
+ if (pack_length == 1)
+ *(uchar*) pos= (uchar) length;
+ else
+ int2store(pos,length);
+ }
+ else
+ {
+ bfill(pos+recinfo[1].length-10,10,'.');
+ }
+}
+
+
+static struct my_option my_long_options[] =
+{
+ {"checksum", 'c', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+ {"debug", '#', "Undocumented",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"delete-rows", 'd', "Abort after this many rows has been deleted",
+ (gptr*) &remove_count, (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG,
+ 1000, 0, 0, 0, 0, 0},
+ {"help", '?', "Display help and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"insert-rows", 'i', "Undocumented", (gptr*) &insert_count,
+ (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0},
+ {"key-alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-binary-pack", 'B', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-blob", 'b', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-cache", 'K', "Undocumented", (gptr*) &key_cacheing,
+ (gptr*) &key_cacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length,
+ 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0},
+ {"key-multiple", 'm', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-prefix_pack", 'P', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-space_pack", 'p', "Undocumented",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"key-varchar", 'w', "Test VARCHAR keys",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"null-fields", 'N', "Define fields with NULL",
+ (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
+ {"row-fixed-size", 'S', "Fixed size records",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"rows-in-block", 'M', "Store rows in block format",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"row-pointer-size", 'R', "Undocumented", (gptr*) &rec_pointer_size,
+ (gptr*) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's', "Undocumented",
+ (gptr*) &silent, (gptr*) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"skip-delete", 'U', "Don't test deletes", (gptr*) &skip_delete,
+ (gptr*) &skip_delete, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"skip-update", 'D', "Don't test updates", (gptr*) &skip_update,
+ (gptr*) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"unique", 'C', "Undocumented", (gptr*) &opt_unique, (gptr*) &opt_unique, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"update-rows", 'u', "Undocumented", (gptr*) &update_count,
+ (gptr*) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Be more verbose", (gptr*) &verbose, (gptr*) &verbose, 0,
+ GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version number and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch(optid) {
+ case 'a':
+ key_type= HA_KEYTYPE_TEXT;
+ break;
+ case 'c':
+ create_flag|= HA_CREATE_CHECKSUM;
+ break;
+ case 'R': /* Length of record pointer */
+ if (rec_pointer_size > 3)
+ rec_pointer_size=0;
+ break;
+ case 'P':
+ pack_keys= HA_PACK_KEY; /* Use prefix compression */
+ break;
+ case 'B':
+ pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */
+ break;
+ case 'M':
+ record_type= BLOCK_RECORD;
+ break;
+ case 'S':
+ if (key_field == FIELD_VARCHAR)
+ {
+ create_flag=0; /* Static sized varchar */
+ record_type= STATIC_RECORD;
+ }
+ else if (key_field != FIELD_BLOB)
+ {
+ key_field=FIELD_NORMAL; /* static-size record */
+ extra_field=FIELD_NORMAL;
+ record_type= STATIC_RECORD;
+ }
+ break;
+ case 'p':
+ pack_keys=HA_PACK_KEY; /* Use prefix + space packing */
+ pack_seg=HA_SPACE_PACK;
+ key_type=HA_KEYTYPE_TEXT;
+ break;
+ case 'm':
+ unique_key=0;
+ break;
+ case 'b':
+ key_field=FIELD_BLOB; /* blob key */
+ extra_field= FIELD_BLOB;
+ pack_seg|= HA_BLOB_PART;
+ key_type= HA_KEYTYPE_VARTEXT1;
+ if (record_type == STATIC_RECORD)
+ record_type= DYNAMIC_RECORD;
+ break;
+ case 'k':
+ if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH)
+ {
+ fprintf(stderr,"Wrong key length\n");
+ exit(1);
+ }
+ break;
+ case 'w':
+ key_field=FIELD_VARCHAR; /* varchar keys */
+ extra_field= FIELD_VARCHAR;
+ key_type= HA_KEYTYPE_VARTEXT1;
+ pack_seg|= HA_VAR_LENGTH_PART;
+ if (record_type == STATIC_RECORD)
+ record_type= DYNAMIC_RECORD;
+ break;
+ case 'K': /* Use key cacheing */
+ key_cacheing=1;
+ break;
+ case 'V':
+ printf("test1 Ver 1.2 \n");
+ exit(0);
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case '?':
+ usage();
+ exit(1);
+ }
+ return 0;
+}
+
+
+/* Read options */
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("Usage: %s [options]\n\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
new file mode 100644
index 00000000000..46b8c710d4a
--- /dev/null
+++ b/storage/maria/ma_test2.c
@@ -0,0 +1,1100 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Test av isam-databas: stor test */
+
+#ifndef USE_MY_FUNC /* We want to be able to dbug this !! */
+#define USE_MY_FUNC
+#endif
+#ifdef DBUG_OFF
+#undef DBUG_OFF
+#endif
+#ifndef SAFEMALLOC
+#define SAFEMALLOC
+#endif
+#include "maria_def.h"
+#include <m_ctype.h>
+#include <my_bit.h>
+
+#define STANDARD_LENGTH 37
+#define MARIA_KEYS 6
+#define MAX_PARTS 4
+#if !defined(MSDOS) && !defined(labs)
+#define labs(a) abs(a)
+#endif
+
+static void get_options(int argc, char *argv[]);
+static uint rnd(uint max_value);
+static void fix_length(byte *record,uint length);
+static void put_blob_in_record(char *blob_pos,char **blob_buffer,
+ ulong *length);
+static void copy_key(struct st_maria_info *info,uint inx,
+ uchar *record,uchar *key);
+
+static int verbose=0,testflag=0,
+ first_key=0,async_io=0,key_cacheing=0,write_cacheing=0,locking=0,
+ rec_pointer_size=0,pack_fields=1,silent=0,
+ opt_quick_mode=0;
+static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1;
+static int create_flag= 0, srand_arg= 0;
+static ulong key_cache_size=IO_SIZE*16;
+static uint key_cache_block_size= KEY_CACHE_BLOCK_SIZE;
+static enum data_file_type record_type= DYNAMIC_RECORD;
+
+static uint keys=MARIA_KEYS,recant=1000;
+static uint use_blob=0;
+static uint16 key1[1001],key3[5000];
+static char record[300],record2[300],key[100],key2[100],
+ read_record[300],read_record2[300],read_record3[300];
+static HA_KEYSEG glob_keyseg[MARIA_KEYS][MAX_PARTS];
+
+ /* Test program */
+
+int main(int argc, char *argv[])
+{
+ uint i;
+ int j,n1,n2,n3,error,k;
+ uint write_count,update,dupp_keys,opt_delete,start,length,blob_pos,
+ reclength,ant,found_parts;
+ my_off_t lastpos;
+ ha_rows range_records,records;
+ MARIA_HA *file;
+ MARIA_KEYDEF keyinfo[10];
+ MARIA_COLUMNDEF recinfo[10];
+ MARIA_INFO info;
+ const char *filename;
+ char *blob_buffer;
+ MARIA_CREATE_INFO create_info;
+ MY_INIT(argv[0]);
+
+ filename= "test2";
+ get_options(argc,argv);
+ if (! async_io)
+ my_disable_async_io=1;
+
+ maria_init();
+ reclength=STANDARD_LENGTH+60+(use_blob ? 8 : 0);
+ blob_pos=STANDARD_LENGTH+60;
+ keyinfo[0].seg= &glob_keyseg[0][0];
+ keyinfo[0].seg[0].start=0;
+ keyinfo[0].seg[0].length=6;
+ keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].language= default_charset_info->number;
+ keyinfo[0].seg[0].flag=(uint8) pack_seg;
+ keyinfo[0].seg[0].null_bit=0;
+ keyinfo[0].seg[0].null_pos=0;
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag = pack_type;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[1].seg= &glob_keyseg[1][0];
+ keyinfo[1].seg[0].start=7;
+ keyinfo[1].seg[0].length=6;
+ keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY;
+ keyinfo[1].seg[0].flag=0;
+ keyinfo[1].seg[0].null_bit=0;
+ keyinfo[1].seg[0].null_pos=0;
+ keyinfo[1].seg[1].start=0; /* two part key */
+ keyinfo[1].seg[1].length=6;
+ keyinfo[1].seg[1].type=HA_KEYTYPE_NUM;
+ keyinfo[1].seg[1].flag=HA_REVERSE_SORT;
+ keyinfo[1].seg[1].null_bit=0;
+ keyinfo[1].seg[1].null_pos=0;
+ keyinfo[1].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[1].keysegs=2;
+ keyinfo[1].flag =0;
+ keyinfo[1].block_length= MARIA_MIN_KEY_BLOCK_LENGTH; /* Diff blocklength */
+ keyinfo[2].seg= &glob_keyseg[2][0];
+ keyinfo[2].seg[0].start=12;
+ keyinfo[2].seg[0].length=8;
+ keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY;
+ keyinfo[2].seg[0].flag=HA_REVERSE_SORT;
+ keyinfo[2].seg[0].null_bit=0;
+ keyinfo[2].seg[0].null_pos=0;
+ keyinfo[2].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[2].keysegs=1;
+ keyinfo[2].flag =HA_NOSAME;
+ keyinfo[2].block_length= 0; /* Default block length */
+ keyinfo[3].seg= &glob_keyseg[3][0];
+ keyinfo[3].seg[0].start=0;
+ keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0);
+ keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[3].seg[0].language=default_charset_info->number;
+ keyinfo[3].seg[0].flag=(uint8) pack_seg;
+ keyinfo[3].seg[0].null_bit=0;
+ keyinfo[3].seg[0].null_pos=0;
+ keyinfo[3].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[3].keysegs=1;
+ keyinfo[3].flag = pack_type;
+ keyinfo[3].block_length= 0; /* Default block length */
+ keyinfo[4].seg= &glob_keyseg[4][0];
+ keyinfo[4].seg[0].start=0;
+ keyinfo[4].seg[0].length=5;
+ keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[4].seg[0].language=default_charset_info->number;
+ keyinfo[4].seg[0].flag=0;
+ keyinfo[4].seg[0].null_bit=0;
+ keyinfo[4].seg[0].null_pos=0;
+ keyinfo[4].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[4].keysegs=1;
+ keyinfo[4].flag = pack_type;
+ keyinfo[4].block_length= 0; /* Default block length */
+ keyinfo[5].seg= &glob_keyseg[5][0];
+ keyinfo[5].seg[0].start=0;
+ keyinfo[5].seg[0].length=4;
+ keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[5].seg[0].language=default_charset_info->number;
+ keyinfo[5].seg[0].flag=pack_seg;
+ keyinfo[5].seg[0].null_bit=0;
+ keyinfo[5].seg[0].null_pos=0;
+ keyinfo[5].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[5].keysegs=1;
+ keyinfo[5].flag = pack_type;
+ keyinfo[5].block_length= 0; /* Default block length */
+
+ recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[0].length=7;
+ recinfo[0].null_bit=0;
+ recinfo[0].null_pos=0;
+ recinfo[1].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[1].length=5;
+ recinfo[1].null_bit=0;
+ recinfo[1].null_pos=0;
+ recinfo[2].type=pack_fields ? FIELD_SKIP_PRESPACE : 0;
+ recinfo[2].length=9;
+ recinfo[2].null_bit=0;
+ recinfo[2].null_pos=0;
+ recinfo[3].type=FIELD_NORMAL;
+ recinfo[3].length=STANDARD_LENGTH-7-5-9-4;
+ recinfo[3].null_bit=0;
+ recinfo[3].null_pos=0;
+ recinfo[4].type=pack_fields ? FIELD_SKIP_ZERO : 0;
+ recinfo[4].length=4;
+ recinfo[4].null_bit=0;
+ recinfo[4].null_pos=0;
+ recinfo[5].type=pack_fields ? FIELD_SKIP_ENDSPACE : 0;
+ recinfo[5].length=60;
+ recinfo[5].null_bit=0;
+ recinfo[5].null_pos=0;
+ if (use_blob)
+ {
+ recinfo[6].type=FIELD_BLOB;
+ recinfo[6].length=4+maria_portable_sizeof_char_ptr;
+ recinfo[6].null_bit=0;
+ recinfo[6].null_pos=0;
+ }
+
+ write_count=update=dupp_keys=opt_delete=0;
+ blob_buffer=0;
+
+ for (i=1000 ; i>0 ; i--) key1[i]=0;
+ for (i=4999 ; i>0 ; i--) key3[i]=0;
+
+ if (!silent)
+ printf("- Creating maria-file\n");
+ file= 0;
+ bzero((char*) &create_info,sizeof(create_info));
+ create_info.max_rows=(ha_rows) (rec_pointer_size ?
+ (1L << (rec_pointer_size*8))/
+ reclength : 0);
+ create_info.reloc_rows=(ha_rows) 100;
+ if (maria_create(filename, record_type, keys,&keyinfo[first_key],
+ use_blob ? 7 : 6, &recinfo[0],
+ 0,(MARIA_UNIQUEDEF*) 0,
+ &create_info,create_flag))
+ goto err;
+ if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED)))
+ goto err;
+ if (!silent)
+ printf("- Writing key:s\n");
+ if (key_cacheing)
+ init_key_cache(maria_key_cache,key_cache_block_size,key_cache_size,0,0);
+ if (locking)
+ maria_lock_database(file,F_WRLCK);
+ if (write_cacheing)
+ maria_extra(file,HA_EXTRA_WRITE_CACHE,0);
+ if (opt_quick_mode)
+ maria_extra(file,HA_EXTRA_QUICK,0);
+
+ for (i=0 ; i < recant ; i++)
+ {
+ ulong blob_length;
+ n1=rnd(1000); n2=rnd(100); n3=rnd(5000);
+ sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count);
+ int4store(record+STANDARD_LENGTH-4,(long) i);
+ fix_length(record,(uint) STANDARD_LENGTH+rnd(60));
+ put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length);
+ DBUG_PRINT("test",("record: %d blob_length: %lu", i, blob_length));
+
+ if (maria_write(file,record))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0)
+ {
+ printf("Error: %d in write at record: %d\n",my_errno,i);
+ goto err;
+ }
+ if (verbose) printf(" Double key: %d\n",n3);
+ }
+ else
+ {
+ if (key3[n3] == 1 && first_key <3 && first_key+keys >= 3)
+ {
+ printf("Error: Didn't get error when writing second key: '%8d'\n",n3);
+ goto err;
+ }
+ write_count++; key1[n1]++; key3[n3]=1;
+ }
+
+ /* Check if we can find key without flushing database */
+ if (i % 10 == 0)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (!j)
+ for (j=999 ; j>0 && key1[j] == 0 ; j--) ;
+ sprintf(key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))
+ {
+ printf("Test in loop: Can't find key: \"%s\"\n",key);
+ goto err;
+ }
+ }
+ }
+ if (testflag == 1)
+ goto end;
+
+ if (write_cacheing)
+ {
+ if (maria_extra(file,HA_EXTRA_NO_CACHE,0))
+ {
+ puts("got error from maria_extra(HA_EXTRA_NO_CACHE)");
+ goto end;
+ }
+ }
+ if (key_cacheing)
+ resize_key_cache(maria_key_cache,key_cache_block_size,key_cache_size*2,0,0);
+
+ if (!silent)
+ printf("- Delete\n");
+ if (srand_arg)
+ srand(srand_arg);
+ for (i=0 ; i<recant/10 ; i++)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (j != 0)
+ {
+ sprintf(key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))
+ {
+ printf("can't find key1: \"%s\"\n",key);
+ goto err;
+ }
+ if (bcmp(read_record+keyinfo[0].seg[0].start,
+ key, keyinfo[0].seg[0].length))
+ {
+ printf("Found wrong record when searching for key: \"%s\"\n",key);
+ goto err;
+ }
+ if (opt_delete == (uint) remove_count) /* While testing */
+ goto end;
+ if (maria_delete(file,read_record))
+ {
+ printf("error: %d; can't delete record: \"%s\"\n", my_errno,read_record);
+ goto err;
+ }
+ opt_delete++;
+ key1[atoi(read_record+keyinfo[0].seg[0].start)]--;
+ key3[atoi(read_record+keyinfo[2].seg[0].start)]=0;
+ }
+ else
+ puts("Warning: Skipping delete test because no dupplicate keys");
+ }
+ if (testflag == 2)
+ goto end;
+
+ if (!silent)
+ printf("- Update\n");
+ if (srand_arg)
+ srand(srand_arg);
+ for (i=0 ; i<recant/10 ; i++)
+ {
+ n1=rnd(1000); n2=rnd(100); n3=rnd(5000);
+ sprintf(record2,"%6d:%4d:%8d:XXX: %4d ",n1,n2,n3,update);
+ int4store(record2+STANDARD_LENGTH-4,(long) i);
+ fix_length(record2,(uint) STANDARD_LENGTH+rnd(60));
+
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ if (j != 0)
+ {
+ sprintf(key,"%6d",j);
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))
+ {
+ printf("can't find key1: \"%s\"\n",key);
+ goto err;
+ }
+ if (bcmp(read_record+keyinfo[0].seg[0].start,
+ key, keyinfo[0].seg[0].length))
+ {
+ printf("Found wrong record when searching for key: \"%s\"; Found \"%.*s\"\n",
+ key, keyinfo[0].seg[0].length,
+ read_record+keyinfo[0].seg[0].start);
+ goto err;
+ }
+ if (use_blob)
+ {
+ ulong blob_length;
+ if (i & 1)
+ put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length);
+ else
+ bmove(record+blob_pos,read_record+blob_pos,8);
+ }
+ if (maria_update(file,read_record,record2))
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0)
+ {
+ printf("error: %d; can't update:\nFrom: \"%s\"\nTo: \"%s\"\n",
+ my_errno,read_record,record2);
+ goto err;
+ }
+ if (verbose)
+ printf("Double key when tried to update:\nFrom: \"%s\"\nTo: \"%s\"\n",record,record2);
+ }
+ else
+ {
+ key1[atoi(read_record+keyinfo[0].seg[0].start)]--;
+ key3[atoi(read_record+keyinfo[2].seg[0].start)]=0;
+ key1[n1]++; key3[n3]=1;
+ update++;
+ }
+ }
+ }
+ if (testflag == 3)
+ goto end;
+
+ for (i=999, dupp_keys=j=0 ; i>0 ; i--)
+ {
+ if (key1[i] > dupp_keys)
+ {
+ dupp_keys=key1[i]; j=i;
+ }
+ }
+ sprintf(key,"%6d",j);
+ start=keyinfo[0].seg[0].start;
+ length=keyinfo[0].seg[0].length;
+ if (dupp_keys)
+ {
+ if (!silent)
+ printf("- Same key: first - next -> last - prev -> first\n");
+ DBUG_PRINT("progpos",("first - next -> last - prev -> first"));
+ if (verbose) printf(" Using key: \"%s\" Keys: %d\n",key,dupp_keys);
+
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))
+ goto err;
+ if (maria_rsame(file,read_record2,-1))
+ goto err;
+ if (memcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame didn't find same record\n");
+ goto end;
+ }
+ info.recpos=maria_position(file);
+ if (maria_rfirst(file,read_record2,0) ||
+ maria_rsame_with_pos(file,read_record2,0,info.recpos) ||
+ memcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame_with_pos didn't find same record\n");
+ goto end;
+ }
+ {
+ info.recpos= maria_position(file);
+ int skr=maria_rnext(file,read_record2,0);
+ if ((skr && my_errno != HA_ERR_END_OF_FILE) ||
+ maria_rprev(file,read_record2,-1) ||
+ memcmp(read_record,read_record2,reclength) != 0 ||
+ info.recpos != maria_position(file))
+ {
+ printf("maria_rsame_with_pos lost position\n");
+ goto end;
+ }
+ }
+ ant=1;
+ while (maria_rnext(file,read_record2,0) == 0 &&
+ memcmp(read_record2+start,key,length) == 0) ant++;
+ if (ant != dupp_keys)
+ {
+ printf("next: Found: %d keys of %d\n",ant,dupp_keys);
+ goto end;
+ }
+ ant=0;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys)
+ {
+ printf("prev: Found: %d records of %d\n",ant,dupp_keys);
+ goto end;
+ }
+
+ /* Check of maria_rnext_same */
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))
+ goto err;
+ ant=1;
+ while (!maria_rnext_same(file,read_record3) && ant < dupp_keys+10)
+ ant++;
+ if (ant != dupp_keys || my_errno != HA_ERR_END_OF_FILE)
+ {
+ printf("maria_rnext_same: Found: %d records of %d\n",ant,dupp_keys);
+ goto end;
+ }
+ }
+
+ if (!silent)
+ printf("- All keys: first - next -> last - prev -> first\n");
+ DBUG_PRINT("progpos",("All keys: first - next -> last - prev -> first"));
+ ant=1;
+ if (maria_rfirst(file,read_record,0))
+ {
+ printf("Can't find first record\n");
+ goto end;
+ }
+ while ((error=maria_rnext(file,read_record3,0)) == 0 && ant < write_count+10)
+ ant++;
+ if (ant != write_count - opt_delete || error != HA_ERR_END_OF_FILE)
+ {
+ printf("next: I found: %d records of %d (error: %d)\n",
+ ant, write_count - opt_delete, error);
+ goto end;
+ }
+ if (maria_rlast(file,read_record2,0) ||
+ bcmp(read_record2,read_record3,reclength))
+ {
+ printf("Can't find last record\n");
+ DBUG_DUMP("record2",(byte*) read_record2,reclength);
+ DBUG_DUMP("record3",(byte*) read_record3,reclength);
+ goto end;
+ }
+ ant=1;
+ while (maria_rprev(file,read_record3,0) == 0 && ant < write_count+10)
+ ant++;
+ if (ant != write_count - opt_delete)
+ {
+ printf("prev: I found: %d records of %d\n",ant,write_count);
+ goto end;
+ }
+ if (bcmp(read_record,read_record3,reclength))
+ {
+ printf("Can't find first record\n");
+ goto end;
+ }
+
+ if (!silent)
+ printf("- Test if: Read first - next - prev - prev - next == first\n");
+ DBUG_PRINT("progpos",("- Read first - next - prev - prev - next == first"));
+ if (maria_rfirst(file,read_record,0) ||
+ maria_rnext(file,read_record3,0) ||
+ maria_rprev(file,read_record3,0) ||
+ maria_rprev(file,read_record3,0) == 0 ||
+ maria_rnext(file,read_record3,0))
+ goto err;
+ if (bcmp(read_record,read_record3,reclength) != 0)
+ printf("Can't find first record\n");
+
+ if (!silent)
+ printf("- Test if: Read last - prev - next - next - prev == last\n");
+ DBUG_PRINT("progpos",("Read last - prev - next - next - prev == last"));
+ if (maria_rlast(file,read_record2,0) ||
+ maria_rprev(file,read_record3,0) ||
+ maria_rnext(file,read_record3,0) ||
+ maria_rnext(file,read_record3,0) == 0 ||
+ maria_rprev(file,read_record3,0))
+ goto err;
+ if (bcmp(read_record2,read_record3,reclength))
+ printf("Can't find last record\n");
+
+ if (!silent)
+ puts("- Test read key-part");
+ strmov(key2,key);
+ for(i=strlen(key2) ; i-- > 1 ;)
+ {
+ key2[i]=0;
+
+ /* The following row is just to catch some bugs in the key code */
+ bzero((char*) file->lastkey,file->s->base.max_key_length*2);
+ if (maria_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX))
+ goto err;
+ if (bcmp(read_record+start,key,(uint) i))
+ {
+ puts("Didn't find right record");
+ goto end;
+ }
+ }
+ if (dupp_keys > 2)
+ {
+ if (!silent)
+ printf("- Read key (first) - next - delete - next -> last\n");
+ DBUG_PRINT("progpos",("first - next - delete - next -> last"));
+ if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) goto err;
+ if (maria_rnext(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ while (maria_rnext(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-1)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1);
+ goto end;
+ }
+ }
+ if (dupp_keys>4)
+ {
+ if (!silent)
+ printf("- Read last of key - prev - delete - prev -> first\n");
+ DBUG_PRINT("progpos",("last - prev - delete - prev -> first"));
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-2)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2);
+ goto end;
+ }
+ }
+ if (dupp_keys > 6)
+ {
+ if (!silent)
+ printf("- Read first - delete - next -> last\n");
+ DBUG_PRINT("progpos",("first - delete - next -> last"));
+ if (maria_rkey(file,read_record3,0,key,0,HA_READ_KEY_EXACT)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=1;
+ if (maria_rnext(file,read_record,0))
+ goto err; /* Skall finnas poster */
+ while (maria_rnext(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-3)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3);
+ goto end;
+ }
+
+ if (!silent)
+ printf("- Read last - delete - prev -> first\n");
+ DBUG_PRINT("progpos",("last - delete - prev -> first"));
+ if (maria_rprev(file,read_record3,0)) goto err;
+ if (maria_delete(file,read_record3)) goto err;
+ opt_delete++;
+ ant=0;
+ while (maria_rprev(file,read_record3,0) == 0 &&
+ bcmp(read_record3+start,key,length) == 0) ant++;
+ if (ant != dupp_keys-4)
+ {
+ printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4);
+ goto end;
+ }
+ }
+
+ if (!silent)
+ puts("- Test if: Read rrnd - same");
+ DBUG_PRINT("progpos",("Read rrnd - same"));
+ assert(maria_scan_init(file) == 0);
+ for (i=0 ; i < write_count ; i++)
+ {
+ int tmp;
+ if ((tmp= maria_scan(file,read_record)) &&
+ tmp != HA_ERR_END_OF_FILE &&
+ tmp != HA_ERR_RECORD_DELETED)
+ {
+ printf("Got error %d when scanning table\n", tmp);
+ break;
+ }
+ }
+ maria_scan_end(file);
+ if (i != write_count && i != write_count - opt_delete)
+ {
+ printf("Found wrong number of rows while scanning table\n");
+ goto err;
+ }
+
+ bmove(read_record2,read_record,reclength);
+ for (i=min(2,keys) ; i-- > 0 ;)
+ {
+ if (maria_rsame(file,read_record2,(int) i)) goto err;
+ if (bcmp(read_record,read_record2,reclength) != 0)
+ {
+ printf("maria_rsame didn't find same record\n");
+ goto end;
+ }
+ }
+ if (!silent)
+ puts("- Test maria_records_in_range");
+ maria_status(file,&info,HA_STATUS_VARIABLE);
+ for (i=0 ; i < info.keys ; i++)
+ {
+ key_range min_key, max_key;
+ if (maria_rfirst(file,read_record,(int) i) ||
+ maria_rlast(file,read_record2,(int) i))
+ goto err;
+ copy_key(file,(uint) i,(uchar*) read_record,(uchar*) key);
+ copy_key(file,(uint) i,(uchar*) read_record2,(uchar*) key2);
+ min_key.key= key;
+ min_key.length= USE_WHOLE_KEY;
+ min_key.flag= HA_READ_KEY_EXACT;
+ max_key.key= key2;
+ max_key.length= USE_WHOLE_KEY;
+ max_key.flag= HA_READ_AFTER_KEY;
+
+ range_records= maria_records_in_range(file,(int) i, &min_key, &max_key);
+ if (range_records < info.records*8/10 ||
+ range_records > info.records*12/10)
+ {
+ printf("maria_records_range returned %ld; Should be about %ld\n",
+ (long) range_records,(long) info.records);
+ goto end;
+ }
+ if (verbose)
+ {
+ printf("maria_records_range returned %ld; Exact is %ld (diff: %4.2g %%)\n",
+ (long) range_records, (long) info.records,
+ labs((long) range_records - (long) info.records)*100.0/
+ info.records);
+ }
+ }
+ for (i=0 ; i < 5 ; i++)
+ {
+ for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ;
+ for (k=rnd(1000)+1 ; k>0 && key1[k] == 0 ; k--) ;
+ if (j != 0 && k != 0)
+ {
+ key_range min_key, max_key;
+ if (j > k)
+ swap_variables(int, j, k);
+ sprintf(key,"%6d",j);
+ sprintf(key2,"%6d",k);
+
+ min_key.key= key;
+ min_key.length= USE_WHOLE_KEY;
+ min_key.flag= HA_READ_AFTER_KEY;
+ max_key.key= key2;
+ max_key.length= USE_WHOLE_KEY;
+ max_key.flag= HA_READ_BEFORE_KEY;
+ range_records= maria_records_in_range(file, 0, &min_key, &max_key);
+ records=0;
+ for (j++ ; j < k ; j++)
+ records+=key1[j];
+ if ((long) range_records < (long) records*7/10-2 ||
+ (long) range_records > (long) records*14/10+2)
+ {
+ printf("maria_records_range for key: %d returned %lu; Should be about %lu\n",
+ i, (ulong) range_records, (ulong) records);
+ goto end;
+ }
+ if (verbose && records)
+ {
+ printf("maria_records_range returned %lu; Exact is %lu (diff: %4.2g %%)\n",
+ (ulong) range_records, (ulong) records,
+ labs((long) range_records-(long) records)*100.0/records);
+
+ }
+ }
+ }
+
+ if (!silent)
+ printf("- maria_info\n");
+ maria_status(file,&info,HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (info.records != write_count-opt_delete || info.deleted > opt_delete + update
+ || info.keys != keys)
+ {
+ puts("Wrong info from maria_info");
+ printf("Got: records: %lu delete: %lu i_keys: %d\n",
+ (ulong) info.records, (ulong) info.deleted, info.keys);
+ }
+ if (verbose)
+ {
+ char buff[80];
+ get_date(buff,3,info.create_time);
+ printf("info: Created %s\n",buff);
+ get_date(buff,3,info.check_time);
+ printf("info: checked %s\n",buff);
+ get_date(buff,3,info.update_time);
+ printf("info: Modified %s\n",buff);
+ }
+
+ maria_panic(HA_PANIC_WRITE);
+ maria_panic(HA_PANIC_READ);
+ if (maria_is_changed(file))
+ puts("Warning: maria_is_changed reported that datafile was changed");
+
+ if (!silent)
+ printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n");
+ if (maria_reset(file) || maria_extra(file,HA_EXTRA_CACHE,0))
+ {
+ if (locking || (!use_blob && !pack_fields))
+ {
+ puts("got error from maria_extra(HA_EXTRA_CACHE)");
+ goto end;
+ }
+ }
+ ant=0;
+ assert(maria_scan_init(file) == 0);
+ while ((error= maria_scan(file,record)) != HA_ERR_END_OF_FILE &&
+ ant < write_count + 10)
+ ant+= error ? 0 : 1;
+ maria_scan_end(file);
+ if (ant != write_count-opt_delete)
+ {
+ printf("scan with cache: I can only find: %d records of %d\n",
+ ant,write_count-opt_delete);
+ goto end;
+ }
+ if (maria_extra(file,HA_EXTRA_NO_CACHE,0))
+ {
+ puts("got error from maria_extra(HA_EXTRA_NO_CACHE)");
+ goto end;
+ }
+
+ ant=0;
+ maria_scan_init(file);
+ while ((error=maria_scan(file,record)) != HA_ERR_END_OF_FILE &&
+ ant < write_count + 10)
+ ant+= error ? 0 : 1;
+ if (ant != write_count-opt_delete)
+ {
+ printf("scan with cache: I can only find: %d records of %d\n",
+ ant,write_count-opt_delete);
+ goto end;
+ }
+
+ if (testflag == 4)
+ goto end;
+
+ if (!silent)
+ printf("- Removing keys\n");
+ DBUG_PRINT("progpos",("Removing keys"));
+ lastpos = HA_OFFSET_ERROR;
+ /* DBUG_POP(); */
+ maria_reset(file);
+ found_parts=0;
+ maria_scan_init(file);
+ while ((error= maria_scan(file,read_record)) != HA_ERR_END_OF_FILE)
+ {
+ info.recpos=maria_position(file);
+ if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR)
+ {
+ printf("maria_rrnd didn't advance filepointer; old: %ld, new: %ld\n",
+ (long) lastpos, (long) info.recpos);
+ goto err;
+ }
+ lastpos=info.recpos;
+ if (error == 0)
+ {
+ if (opt_delete == (uint) remove_count) /* While testing */
+ goto end;
+ if (rnd(2) == 1 && maria_rsame(file,read_record,-1))
+ {
+ printf("can't find record %lx\n",(long) info.recpos);
+ goto err;
+ }
+ if (use_blob)
+ {
+ ulong blob_length,pos;
+ uchar *ptr;
+ longget(blob_length,read_record+blob_pos+4);
+ ptr=(uchar*) blob_length;
+ longget(blob_length,read_record+blob_pos);
+ for (pos=0 ; pos < blob_length ; pos++)
+ {
+ if (ptr[pos] != (uchar) (blob_length+pos))
+ {
+ printf("Found blob with wrong info at %ld\n",(long) lastpos);
+ maria_scan_end(file);
+ my_errno= 0;
+ goto err;
+ }
+ }
+ }
+ if (maria_delete(file,read_record))
+ {
+ printf("can't delete record: %6.6s, delete_count: %d\n",
+ read_record, opt_delete);
+ maria_scan_end(file);
+ goto err;
+ }
+ opt_delete++;
+ }
+ else
+ found_parts++;
+ }
+ maria_scan_end(file);
+ if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED)
+ printf("error: %d from maria_rrnd\n",my_errno);
+ if (write_count != opt_delete)
+ {
+ printf("Deleted only %d of %d records (%d parts)\n",opt_delete,write_count,
+ found_parts);
+ goto err;
+ }
+end:
+ if (maria_close(file))
+ goto err;
+ maria_panic(HA_PANIC_CLOSE); /* Should close log */
+ if (!silent)
+ {
+ printf("\nFollowing test have been made:\n");
+ printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete);
+ if (rec_pointer_size)
+ printf("Record pointer size: %d\n",rec_pointer_size);
+ printf("maria_block_size: %lu\n", maria_block_size);
+ if (key_cacheing)
+ {
+ puts("Key cache used");
+ printf("key_cache_block_size: %u\n", key_cache_block_size);
+ if (write_cacheing)
+ puts("Key cache resized");
+ }
+ if (write_cacheing)
+ puts("Write cacheing used");
+ if (write_cacheing)
+ puts("quick mode");
+ if (async_io && locking)
+ puts("Asyncron io with locking used");
+ else if (locking)
+ puts("Locking used");
+ if (use_blob)
+ puts("blobs used");
+ printf("key cache status: \n\
+blocks used:%10lu\n\
+not flushed:%10lu\n\
+w_requests: %10lu\n\
+writes: %10lu\n\
+r_requests: %10lu\n\
+reads: %10lu\n",
+ maria_key_cache->blocks_used,
+ maria_key_cache->global_blocks_changed,
+ (ulong) maria_key_cache->global_cache_w_requests,
+ (ulong) maria_key_cache->global_cache_write,
+ (ulong) maria_key_cache->global_cache_r_requests,
+ (ulong) maria_key_cache->global_cache_read);
+ }
+ end_key_cache(maria_key_cache,1);
+ my_free(blob_buffer, MYF(MY_ALLOW_ZERO_PTR));
+ my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO);
+ return(0);
+err:
+ printf("got error: %d when using MARIA-database\n",my_errno);
+ if (file)
+ VOID(maria_close(file));
+ maria_end();
+ return(1);
+} /* main */
+
+
+/* Read options */
+
+static void get_options(int argc, char **argv)
+{
+ char *pos,*progname;
+
+ progname= argv[0];
+
+ while (--argc >0 && *(pos = *(++argv)) == '-' ) {
+ switch(*++pos) {
+ case 'B':
+ pack_type= HA_BINARY_PACK_KEY;
+ break;
+ case 'b':
+ use_blob= 1;
+ if (*++pos)
+ use_blob= atol(pos);
+ break;
+ case 'K': /* Use key cacheing */
+ key_cacheing=1;
+ if (*++pos)
+ key_cache_size=atol(pos);
+ break;
+ case 'W': /* Use write cacheing */
+ write_cacheing=1;
+ if (*++pos)
+ my_default_record_cache_size=atoi(pos);
+ break;
+ case 'd':
+ remove_count= atoi(++pos);
+ break;
+ case 'i':
+ if (*++pos)
+ srand(srand_arg= atoi(pos));
+ break;
+ case 'L':
+ locking=1;
+ break;
+ case 'A': /* use asyncron io */
+ async_io=1;
+ if (*++pos)
+ my_default_record_cache_size=atoi(pos);
+ break;
+ case 'v': /* verbose */
+ verbose=1;
+ break;
+ case 'm': /* records */
+ if ((recant=atoi(++pos)) < 10 && testflag > 1)
+ {
+ fprintf(stderr,"record count must be >= 10 (if testflag != 1)\n");
+ exit(1);
+ }
+ break;
+ case 'e': /* maria_block_length */
+ if ((maria_block_size= atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH ||
+ maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH)
+ {
+ fprintf(stderr,"Wrong maria_block_length\n");
+ exit(1);
+ }
+ maria_block_size= my_round_up_to_next_power(maria_block_size);
+ break;
+ case 'E': /* maria_block_length */
+ if ((key_cache_block_size=atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH ||
+ key_cache_block_size > MARIA_MAX_KEY_BLOCK_LENGTH)
+ {
+ fprintf(stderr,"Wrong key_cache_block_size\n");
+ exit(1);
+ }
+ key_cache_block_size= my_round_up_to_next_power(key_cache_block_size);
+ break;
+ case 'f':
+ if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS)
+ first_key=0;
+ break;
+ case 'k':
+ if ((keys=(uint) atoi(++pos)) < 1 ||
+ keys > (uint) (MARIA_KEYS-first_key))
+ keys=MARIA_KEYS-first_key;
+ break;
+ case 'M':
+ record_type= BLOCK_RECORD;
+ break;
+ case 'P':
+ pack_type=0; /* Don't use DIFF_LENGTH */
+ pack_seg=0;
+ break;
+ case 'R': /* Length of record pointer */
+ rec_pointer_size=atoi(++pos);
+ if (rec_pointer_size > 7)
+ rec_pointer_size=0;
+ break;
+ case 'S':
+ pack_fields=0; /* Static-length-records */
+ record_type= STATIC_RECORD;
+ break;
+ case 's':
+ silent=1;
+ break;
+ case 't':
+ testflag=atoi(++pos); /* testmod */
+ break;
+ case 'q':
+ opt_quick_mode=1;
+ break;
+ case 'c':
+ create_flag|= HA_CREATE_CHECKSUM;
+ break;
+ case 'D':
+ create_flag|=HA_CREATE_DELAY_KEY_WRITE;
+ break;
+ case '?':
+ case 'I':
+ case 'V':
+ printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE);
+ puts("By Monty, for your professional use\n");
+ printf("Usage: %s [-?AbBcDIKLPRqSsVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n",
+ progname);
+ exit(0);
+ case '#':
+ DBUG_PUSH (++pos);
+ break;
+ default:
+ printf("Illegal option: '%c'\n",*pos);
+ break;
+ }
+ }
+ return;
+} /* get options */
+
+ /* Get a random value 0 <= x <= n */
+
+static uint rnd(uint max_value)
+{
+ return (uint) ((rand() & 32767)/32767.0*max_value);
+} /* rnd */
+
+
+ /* Create a variable length record */
+
+static void fix_length(byte *rec, uint length)
+{
+ bmove(rec+STANDARD_LENGTH,
+ "0123456789012345678901234567890123456789012345678901234567890",
+ length-STANDARD_LENGTH);
+ strfill(rec+length,STANDARD_LENGTH+60-length,' ');
+} /* fix_length */
+
+
+ /* Put maybe a blob in record */
+
+static void put_blob_in_record(char *blob_pos, char **blob_buffer,
+ ulong *blob_length)
+{
+ ulong i,length;
+ if (use_blob)
+ {
+ if (rnd(10) == 0)
+ {
+ if (! *blob_buffer &&
+ !(*blob_buffer=my_malloc((uint) use_blob,MYF(MY_WME))))
+ {
+ use_blob=0;
+ return;
+ }
+ length=rnd(use_blob);
+ for (i=0 ; i < length ; i++)
+ (*blob_buffer)[i]=(char) (length+i);
+ int4store(blob_pos,length);
+ memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*));
+ *blob_length= length;
+ }
+ else
+ {
+ int4store(blob_pos,0);
+ *blob_length= 0;
+ }
+ }
+ return;
+}
+
+
+static void copy_key(MARIA_HA *info,uint inx,uchar *rec,uchar *key_buff)
+{
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++)
+ {
+ memcpy(key_buff,rec+keyseg->start,(size_t) keyseg->length);
+ key_buff+=keyseg->length;
+ }
+ return;
+}
diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c
new file mode 100644
index 00000000000..f6ed248ce16
--- /dev/null
+++ b/storage/maria/ma_test3.c
@@ -0,0 +1,501 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Test av locking */
+
+#ifndef __NETWARE__
+
+#include "maria.h"
+#include <sys/types.h>
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+
+#if defined(HAVE_LRAND48)
+#define rnd(X) (lrand48() % X)
+#define rnd_init(X) srand48(X)
+#else
+#define rnd(X) (random() % X)
+#define rnd_init(X) srandom(X)
+#endif
+
+
+const char *filename= "test3";
+uint tests=10,forks=10,key_cacheing=0;
+
+static void get_options(int argc, char *argv[]);
+void start_test(int id);
+int test_read(MARIA_HA *,int),test_write(MARIA_HA *,int,int),
+ test_update(MARIA_HA *,int,int),test_rrnd(MARIA_HA *,int);
+
+struct record {
+ char id[8];
+ char nr[4];
+ char text[10];
+} record;
+
+
+int main(int argc,char **argv)
+{
+ int status,wait_ret;
+ uint i=0;
+ MARIA_KEYDEF keyinfo[10];
+ MARIA_COLUMNDEF recinfo[10];
+ HA_KEYSEG keyseg[10][2];
+ MY_INIT(argv[0]);
+ get_options(argc,argv);
+
+ fprintf(stderr, "WARNING! this program is to test 'external locking'"
+ " (when several processes share a table through file locking)"
+ " which is not supported by Maria at all; expect errors."
+ " We may soon remove this program.\n");
+ maria_init();
+ bzero((char*) keyinfo,sizeof(keyinfo));
+ bzero((char*) recinfo,sizeof(recinfo));
+ bzero((char*) keyseg,sizeof(keyseg));
+ keyinfo[0].seg= &keyseg[0][0];
+ keyinfo[0].seg[0].start=0;
+ keyinfo[0].seg[0].length=8;
+ keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT;
+ keyinfo[0].seg[0].flag=HA_SPACE_PACK;
+ keyinfo[0].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[0].keysegs=1;
+ keyinfo[0].flag = (uint8) HA_PACK_KEY;
+ keyinfo[0].block_length= 0; /* Default block length */
+ keyinfo[1].seg= &keyseg[1][0];
+ keyinfo[1].seg[0].start=8;
+ keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */
+ keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT;
+ keyinfo[1].seg[0].flag=0;
+ keyinfo[1].key_alg=HA_KEY_ALG_BTREE;
+ keyinfo[1].keysegs=1;
+ keyinfo[1].flag =HA_NOSAME;
+ keyinfo[1].block_length= 0; /* Default block length */
+
+ recinfo[0].type=0;
+ recinfo[0].length=sizeof(record.id);
+ recinfo[1].type=0;
+ recinfo[1].length=sizeof(record.nr);
+ recinfo[2].type=0;
+ recinfo[2].length=sizeof(record.text);
+
+ puts("- Creating maria-file");
+ my_delete(filename,MYF(0)); /* Remove old locks under gdb */
+ if (maria_create(filename,BLOCK_RECORD, 2, &keyinfo[0],2,&recinfo[0],0,
+ (MARIA_UNIQUEDEF*) 0, (MARIA_CREATE_INFO*) 0,0))
+ exit(1);
+
+ rnd_init(0);
+ printf("- Starting %d processes\n",forks); fflush(stdout);
+ for (i=0 ; i < forks; i++)
+ {
+ if (!fork())
+ {
+ start_test(i+1);
+ sleep(1);
+ return 0;
+ }
+ VOID(rnd(1));
+ }
+
+ for (i=0 ; i < forks ; i++)
+ while ((wait_ret=wait(&status)) && wait_ret == -1);
+ maria_end();
+ return 0;
+}
+
+
+static void get_options(int argc, char **argv)
+{
+ char *pos,*progname;
+
+ progname= argv[0];
+
+ while (--argc >0 && *(pos = *(++argv)) == '-' ) {
+ switch(*++pos) {
+ case 'f':
+ forks=atoi(++pos);
+ break;
+ case 't':
+ tests=atoi(++pos);
+ break;
+ case 'K': /* Use key cacheing */
+ key_cacheing=1;
+ break;
+ case 'A': /* All flags */
+ key_cacheing=1;
+ break;
+ case '?':
+ case 'I':
+ case 'V':
+ printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE);
+ puts("By Monty, for your professional use\n");
+ puts("Test av locking with threads\n");
+ printf("Usage: %s [-?lKA] [-f#] [-t#]\n",progname);
+ exit(0);
+ case '#':
+ DBUG_PUSH (++pos);
+ break;
+ default:
+ printf("Illegal option: '%c'\n",*pos);
+ break;
+ }
+ }
+ return;
+}
+
+
+void start_test(int id)
+{
+ uint i;
+ int error,lock_type;
+ MARIA_INFO isam_info;
+ MARIA_HA *file,*file1,*file2=0,*lock;
+
+ if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) ||
+ !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)))
+ {
+ fprintf(stderr,"Can't open isam-file: %s\n",filename);
+ exit(1);
+ }
+ if (key_cacheing && rnd(2) == 0)
+ init_key_cache(maria_key_cache, KEY_CACHE_BLOCK_SIZE, 65536L, 0, 0);
+ printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout);
+
+ for (error=i=0 ; i < tests && !error; i++)
+ {
+ file= (rnd(2) == 1) ? file1 : file2;
+ lock=0 ; lock_type=0;
+ if (rnd(10) == 0)
+ {
+ if (maria_lock_database(lock=(rnd(2) ? file1 : file2),
+ lock_type=(rnd(2) == 0 ? F_RDLCK : F_WRLCK)))
+ {
+ fprintf(stderr,"%2d: start: Can't lock table %d\n",id,my_errno);
+ error=1;
+ break;
+ }
+ }
+ switch (rnd(4)) {
+ case 0: error=test_read(file,id); break;
+ case 1: error=test_rrnd(file,id); break;
+ case 2: error=test_write(file,id,lock_type); break;
+ case 3: error=test_update(file,id,lock_type); break;
+ }
+ if (lock)
+ maria_lock_database(lock,F_UNLCK);
+ }
+ if (!error)
+ {
+ maria_status(file1,&isam_info,HA_STATUS_VARIABLE);
+ printf("%2d: End of test. Records: %ld Deleted: %ld\n",
+ id,(long) isam_info.records, (long) isam_info.deleted);
+ fflush(stdout);
+ }
+
+ maria_close(file1);
+ maria_close(file2);
+ if (error)
+ {
+ printf("%2d: Aborted\n",id); fflush(stdout);
+ exit(1);
+ }
+}
+
+
+int test_read(MARIA_HA *file,int id)
+{
+ uint i,lock,found,next,prev;
+ ulong find;
+
+ lock=0;
+ if (rnd(2) == 0)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_RDLCK))
+ {
+ fprintf(stderr,"%2d: Can't lock table %d\n",id,my_errno);
+ return 1;
+ }
+ }
+
+ found=next=prev=0;
+ for (i=0 ; i < 100 ; i++)
+ {
+ find=rnd(100000);
+ if (!maria_rkey(file,record.id,1,(byte*) &find,
+ sizeof(find),HA_READ_KEY_EXACT))
+ found++;
+ else
+ {
+ if (my_errno != HA_ERR_KEY_NOT_FOUND)
+ {
+ fprintf(stderr,"%2d: Got error %d from read in read\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rnext(file,record.id,1))
+ next++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in read\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rprev(file,record.id,1))
+ prev++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in read\n",
+ id,my_errno);
+ return 1;
+ }
+ }
+ }
+ }
+ }
+ if (lock)
+ {
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ return 1;
+ }
+ }
+ printf("%2d: read: found: %5d next: %5d prev: %5d\n",
+ id,found,next,prev);
+ fflush(stdout);
+ return 0;
+}
+
+
+int test_rrnd(MARIA_HA *file,int id)
+{
+ uint count,lock;
+
+ lock=0;
+ if (rnd(2) == 0)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_RDLCK))
+ {
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ maria_close(file);
+ return 1;
+ }
+ if (rnd(2) == 0)
+ maria_extra(file,HA_EXTRA_CACHE,0);
+ }
+
+ count=0;
+ if (maria_rrnd(file,record.id,0L))
+ {
+ if (my_errno == HA_ERR_END_OF_FILE)
+ goto end;
+ fprintf(stderr,"%2d: Can't read first record (%d)\n",id,my_errno);
+ return 1;
+ }
+ for (count=1 ; !maria_rrnd(file,record.id,HA_OFFSET_ERROR) ;count++) ;
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rrnd\n",id,my_errno);
+ return 1;
+ }
+
+end:
+ if (lock)
+ {
+ maria_extra(file,HA_EXTRA_NO_CACHE,0);
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ exit(0);
+ }
+ }
+ printf("%2d: rrnd: %5d\n",id,count); fflush(stdout);
+ return 0;
+}
+
+
+int test_write(MARIA_HA *file,int id,int lock_type)
+{
+ uint i,tries,count,lock;
+
+ lock=0;
+ if (rnd(2) == 0 || lock_type == F_RDLCK)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_WRLCK))
+ {
+ if (lock_type == F_RDLCK && my_errno == EDEADLK)
+ {
+ printf("%2d: write: deadlock\n",id); fflush(stdout);
+ return 0;
+ }
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ maria_close(file);
+ return 1;
+ }
+ if (rnd(2) == 0)
+ maria_extra(file,HA_EXTRA_WRITE_CACHE,0);
+ }
+
+ sprintf(record.id,"%7d",getpid());
+ strnmov(record.text,"Testing...", sizeof(record.text));
+
+ tries=(uint) rnd(100)+10;
+ for (i=count=0 ; i < tries ; i++)
+ {
+ uint32 tmp=rnd(80000)+20000;
+ int4store(record.nr,tmp);
+ if (!maria_write(file,record.id))
+ count++;
+ else
+ {
+ if (my_errno != HA_ERR_FOUND_DUPP_KEY)
+ {
+ fprintf(stderr,"%2d: Got error %d (errno %d) from write\n",id,my_errno,
+ errno);
+ return 1;
+ }
+ }
+ }
+ if (lock)
+ {
+ maria_extra(file,HA_EXTRA_NO_CACHE,0);
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"%2d: Can't unlock table\n",id);
+ exit(0);
+ }
+ }
+ printf("%2d: write: %5d\n",id,count); fflush(stdout);
+ return 0;
+}
+
+
+int test_update(MARIA_HA *file,int id,int lock_type)
+{
+ uint i,lock,found,next,prev,update;
+ uint32 tmp;
+ char find[4];
+ struct record new_record;
+
+ lock=0;
+ if (rnd(2) == 0 || lock_type == F_RDLCK)
+ {
+ lock=1;
+ if (maria_lock_database(file,F_WRLCK))
+ {
+ if (lock_type == F_RDLCK && my_errno == EDEADLK)
+ {
+ printf("%2d: write: deadlock\n",id); fflush(stdout);
+ return 0;
+ }
+ fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno);
+ return 1;
+ }
+ }
+ bzero((char*) &new_record,sizeof(new_record));
+ strmov(new_record.text,"Updated");
+
+ found=next=prev=update=0;
+ for (i=0 ; i < 100 ; i++)
+ {
+ tmp=rnd(100000);
+ int4store(find,tmp);
+ if (!maria_rkey(file,record.id,1,(byte*) find,
+ sizeof(find),HA_READ_KEY_EXACT))
+ found++;
+ else
+ {
+ if (my_errno != HA_ERR_KEY_NOT_FOUND)
+ {
+ fprintf(stderr,"%2d: Got error %d from read in update\n",id,my_errno);
+ return 1;
+ }
+ else if (!maria_rnext(file,record.id,1))
+ next++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in update\n",
+ id,my_errno);
+ return 1;
+ }
+ else if (!maria_rprev(file,record.id,1))
+ prev++;
+ else
+ {
+ if (my_errno != HA_ERR_END_OF_FILE)
+ {
+ fprintf(stderr,"%2d: Got error %d from rnext in update\n",
+ id,my_errno);
+ return 1;
+ }
+ continue;
+ }
+ }
+ }
+ memcpy_fixed(new_record.id,record.id,sizeof(record.id));
+ tmp=rnd(20000)+40000;
+ int4store(new_record.nr,tmp);
+ if (!maria_update(file,record.id,new_record.id))
+ update++;
+ else
+ {
+ if (my_errno != HA_ERR_RECORD_CHANGED &&
+ my_errno != HA_ERR_RECORD_DELETED &&
+ my_errno != HA_ERR_FOUND_DUPP_KEY)
+ {
+ fprintf(stderr,"%2d: Got error %d from update\n",id,my_errno);
+ return 1;
+ }
+ }
+ }
+ if (lock)
+ {
+ if (maria_lock_database(file,F_UNLCK))
+ {
+ fprintf(stderr,"Can't unlock table,id, error%d\n",my_errno);
+ return 1;
+ }
+ }
+ printf("%2d: update: %5d\n",id,update); fflush(stdout);
+ return 0;
+}
+
+#else /* __NETWARE__ */
+
+#include <stdio.h>
+
+main()
+{
+ fprintf(stderr,"this test has not been ported to NetWare\n");
+ return 0;
+}
+
+#endif /* __NETWARE__ */
diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res
new file mode 100644
index 00000000000..57b0feeeae8
--- /dev/null
+++ b/storage/maria/ma_test_all.res
@@ -0,0 +1,62 @@
+Running tests with dynamic row format
+Running tests with static row format
+Running tests with block row format
+ma_test2 -s -L -K -R1 -m2000 ; Should give error 135
+Error: 135 in write at record: 1099
+got error: 135 when using MARIA-database
+./maria_chk -sm test2 will warn that 'Datafile is almost full'
+maria_chk: MARIA file test2
+maria_chk: warning: Datafile is almost full, 65516 of 65534 used
+MARIA-table 'test2' is usable but should be fixed
+
+real 0m0.808s
+user 0m0.584s
+sys 0m0.212s
+
+real 0m0.780s
+user 0m0.584s
+sys 0m0.176s
+
+real 0m0.809s
+user 0m0.616s
+sys 0m0.180s
+
+real 0m1.356s
+user 0m1.140s
+sys 0m0.188s
+
+real 0m0.783s
+user 0m0.600s
+sys 0m0.176s
+
+real 0m1.390s
+user 0m1.184s
+sys 0m0.152s
+
+real 0m1.875s
+user 0m1.632s
+sys 0m0.244s
+
+real 0m1.313s
+user 0m1.148s
+sys 0m0.160s
+
+real 0m1.846s
+user 0m1.644s
+sys 0m0.188s
+
+real 0m1.875s
+user 0m1.632s
+sys 0m0.212s
+
+real 0m1.819s
+user 0m1.672s
+sys 0m0.124s
+
+real 0m2.117s
+user 0m1.816s
+sys 0m0.292s
+
+real 0m1.871s
+user 0m1.636s
+sys 0m0.196s
diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh
new file mode 100755
index 00000000000..17e654ac51f
--- /dev/null
+++ b/storage/maria/ma_test_all.sh
@@ -0,0 +1,192 @@
+#!/bin/sh
+#
+# Execute some simple basic test on MyISAM libary to check if things
+# works at all.
+
+valgrind="valgrind --alignment=8 --leak-check=yes"
+silent="-s"
+suffix=""
+#set -x -v -e
+
+run_tests()
+{
+ row_type=$1
+ #
+ # First some simple tests
+ #
+ ./ma_test1$suffix $silent $row_type
+ ./maria_chk$suffix -se test1
+ ./ma_test1$suffix $silent -N $row_type
+ ./maria_chk$suffix -se test1
+ ./ma_test1$suffix $silent -P --checksum $row_type
+ ./maria_chk$suffix -se test1
+ ./ma_test1$suffix $silent -P -N $row_type
+ ./maria_chk$suffix -se test1
+ ./ma_test1$suffix $silent -B -N -R2 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -k 480 --unique $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -N -R1 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p -N --unique $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p -N --key_length=127 --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p -N --key_length=128 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -B $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -B --key_length=64 --unique $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -B -k 480 --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -m $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -m -P --unique --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -m -p $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -w --unique $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -w --key_length=64 --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -w -N --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -w --key_length=480 --checksum $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -b -N $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -a -b --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent -p -B --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent --checksum --unique $row_type
+ ./maria_chk$suffix -se test1
+ ./ma_test1$suffix $silent --unique $row_type
+ ./maria_chk$suffix -se test1
+
+ ./ma_test1$suffix $silent --key_multiple -N -S $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent --key_multiple -a -p --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent --key_multiple -a -B --key_length=480 $row_type
+ ./maria_chk$suffix -sm test1
+ ./ma_test1$suffix $silent --key_multiple -P -S $row_type
+ ./maria_chk$suffix -sm test1
+
+ ./maria_pack$suffix --force -s test1
+ ./maria_chk$suffix -ess test1
+
+ ./ma_test2$suffix $silent -L -K -W -P $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -L -K -W -P -A $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -L -K -P -R3 -m50 -b1000000 $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -L -B $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -D -B -c $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -m10000 -e4096 -K $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -m10000 -e8192 -K $row_type
+ ./maria_chk$suffix -sm test2
+ ./ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type
+ ./maria_chk$suffix -sm test2
+}
+
+run_repair_tests()
+{
+ row_type=$1
+ ./ma_test1$suffix $silent --checksum $row_type
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -rs test1
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -rqs test1
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -rs --correct-checksum test1
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -rqs --correct-checksum test1
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -ros --correct-checksum test1
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -rqos --correct-checksum test1
+ ./maria_chk$suffix -se test1
+}
+
+run_pack_tests()
+{
+ row_type=$1
+ # check of maria_pack / maria_chk
+ ./ma_test1$suffix $silent --checksum $row_type
+ ./maria_pack$suffix --force -s test1
+ ./maria_chk$suffix -ess test1
+ ./maria_chk$suffix -rqs test1
+ ./maria_chk$suffix -es test1
+ ./maria_chk$suffix -rs test1
+ ./maria_chk$suffix -es test1
+ ./maria_chk$suffix -rus test1
+ ./maria_chk$suffix -es test1
+
+ ./ma_test1$suffix $silent --checksum -S $row_type
+ ./maria_chk$suffix -se test1
+ ./maria_chk$suffix -ros test1
+ ./maria_chk$suffix -rqs test1
+ ./maria_chk$suffix -se test1
+
+ ./maria_pack$suffix --force -s test1
+ ./maria_chk$suffix -rqs test1
+ ./maria_chk$suffix -es test1
+ ./maria_chk$suffix -rus test1
+ ./maria_chk$suffix -es test1
+}
+
+echo "Running tests with dynamic row format"
+run_tests ""
+run_repair_tests ""
+run_pack_tests ""
+
+echo "Running tests with static row format"
+run_tests -S
+run_repair_tests -S
+run_pack_tests -S
+
+echo "Running tests with block row format"
+run_tests -M
+
+#
+# Tests that gives warnings
+#
+
+./ma_test2$suffix $silent -L -K -W -P -S -R1 -m500
+./maria_chk$suffix -sm test2
+echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135"
+./ma_test2$suffix $silent -L -K -R1 -m2000
+echo "./maria_chk$suffix -sm test2 will warn that 'Datafile is almost full'"
+./maria_chk$suffix -sm test2
+./maria_chk$suffix -ssm test2
+
+#
+# Some timing tests
+#
+time ./ma_test2$suffix $silent
+time ./ma_test2$suffix $silent -S
+time ./ma_test2$suffix $silent -M
+time ./ma_test2$suffix $silent -B
+time ./ma_test2$suffix $silent -L
+time ./ma_test2$suffix $silent -K
+time ./ma_test2$suffix $silent -K -B
+time ./ma_test2$suffix $silent -L -B
+time ./ma_test2$suffix $silent -L -K -B
+time ./ma_test2$suffix $silent -L -K -W -B
+time ./ma_test2$suffix $silent -L -K -W -B -S
+time ./ma_test2$suffix $silent -L -K -W -B -M
+time ./ma_test2$suffix $silent -D -K -W -B -S
diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c
new file mode 100644
index 00000000000..5d0133c8ac1
--- /dev/null
+++ b/storage/maria/ma_unique.c
@@ -0,0 +1,236 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Functions to check if a row is unique */
+
+#include "maria_def.h"
+#include <m_ctype.h>
+
+my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record,
+ ha_checksum unique_hash, my_off_t disk_pos)
+{
+ my_off_t lastpos=info->cur_row.lastpos;
+ MARIA_KEYDEF *key= &info->s->keyinfo[def->key];
+ byte *key_buff= info->lastkey2;
+ DBUG_ENTER("_ma_check_unique");
+ DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash));
+
+ maria_unique_store(record+key->seg->start, unique_hash);
+ _ma_make_key(info,def->key,key_buff,record,0);
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (_ma_search(info,info->s->keyinfo+def->key,key_buff,
+ MARIA_UNIQUE_HASH_LENGTH,
+ SEARCH_FIND,info->s->state.key_root[def->key]))
+ {
+ info->page_changed=1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_RETURN(0); /* No matching rows */
+ }
+
+ for (;;)
+ {
+ if (info->cur_row.lastpos != disk_pos &&
+ !(*info->s->compare_unique)(info,def,record,info->cur_row.lastpos))
+ {
+ my_errno=HA_ERR_FOUND_DUPP_UNIQUE;
+ info->errkey= (int) def->key;
+ info->dup_key_pos= info->cur_row.lastpos;
+ info->page_changed= 1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_PRINT("info",("Found duplicate"));
+ DBUG_RETURN(1); /* Found identical */
+ }
+ if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey,
+ MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER,
+ info->s->state.key_root[def->key]) ||
+ bcmp((char*) info->lastkey, (char*) key_buff,
+ MARIA_UNIQUE_HASH_LENGTH))
+ {
+ info->page_changed= 1; /* Can't optimize read next */
+ info->cur_row.lastpos= lastpos;
+ DBUG_RETURN(0); /* end of tree */
+ }
+ }
+}
+
+
+/*
+ Calculate a hash for a row
+
+ TODO
+ Add support for bit fields
+*/
+
+ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *record)
+{
+ const byte *pos, *end;
+ ha_checksum crc= 0;
+ ulong seed1=0, seed2= 4;
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=def->seg ; keyseg < def->end ; keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint length=keyseg->length;
+
+ if (keyseg->null_bit)
+ {
+ if (record[keyseg->null_pos] & keyseg->null_bit)
+ {
+ /*
+ Change crc in a way different from an empty string or 0.
+ (This is an optimisation; The code will work even if this isn't
+ done)
+ */
+ crc=((crc << 8) + 511+
+ (crc >> (8*sizeof(ha_checksum)-8)));
+ continue;
+ }
+ }
+ pos= record+keyseg->start;
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= keyseg->bit_start;
+ uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos :
+ uint2korr(pos));
+ pos+= pack_length; /* Skip VARCHAR length */
+ set_if_smaller(length,tmp_length);
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos);
+ memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*));
+ if (!length || length > tmp_length)
+ length=tmp_length; /* The whole blob */
+ }
+ end= pos+length;
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
+ {
+ keyseg->charset->coll->hash_sort(keyseg->charset,
+ (const uchar*) pos, length, &seed1,
+ &seed2);
+ crc^= seed1;
+ }
+ else
+ while (pos != end)
+ crc=((crc << 8) +
+ (((uchar) *(uchar*) pos++))) +
+ (crc >> (8*sizeof(ha_checksum)-8));
+ }
+ return crc;
+}
+
+
+/*
+ compare unique key for two rows
+
+ TODO
+ Add support for bit fields
+
+ RETURN
+ 0 if both rows have equal unique value
+ 1 Rows are different
+*/
+
+my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b,
+ my_bool null_are_equal)
+{
+ const byte *pos_a, *pos_b, *end;
+ HA_KEYSEG *keyseg;
+
+ for (keyseg=def->seg ; keyseg < def->end ; keyseg++)
+ {
+ enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
+ uint a_length, b_length;
+ a_length= b_length= keyseg->length;
+
+ /* If part is NULL it's regarded as different */
+ if (keyseg->null_bit)
+ {
+ uint tmp;
+ if ((tmp=(a[keyseg->null_pos] & keyseg->null_bit)) !=
+ (uint) (b[keyseg->null_pos] & keyseg->null_bit))
+ return 1;
+ if (tmp)
+ {
+ if (!null_are_equal)
+ return 1;
+ continue;
+ }
+ }
+ pos_a= a+keyseg->start;
+ pos_b= b+keyseg->start;
+ if (keyseg->flag & HA_VAR_LENGTH_PART)
+ {
+ uint pack_length= keyseg->bit_start;
+ if (pack_length == 1)
+ {
+ a_length= (uint) *(uchar*) pos_a++;
+ b_length= (uint) *(uchar*) pos_b++;
+ }
+ else
+ {
+ a_length= uint2korr(pos_a);
+ b_length= uint2korr(pos_b);
+ pos_a+= 2; /* Skip VARCHAR length */
+ pos_b+= 2;
+ }
+ set_if_smaller(a_length, keyseg->length); /* Safety */
+ set_if_smaller(b_length, keyseg->length); /* safety */
+ }
+ else if (keyseg->flag & HA_BLOB_PART)
+ {
+ /* Only compare 'length' characters if length != 0 */
+ a_length= _ma_calc_blob_length(keyseg->bit_start,pos_a);
+ b_length= _ma_calc_blob_length(keyseg->bit_start,pos_b);
+ /* Check that a and b are of equal length */
+ if (keyseg->length)
+ {
+ /*
+ This is used in some cases when we are not interested in comparing
+ the whole length of the blob.
+ */
+ set_if_smaller(a_length, keyseg->length);
+ set_if_smaller(b_length, keyseg->length);
+ }
+ memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*));
+ memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*));
+ }
+ if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
+ type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
+ (uchar *) pos_b, b_length, 0, 1))
+ return 1;
+ }
+ else
+ {
+ if (a_length != b_length)
+ return 1;
+ end= pos_a+a_length;
+ while (pos_a != end)
+ {
+ if (*pos_a++ != *pos_b++)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c
new file mode 100644
index 00000000000..e3e391dcccc
--- /dev/null
+++ b/storage/maria/ma_update.c
@@ -0,0 +1,251 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Update an old row in a MARIA table */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+
+int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec)
+{
+ int flag,key_changed,save_errno;
+ reg3 my_off_t pos;
+ uint i;
+ byte old_key[HA_MAX_KEY_BUFF],*new_key;
+ bool auto_key_changed=0;
+ ulonglong changed;
+ MARIA_SHARE *share=info->s;
+ ha_checksum old_checksum;
+ DBUG_ENTER("maria_update");
+ LINT_INIT(new_key);
+ LINT_INIT(changed);
+ LINT_INIT(old_checksum);
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ if (!(info->update & HA_STATE_AKTIV))
+ {
+ DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND);
+ }
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (info->state->key_file_length >= share->base.margin_key_file_length)
+ {
+ DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL);
+ }
+ pos= info->cur_row.lastpos;
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+
+ if ((*share->compare_record)(info,oldrec))
+ {
+ save_errno= my_errno;
+ DBUG_PRINT("warning", ("Got error from compare record"));
+ goto err_end; /* Record has changed */
+ }
+
+ if (share->calc_checksum)
+ {
+ /*
+ We can't use the row based checksum as this doesn't have enough
+ precision.
+ */
+ if (info->s->calc_checksum)
+ old_checksum= (*info->s->calc_checksum)(info, oldrec);
+ }
+
+ /* Calculate and check all unique constraints */
+ key_changed=0;
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ MARIA_UNIQUEDEF *def=share->uniqueinfo+i;
+ if (_ma_unique_comp(def, newrec, oldrec,1) &&
+ _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec),
+ pos))
+ {
+ save_errno=my_errno;
+ goto err_end;
+ }
+ }
+ if (_ma_mark_file_changed(info))
+ {
+ save_errno=my_errno;
+ goto err_end;
+ }
+
+ /* Check which keys changed from the original row */
+
+ new_key= info->lastkey2;
+ changed=0;
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_cmp(info,i,oldrec, newrec))
+ {
+ if ((int) i == info->lastinx)
+ {
+ /*
+ We are changeing the index we are reading on. Mark that
+ the index data has changed and we need to do a full search
+ when doing read-next
+ */
+ key_changed|=HA_STATE_WRITTEN;
+ }
+ changed|=((ulonglong) 1 << i);
+ if (_ma_ft_update(info,i,(char*) old_key,oldrec,newrec,pos))
+ goto err;
+ }
+ }
+ else
+ {
+ uint new_length= _ma_make_key(info,i,new_key,newrec,pos);
+ uint old_length= _ma_make_key(info,i,old_key,oldrec,pos);
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (new_length != old_length ||
+ memcmp(old_key, new_key, new_length))
+ {
+ if ((int) i == info->lastinx)
+ key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */
+ changed|=((ulonglong) 1 << i);
+ share->keyinfo[i].version++;
+ if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err;
+ if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err;
+ if (share->base.auto_key == i+1)
+ auto_key_changed=1;
+ }
+ }
+ }
+ }
+ /*
+ If we are running with external locking, we must update the index file
+ that something has changed.
+ */
+ if (changed || !my_disable_locking)
+ key_changed|= HA_STATE_CHANGED;
+
+ if (share->calc_checksum)
+ {
+ info->cur_row.checksum= (*share->calc_checksum)(info,newrec);
+ /* Store new checksum in index file header */
+ key_changed|= HA_STATE_CHANGED;
+ }
+ {
+ /*
+ Don't update index file if data file is not extended and no status
+ information changed
+ */
+ MARIA_STATUS_INFO state;
+ ha_rows org_split;
+ my_off_t org_delete_link;
+
+ memcpy((char*) &state, (char*) info->state, sizeof(state));
+ org_split= share->state.split;
+ org_delete_link= share->state.dellink;
+ if ((*share->update_record)(info,pos,newrec))
+ goto err;
+ if (!key_changed &&
+ (memcmp((char*) &state, (char*) info->state, sizeof(state)) ||
+ org_split != share->state.split ||
+ org_delete_link != share->state.dellink))
+ key_changed|= HA_STATE_CHANGED; /* Must update index file */
+ }
+ if (auto_key_changed)
+ set_if_bigger(info->s->state.auto_increment,
+ ma_retrieve_auto_increment(info, newrec));
+ if (share->calc_checksum)
+ info->state->checksum+= (info->cur_row.checksum - old_checksum);
+
+ /*
+ We can't yet have HA_STATE_ACTIVE here, as block_record dosn't support
+ it
+ */
+ info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | key_changed);
+
+ /*
+ Every Maria function that updates Maria table must end with
+ call to _ma_writeinfo(). If operation (second param of
+ _ma_writeinfo()) is not 0 it sets share->changed to 1, that is
+ flags that data has changed. If operation is 0, this function
+ equals to no-op in this case.
+
+ ma_update() must always pass !0 value as operation, since even if
+ there is no index change there could be data change.
+ */
+ VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename));
+ (*info->invalidator)(info->filename);
+ info->invalidator=0;
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_PRINT("error",("key: %d errno: %d",i,my_errno));
+ save_errno=my_errno;
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_RECORD_FILE_FULL)
+ {
+ info->errkey= (int) i;
+ flag=0;
+ do
+ {
+ if (((ulonglong) 1 << i) & changed)
+ {
+ if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if ((flag++ && _ma_ft_del(info,i,(char*) new_key,newrec,pos)) ||
+ _ma_ft_add(info,i,(char*) old_key,oldrec,pos))
+ break;
+ }
+ else
+ {
+ uint new_length= _ma_make_key(info,i,new_key,newrec,pos);
+ uint old_length= _ma_make_key(info,i,old_key,oldrec,pos);
+ if ((flag++ && _ma_ck_delete(info,i,new_key,new_length)) ||
+ _ma_ck_write(info,i,old_key,old_length))
+ break;
+ }
+ }
+ } while (i-- != 0);
+ }
+ else
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+ info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED |
+ key_changed);
+
+ err_end:
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ if (save_errno == HA_ERR_KEY_NOT_FOUND)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ save_errno=HA_ERR_CRASHED;
+ }
+ DBUG_RETURN(my_errno=save_errno);
+} /* maria_update */
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
new file mode 100644
index 00000000000..3f1ca59a00a
--- /dev/null
+++ b/storage/maria/ma_write.c
@@ -0,0 +1,1081 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Write a row to a MARIA table */
+
+#include "ma_fulltext.h"
+#include "ma_rt_index.h"
+
+#define MAX_POINTER_LENGTH 8
+
+ /* Functions declared in this file */
+
+static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ uint comp_flag, byte *key,
+ uint key_length, my_off_t pos, byte *father_buff,
+ byte *father_keypos, my_off_t father_page,
+ my_bool insert_last);
+static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key,
+ byte *curr_buff,byte *father_buff,
+ byte *father_keypos,my_off_t father_page);
+static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint *return_key_length,
+ byte **after_key);
+int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,byte *key,
+ uint key_length);
+int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,byte *key,
+ uint key_length);
+
+
+MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info,
+ const byte *record
+ __attribute__((unused)))
+{
+ return ((info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end) ?
+ info->s->state.dellink :
+ info->state->data_file_length);
+}
+
+my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
+
+/* Write new record to a table */
+
+int maria_write(MARIA_HA *info, byte *record)
+{
+ MARIA_SHARE *share=info->s;
+ uint i;
+ int save_errno;
+ MARIA_RECORD_POS filepos;
+ byte *buff;
+ my_bool lock_tree= share->concurrent_insert;
+ my_bool fatal_error;
+ DBUG_ENTER("maria_write");
+ DBUG_PRINT("enter",("index_file: %d data_file: %d",
+ info->s->kfile,info->dfile));
+
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ DBUG_RETURN(my_errno= HA_ERR_CRASHED););
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ DBUG_RETURN(my_errno=EACCES);
+ }
+ if (_ma_readinfo(info,F_WRLCK,1))
+ DBUG_RETURN(my_errno);
+ dont_break(); /* Dont allow SIGHUP or SIGINT */
+
+ if (share->base.reloc == (ha_rows) 1 &&
+ share->base.records == (ha_rows) 1 &&
+ info->state->records == (ha_rows) 1)
+ { /* System file */
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ goto err2;
+ }
+ if (info->state->key_file_length >= share->base.margin_key_file_length)
+ {
+ my_errno=HA_ERR_INDEX_FILE_FULL;
+ goto err2;
+ }
+ if (_ma_mark_file_changed(info))
+ goto err2;
+
+ /* Calculate and check all unique constraints */
+ for (i=0 ; i < share->state.header.uniques ; i++)
+ {
+ if (_ma_check_unique(info,share->uniqueinfo+i,record,
+ _ma_unique_hash(share->uniqueinfo+i,record),
+ HA_OFFSET_ERROR))
+ goto err2;
+ }
+
+ if ((info->opt_flag & OPT_NO_ROWS))
+ filepos= HA_OFFSET_ERROR;
+ else
+ {
+ /*
+ This may either calculate a record or, or write the record and return
+ the record id
+ */
+ if ((filepos= (*share->write_record_init)(info, record)) ==
+ HA_OFFSET_ERROR)
+ goto err2;
+ }
+
+ /* Write all keys to indextree */
+ buff= info->lastkey2;
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ bool local_lock_tree= (lock_tree &&
+ !(info->bulk_insert &&
+ is_tree_inited(&info->bulk_insert[i])));
+ if (local_lock_tree)
+ {
+ rw_wrlock(&share->key_root_lock[i]);
+ share->keyinfo[i].version++;
+ }
+ if (share->keyinfo[i].flag & HA_FULLTEXT )
+ {
+ if (_ma_ft_add(info,i,(char*) buff,record,filepos))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ DBUG_PRINT("error",("Got error: %d on write",my_errno));
+ goto err;
+ }
+ }
+ else
+ {
+ if (share->keyinfo[i].ck_insert(info,i,buff,
+ _ma_make_key(info,i,buff,record,
+ filepos)))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ DBUG_PRINT("error",("Got error: %d on write",my_errno));
+ goto err;
+ }
+ }
+
+ /* The above changed info->lastkey2. Inform maria_rnext_same(). */
+ info->update&= ~HA_STATE_RNEXT_SAME;
+
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ }
+ }
+ if (share->calc_write_checksum)
+ info->cur_row.checksum= (*share->calc_write_checksum)(info,record);
+ if (filepos != HA_OFFSET_ERROR)
+ {
+ if ((*share->write_record)(info,record))
+ goto err;
+ info->state->checksum+= info->cur_row.checksum;
+ }
+ if (share->base.auto_key)
+ set_if_bigger(info->s->state.auto_increment,
+ ma_retrieve_auto_increment(info, record));
+ info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN |
+ HA_STATE_ROW_CHANGED);
+ info->state->records++;
+ info->cur_row.lastpos= filepos;
+ VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE));
+ if (info->invalidator != 0)
+ {
+ DBUG_PRINT("info", ("invalidator... '%s' (update)", info->filename));
+ (*info->invalidator)(info->filename);
+ info->invalidator=0;
+ }
+
+ /*
+ Update status of the table. We need to do so after each row write
+ for the log tables, as we want the new row to become visible to
+ other threads as soon as possible. We don't lock mutex here
+ (as it is required by pthread memory visibility rules) as (1) it's
+ not critical to use outdated share->is_log_table value (2) locking
+ mutex here for every write is too expensive.
+ */
+ if (share->is_log_table)
+ _ma_update_status((void*) info);
+
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(0);
+
+err:
+ save_errno= my_errno;
+ fatal_error= 0;
+ if (my_errno == HA_ERR_FOUND_DUPP_KEY ||
+ my_errno == HA_ERR_RECORD_FILE_FULL ||
+ my_errno == HA_ERR_NULL_IN_SPATIAL)
+ {
+ if (info->bulk_insert)
+ {
+ uint j;
+ for (j=0 ; j < share->base.keys ; j++)
+ maria_flush_bulk_insert(info, j);
+ }
+ info->errkey= (int) i;
+ while ( i-- > 0)
+ {
+ if (maria_is_key_active(share->state.key_map, i))
+ {
+ bool local_lock_tree= (lock_tree &&
+ !(info->bulk_insert &&
+ is_tree_inited(&info->bulk_insert[i])));
+ if (local_lock_tree)
+ rw_wrlock(&share->key_root_lock[i]);
+ if (share->keyinfo[i].flag & HA_FULLTEXT)
+ {
+ if (_ma_ft_del(info,i,(char*) buff,record,filepos))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ break;
+ }
+ }
+ else
+ {
+ uint key_length= _ma_make_key(info,i,buff,record,filepos);
+ if (_ma_ck_delete(info,i,buff,key_length))
+ {
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ break;
+ }
+ }
+ if (local_lock_tree)
+ rw_unlock(&share->key_root_lock[i]);
+ }
+ }
+ }
+ else
+ fatal_error= 1;
+
+ if ((*share->write_record_abort)(info))
+ fatal_error= 1;
+ if (fatal_error)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ maria_mark_crashed(info);
+ }
+
+ info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED);
+ my_errno=save_errno;
+err2:
+ save_errno=my_errno;
+ DBUG_PRINT("error", ("got error: %d", save_errno));
+ VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
+ allow_break(); /* Allow SIGHUP & SIGINT */
+ DBUG_RETURN(my_errno=save_errno);
+} /* maria_write */
+
+
+ /* Write one key to btree */
+
+int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, uint key_length)
+{
+ DBUG_ENTER("_ma_ck_write");
+
+ if (info->bulk_insert && is_tree_inited(&info->bulk_insert[keynr]))
+ {
+ DBUG_RETURN(_ma_ck_write_tree(info, keynr, key, key_length));
+ }
+ else
+ {
+ DBUG_RETURN(_ma_ck_write_btree(info, keynr, key, key_length));
+ }
+} /* _ma_ck_write */
+
+
+/**********************************************************************
+ * Normal insert code *
+ **********************************************************************/
+
+int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, byte *key,
+ uint key_length)
+{
+ int error;
+ uint comp_flag;
+ MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr;
+ my_off_t *root=&info->s->state.key_root[keynr];
+ DBUG_ENTER("_ma_ck_write_btree");
+
+ if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
+ comp_flag=SEARCH_BIGGER; /* Put after same key */
+ else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT))
+ {
+ comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
+ if (keyinfo->flag & HA_NULL_ARE_EQUAL)
+ comp_flag|= SEARCH_NULL_ARE_EQUAL;
+ }
+ else
+ comp_flag=SEARCH_SAME; /* Keys in rec-pos order */
+
+ error= _ma_ck_real_write_btree(info, keyinfo, key, key_length,
+ root, comp_flag);
+ if (info->ft1_to_ft2)
+ {
+ if (!error)
+ error= _ma_ft_convert_to_ft2(info, keynr, key);
+ delete_dynamic(info->ft1_to_ft2);
+ my_free((gptr)info->ft1_to_ft2, MYF(0));
+ info->ft1_to_ft2=0;
+ }
+ DBUG_RETURN(error);
+} /* _ma_ck_write_btree */
+
+
+int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length, my_off_t *root,
+ uint comp_flag)
+{
+ int error;
+ DBUG_ENTER("_ma_ck_real_write_btree");
+ /* key_length parameter is used only if comp_flag is SEARCH_FIND */
+ if (*root == HA_OFFSET_ERROR ||
+ (error=w_search(info, keyinfo, comp_flag, key, key_length,
+ *root, (byte*) 0, (byte*) 0,
+ (my_off_t) 0, 1)) > 0)
+ error= _ma_enlarge_root(info,keyinfo,key,root);
+ DBUG_RETURN(error);
+} /* _ma_ck_real_write_btree */
+
+
+ /* Make a new root with key as only pointer */
+
+int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ my_off_t *root)
+{
+ uint t_length,nod_flag;
+ MARIA_KEY_PARAM s_temp;
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_enlarge_root");
+
+ nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0;
+ _ma_kpointer(info,info->buff+2,*root); /* if nod */
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte*) 0,
+ (byte*) 0, (byte*) 0, key,&s_temp);
+ maria_putint(info->buff,t_length+2+nod_flag,nod_flag);
+ (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp);
+ info->keybuff_used=info->page_changed=1; /* info->buff is used */
+ if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR ||
+ _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(0);
+} /* _ma_enlarge_root */
+
+
+ /*
+ Search after a position for a key and store it there
+ Returns -1 = error
+ 0 = ok
+ 1 = key should be stored in higher tree
+ */
+
+static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, byte *key, uint key_length, my_off_t page,
+ byte *father_buff, byte *father_keypos,
+ my_off_t father_page, my_bool insert_last)
+{
+ int error,flag;
+ uint nod_flag, search_key_length;
+ byte *temp_buff,*keypos;
+ byte keybuff[HA_MAX_KEY_BUFF];
+ my_bool was_last_key;
+ my_off_t next_page, dup_key_pos;
+ DBUG_ENTER("w_search");
+ DBUG_PRINT("enter",("page: %ld", (long) page));
+
+ search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
+ if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length+
+ HA_MAX_KEY_BUFF*2)))
+ DBUG_RETURN(-1);
+ if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0))
+ goto err;
+
+ flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length,
+ comp_flag, &keypos, keybuff, &was_last_key);
+ nod_flag= _ma_test_if_nod(temp_buff);
+ if (flag == 0)
+ {
+ uint tmp_key_length;
+ /* get position to record with duplicated key */
+ tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff);
+ if (tmp_key_length)
+ dup_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length);
+ else
+ dup_key_pos= HA_OFFSET_ERROR;
+
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ uint off;
+ int subkeys;
+
+ get_key_full_length_rdonly(off, keybuff);
+ subkeys=ft_sintXkorr(keybuff+off);
+ comp_flag=SEARCH_SAME;
+ if (subkeys >= 0)
+ {
+ /* normal word, one-level tree structure */
+ flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key,
+ USE_WHOLE_KEY, comp_flag,
+ &keypos, keybuff, &was_last_key);
+ }
+ else
+ {
+ /* popular word. two-level tree. going down */
+ my_off_t root=dup_key_pos;
+ keyinfo=&info->s->ft2_keyinfo;
+ get_key_full_length_rdonly(off, key);
+ key+=off;
+ keypos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
+ error= _ma_ck_real_write_btree(info, keyinfo, key, 0,
+ &root, comp_flag);
+ _ma_dpointer(info, keypos+HA_FT_WLEN, root);
+ subkeys--; /* should there be underflow protection ? */
+ DBUG_ASSERT(subkeys < 0);
+ ft_intXstore(keypos, subkeys);
+ if (!error)
+ error= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff);
+ my_afree((byte*) temp_buff);
+ DBUG_RETURN(error);
+ }
+ }
+ else /* not HA_FULLTEXT, normal HA_NOSAME key */
+ {
+ info->dup_key_pos= dup_key_pos;
+ my_afree((byte*) temp_buff);
+ my_errno=HA_ERR_FOUND_DUPP_KEY;
+ DBUG_RETURN(-1);
+ }
+ }
+ if (flag == MARIA_FOUND_WRONG_KEY)
+ DBUG_RETURN(-1);
+ if (!was_last_key)
+ insert_last=0;
+ next_page= _ma_kpos(nod_flag,keypos);
+ if (next_page == HA_OFFSET_ERROR ||
+ (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page,
+ temp_buff, keypos, page, insert_last)) >0)
+ {
+ error= _ma_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff,
+ father_keypos,father_page, insert_last);
+ if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff))
+ goto err;
+ }
+ my_afree((byte*) temp_buff);
+ DBUG_RETURN(error);
+err:
+ my_afree((byte*) temp_buff);
+ DBUG_PRINT("exit",("Error: %d",my_errno));
+ DBUG_RETURN (-1);
+} /* w_search */
+
+
+/*
+ Insert new key.
+
+ SYNOPSIS
+ _ma_insert()
+ info Open table information.
+ keyinfo Key definition information.
+ key New key.
+ anc_buff Key page (beginning).
+ key_pos Position in key page where to insert.
+ key_buff Copy of previous key.
+ father_buff parent key page for balancing.
+ father_key_pos position in parent key page for balancing.
+ father_page position of parent key page in file.
+ insert_last If to append at end of page.
+
+ DESCRIPTION
+ Insert new key at right of key_pos.
+
+ RETURN
+ 2 if key contains key to upper level.
+ 0 OK.
+ < 0 Error.
+*/
+
+int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *key, byte *anc_buff, byte *key_pos, byte *key_buff,
+ byte *father_buff, byte *father_key_pos, my_off_t father_page,
+ my_bool insert_last)
+{
+ uint a_length,nod_flag;
+ int t_length;
+ byte *endpos, *prev_key;
+ MARIA_KEY_PARAM s_temp;
+ DBUG_ENTER("_ma_insert");
+ DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos));
+ DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,
+ USE_WHOLE_KEY););
+
+ nod_flag=_ma_test_if_nod(anc_buff);
+ a_length=maria_getint(anc_buff);
+ endpos= anc_buff+ a_length;
+ prev_key=(key_pos == anc_buff+2+nod_flag ? (byte*) 0 : key_buff);
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
+ (key_pos == endpos ? (byte*) 0 : key_pos),
+ prev_key, prev_key,
+ key,&s_temp);
+#ifndef DBUG_OFF
+ if (key_pos != anc_buff+2+nod_flag && (keyinfo->flag &
+ (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
+ {
+ DBUG_DUMP("prev_key",(byte*) key_buff, _ma_keylength(keyinfo,key_buff));
+ }
+ if (keyinfo->flag & HA_PACK_KEY)
+ {
+ DBUG_PRINT("test",("t_length: %d ref_len: %d",
+ t_length,s_temp.ref_length));
+ DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx",
+ s_temp.n_ref_length, s_temp.n_length, (long) s_temp.key));
+ }
+#endif
+ if (t_length > 0)
+ {
+ if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ bmove_upp((byte*) endpos+t_length,(byte*) endpos,(uint) (endpos-key_pos));
+ }
+ else
+ {
+ if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH)
+ {
+ maria_print_error(info->s, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(-1);
+ }
+ bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length);
+ }
+ (*keyinfo->store_key)(keyinfo,key_pos,&s_temp);
+ a_length+=t_length;
+ maria_putint(anc_buff,a_length,nod_flag);
+ if (a_length <= keyinfo->block_length)
+ {
+ if (keyinfo->block_length - a_length < 32 &&
+ keyinfo->flag & HA_FULLTEXT && key_pos == endpos &&
+ info->s->base.key_reflength <= info->s->base.rec_reflength &&
+ info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
+ {
+ /*
+ Normal word. One-level tree. Page is almost full.
+ Let's consider converting.
+ We'll compare 'key' and the first key at anc_buff
+ */
+ byte *a=key, *b=anc_buff+2+nod_flag;
+ uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength;
+ /* the very first key on the page is always unpacked */
+ DBUG_ASSERT((*b & 128) == 0);
+#if HA_FT_MAXLEN >= 127
+ blen= mi_uint2korr(b); b+=2;
+#else
+ blen= *(uchar*) b++;
+#endif
+ get_key_length(alen,a);
+ DBUG_ASSERT(info->ft1_to_ft2==0);
+ if (alen == blen &&
+ ha_compare_text(keyinfo->seg->charset, (uchar*) a, alen,
+ (uchar*) b, blen, 0, 0) == 0)
+ {
+ /* yup. converting */
+ info->ft1_to_ft2=(DYNAMIC_ARRAY *)
+ my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME));
+ my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50);
+
+ /*
+ now, adding all keys from the page to dynarray
+ if the page is a leaf (if not keys will be deleted later)
+ */
+ if (!nod_flag)
+ {
+ /* let's leave the first key on the page, though, because
+ we cannot easily dispatch an empty page here */
+ b+=blen+ft2len+2;
+ for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
+ insert_dynamic(info->ft1_to_ft2, (char*) b);
+
+ /* fixing the page's length - it contains only one key now */
+ maria_putint(anc_buff,2+blen+ft2len+2,0);
+ }
+ /* the rest will be done when we're back from recursion */
+ }
+ }
+ DBUG_RETURN(0); /* There is room on page */
+ }
+ /* Page is full */
+ if (nod_flag)
+ insert_last=0;
+ if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) &&
+ father_buff && !insert_last)
+ DBUG_RETURN(_ma_balance_page(info,keyinfo,key,anc_buff,father_buff,
+ father_key_pos,father_page));
+ DBUG_RETURN(_ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last));
+} /* _ma_insert */
+
+
+ /* split a full page in two and assign emerging item to key */
+
+int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ byte *key, byte *buff, byte *key_buff,
+ my_bool insert_last_key)
+{
+ uint length,a_length,key_ref_length,t_length,nod_flag,key_length;
+ byte *key_pos,*pos, *after_key;
+ my_off_t new_pos;
+ MARIA_KEY_PARAM s_temp;
+ DBUG_ENTER("maria_split_page");
+ DBUG_DUMP("buff",(byte*) buff,maria_getint(buff));
+
+ if (info->s->keyinfo+info->lastinx == keyinfo)
+ info->page_changed=1; /* Info->buff is used */
+ info->keybuff_used=1;
+ nod_flag=_ma_test_if_nod(buff);
+ key_ref_length=2+nod_flag;
+ if (insert_last_key)
+ key_pos= _ma_find_last_pos(keyinfo,buff,key_buff, &key_length, &after_key);
+ else
+ key_pos= _ma_find_half_pos(nod_flag,keyinfo,buff,key_buff, &key_length,
+ &after_key);
+ if (!key_pos)
+ DBUG_RETURN(-1);
+
+ length=(uint) (key_pos-buff);
+ a_length=maria_getint(buff);
+ maria_putint(buff,length,nod_flag);
+
+ key_pos=after_key;
+ if (nod_flag)
+ {
+ DBUG_PRINT("test",("Splitting nod"));
+ pos=key_pos-nod_flag;
+ memcpy((byte*) info->buff+2,(byte*) pos,(size_t) nod_flag);
+ }
+
+ /* Move middle item to key and pointer to new page */
+ if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ DBUG_RETURN(-1);
+ _ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos);
+
+ /* Store new page */
+ if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff))
+ DBUG_RETURN(-1);
+
+ t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte *) 0,
+ (byte*) 0, (byte*) 0,
+ key_buff, &s_temp);
+ length=(uint) ((buff+a_length)-key_pos);
+ memcpy((byte*) info->buff+key_ref_length+t_length,(byte*) key_pos,
+ (size_t) length);
+ (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp);
+ maria_putint(info->buff,length+t_length+key_ref_length,nod_flag);
+
+ if (_ma_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff))
+ DBUG_RETURN(-1);
+ DBUG_DUMP("key",(byte*) key, _ma_keylength(keyinfo,key));
+ DBUG_RETURN(2); /* Middle key up */
+} /* _ma_split_page */
+
+
+ /*
+ Calculate how to much to move to split a page in two
+ Returns pointer to start of key.
+ key will contain the key.
+ return_key_length will contain the length of key
+ after_key will contain the position to where the next key starts
+ */
+
+byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint *return_key_length,
+ byte **after_key)
+{
+ uint keys,length,key_ref_length;
+ byte *end,*lastpos;
+ DBUG_ENTER("_ma_find_half_pos");
+
+ key_ref_length=2+nod_flag;
+ length=maria_getint(page)-key_ref_length;
+ page+=key_ref_length;
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ key_ref_length=keyinfo->keylength+nod_flag;
+ keys=length/(key_ref_length*2);
+ *return_key_length=keyinfo->keylength;
+ end=page+keys*key_ref_length;
+ *after_key=end+key_ref_length;
+ memcpy(key,end,key_ref_length);
+ DBUG_RETURN(end);
+ }
+
+ end=page+length/2-key_ref_length; /* This is aprox. half */
+ *key='\0';
+ do
+ {
+ lastpos=page;
+ if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key)))
+ DBUG_RETURN(0);
+ } while (page < end);
+ *return_key_length=length;
+ *after_key=page;
+ DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx",
+ (long) lastpos, (long) page, (long) end));
+ DBUG_RETURN(lastpos);
+} /* _ma_find_half_pos */
+
+
+/*
+ Split buffer at last key
+ Returns pointer to the start of the key before the last key
+ key will contain the last key
+*/
+
+static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page,
+ byte *key, uint *return_key_length,
+ byte **after_key)
+{
+ uint keys,length,last_length,key_ref_length;
+ byte *end,*lastpos,*prevpos;
+ byte key_buff[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_find_last_pos");
+
+ key_ref_length=2;
+ length=maria_getint(page)-key_ref_length;
+ page+=key_ref_length;
+ if (!(keyinfo->flag &
+ (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
+ HA_BINARY_PACK_KEY)))
+ {
+ keys=length/keyinfo->keylength-2;
+ *return_key_length=length=keyinfo->keylength;
+ end=page+keys*length;
+ *after_key=end+length;
+ memcpy(key,end,length);
+ DBUG_RETURN(end);
+ }
+
+ LINT_INIT(prevpos);
+ LINT_INIT(last_length);
+ end=page+length-key_ref_length;
+ *key='\0';
+ length=0;
+ lastpos=page;
+ while (page < end)
+ {
+ prevpos=lastpos; lastpos=page;
+ last_length=length;
+ memcpy(key, key_buff, length); /* previous key */
+ if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff)))
+ {
+ maria_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno=HA_ERR_CRASHED;
+ DBUG_RETURN(0);
+ }
+ }
+ *return_key_length=last_length;
+ *after_key=lastpos;
+ DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx",
+ (long) prevpos,(long) page,(long) end));
+ DBUG_RETURN(prevpos);
+} /* _ma_find_last_pos */
+
+
+ /* Balance page with not packed keys with page on right/left */
+ /* returns 0 if balance was done */
+
+static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, byte *curr_buff, byte *father_buff,
+ byte *father_key_pos, my_off_t father_page)
+{
+ my_bool right;
+ uint k_length,father_length,father_keylength,nod_flag,curr_keylength,
+ right_length,left_length,new_right_length,new_left_length,extra_length,
+ length,keys;
+ byte *pos,*buff,*extra_buff;
+ my_off_t next_page,new_pos;
+ byte tmp_part_key[HA_MAX_KEY_BUFF];
+ DBUG_ENTER("_ma_balance_page");
+
+ k_length=keyinfo->keylength;
+ father_length=maria_getint(father_buff);
+ father_keylength=k_length+info->s->base.key_reflength;
+ nod_flag=_ma_test_if_nod(curr_buff);
+ curr_keylength=k_length+nod_flag;
+ info->page_changed=1;
+
+ if ((father_key_pos != father_buff+father_length &&
+ (info->state->records & 1)) ||
+ father_key_pos == father_buff+2+info->s->base.key_reflength)
+ {
+ right=1;
+ next_page= _ma_kpos(info->s->base.key_reflength,
+ father_key_pos+father_keylength);
+ buff=info->buff;
+ DBUG_PRINT("test",("use right page: %lu", (ulong) next_page));
+ }
+ else
+ {
+ right=0;
+ father_key_pos-=father_keylength;
+ next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos);
+ /* Fix that curr_buff is to left */
+ buff=curr_buff; curr_buff=info->buff;
+ DBUG_PRINT("test",("use left page: %lu", (ulong) next_page));
+ } /* father_key_pos ptr to parting key */
+
+ if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0))
+ goto err;
+ DBUG_DUMP("next",(byte*) info->buff,maria_getint(info->buff));
+
+ /* Test if there is room to share keys */
+
+ left_length=maria_getint(curr_buff);
+ right_length=maria_getint(buff);
+ keys=(left_length+right_length-4-nod_flag*2)/curr_keylength;
+
+ if ((right ? right_length : left_length) + curr_keylength <=
+ keyinfo->block_length)
+ { /* Merge buffs */
+ new_left_length=2+nod_flag+(keys/2)*curr_keylength;
+ new_right_length=2+nod_flag+((keys+1)/2)*curr_keylength;
+ maria_putint(curr_buff,new_left_length,nod_flag);
+ maria_putint(buff,new_right_length,nod_flag);
+
+ if (left_length < new_left_length)
+ { /* Move keys buff -> leaf */
+ pos=curr_buff+left_length;
+ memcpy((byte*) pos,(byte*) father_key_pos, (size_t) k_length);
+ memcpy((byte*) pos+k_length, (byte*) buff+2,
+ (size_t) (length=new_left_length - left_length - k_length));
+ pos=buff+2+length;
+ memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length);
+ bmove((byte*) buff+2,(byte*) pos+k_length,new_right_length);
+ }
+ else
+ { /* Move keys -> buff */
+
+ bmove_upp((byte*) buff+new_right_length,(byte*) buff+right_length,
+ right_length-2);
+ length=new_right_length-right_length-k_length;
+ memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length);
+ pos=curr_buff+new_left_length;
+ memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length);
+ memcpy((byte*) buff+2,(byte*) pos+k_length,(size_t) length);
+ }
+
+ if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) ||
+ _ma_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff))
+ goto err;
+ DBUG_RETURN(0);
+ }
+
+ /* curr_buff[] and buff[] are full, lets split and make new nod */
+
+ extra_buff=info->buff+info->s->base.max_key_block_length;
+ new_left_length=new_right_length=2+nod_flag+(keys+1)/3*curr_keylength;
+ if (keys == 5) /* Too few keys to balance */
+ new_left_length-=curr_keylength;
+ extra_length=nod_flag+left_length+right_length-
+ new_left_length-new_right_length-curr_keylength;
+ DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d",
+ left_length, right_length,
+ new_left_length, new_right_length,
+ extra_length));
+ maria_putint(curr_buff,new_left_length,nod_flag);
+ maria_putint(buff,new_right_length,nod_flag);
+ maria_putint(extra_buff,extra_length+2,nod_flag);
+
+ /* move first largest keys to new page */
+ pos=buff+right_length-extra_length;
+ memcpy((byte*) extra_buff+2,pos,(size_t) extra_length);
+ /* Save new parting key */
+ memcpy(tmp_part_key, pos-k_length,k_length);
+ /* Make place for new keys */
+ bmove_upp((byte*) buff+new_right_length,(byte*) pos-k_length,
+ right_length-extra_length-k_length-2);
+ /* Copy keys from left page */
+ pos= curr_buff+new_left_length;
+ memcpy((byte*) buff+2,(byte*) pos+k_length,
+ (size_t) (length=left_length-new_left_length-k_length));
+ /* Copy old parting key */
+ memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length);
+
+ /* Move new parting keys up to caller */
+ memcpy((byte*) (right ? key : father_key_pos),pos,(size_t) k_length);
+ memcpy((byte*) (right ? father_key_pos : key),tmp_part_key, k_length);
+
+ if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ goto err;
+ _ma_kpointer(info,key+k_length,new_pos);
+ if (_ma_write_keypage(info,keyinfo,(right ? new_pos : next_page),
+ DFLT_INIT_HITS,info->buff) ||
+ _ma_write_keypage(info,keyinfo,(right ? next_page : new_pos),
+ DFLT_INIT_HITS,extra_buff))
+ goto err;
+
+ DBUG_RETURN(1); /* Middle key up */
+
+err:
+ DBUG_RETURN(-1);
+} /* _ma_balance_page */
+
+/**********************************************************************
+ * Bulk insert code *
+ **********************************************************************/
+
+typedef struct {
+ MARIA_HA *info;
+ uint keynr;
+} bulk_insert_param;
+
+
+int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, byte *key,
+ uint key_length)
+{
+ int error;
+ DBUG_ENTER("_ma_ck_write_tree");
+
+ error= tree_insert(&info->bulk_insert[keynr], key,
+ key_length + info->s->rec_reflength,
+ info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ;
+
+ DBUG_RETURN(error);
+} /* _ma_ck_write_tree */
+
+
+/* typeof(_ma_keys_compare)=qsort_cmp2 */
+
+static int keys_compare(bulk_insert_param *param, byte *key1, byte *key2)
+{
+ uint not_used[2];
+ return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg,
+ (uchar*) key1, (uchar*) key2, USE_WHOLE_KEY, SEARCH_SAME,
+ not_used);
+}
+
+
+static int keys_free(byte *key, TREE_FREE mode, bulk_insert_param *param)
+{
+ /*
+ Probably I can use info->lastkey here, but I'm not sure,
+ and to be safe I'd better use local lastkey.
+ */
+ byte lastkey[HA_MAX_KEY_BUFF];
+ uint keylen;
+ MARIA_KEYDEF *keyinfo;
+
+ switch (mode) {
+ case free_init:
+ if (param->info->s->concurrent_insert)
+ {
+ rw_wrlock(&param->info->s->key_root_lock[param->keynr]);
+ param->info->s->keyinfo[param->keynr].version++;
+ }
+ return 0;
+ case free_free:
+ keyinfo=param->info->s->keyinfo+param->keynr;
+ keylen= _ma_keylength(keyinfo, key);
+ memcpy(lastkey, key, keylen);
+ return _ma_ck_write_btree(param->info,param->keynr,lastkey,
+ keylen - param->info->s->rec_reflength);
+ case free_end:
+ if (param->info->s->concurrent_insert)
+ rw_unlock(&param->info->s->key_root_lock[param->keynr]);
+ return 0;
+ }
+ return -1;
+}
+
+
+int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows)
+{
+ MARIA_SHARE *share=info->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ bulk_insert_param *params;
+ uint i, num_keys, total_keylength;
+ ulonglong key_map;
+ DBUG_ENTER("_ma_init_bulk_insert");
+ DBUG_PRINT("enter",("cache_size: %lu", cache_size));
+
+ DBUG_ASSERT(!info->bulk_insert &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT));
+
+ maria_clear_all_keys_active(key_map);
+ for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++)
+ {
+ if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) &&
+ maria_is_key_active(share->state.key_map, i))
+ {
+ num_keys++;
+ maria_set_key_active(key_map, i);
+ total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE;
+ }
+ }
+
+ if (num_keys==0 ||
+ num_keys * MARIA_MIN_SIZE_BULK_INSERT_TREE > cache_size)
+ DBUG_RETURN(0);
+
+ if (rows && rows*total_keylength < cache_size)
+ cache_size=rows;
+ else
+ cache_size/=total_keylength*16;
+
+ info->bulk_insert=(TREE *)
+ my_malloc((sizeof(TREE)*share->base.keys+
+ sizeof(bulk_insert_param)*num_keys),MYF(0));
+
+ if (!info->bulk_insert)
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ params=(bulk_insert_param *)(info->bulk_insert+share->base.keys);
+ for (i=0 ; i < share->base.keys ; i++)
+ {
+ if (maria_is_key_active(key_map, i))
+ {
+ params->info=info;
+ params->keynr=i;
+ /* Only allocate a 16'th of the buffer at a time */
+ init_tree(&info->bulk_insert[i],
+ cache_size * key[i].maxlength,
+ cache_size * key[i].maxlength, 0,
+ (qsort_cmp2)keys_compare, 0,
+ (tree_element_free) keys_free, (void *)params++);
+ }
+ else
+ info->bulk_insert[i].root=0;
+ }
+
+ DBUG_RETURN(0);
+}
+
+void maria_flush_bulk_insert(MARIA_HA *info, uint inx)
+{
+ if (info->bulk_insert)
+ {
+ if (is_tree_inited(&info->bulk_insert[inx]))
+ reset_tree(&info->bulk_insert[inx]);
+ }
+}
+
+void maria_end_bulk_insert(MARIA_HA *info)
+{
+ if (info->bulk_insert)
+ {
+ uint i;
+ for (i=0 ; i < info->s->base.keys ; i++)
+ {
+ if (is_tree_inited(& info->bulk_insert[i]))
+ {
+ delete_tree(& info->bulk_insert[i]);
+ }
+ }
+ my_free((void *)info->bulk_insert, MYF(0));
+ info->bulk_insert=0;
+ }
+}
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
new file mode 100644
index 00000000000..8471d36bec9
--- /dev/null
+++ b/storage/maria/maria_chk.c
@@ -0,0 +1,1823 @@
+/* Copyright (C) 2006-2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Describe, check and repair of MARIA tables */
+
+#include "ma_fulltext.h"
+#include <myisamchk.h>
+#include <my_bit.h>
+#include <m_ctype.h>
+#include <stdarg.h>
+#include <my_getopt.h>
+#ifdef HAVE_SYS_VADVICE_H
+#include <sys/vadvise.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+SET_STACK_SIZE(9000) /* Minimum stack size for program */
+
+#ifndef USE_RAID
+#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G)
+#define my_raid_delete(A,B,C) my_delete(A,B)
+#endif
+
+static uint decode_bits;
+static char **default_argv;
+static const char *load_default_groups[]= { "maria_chk", 0 };
+static const char *set_collation_name, *opt_tmpdir;
+static CHARSET_INFO *set_collation;
+static long opt_maria_block_size;
+static long opt_key_cache_block_size;
+static const char *my_progname_short;
+static int stopwords_inited= 0;
+static MY_TMPDIR maria_chk_tmpdir;
+
+static const char *type_names[]=
+{
+ "impossible","char","binary", "short", "long", "float",
+ "double","number","unsigned short",
+ "unsigned long","longlong","ulonglong","int24",
+ "uint24","int8","varchar", "varbin","?",
+ "?"
+};
+
+static const char *prefix_packed_txt="packed ",
+ *bin_packed_txt="prefix ",
+ *diff_txt="stripped ",
+ *null_txt="NULL",
+ *blob_txt="BLOB ";
+
+static const char *field_pack[]=
+{
+ "","no endspace", "no prespace",
+ "no zeros", "blob", "constant", "table-lockup",
+ "always zero","varchar","unique-hash","?","?"
+};
+
+static const char *record_formats[]=
+{
+ "Fixed length", "Packed", "Compressed", "Block", "?"
+};
+
+static const char *maria_stats_method_str="nulls_unequal";
+
+static void get_options(int *argc,char * * *argv);
+static void print_version(void);
+static void usage(void);
+static int maria_chk(HA_CHECK *param, char *filename);
+static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name);
+static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info,
+ my_string name, uint sort_key,
+ my_bool write_info, my_bool update_index);
+static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, byte *buff,uint sortkey,
+ File new_file, my_bool update_index);
+
+HA_CHECK check_param;
+
+ /* Main program */
+
+int main(int argc, char **argv)
+{
+ int error;
+ MY_INIT(argv[0]);
+ my_progname_short= my_progname+dirname_length(my_progname);
+
+ maria_chk_init(&check_param);
+ check_param.opt_lock_memory= 1; /* Lock memory if possible */
+ check_param.using_global_keycache = 0;
+ get_options(&argc,(char***) &argv);
+ maria_quick_table_bits=decode_bits;
+ error=0;
+ maria_init();
+
+ while (--argc >= 0)
+ {
+ int new_error=maria_chk(&check_param, *(argv++));
+ if ((check_param.testflag & T_REP_ANY) != T_REP)
+ check_param.testflag&= ~T_REP;
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ if ((check_param.error_printed | check_param.warning_printed) &&
+ (check_param.testflag & T_FORCE_CREATE) &&
+ (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS |
+ T_SORT_INDEX))))
+ {
+ uint old_testflag=check_param.testflag;
+ if (!(check_param.testflag & T_REP))
+ check_param.testflag|= T_REP_BY_SORT;
+ check_param.testflag&= ~T_EXTEND; /* Don't needed */
+ error|=maria_chk(&check_param, argv[-1]);
+ check_param.testflag= old_testflag;
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ }
+ else
+ error|=new_error;
+ if (argc && (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO))
+ {
+ puts("\n---------\n");
+ VOID(fflush(stdout));
+ }
+ }
+ if (check_param.total_files > 1)
+ { /* Only if descript */
+ char buff[22],buff2[22];
+ if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO)
+ puts("\n---------\n");
+ printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff),
+ llstr(check_param.total_deleted,buff2));
+ }
+ free_defaults(default_argv);
+ free_tmpdir(&maria_chk_tmpdir);
+ maria_end();
+ my_end(check_param.testflag & T_INFO ?
+ MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
+ exit(error);
+#ifndef _lint
+ return 0; /* No compiler warning */
+#endif
+} /* main */
+
+enum options_mc {
+ OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS,
+ OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE,
+ OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE,
+ OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE,
+ OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN,
+ OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE,
+ OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD
+};
+
+static struct my_option my_long_options[] =
+{
+ {"analyze", 'a',
+ "Analyze distribution of keys. Will make some joins in MySQL faster. You can check the calculated distribution.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifdef __NETWARE__
+ {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"block-search", 'b',
+ "No help available.",
+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"backup", 'B',
+ "Make a backup of the .MYD file as 'filename-time.BAK'.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"character-sets-dir", OPT_CHARSETS_DIR,
+ "Directory where character sets are.",
+ (gptr*) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"check", 'c',
+ "Check table for errors.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"check-only-changed", 'C',
+ "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"correct-checksum", OPT_CORRECT_CHECKSUM,
+ "Correct checksum information for table.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifndef DBUG_OFF
+ {"debug", '#',
+ "Output debug log. Often this is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"description", 'd',
+ "Prints some information about table.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"data-file-length", 'D',
+ "Max length of data file (when recreating data-file when it's full).",
+ (gptr*) &check_param.max_data_file_length,
+ (gptr*) &check_param.max_data_file_length,
+ 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"extend-check", 'e',
+ "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"fast", 'F',
+ "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"force", 'f',
+ "Restart with -r if there are any errors in the table. States will be updated as with --update-state.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"HELP", 'H',
+ "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"help", '?',
+ "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"information", 'i',
+ "Print statistics information about table that is checked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"keys-used", 'k',
+ "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.",
+ (gptr*) &check_param.keys_in_use,
+ (gptr*) &check_param.keys_in_use,
+ 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0},
+ {"max-record-length", OPT_MAX_RECORD_LENGTH,
+ "Skip rows bigger than this if maria_chk can't allocate memory to hold it",
+ (gptr*) &check_param.max_record_length,
+ (gptr*) &check_param.max_record_length,
+ 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0},
+ {"medium-check", 'm',
+ "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"quick", 'q', "Faster repair by not modifying the data file.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"read-only", 'T',
+ "Don't mark table as checked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"recover", 'r',
+ "Can fix almost anything except unique keys that aren't unique.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"parallel-recover", 'p',
+ "Same as '-r' but creates all the keys in parallel.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"safe-recover", 'o',
+ "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-recover", 'n',
+ "Force recovering with sorting even if the temporary file was very big.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifdef DEBUG
+ {"start-check-pos", OPT_START_CHECK_POS,
+ "No help available.",
+ 0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"set-auto-increment", 'A',
+ "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.",
+ (gptr*) &check_param.auto_increment_value,
+ (gptr*) &check_param.auto_increment_value,
+ 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"set-collation", OPT_SET_COLLATION,
+ "Change the collation used by the index",
+ (gptr*) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"set-variable", 'O',
+ "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's',
+ "Only print errors. One can use two -s to make maria_chk very silent.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-index", 'S',
+ "Sort index blocks. This speeds up 'read-next' in applications.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"sort-records", 'R',
+ "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)",
+ (gptr*) &check_param.opt_sort_key,
+ (gptr*) &check_param.opt_sort_key,
+ 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"tmpdir", 't',
+ "Path for temporary files.",
+ (gptr*) &opt_tmpdir,
+ 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"update-state", 'U',
+ "Mark tables as crashed if any errors were found.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"unpack", 'u',
+ "Unpack file packed with mariapack.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v',
+ "Print more information. This can be used with --description and --check. Use many -v for more verbosity!",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V',
+ "Print version and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"wait", 'w',
+ "Wait if table is locked.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "",
+ (gptr*) &check_param.use_buffers, (gptr*) &check_param.use_buffers, 0,
+ GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0},
+ { "key_cache_block_size", OPT_KEY_CACHE_BLOCK_SIZE, "",
+ (gptr*) &opt_key_cache_block_size,
+ (gptr*) &opt_key_cache_block_size, 0,
+ GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
+ MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0},
+ { "maria_block_size", OPT_MARIA_BLOCK_SIZE, "",
+ (gptr*) &opt_maria_block_size, (gptr*) &opt_maria_block_size, 0,
+ GET_LONG, REQUIRED_ARG, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
+ MARIA_MAX_KEY_BLOCK_LENGTH, 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0},
+ { "read_buffer_size", OPT_READ_BUFFER_SIZE, "",
+ (gptr*) &check_param.read_buffer_length,
+ (gptr*) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "",
+ (gptr*) &check_param.write_buffer_length,
+ (gptr*) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "",
+ (gptr*) &check_param.sort_buffer_length,
+ (gptr*) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
+ (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD),
+ (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "",
+ (gptr*) &check_param.sort_key_blocks,
+ (gptr*) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG,
+ BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0},
+ { "decode_bits", OPT_DECODE_BITS, "", (gptr*) &decode_bits,
+ (gptr*) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0},
+ { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (gptr*) &ft_min_word_len,
+ (gptr*) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN,
+ 0, 1, 0},
+ { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len,
+ (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10,
+ HA_FT_MAXCHARLEN, 0, 1, 0},
+ { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE,
+ "Use stopwords from this file instead of built-in list.",
+ (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
+ REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"stats_method", OPT_STATS_METHOD,
+ "Specifies how index statistics collection code should threat NULLs. "
+ "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
+ "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
+ (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+#include <help_start.h>
+
+static void print_version(void)
+{
+ printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE,
+ MACHINE_TYPE);
+ NETWARE_SET_SCREEN_MODE(1);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("By Monty, for your professional use");
+ puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n");
+ puts("Description, check and repair of MARIA tables.");
+ puts("Used without options all tables on the command will be checked for errors");
+ printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short);
+ printf("\nGlobal options:\n");
+#ifndef DBUG_OFF
+ printf("\
+ -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n");
+#endif
+ printf("\
+ -?, --help Display this help and exit.\n\
+ -O, --set-variable var=option.\n\
+ Change the value of a variable. Please note that\n\
+ this option is deprecated; you can set variables\n\
+ directly with '--variable-name=value'.\n\
+ -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\
+ specified, separated by ");
+#if defined( __WIN__) || defined(__NETWARE__)
+ printf("semicolon (;)");
+#else
+ printf("colon (:)");
+#endif
+ printf(", they will be used\n\
+ in a round-robin fashion.\n\
+ -s, --silent Only print errors. One can use two -s to make\n\
+ maria_chk very silent.\n\
+ -v, --verbose Print more information. This can be used with\n\
+ --description and --check. Use many -v for more verbosity.\n\
+ -V, --version Print version and exit.\n\
+ -w, --wait Wait if table is locked.\n\n");
+#ifdef DEBUG
+ puts(" --start-check-pos=# Start reading file at given offset.\n");
+#endif
+
+ puts("Check options (check is the default action for maria_chk):\n\
+ -c, --check Check table for errors.\n\
+ -e, --extend-check Check the table VERY throughly. Only use this in\n\
+ extreme cases as maria_chk should normally be able to\n\
+ find out if the table is ok even without this switch.\n\
+ -F, --fast Check only tables that haven't been closed properly.\n\
+ -C, --check-only-changed\n\
+ Check only tables that have changed since last check.\n\
+ -f, --force Restart with '-r' if there are any errors in the table.\n\
+ States will be updated as with '--update-state'.\n\
+ -i, --information Print statistics information about table that is checked.\n\
+ -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\
+ all errors. Should be good enough for most cases.\n\
+ -U --update-state Mark tables as crashed if you find any errors.\n\
+ -T, --read-only Don't mark table as checked.\n");
+
+ puts("Repair options (When using '-r' or '-o'):\n\
+ -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\
+ --correct-checksum Correct checksum information for table.\n\
+ -D, --data-file-length=# Max length of data file (when recreating data\n\
+ file when it's full).\n\
+ -e, --extend-check Try to recover every possible row from the data file\n\
+ Normally this will also find a lot of garbage rows;\n\
+ Don't use this option if you are not totally desperate.\n\
+ -f, --force Overwrite old temporary files.\n\
+ -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\
+ bit mask of which keys to use. This can be used to\n\
+ get faster inserts.\n\
+ --max-record-length=#\n\
+ Skip rows bigger than this if maria_chk can't allocate\n\
+ memory to hold it.\n\
+ -r, --recover Can fix almost anything except unique keys that aren't\n\
+ unique.\n\
+ -n, --sort-recover Forces recovering with sorting even if the temporary\n\
+ file would be very big.\n\
+ -p, --parallel-recover\n\
+ Uses the same technique as '-r' and '-n', but creates\n\
+ all the keys in parallel, in different threads.\n\
+ -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\
+ handle a couple of cases where '-r' reports that it\n\
+ can't fix the data file.\n\
+ --character-sets-dir=...\n\
+ Directory where character sets are.\n\
+ --set-collation=name\n\
+ Change the collation used by the index.\n\
+ -q, --quick Faster repair by not modifying the data file.\n\
+ One can give a second '-q' to force maria_chk to\n\
+ modify the original datafile in case of duplicate keys.\n\
+ NOTE: Tables where the data file is currupted can't be\n\
+ fixed with this option.\n\
+ -u, --unpack Unpack file packed with mariapack.\n\
+");
+
+ puts("Other actions:\n\
+ -a, --analyze Analyze distribution of keys. Will make some joins in\n\
+ MySQL faster. You can check the calculated distribution\n\
+ by using '--description --verbose table_name'.\n\
+ --stats_method=name Specifies how index statistics collection code should\n\
+ threat NULLs. Possible values of name are \"nulls_unequal\"\n\
+ (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
+ \"nulls_ignored\".\n\
+ -d, --description Prints some information about table.\n\
+ -A, --set-auto-increment[=value]\n\
+ Force auto_increment to start at this or higher value\n\
+ If no value is given, then sets the next auto_increment\n\
+ value to the highest used value for the auto key + 1.\n\
+ -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\
+ applications.\n\
+ -R, --sort-records=#\n\
+ Sort records according to an index. This makes your\n\
+ data much more localized and may speed up things\n\
+ (It may be VERY slow to do a sort the first time!).\n\
+ -b, --block-search=#\n\
+ Find a record, a block at given offset belongs to.");
+
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+#include <help_end.h>
+
+const char *maria_stats_method_names[] = {"nulls_unequal", "nulls_equal",
+ "nulls_ignored", NullS};
+TYPELIB maria_stats_method_typelib= {
+ array_elements(maria_stats_method_names) - 1, "",
+ maria_stats_method_names, NULL};
+
+ /* Read options */
+
+static my_bool
+get_one_option(int optid,
+ const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch (optid) {
+#ifdef __NETWARE__
+ case OPT_AUTO_CLOSE:
+ setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
+ break;
+#endif
+ case 'a':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_STATISTICS;
+ else
+ check_param.testflag|= T_STATISTICS;
+ break;
+ case 'A':
+ if (argument)
+ check_param.auto_increment_value= strtoull(argument, NULL, 0);
+ else
+ check_param.auto_increment_value= 0; /* Set to max used value */
+ check_param.testflag|= T_AUTO_INC;
+ break;
+ case 'b':
+ check_param.search_after_block= strtoul(argument, NULL, 10);
+ break;
+ case 'B':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_BACKUP_DATA;
+ else
+ check_param.testflag|= T_BACKUP_DATA;
+ break;
+ case 'c':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_CHECK;
+ else
+ check_param.testflag|= T_CHECK;
+ break;
+ case 'C':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_CHECK | T_CHECK_ONLY_CHANGED);
+ else
+ check_param.testflag|= T_CHECK | T_CHECK_ONLY_CHANGED;
+ break;
+ case 'D':
+ check_param.max_data_file_length=strtoll(argument, NULL, 10);
+ break;
+ case 's': /* silent */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_SILENT | T_VERY_SILENT);
+ else
+ {
+ if (check_param.testflag & T_SILENT)
+ check_param.testflag|= T_VERY_SILENT;
+ check_param.testflag|= T_SILENT;
+ check_param.testflag&= ~T_WRITE_LOOP;
+ }
+ break;
+ case 'w':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_WAIT_FOREVER;
+ else
+ check_param.testflag|= T_WAIT_FOREVER;
+ break;
+ case 'd': /* description if isam-file */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_DESCRIPT;
+ else
+ check_param.testflag|= T_DESCRIPT;
+ break;
+ case 'e': /* extend check */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_EXTEND;
+ else
+ check_param.testflag|= T_EXTEND;
+ break;
+ case 'i':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_INFO;
+ else
+ check_param.testflag|= T_INFO;
+ break;
+ case 'f':
+ if (argument == disabled_my_option)
+ {
+ check_param.tmpfile_createflag= O_RDWR | O_TRUNC | O_EXCL;
+ check_param.testflag&= ~(T_FORCE_CREATE | T_UPDATE_STATE);
+ }
+ else
+ {
+ check_param.tmpfile_createflag= O_RDWR | O_TRUNC;
+ check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE;
+ }
+ break;
+ case 'F':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_FAST;
+ else
+ check_param.testflag|= T_FAST;
+ break;
+ case 'k':
+ check_param.keys_in_use= (ulonglong) strtoll(argument, NULL, 10);
+ break;
+ case 'm':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_MEDIUM;
+ else
+ check_param.testflag|= T_MEDIUM; /* Medium check */
+ break;
+ case 'r': /* Repair table */
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument != disabled_my_option)
+ check_param.testflag|= T_REP_BY_SORT;
+ break;
+ case 'p':
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument != disabled_my_option)
+ check_param.testflag|= T_REP_PARALLEL;
+ break;
+ case 'o':
+ check_param.testflag&= ~T_REP_ANY;
+ check_param.force_sort= 0;
+ if (argument != disabled_my_option)
+ {
+ check_param.testflag|= T_REP;
+ my_disable_async_io= 1; /* More safety */
+ }
+ break;
+ case 'n':
+ check_param.testflag&= ~T_REP_ANY;
+ if (argument == disabled_my_option)
+ check_param.force_sort= 0;
+ else
+ {
+ check_param.testflag|= T_REP_BY_SORT;
+ check_param.force_sort= 1;
+ }
+ break;
+ case 'q':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_QUICK | T_FORCE_UNIQUENESS);
+ else
+ check_param.testflag|=
+ (check_param.testflag & T_QUICK) ? T_FORCE_UNIQUENESS : T_QUICK;
+ break;
+ case 'u':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~(T_UNPACK | T_REP_BY_SORT);
+ else
+ check_param.testflag|= T_UNPACK | T_REP_BY_SORT;
+ break;
+ case 'v': /* Verbose */
+ if (argument == disabled_my_option)
+ {
+ check_param.testflag&= ~T_VERBOSE;
+ check_param.verbose=0;
+ }
+ else
+ {
+ check_param.testflag|= T_VERBOSE;
+ check_param.verbose++;
+ }
+ break;
+ case 'R': /* Sort records */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_SORT_RECORDS;
+ else
+ {
+ check_param.testflag|= T_SORT_RECORDS;
+ check_param.opt_sort_key= (uint) atoi(argument) - 1;
+ if (check_param.opt_sort_key >= MARIA_MAX_KEY)
+ {
+ fprintf(stderr,
+ "The value of the sort key is bigger than max key: %d.\n",
+ MARIA_MAX_KEY);
+ exit(1);
+ }
+ }
+ break;
+ case 'S': /* Sort index */
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_SORT_INDEX;
+ else
+ check_param.testflag|= T_SORT_INDEX;
+ break;
+ case 'T':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_READONLY;
+ else
+ check_param.testflag|= T_READONLY;
+ break;
+ case 'U':
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_UPDATE_STATE;
+ else
+ check_param.testflag|= T_UPDATE_STATE;
+ break;
+ case '#':
+ if (argument == disabled_my_option)
+ {
+ DBUG_POP();
+ }
+ else
+ {
+ DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace");
+ }
+ break;
+ case 'V':
+ print_version();
+ exit(0);
+ case OPT_CORRECT_CHECKSUM:
+ if (argument == disabled_my_option)
+ check_param.testflag&= ~T_CALC_CHECKSUM;
+ else
+ check_param.testflag|= T_CALC_CHECKSUM;
+ break;
+ case OPT_STATS_METHOD:
+ {
+ int method;
+ enum_handler_stats_method method_conv;
+ LINT_INIT(method_conv);
+ maria_stats_method_str= argument;
+ if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0)
+ {
+ fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
+ exit(1);
+ }
+ switch (method-1) {
+ case 0:
+ method_conv= MI_STATS_METHOD_NULLS_EQUAL;
+ break;
+ case 1:
+ method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
+ break;
+ case 2:
+ method_conv= MI_STATS_METHOD_IGNORE_NULLS;
+ break;
+ }
+ check_param.stats_method= method_conv;
+ break;
+ }
+#ifdef DEBUG /* Only useful if debugging */
+ case OPT_START_CHECK_POS:
+ check_param.start_check_pos= strtoull(argument, NULL, 0);
+ break;
+#endif
+ case 'H':
+ my_print_help(my_long_options);
+ exit(0);
+ case '?':
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+
+static void get_options(register int *argc,register char ***argv)
+{
+ int ho_error;
+
+ load_defaults("my", load_default_groups, argc, argv);
+ default_argv= *argv;
+ if (isatty(fileno(stdout)))
+ check_param.testflag|=T_WRITE_LOOP;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ /* If using repair, then update checksum if one uses --update-state */
+ if ((check_param.testflag & T_UPDATE_STATE) &&
+ (check_param.testflag & T_REP_ANY))
+ check_param.testflag|= T_CALC_CHECKSUM;
+
+ if (*argc == 0)
+ {
+ usage();
+ exit(-1);
+ }
+
+ if ((check_param.testflag & T_UNPACK) &&
+ (check_param.testflag & (T_QUICK | T_SORT_RECORDS)))
+ {
+ VOID(fprintf(stderr,
+ "%s: --unpack can't be used with --quick or --sort-records\n",
+ my_progname_short));
+ exit(1);
+ }
+ if ((check_param.testflag & T_READONLY) &&
+ (check_param.testflag &
+ (T_REP_ANY | T_STATISTICS | T_AUTO_INC |
+ T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE)))
+ {
+ VOID(fprintf(stderr,
+ "%s: Can't use --readonly when repairing or sorting\n",
+ my_progname_short));
+ exit(1);
+ }
+
+ if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir))
+ exit(1);
+
+ check_param.tmpdir=&maria_chk_tmpdir;
+ check_param.key_cache_block_size= opt_key_cache_block_size;
+
+ if (set_collation_name)
+ if (!(set_collation= get_charset_by_name(set_collation_name,
+ MYF(MY_WME))))
+ exit(1);
+
+ maria_block_size=(uint) 1 << my_bit_log2(opt_maria_block_size);
+ return;
+} /* get options */
+
+
+ /* Check table */
+
+static int maria_chk(HA_CHECK *param, my_string filename)
+{
+ int error,lock_type,recreate;
+ int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS);
+ MARIA_HA *info;
+ File datafile;
+ char llbuff[22],llbuff2[22];
+ my_bool state_updated=0;
+ MARIA_SHARE *share;
+ DBUG_ENTER("maria_chk");
+
+ param->out_flag=error=param->warning_printed=param->error_printed=
+ recreate=0;
+ datafile=0;
+ param->isam_file_name=filename; /* For error messages */
+ if (!(info=maria_open(filename,
+ (param->testflag & (T_DESCRIPT | T_READONLY)) ?
+ O_RDONLY : O_RDWR,
+ HA_OPEN_FOR_REPAIR |
+ ((param->testflag & T_WAIT_FOREVER) ?
+ HA_OPEN_WAIT_IF_LOCKED :
+ (param->testflag & T_DESCRIPT) ?
+ HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED))))
+ {
+ /* Avoid twice printing of isam file name */
+ param->error_printed=1;
+ switch (my_errno) {
+ case HA_ERR_CRASHED:
+ _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename);
+ break;
+ case HA_ERR_NOT_A_TABLE:
+ _ma_check_print_error(param,"'%s' is not a MARIA-table",filename);
+ break;
+ case HA_ERR_CRASHED_ON_USAGE:
+ _ma_check_print_error(param,"'%s' is marked as crashed",filename);
+ break;
+ case HA_ERR_CRASHED_ON_REPAIR:
+ _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename);
+ break;
+ case HA_ERR_OLD_FILE:
+ _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename);
+ break;
+ case HA_ERR_END_OF_FILE:
+ _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename);
+ break;
+ case EAGAIN:
+ _ma_check_print_error(param,"'%s' is locked. Use -w to wait until unlocked",filename);
+ break;
+ case ENOENT:
+ _ma_check_print_error(param,"File '%s' doesn't exist",filename);
+ break;
+ case EACCES:
+ _ma_check_print_error(param,"You don't have permission to use '%s'",
+ filename);
+ break;
+ default:
+ _ma_check_print_error(param,"%d when opening MARIA-table '%s'",
+ my_errno,filename);
+ break;
+ }
+ DBUG_RETURN(1);
+ }
+ share=info->s;
+ share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
+ share->tot_locks-= share->r_locks;
+ share->r_locks=0;
+
+ if (share->data_file_type == BLOCK_RECORD &&
+ (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS |
+ T_CHECK | T_CHECK_ONLY_CHANGED)))
+ {
+ _ma_check_print_error(param,
+ "Record format used by '%s' is is not yet supported with repair/check",
+ filename);
+ param->error_printed= 0;
+ error= 1;
+ goto end2;
+ }
+
+ /*
+ Skip the checking of the file if:
+ We are using --fast and the table is closed properly
+ We are using --check-only-changed-tables and the table hasn't changed
+ */
+ if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED))
+ {
+ my_bool need_to_check= (maria_is_crashed(info) ||
+ share->state.open_count != 0);
+
+ if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) &&
+ ((share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR) ||
+ !(param->testflag & T_CHECK_ONLY_CHANGED))))
+ need_to_check=1;
+
+ if (info->s->base.keys && info->state->records)
+ {
+ if ((param->testflag & T_STATISTICS) &&
+ (share->state.changed & STATE_NOT_ANALYZED))
+ need_to_check=1;
+ if ((param->testflag & T_SORT_INDEX) &&
+ (share->state.changed & STATE_NOT_SORTED_PAGES))
+ need_to_check=1;
+ if ((param->testflag & T_REP_BY_SORT) &&
+ (share->state.changed & STATE_NOT_OPTIMIZED_KEYS))
+ need_to_check=1;
+ }
+ if ((param->testflag & T_CHECK_ONLY_CHANGED) &&
+ (share->state.changed & (STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR)))
+ need_to_check=1;
+ if (!need_to_check)
+ {
+ if (!(param->testflag & T_SILENT) || param->testflag & T_INFO)
+ printf("MARIA file: %s is already checked\n",filename);
+ if (maria_close(info))
+ {
+ _ma_check_print_error(param,"%d when closing MARIA-table '%s'",
+ my_errno,filename);
+ DBUG_RETURN(1);
+ }
+ DBUG_RETURN(0);
+ }
+ }
+ if ((param->testflag & (T_REP_ANY | T_STATISTICS |
+ T_SORT_RECORDS | T_SORT_INDEX)) &&
+ (((param->testflag & T_UNPACK) &&
+ share->data_file_type == COMPRESSED_RECORD) ||
+ mi_uint2korr(share->state.header.state_info_length) !=
+ MARIA_STATE_INFO_SIZE ||
+ mi_uint2korr(share->state.header.base_info_length) !=
+ MARIA_BASE_INFO_SIZE ||
+ maria_is_any_intersect_keys_active(param->keys_in_use, share->base.keys,
+ ~share->state.key_map) ||
+ maria_test_if_almost_full(info) ||
+ info->s->state.header.file_version[3] != maria_file_magic[3] ||
+ (set_collation &&
+ set_collation->number != share->state.header.language) ||
+ maria_block_size != MARIA_KEY_BLOCK_LENGTH))
+ {
+ if (set_collation)
+ param->language= set_collation->number;
+ if (maria_recreate_table(param, &info,filename))
+ {
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is not fixed because of errors\n",
+ filename));
+ return(-1);
+ }
+ recreate=1;
+ if (!(param->testflag & T_REP_ANY))
+ {
+ param->testflag|=T_REP_BY_SORT; /* if only STATISTICS */
+ if (!(param->testflag & T_SILENT))
+ printf("- '%s' has old table-format. Recreating index\n",filename);
+ rep_quick|=T_QUICK;
+ }
+ share=info->s;
+ share->tot_locks-= share->r_locks;
+ share->r_locks=0;
+ }
+
+ if (param->testflag & T_DESCRIPT)
+ {
+ param->total_files++;
+ param->total_records+=info->state->records;
+ param->total_deleted+=info->state->del;
+ descript(param, info, filename);
+ }
+ else
+ {
+ if (!stopwords_inited++)
+ ft_init_stopwords();
+
+ if (!(param->testflag & T_READONLY))
+ lock_type = F_WRLCK; /* table is changed */
+ else
+ lock_type= F_RDLCK;
+ if (info->lock_type == F_RDLCK)
+ info->lock_type=F_UNLCK; /* Read only table */
+ if (_ma_readinfo(info,lock_type,0))
+ {
+ _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d",
+ filename,my_errno);
+ param->error_printed=0;
+ error= 1;
+ goto end2;
+ }
+ /*
+ _ma_readinfo() has locked the table.
+ We mark the table as locked (without doing file locks) to be able to
+ use functions that only works on locked tables (like row caching).
+ */
+ maria_lock_database(info, F_EXTRA_LCK);
+ datafile=info->dfile;
+
+ if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
+ {
+ if (param->testflag & T_REP_ANY)
+ {
+ ulonglong tmp=share->state.key_map;
+ maria_copy_keys_active(share->state.key_map, share->base.keys,
+ param->keys_in_use);
+ if (tmp != share->state.key_map)
+ info->update|=HA_STATE_CHANGED;
+ }
+ if (rep_quick &&
+ maria_chk_del(param, info, param->testflag & ~T_VERBOSE))
+ {
+ if (param->testflag & T_FORCE_CREATE)
+ {
+ rep_quick=0;
+ _ma_check_print_info(param,"Creating new data file\n");
+ }
+ else
+ {
+ error=1;
+ _ma_check_print_error(param,
+ "Quick-recover aborted; Run recovery without switch 'q'");
+ }
+ }
+ if (!error)
+ {
+ if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) &&
+ (maria_is_any_key_active(share->state.key_map) ||
+ (rep_quick && !param->keys_in_use && !recreate)) &&
+ maria_test_if_sort_rep(info, info->state->records,
+ info->s->state.key_map,
+ param->force_sort))
+ {
+ if (param->testflag & T_REP_BY_SORT)
+ error=maria_repair_by_sort(param,info,filename,rep_quick);
+ else
+ error=maria_repair_parallel(param,info,filename,rep_quick);
+ state_updated=1;
+ }
+ else if (param->testflag & T_REP_ANY)
+ error=maria_repair(param, info,filename,rep_quick);
+ }
+ if (!error && param->testflag & T_SORT_RECORDS)
+ {
+ /*
+ The data file is nowadays reopened in the repair code so we should
+ soon remove the following reopen-code
+ */
+#ifndef TO_BE_REMOVED
+ if (param->out_flag & O_NEW_DATA)
+ { /* Change temp file to org file */
+ VOID(my_close(info->dfile,MYF(MY_WME))); /* Close new file */
+ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
+ MYF(0));
+ if (_ma_open_datafile(info,info->s, -1))
+ error=1;
+ param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */
+ param->read_cache.file=info->dfile;
+ }
+#endif
+ if (! error)
+ {
+ uint key;
+ /*
+ We can't update the index in maria_sort_records if we have a
+ prefix compressed or fulltext index
+ */
+ my_bool update_index=1;
+ for (key=0 ; key < share->base.keys; key++)
+ if (share->keyinfo[key].flag & (HA_BINARY_PACK_KEY|HA_FULLTEXT))
+ update_index=0;
+
+ error=maria_sort_records(param,info,filename,param->opt_sort_key,
+ /* what is the following parameter for ? */
+ (my_bool) !(param->testflag & T_REP),
+ update_index);
+ datafile=info->dfile; /* This is now locked */
+ if (!error && !update_index)
+ {
+ if (param->verbose)
+ puts("Table had a compressed index; We must now recreate the index");
+ error=maria_repair_by_sort(param,info,filename,1);
+ }
+ }
+ }
+ if (!error && param->testflag & T_SORT_INDEX)
+ error=maria_sort_index(param,info,filename);
+ if (!error)
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ else
+ maria_mark_crashed(info);
+ }
+ else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC))
+ {
+ if (!(param->testflag & T_SILENT) || param->testflag & T_INFO)
+ printf("Checking MARIA file: %s\n",filename);
+ if (!(param->testflag & T_SILENT))
+ printf("Data records: %7s Deleted blocks: %7s\n",
+ llstr(info->state->records,llbuff),
+ llstr(info->state->del,llbuff2));
+ error =maria_chk_status(param,info);
+ maria_intersect_keys_active(share->state.key_map, param->keys_in_use);
+ error =maria_chk_size(param,info);
+ if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE)))
+ error|=maria_chk_del(param, info,param->testflag);
+ if ((!error || (!(param->testflag & (T_FAST | T_FORCE_CREATE)) &&
+ !param->start_check_pos)))
+ {
+ error|=maria_chk_key(param, info);
+ if (!error && (param->testflag & (T_STATISTICS | T_AUTO_INC)))
+ error=maria_update_state_info(param, info,
+ ((param->testflag & T_STATISTICS) ?
+ UPDATE_STAT : 0) |
+ ((param->testflag & T_AUTO_INC) ?
+ UPDATE_AUTO_INC : 0));
+ }
+ if ((!rep_quick && !error) ||
+ !(param->testflag & (T_FAST | T_FORCE_CREATE)))
+ {
+ if (param->testflag & (T_EXTEND | T_MEDIUM))
+ VOID(init_key_cache(maria_key_cache,opt_key_cache_block_size,
+ param->use_buffers, 0, 0));
+ VOID(init_io_cache(&param->read_cache,datafile,
+ (uint) param->read_buffer_length,
+ READ_CACHE,
+ (param->start_check_pos ?
+ param->start_check_pos :
+ share->pack.header_length),
+ 1,
+ MYF(MY_WME)));
+ maria_lock_memory(param);
+ if ((info->s->data_file_type != STATIC_RECORD) ||
+ (param->testflag & (T_EXTEND | T_MEDIUM)))
+ error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND);
+ error|=_ma_flush_blocks(param, share->key_cache, share->kfile);
+ VOID(end_io_cache(&param->read_cache));
+ }
+ if (!error)
+ {
+ if ((share->state.changed & STATE_CHANGED) &&
+ (param->testflag & T_UPDATE_STATE))
+ info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED |
+ STATE_CRASHED_ON_REPAIR);
+ }
+ else if (!maria_is_crashed(info) &&
+ (param->testflag & T_UPDATE_STATE))
+ { /* Mark crashed */
+ maria_mark_crashed(info);
+ info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+ }
+ }
+ if ((param->testflag & T_AUTO_INC) ||
+ ((param->testflag & T_REP_ANY) && info->s->base.auto_key))
+ _ma_update_auto_increment_key(param, info,
+ (my_bool) !test(param->testflag & T_AUTO_INC));
+
+ if (!(param->testflag & T_DESCRIPT))
+ {
+ if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY))
+ error|=maria_update_state_info(param, info,
+ UPDATE_OPEN_COUNT |
+ (((param->testflag & T_REP_ANY) ?
+ UPDATE_TIME : 0) |
+ (state_updated ? UPDATE_STAT : 0) |
+ ((param->testflag & T_SORT_RECORDS) ?
+ UPDATE_SORT : 0)));
+ info->update&= ~HA_STATE_CHANGED;
+ }
+ maria_lock_database(info, F_UNLCK);
+end2:
+ if (maria_close(info))
+ {
+ _ma_check_print_error(param,"%d when closing MARIA-table '%s'",my_errno,filename);
+ DBUG_RETURN(1);
+ }
+ if (error == 0)
+ {
+ if (param->out_flag & O_NEW_DATA)
+ error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
+ ((param->testflag & T_BACKUP_DATA) ?
+ MYF(MY_REDEL_MAKE_BACKUP) : MYF(0)));
+ if (param->out_flag & O_NEW_INDEX)
+ error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT,
+ MYF(0));
+ }
+ VOID(fflush(stdout)); VOID(fflush(stderr));
+ if (param->error_printed)
+ {
+ if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
+ {
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is not fixed because of errors\n",
+ filename));
+ if (param->testflag & T_REP_ANY)
+ VOID(fprintf(stderr,
+ "Try fixing it by using the --safe-recover (-o), the --force (-f) option or by not using the --quick (-q) flag\n"));
+ }
+ else if (!(param->error_printed & 2) &&
+ !(param->testflag & T_FORCE_CREATE))
+ VOID(fprintf(stderr,
+ "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n",
+ filename));
+ }
+ else if (param->warning_printed &&
+ ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX |
+ T_FORCE_CREATE)))
+ VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n",
+ filename));
+ VOID(fflush(stderr));
+ DBUG_RETURN(error);
+} /* maria_chk */
+
+
+/* Write info about table */
+
+static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name)
+{
+ uint key,keyseg_nr,field,start;
+ reg3 MARIA_KEYDEF *keyinfo;
+ reg2 HA_KEYSEG *keyseg;
+ reg4 const char *text;
+ char buff[160],length[10],*pos,*end;
+ enum en_fieldtype type;
+ MARIA_SHARE *share=info->s;
+ char llbuff[22],llbuff2[22];
+ DBUG_ENTER("describe");
+
+ printf("\nMARIA file: %s\n",name);
+ printf("Record format: %s\n", record_formats[share->data_file_type]);
+ printf("Character set: %s (%d)\n",
+ get_charset_name(share->state.header.language),
+ share->state.header.language);
+
+ if (param->testflag & T_VERBOSE)
+ {
+ printf("File-version: %d\n",
+ (int) share->state.header.file_version[3]);
+ if (share->state.create_time)
+ {
+ get_date(buff,1,share->state.create_time);
+ printf("Creation time: %s\n",buff);
+ }
+ if (share->state.check_time)
+ {
+ get_date(buff,1,share->state.check_time);
+ printf("Recover time: %s\n",buff);
+ }
+ pos=buff;
+ if (share->state.changed & STATE_CRASHED)
+ strmov(buff,"crashed");
+ else
+ {
+ if (share->state.open_count)
+ pos=strmov(pos,"open,");
+ if (share->state.changed & STATE_CHANGED)
+ pos=strmov(pos,"changed,");
+ else
+ pos=strmov(pos,"checked,");
+ if (!(share->state.changed & STATE_NOT_ANALYZED))
+ pos=strmov(pos,"analyzed,");
+ if (!(share->state.changed & STATE_NOT_OPTIMIZED_KEYS))
+ pos=strmov(pos,"optimized keys,");
+ if (!(share->state.changed & STATE_NOT_SORTED_PAGES))
+ pos=strmov(pos,"sorted index pages,");
+ pos[-1]=0; /* Remove extra ',' */
+ }
+ printf("Status: %s\n",buff);
+ if (share->base.auto_key)
+ {
+ printf("Auto increment key: %16d Last value: %18s\n",
+ share->base.auto_key,
+ llstr(share->state.auto_increment,llbuff));
+ }
+ if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ printf("Checksum: %26s\n",llstr(info->state->checksum,llbuff));
+;
+ if (share->options & HA_OPTION_DELAY_KEY_WRITE)
+ printf("Keys are only flushed at close\n");
+
+ }
+ printf("Data records: %16s Deleted blocks: %18s\n",
+ llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2));
+ if (param->testflag & T_SILENT)
+ DBUG_VOID_RETURN; /* This is enough */
+
+ if (param->testflag & T_VERBOSE)
+ {
+#ifdef USE_RELOC
+ printf("Init-relocation: %16s\n",llstr(share->base.reloc,llbuff));
+#endif
+ printf("Datafile parts: %16s Deleted data: %18s\n",
+ llstr(share->state.split,llbuff),
+ llstr(info->state->empty,llbuff2));
+ printf("Datafile pointer (bytes): %11d Keyfile pointer (bytes): %13d\n",
+ share->rec_reflength,share->base.key_reflength);
+ printf("Datafile length: %16s Keyfile length: %18s\n",
+ llstr(info->state->data_file_length,llbuff),
+ llstr(info->state->key_file_length,llbuff2));
+
+ if (info->s->base.reloc == 1L && info->s->base.records == 1L)
+ puts("This is a one-record table");
+ else
+ {
+ if (share->base.max_data_file_length != HA_OFFSET_ERROR ||
+ share->base.max_key_file_length != HA_OFFSET_ERROR)
+ printf("Max datafile length: %16s Max keyfile length: %18s\n",
+ llstr(share->base.max_data_file_length-1,llbuff),
+ llstr(share->base.max_key_file_length-1,llbuff2));
+ }
+ }
+ printf("Block_size: %16d\n",(int) share->block_size);
+ printf("Recordlength: %16d\n",(int) share->base.pack_reclength);
+ if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
+ {
+ longlong2str(share->state.key_map,buff,2);
+ printf("Using only keys '%s' of %d possibly keys\n",
+ buff, share->base.keys);
+ }
+ puts("\ntable description:");
+ printf("Key Start Len Index Type");
+ if (param->testflag & T_VERBOSE)
+ printf(" Rec/key Root Blocksize");
+ VOID(putchar('\n'));
+
+ for (key=keyseg_nr=0, keyinfo= &share->keyinfo[0] ;
+ key < share->base.keys;
+ key++,keyinfo++)
+ {
+ keyseg=keyinfo->seg;
+ if (keyinfo->flag & HA_NOSAME) text="unique ";
+ else if (keyinfo->flag & HA_FULLTEXT) text="fulltext ";
+ else text="multip.";
+
+ pos=buff;
+ if (keyseg->flag & HA_REVERSE_SORT)
+ *pos++ = '-';
+ pos=strmov(pos,type_names[keyseg->type]);
+ *pos++ = ' ';
+ *pos=0;
+ if (keyinfo->flag & HA_PACK_KEY)
+ pos=strmov(pos,prefix_packed_txt);
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ pos=strmov(pos,bin_packed_txt);
+ if (keyseg->flag & HA_SPACE_PACK)
+ pos=strmov(pos,diff_txt);
+ if (keyseg->flag & HA_BLOB_PART)
+ pos=strmov(pos,blob_txt);
+ if (keyseg->flag & HA_NULL_PART)
+ pos=strmov(pos,null_txt);
+ *pos=0;
+
+ printf("%-4d%-6ld%-3d %-8s%-21s",
+ key+1,(long) keyseg->start+1,keyseg->length,text,buff);
+ if (share->state.key_root[key] != HA_OFFSET_ERROR)
+ llstr(share->state.key_root[key],buff);
+ else
+ buff[0]=0;
+ if (param->testflag & T_VERBOSE)
+ printf("%11lu %12s %10d",
+ share->state.rec_per_key_part[keyseg_nr++],
+ buff,keyinfo->block_length);
+ VOID(putchar('\n'));
+ while ((++keyseg)->type != HA_KEYTYPE_END)
+ {
+ pos=buff;
+ if (keyseg->flag & HA_REVERSE_SORT)
+ *pos++ = '-';
+ pos=strmov(pos,type_names[keyseg->type]);
+ *pos++= ' ';
+ if (keyseg->flag & HA_SPACE_PACK)
+ pos=strmov(pos,diff_txt);
+ if (keyseg->flag & HA_BLOB_PART)
+ pos=strmov(pos,blob_txt);
+ if (keyseg->flag & HA_NULL_PART)
+ pos=strmov(pos,null_txt);
+ *pos=0;
+ printf(" %-6ld%-3d %-21s",
+ (long) keyseg->start+1,keyseg->length,buff);
+ if (param->testflag & T_VERBOSE)
+ printf("%11lu", share->state.rec_per_key_part[keyseg_nr++]);
+ VOID(putchar('\n'));
+ }
+ keyseg++;
+ }
+ if (share->state.header.uniques)
+ {
+ MARIA_UNIQUEDEF *uniqueinfo;
+ puts("\nUnique Key Start Len Nullpos Nullbit Type");
+ for (key=0,uniqueinfo= &share->uniqueinfo[0] ;
+ key < share->state.header.uniques; key++, uniqueinfo++)
+ {
+ my_bool new_row=0;
+ char null_bit[8],null_pos[8];
+ printf("%-8d%-5d",key+1,uniqueinfo->key+1);
+ for (keyseg=uniqueinfo->seg ; keyseg->type != HA_KEYTYPE_END ; keyseg++)
+ {
+ if (new_row)
+ fputs(" ",stdout);
+ null_bit[0]=null_pos[0]=0;
+ if (keyseg->null_bit)
+ {
+ sprintf(null_bit,"%d",keyseg->null_bit);
+ sprintf(null_pos,"%ld",(long) keyseg->null_pos+1);
+ }
+ printf("%-7ld%-5d%-9s%-10s%-30s\n",
+ (long) keyseg->start+1,keyseg->length,
+ null_pos,null_bit,
+ type_names[keyseg->type]);
+ new_row=1;
+ }
+ }
+ }
+ if (param->verbose > 1)
+ {
+ char null_bit[8],null_pos[8];
+ printf("\nField Start Length Nullpos Nullbit Type");
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ printf(" Huff tree Bits");
+ VOID(putchar('\n'));
+
+ for (field=0 ; field < share->base.fields ; field++)
+ {
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ type=share->rec[field].base_type;
+ else
+ type=(enum en_fieldtype) share->rec[field].type;
+ end=strmov(buff,field_pack[type]);
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ if (share->rec[field].pack_type & PACK_TYPE_SELECTED)
+ end=strmov(end,", not_always");
+ if (share->rec[field].pack_type & PACK_TYPE_SPACE_FIELDS)
+ end=strmov(end,", no empty");
+ if (share->rec[field].pack_type & PACK_TYPE_ZERO_FILL)
+ {
+ sprintf(end,", zerofill(%d)",share->rec[field].space_length_bits);
+ end=strend(end);
+ }
+ }
+ if (buff[0] == ',')
+ strmov(buff,buff+2);
+ int10_to_str((long) share->rec[field].length,length,10);
+ null_bit[0]=null_pos[0]=0;
+ if (share->rec[field].null_bit)
+ {
+ sprintf(null_bit,"%d",share->rec[field].null_bit);
+ sprintf(null_pos,"%d",share->rec[field].null_pos+1);
+ }
+ printf("%-6d%-6u%-7s%-8s%-8s%-35s",field+1,
+ (uint) share->rec[field].offset+1,
+ length, null_pos, null_bit, buff);
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ {
+ if (share->rec[field].huff_tree)
+ printf("%3d %2d",
+ (uint) (share->rec[field].huff_tree-share->decode_trees)+1,
+ share->rec[field].huff_tree->quick_table_bits);
+ }
+ VOID(putchar('\n'));
+ start+=share->rec[field].length;
+ }
+ }
+ DBUG_VOID_RETURN;
+} /* describe */
+
+
+ /* Sort records according to one key */
+
+static int maria_sort_records(HA_CHECK *param,
+ register MARIA_HA *info, my_string name,
+ uint sort_key,
+ my_bool write_info,
+ my_bool update_index)
+{
+ int got_error;
+ uint key;
+ MARIA_KEYDEF *keyinfo;
+ File new_file;
+ byte *temp_buff;
+ ha_rows old_record_count;
+ MARIA_SHARE *share=info->s;
+ char llbuff[22],llbuff2[22];
+ MARIA_SORT_INFO sort_info;
+ MARIA_SORT_PARAM sort_param;
+ DBUG_ENTER("sort_records");
+
+ bzero((char*)&sort_info,sizeof(sort_info));
+ bzero((char*)&sort_param,sizeof(sort_param));
+ sort_param.sort_info=&sort_info;
+ sort_info.param=param;
+ keyinfo= &share->keyinfo[sort_key];
+ got_error=1;
+ temp_buff=0;
+ new_file= -1;
+
+ if (! maria_is_key_active(share->state.key_map, sort_key))
+ {
+ _ma_check_print_warning(param,
+ "Can't sort table '%s' on key %d; No such key",
+ name,sort_key+1);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (keyinfo->flag & HA_FULLTEXT)
+ {
+ _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d",
+ name,sort_key+1);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (share->data_file_type == COMPRESSED_RECORD)
+ {
+ _ma_check_print_warning(param,"Can't sort read-only table '%s'", name);
+ param->error_printed=0;
+ DBUG_RETURN(0); /* Nothing to do */
+ }
+ if (!(param->testflag & T_SILENT))
+ {
+ printf("- Sorting records for MARIA-table '%s'\n",name);
+ if (write_info)
+ printf("Data records: %9s Deleted: %9s\n",
+ llstr(info->state->records,llbuff),
+ llstr(info->state->del,llbuff2));
+ }
+ if (share->state.key_root[sort_key] == HA_OFFSET_ERROR)
+ DBUG_RETURN(0); /* Nothing to do */
+
+ init_key_cache(maria_key_cache, opt_key_cache_block_size, param->use_buffers,
+ 0, 0);
+ if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length,
+ WRITE_CACHE,share->pack.header_length,1,
+ MYF(MY_WME | MY_WAIT_IF_FULL)))
+ goto err;
+ info->opt_flag|=WRITE_CACHE_USED;
+
+ if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not enough memory for key block");
+ goto err;
+ }
+ if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength,
+ MYF(0))))
+ {
+ _ma_check_print_error(param,"Not enough memory for record");
+ goto err;
+ }
+ fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32);
+ new_file= my_create(fn_format(param->temp_filename,
+ param->temp_filename,"",
+ DATA_TMP_EXT,2+4),
+ 0,param->tmpfile_createflag,
+ MYF(0));
+ if (new_file < 0)
+ {
+ _ma_check_print_error(param,"Can't create new tempfile: '%s'",
+ param->temp_filename);
+ goto err;
+ }
+ if (share->pack.header_length)
+ if (maria_filecopy(param,new_file,info->dfile,0L,share->pack.header_length,
+ "datafile-header"))
+ goto err;
+ info->rec_cache.file=new_file; /* Use this file for cacheing*/
+
+ maria_lock_memory(param);
+ for (key=0 ; key < share->base.keys ; key++)
+ share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME;
+
+ if (my_pread(share->kfile, temp_buff,
+ (uint) keyinfo->block_length,
+ share->state.key_root[sort_key],
+ MYF(MY_NABP+MY_WME)))
+ {
+ _ma_check_print_error(param,"Can't read indexpage from filepos: %s",
+ (ulong) share->state.key_root[sort_key]);
+ goto err;
+ }
+
+ /* Setup param for _ma_sort_write_record */
+ sort_info.info=info;
+ sort_info.new_data_file_type=share->data_file_type;
+ sort_param.fix_datafile=1;
+ sort_param.master=1;
+ sort_param.filepos=share->pack.header_length;
+ old_record_count=info->state->records;
+ info->state->records=0;
+ if (sort_info.new_data_file_type != COMPRESSED_RECORD)
+ info->state->checksum=0;
+
+ if (sort_record_index(&sort_param,info,keyinfo,
+ share->state.key_root[sort_key],
+ temp_buff, sort_key,new_file,update_index) ||
+ maria_write_data_suffix(&sort_info,1) ||
+ flush_io_cache(&info->rec_cache))
+ goto err;
+
+ if (info->state->records != old_record_count)
+ {
+ _ma_check_print_error(param,"found %s of %s records",
+ llstr(info->state->records,llbuff),
+ llstr(old_record_count,llbuff2));
+ goto err;
+ }
+
+ VOID(my_close(info->dfile,MYF(MY_WME)));
+ param->out_flag|=O_NEW_DATA; /* Data in new file */
+ info->dfile=new_file; /* Use new datafile */
+ info->state->del=0;
+ info->state->empty=0;
+ share->state.dellink= HA_OFFSET_ERROR;
+ info->state->data_file_length=sort_param.filepos;
+ share->state.split=info->state->records; /* Only hole records */
+ share->state.version=(ulong) time((time_t*) 0);
+
+ info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
+
+ if (param->testflag & T_WRITE_LOOP)
+ {
+ VOID(fputs(" \r",stdout)); VOID(fflush(stdout));
+ }
+ got_error=0;
+
+err:
+ if (got_error && new_file >= 0)
+ {
+ VOID(end_io_cache(&info->rec_cache));
+ (void) my_close(new_file,MYF(MY_WME));
+ (void) my_delete(param->temp_filename, MYF(MY_WME));
+ }
+ if (temp_buff)
+ {
+ my_afree((gptr) temp_buff);
+ }
+ my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR));
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ VOID(end_io_cache(&info->rec_cache));
+ my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR));
+ sort_info.buff=0;
+ share->state.sortkey=sort_key;
+ DBUG_RETURN(_ma_flush_blocks(param, share->key_cache, share->kfile) |
+ got_error);
+} /* sort_records */
+
+
+/* Sort records recursive using one index */
+
+static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo,
+ my_off_t page, byte *buff, uint sort_key,
+ File new_file,my_bool update_index)
+{
+ uint nod_flag,used_length,key_length;
+ byte *temp_buff,*keypos,*endpos;
+ my_off_t next_page,rec_pos;
+ byte lastkey[HA_MAX_KEY_BUFF];
+ char llbuff[22];
+ MARIA_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
+ DBUG_ENTER("sort_record_index");
+
+ nod_flag=_ma_test_if_nod(buff);
+ temp_buff=0;
+
+ if (nod_flag)
+ {
+ if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length)))
+ {
+ _ma_check_print_error(param,"Not Enough memory");
+ DBUG_RETURN(-1);
+ }
+ }
+ used_length=maria_getint(buff);
+ keypos=buff+2+nod_flag;
+ endpos=buff+used_length;
+ for ( ;; )
+ {
+ _sanity(__FILE__,__LINE__);
+ if (nod_flag)
+ {
+ next_page= _ma_kpos(nod_flag, keypos);
+ if (my_pread(info->s->kfile,(byte*) temp_buff,
+ (uint) keyinfo->block_length, next_page,
+ MYF(MY_NABP+MY_WME)))
+ {
+ _ma_check_print_error(param,"Can't read keys from filepos: %s",
+ llstr(next_page,llbuff));
+ goto err;
+ }
+ if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff,
+ sort_key,
+ new_file, update_index))
+ goto err;
+ }
+ _sanity(__FILE__,__LINE__);
+ if (keypos >= endpos ||
+ (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey))
+ == 0)
+ break;
+ rec_pos= _ma_dpos(info,0,lastkey+key_length);
+
+ if ((*info->s->read_record)(info,sort_param->record,rec_pos))
+ {
+ _ma_check_print_error(param,"%d when reading datafile",my_errno);
+ goto err;
+ }
+ if (rec_pos != sort_param->filepos && update_index)
+ {
+ _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength,
+ sort_param->filepos);
+ if (maria_movepoint(info,sort_param->record,rec_pos,sort_param->filepos,
+ sort_key))
+ {
+ _ma_check_print_error(param,"%d when updating key-pointers",my_errno);
+ goto err;
+ }
+ }
+ if (_ma_sort_write_record(sort_param))
+ goto err;
+ }
+ /* Clear end of block to get better compression if the table is backuped */
+ bzero((byte*) buff+used_length,keyinfo->block_length-used_length);
+ if (my_pwrite(info->s->kfile,(byte*) buff,(uint) keyinfo->block_length,
+ page,param->myf_rw))
+ {
+ _ma_check_print_error(param,"%d when updating keyblock",my_errno);
+ goto err;
+ }
+ if (temp_buff)
+ my_afree((gptr) temp_buff);
+ DBUG_RETURN(0);
+err:
+ if (temp_buff)
+ my_afree((gptr) temp_buff);
+ DBUG_RETURN(1);
+} /* sort_record_index */
+
+
+
+/*
+ Check if maria_chk was killed by a signal
+ This is overloaded by other programs that want to be able to abort
+ sorting
+*/
+
+static int not_killed= 0;
+
+volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused)))
+{
+ return &not_killed; /* always NULL */
+}
+
+ /* print warnings and errors */
+ /* VARARGS */
+
+void _ma_check_print_info(HA_CHECK *param __attribute__((unused)),
+ const char *fmt,...)
+{
+ va_list args;
+
+ va_start(args,fmt);
+ VOID(vfprintf(stdout, fmt, args));
+ VOID(fputc('\n',stdout));
+ va_end(args);
+}
+
+/* VARARGS */
+
+void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_warning");
+
+ fflush(stdout);
+ if (!param->warning_printed && !param->error_printed)
+ {
+ if (param->testflag & T_SILENT)
+ fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,
+ param->isam_file_name);
+ param->out_flag|= O_DATA_LOST;
+ }
+ param->warning_printed=1;
+ va_start(args,fmt);
+ fprintf(stderr,"%s: warning: ",my_progname_short);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+/* VARARGS */
+
+void _ma_check_print_error(HA_CHECK *param, const char *fmt,...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_error");
+ DBUG_PRINT("enter",("format: %s",fmt));
+
+ fflush(stdout);
+ if (!param->warning_printed && !param->error_printed)
+ {
+ if (param->testflag & T_SILENT)
+ fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name);
+ param->out_flag|= O_DATA_LOST;
+ }
+ param->error_printed|=1;
+ va_start(args,fmt);
+ fprintf(stderr,"%s: error: ",my_progname_short);
+ VOID(vfprintf(stderr, fmt, args));
+ VOID(fputc('\n',stderr));
+ fflush(stderr);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
new file mode 100644
index 00000000000..b174df2fa3e
--- /dev/null
+++ b/storage/maria/maria_def.h
@@ -0,0 +1,853 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This file is included by all internal maria files */
+
+#include "maria.h" /* Structs & some defines */
+#include "myisampack.h" /* packing of keys */
+#include <my_tree.h>
+#ifdef THREAD
+#include <my_pthread.h>
+#include <thr_lock.h>
+#else
+#include <my_no_pthread.h>
+#endif
+
+#include <pagecache.h>
+#include "ma_loghandler.h"
+#include "ma_control_file.h"
+
+/* undef map from my_nosys; We need test-if-disk full */
+#undef my_write
+
+typedef struct st_maria_status_info
+{
+ ha_rows records; /* Rows in table */
+ ha_rows del; /* Removed rows */
+ my_off_t empty; /* lost space in datafile */
+ my_off_t key_empty; /* lost space in indexfile */
+ my_off_t key_file_length;
+ my_off_t data_file_length;
+ ha_checksum checksum;
+} MARIA_STATUS_INFO;
+
+typedef struct st_maria_state_info
+{
+ struct
+ { /* Fileheader */
+ uchar file_version[4];
+ uchar options[2];
+ uchar header_length[2];
+ uchar state_info_length[2];
+ uchar base_info_length[2];
+ uchar base_pos[2];
+ uchar key_parts[2]; /* Key parts */
+ uchar unique_key_parts[2]; /* Key parts + unique parts */
+ uchar keys; /* number of keys in file */
+ uchar uniques; /* number of UNIQUE definitions */
+ uchar language; /* Language for indexes */
+ uchar fulltext_keys;
+ uchar data_file_type;
+ uchar org_data_file_type; /* Used by mariapack to store dft */
+ } header;
+
+ MARIA_STATUS_INFO state;
+ ha_rows split; /* number of split blocks */
+ my_off_t dellink; /* Link to next removed block */
+ ulonglong first_bitmap_with_space;
+ ulonglong auto_increment;
+ ulong process; /* process that updated table last */
+ ulong unique; /* Unique number for this process */
+ ulong update_count; /* Updated for each write lock */
+ ulong status;
+ ulong *rec_per_key_part;
+ ha_checksum checksum; /* Table checksum */
+ my_off_t *key_root; /* Start of key trees */
+ my_off_t key_del; /* delete links for trees */
+ my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */
+
+ ulong sec_index_changed; /* Updated when new sec_index */
+ ulong sec_index_used; /* which extra index are in use */
+ ulonglong key_map; /* Which keys are in use */
+ ulong version; /* timestamp of create */
+ time_t create_time; /* Time when created database */
+ time_t recover_time; /* Time for last recover */
+ time_t check_time; /* Time for last check */
+ uint sortkey; /* sorted by this key (not used) */
+ uint open_count;
+ uint8 changed; /* Changed since mariachk */
+
+ /* the following isn't saved on disk */
+ uint state_diff_length; /* Should be 0 */
+ uint state_length; /* Length of state header in file */
+ ulong *key_info;
+} MARIA_STATE_INFO;
+
+
+#define MARIA_STATE_INFO_SIZE (24 + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8)
+#define MARIA_STATE_KEY_SIZE 8
+#define MARIA_STATE_KEYBLOCK_SIZE 8
+#define MARIA_STATE_KEYSEG_SIZE 4
+#define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE)
+#define MARIA_KEYDEF_SIZE (2+ 5*2)
+#define MARIA_UNIQUEDEF_SIZE (2+1+1)
+#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
+#define MARIA_COLUMNDEF_SIZE (6+2+2+2+2+2+1+1)
+#define MARIA_BASE_INFO_SIZE (5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16)
+#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+
+typedef struct st_ma_base_info
+{
+ my_off_t keystart; /* Start of keys */
+ my_off_t max_data_file_length;
+ my_off_t max_key_file_length;
+ my_off_t margin_key_file_length;
+ ha_rows records, reloc; /* Create information */
+ ulong mean_row_length; /* Create information */
+ ulong reclength; /* length of unpacked record */
+ ulong pack_reclength; /* Length of full packed rec */
+ ulong min_pack_length;
+ ulong max_pack_length; /* Max possibly length of packed rec */
+ ulong min_block_length;
+ uint fields; /* fields in table */
+ uint fixed_not_null_fields;
+ uint fixed_not_null_fields_length;
+ uint max_field_lengths;
+ uint pack_fields; /* packed fields in table */
+ uint varlength_fields; /* char/varchar/blobs */
+ uint rec_reflength; /* = 2-8 */
+ uint key_reflength; /* = 2-8 */
+ uint keys; /* same as in state.header */
+ uint auto_key; /* Which key-1 is a auto key */
+ uint blobs; /* Number of blobs */
+ /* Length of packed bits (when table was created first time) */
+ uint pack_bytes;
+ /* Length of null bits (when table was created first time) */
+ uint original_null_bytes;
+ uint null_bytes; /* Null bytes in record */
+ uint field_offsets; /* Number of field offsets */
+ uint max_key_block_length; /* Max block length */
+ uint max_key_length; /* Max key length */
+ /* Extra allocation when using dynamic record format */
+ uint extra_alloc_bytes;
+ uint extra_alloc_procent;
+ uint is_nulls_extended; /* 1 if new null bytes */
+ uint min_row_length;
+ uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */
+ uint block_size;
+ uint default_rec_buff_size;
+ uint extra_rec_buff_size;
+
+ /* The following are from the header */
+ uint key_parts, all_key_parts;
+ my_bool transactional;
+} MARIA_BASE_INFO;
+
+
+/* Structs used intern in database */
+
+typedef struct st_maria_blob /* Info of record */
+{
+ ulong offset; /* Offset to blob in record */
+ uint pack_length; /* Type of packed length */
+ ulong length; /* Calc:ed for each record */
+} MARIA_BLOB;
+
+
+typedef struct st_maria_pack
+{
+ ulong header_length;
+ uint ref_length;
+ uchar version;
+} MARIA_PACK;
+
+typedef struct st_maria_file_bitmap
+{
+ uchar *map;
+ ulonglong page; /* Page number for current bitmap */
+ uint used_size; /* Size of bitmap that is not 0 */
+ File file;
+
+ my_bool changed;
+
+#ifdef THREAD
+ pthread_mutex_t bitmap_lock;
+#endif
+ /* Constants, allocated when initiating bitmaps */
+ uint sizes[8]; /* Size per bit combination */
+ uint total_size; /* Total usable size of bitmap page */
+ uint block_size; /* Block size of file */
+ ulong pages_covered; /* Pages covered by bitmap + 1 */
+} MARIA_FILE_BITMAP;
+
+
+#define MAX_NONMAPPED_INSERTS 1000
+
+typedef struct st_maria_share
+{ /* Shared between opens */
+ MARIA_STATE_INFO state;
+ MARIA_BASE_INFO base;
+ MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key
+ definition */
+ MARIA_KEYDEF *keyinfo; /* Key definitions */
+ MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */
+ HA_KEYSEG *keyparts; /* key part info */
+ MARIA_COLUMNDEF *rec; /* Pointer to field information
+ */
+ MARIA_PACK pack; /* Data about packed records */
+ MARIA_BLOB *blobs; /* Pointer to blobs */
+ char *unique_file_name; /* realpath() of index file */
+ char *data_file_name, /* Resolved path names from
+ symlinks */
+ *index_file_name;
+ byte *file_map; /* mem-map of file if possible */
+ KEY_CACHE *key_cache; /* ref to the current key cache */
+ MARIA_DECODE_TREE *decode_trees;
+ uint16 *decode_tables;
+ my_bool (*once_init)(struct st_maria_share *, File);
+ my_bool (*once_end)(struct st_maria_share *);
+ my_bool (*init)(struct st_maria_info *);
+ void (*end)(struct st_maria_info *);
+ int (*read_record)(struct st_maria_info *, byte *, MARIA_RECORD_POS);
+ my_bool (*scan_init)(struct st_maria_info *);
+ int (*scan)(struct st_maria_info *, byte *, MARIA_RECORD_POS, my_bool);
+ void (*scan_end)(struct st_maria_info *);
+ MARIA_RECORD_POS (*write_record_init)(struct st_maria_info *, const byte *);
+ my_bool (*write_record)(struct st_maria_info *, const byte *);
+ my_bool (*write_record_abort)(struct st_maria_info *);
+ my_bool (*update_record)(struct st_maria_info *, MARIA_RECORD_POS,
+ const byte *);
+ my_bool (*delete_record)(struct st_maria_info *);
+ my_bool (*compare_record)(struct st_maria_info *, const byte *);
+ ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *);
+ ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *);
+ my_bool (*compare_unique) (struct st_maria_info *, MARIA_UNIQUEDEF *,
+ const byte *record, MARIA_RECORD_POS pos);
+ uint (*file_read)(MARIA_HA *, byte *, uint, my_off_t, myf);
+ uint (*file_write)(MARIA_HA *, byte *, uint, my_off_t, myf);
+ invalidator_by_filename invalidator; /* query cache invalidator */
+ ulong this_process; /* processid */
+ ulong last_process; /* For table-change-check */
+ ulong last_version; /* Version on start */
+ ulong options; /* Options used */
+ ulong min_pack_length; /* These are used by packed data */
+ ulong max_pack_length;
+ ulong state_diff_length;
+ uint rec_reflength; /* rec_reflength in use now */
+ uint unique_name_length;
+ uint32 ftparsers; /* Number of distinct ftparsers
+ + 1 */
+ File kfile; /* Shared keyfile */
+ File data_file; /* Shared data file */
+ int mode; /* mode of file on open */
+ uint reopen; /* How many times reopened */
+ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */
+ uint block_size; /* block_size of keyfile & data file*/
+ uint base_length;
+ myf write_flag;
+ enum data_file_type data_file_type;
+ my_bool temporary;
+ /* Below flag is needed to make log tables work with concurrent insert */
+ my_bool is_log_table;
+
+ my_bool changed, /* If changed since lock */
+ global_changed, /* If changed since open */
+ not_flushed, concurrent_insert;
+ my_bool delay_key_write;
+#ifdef THREAD
+ THR_LOCK lock;
+ pthread_mutex_t intern_lock; /* Locking for use with _locking */
+ rw_lock_t *key_root_lock;
+#endif
+ my_off_t mmaped_length;
+ uint nonmmaped_inserts; /* counter of writing in
+ non-mmaped area */
+ MARIA_FILE_BITMAP bitmap;
+ rw_lock_t mmap_lock;
+} MARIA_SHARE;
+
+
+typedef byte MARIA_BITMAP_BUFFER;
+
+typedef struct st_maria_bitmap_block
+{
+ ulonglong page; /* Page number */
+ /* Number of continuous pages. TAIL_BIT is set if this is a tail page */
+ uint page_count;
+ uint empty_space; /* Set for head and tail pages */
+ /*
+ Number of BLOCKS for block-region (holds all non-blob-fields or one blob)
+ */
+ uint sub_blocks;
+ /* set to <> 0 in write_record() if this block was actually used */
+ uint8 used;
+ uint8 org_bitmap_value;
+} MARIA_BITMAP_BLOCK;
+
+
+typedef struct st_maria_bitmap_blocks
+{
+ MARIA_BITMAP_BLOCK *block;
+ uint count;
+ my_bool tail_page_skipped; /* If some tail pages was not used */
+ my_bool page_skipped; /* If some full pages was not used */
+} MARIA_BITMAP_BLOCKS;
+
+
+/* Data about the currently read row */
+typedef struct st_maria_row
+{
+ MARIA_BITMAP_BLOCKS insert_blocks;
+ MARIA_BITMAP_BUFFER *extents;
+ MARIA_RECORD_POS lastpos, nextpos;
+ MARIA_RECORD_POS *tail_positions;
+ ha_checksum checksum;
+ byte *empty_bits, *field_lengths;
+ byte *empty_bits_buffer; /* For storing cur_row.empty_bits */
+ uint *null_field_lengths; /* All null field lengths */
+ ulong *blob_lengths; /* Length for each blob */
+ ulong base_length, normal_length, char_length, varchar_length, blob_length;
+ ulong head_length, total_length;
+ my_size_t extents_buffer_length; /* Size of 'extents' buffer */
+ uint field_lengths_length; /* Length of data in field_lengths */
+ uint extents_count; /* number of extents in 'extents' */
+ uint full_page_count, tail_count; /* For maria_chk */
+} MARIA_ROW;
+
+/* Data to scan row in blocked format */
+typedef struct st_maria_block_scan
+{
+ byte *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff;
+ byte *dir, *dir_end;
+ ulong bitmap_page;
+ ulonglong bits;
+ uint number_of_rows, bit_pos;
+ MARIA_RECORD_POS row_base_page;
+} MARIA_BLOCK_SCAN;
+
+
+struct st_maria_info
+{
+ MARIA_SHARE *s; /* Shared between open:s */
+ MARIA_STATUS_INFO *state, save_state;
+ MARIA_ROW cur_row, new_row;
+ MARIA_BLOCK_SCAN scan;
+ MARIA_BLOB *blobs; /* Pointer to blobs */
+ MARIA_BIT_BUFF bit_buff;
+ DYNAMIC_ARRAY bitmap_blocks;
+ /* accumulate indexfile changes between write's */
+ TREE *bulk_insert;
+ DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
+ MEM_ROOT ft_memroot; /* used by the parser */
+ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ char *filename; /* parameter to open filename */
+ byte *buff; /* page buffer */
+ byte *keyread_buff; /* Buffer for last key read */
+ byte *lastkey, *lastkey2; /* Last used search key */
+ byte *first_mbr_key; /* Searhed spatial key */
+ byte *rec_buff; /* Temp buffer for recordpack */
+ byte *int_keypos, /* Save position for next/previous */
+ *int_maxpos; /* -""- */
+ uint int_nod_flag; /* -""- */
+ uint32 int_keytree_version; /* -""- */
+ int (*read_record) (struct st_maria_info *, byte*, MARIA_RECORD_POS);
+ invalidator_by_filename invalidator; /* query cache invalidator */
+ ulong this_unique; /* uniq filenumber or thread */
+ ulong last_unique; /* last unique number */
+ ulong this_loop; /* counter for this open */
+ ulong last_loop; /* last used counter */
+ MARIA_RECORD_POS save_lastpos;
+ MARIA_RECORD_POS dup_key_pos;
+ my_off_t pos; /* Intern variable */
+ my_off_t last_keypage; /* Last key page read */
+ my_off_t last_search_keypage; /* Last keypage when searching */
+
+ /*
+ QQ: the folloing two xxx_length fields should be removed,
+ as they are not compatible with parallel repair
+ */
+ ulong packed_length, blob_length; /* Length of found, packed record */
+ my_size_t rec_buff_size;
+ int dfile; /* The datafile */
+ uint opt_flag; /* Optim. for space/speed */
+ uint update; /* If file changed since open */
+ int lastinx; /* Last used index */
+ uint lastkey_length; /* Length of key in lastkey */
+ uint last_rkey_length; /* Last length in maria_rkey() */
+ enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
+ uint save_lastkey_length;
+ uint pack_key_length; /* For MARIAMRG */
+ int errkey; /* Got last error on this key */
+ int lock_type; /* How database was locked */
+ int tmp_lock_type; /* When locked by readinfo */
+ uint data_changed; /* Somebody has changed data */
+ uint save_update; /* When using KEY_READ */
+ int save_lastinx;
+ LIST open_list;
+ IO_CACHE rec_cache; /* When cacheing records */
+ uint preload_buff_size; /* When preloading indexes */
+ myf lock_wait; /* is 0 or MY_DONT_WAIT */
+ my_bool was_locked; /* Was locked in panic */
+ my_bool append_insert_at_end; /* Set if concurrent insert */
+ my_bool quick_mode;
+ /* If info->keyread_buff can't be used for rnext */
+ my_bool page_changed;
+ /* If info->keyread_buff has to be reread for rnext */
+ my_bool keybuff_used;
+ my_bool once_flags; /* For MARIA_MRG */
+#ifdef __WIN__
+ my_bool owned_by_merge; /* This Maria table is part of a merge union */
+#endif
+#ifdef THREAD
+ THR_LOCK_DATA lock;
+#endif
+ uchar *maria_rtree_recursion_state; /* For RTREE */
+ int maria_rtree_recursion_depth;
+};
+
+/* Some defines used by maria-functions */
+
+#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _search() */
+#define F_EXTRA_LCK -1
+
+/* bits in opt_flag */
+#define MEMMAP_USED 32
+#define REMEMBER_OLD_POS 64
+
+#define WRITEINFO_UPDATE_KEYFILE 1
+#define WRITEINFO_NO_UNLOCK 2
+
+/* once_flags */
+#define USE_PACKED_KEYS 1
+#define RRND_PRESERVE_LASTINX 2
+
+/* bits in state.changed */
+
+#define STATE_CHANGED 1
+#define STATE_CRASHED 2
+#define STATE_CRASHED_ON_REPAIR 4
+#define STATE_NOT_ANALYZED 8
+#define STATE_NOT_OPTIMIZED_KEYS 16
+#define STATE_NOT_SORTED_PAGES 32
+
+/* options to maria_read_cache */
+
+#define READING_NEXT 1
+#define READING_HEADER 2
+
+#define maria_getint(x) ((uint) mi_uint2korr(x) & 32767)
+#define maria_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\
+ mi_int2store(x,boh); }
+#define _ma_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0)
+#define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
+ DBUG_PRINT("error", ("Marked table crashed")); \
+ }while(0)
+#define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \
+ STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \
+ (x)->update|= HA_STATE_CHANGED; \
+ DBUG_PRINT("error", \
+ ("Marked table crashed")); \
+ }while(0)
+#define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED)
+#define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR)
+#define maria_print_error(SHARE, ERRNO) \
+ _ma_report_error((ERRNO), (SHARE)->index_file_name)
+
+/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
+
+#define store_key_length(key,length) \
+{ if ((length) < 255) \
+ { *(key)=(length); } \
+ else \
+ { *(key)=255; mi_int2store((key)+1,(length)); } \
+}
+
+#define get_key_full_length(length,key) \
+ { if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key)++))+1; \
+ else \
+ { length=mi_uint2korr((key)+1)+3; (key)+=3; } \
+}
+
+#define get_key_full_length_rdonly(length,key) \
+{ if (*(uchar*) (key) != 255) \
+ length= ((uint) *(uchar*) ((key)))+1; \
+ else \
+ { length=mi_uint2korr((key)+1)+3; } \
+}
+
+#define get_pack_length(length) ((length) >= 255 ? 3 : 1)
+
+#define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
+/* Don't use to small record-blocks */
+#define MARIA_EXTEND_BLOCK_LENGTH 20
+#define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2)
+ /* Max prefix of record-block */
+#define MARIA_MAX_DYN_BLOCK_HEADER 20
+#define MARIA_BLOCK_INFO_HEADER_LENGTH 20
+#define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
+#define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
+#define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH)
+#define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */
+#define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
+#define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)))
+#define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
+
+#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
+
+#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2
+#define PACK_TYPE_ZERO_FILL 4
+#define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
+
+#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
+#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
+#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
+
+#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100
+#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100
+#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10
+
+/* The UNIQUE check is done with a hashed long key */
+
+#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
+#define maria_unique_store(A,B) mi_int4store((A),(B))
+
+#ifdef THREAD
+extern pthread_mutex_t THR_LOCK_maria;
+#endif
+#if !defined(THREAD) || defined(DONT_USE_RW_LOCKS)
+#define rw_wrlock(A) {}
+#define rw_rdlock(A) {}
+#define rw_unlock(A) {}
+#endif
+
+ /* Some extern variables */
+
+extern LIST *maria_open_list;
+extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[];
+extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[];
+extern uint maria_quick_table_bits;
+extern const char *maria_data_root;
+extern byte maria_zero_string[];
+extern my_bool maria_inited;
+
+ /* This is used by _ma_calc_xxx_key_length och _ma_store_key */
+
+typedef struct st_maria_s_param
+{
+ uint ref_length, key_length, n_ref_length;
+ uint n_length, totlength, part_of_prev_key, prev_length, pack_marker;
+ const byte *key;
+ byte *prev_key, *next_key_pos;
+ bool store_not_null;
+} MARIA_KEY_PARAM;
+
+ /* Prototypes for intern functions */
+
+extern int _ma_read_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS);
+extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS,
+ my_bool);
+extern my_bool _ma_write_dynamic_record(MARIA_HA *, const byte *);
+extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS,
+ const byte *);
+extern my_bool _ma_delete_dynamic_record(MARIA_HA *info);
+extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record);
+extern my_bool _ma_write_blob_record(MARIA_HA *, const byte *);
+extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS,
+ const byte *);
+extern int _ma_read_static_record(MARIA_HA *info, byte *, MARIA_RECORD_POS);
+extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, MARIA_RECORD_POS,
+ my_bool);
+extern my_bool _ma_write_static_record(MARIA_HA *, const byte *);
+extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS,
+ const byte *);
+extern my_bool _ma_delete_static_record(MARIA_HA *info);
+extern my_bool _ma_cmp_static_record(MARIA_HA *info, const byte *record);
+extern int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key,
+ uint length);
+extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length,
+ MARIA_RECORD_POS *root, uint comp_flag);
+extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, MARIA_RECORD_POS *root);
+extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ byte *anc_buff, byte *key_pos, byte *key_buff,
+ byte *father_buff, byte *father_keypos,
+ my_off_t father_page, my_bool insert_last);
+extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, byte *buff, byte *key_buff,
+ my_bool insert_last);
+extern byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key,
+ uint *return_key_length,
+ byte ** after_key);
+extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte *key_pos, byte *org_key,
+ byte *key_buff, const byte *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte *key_pos, byte *org_key,
+ byte *key_buff, const byte *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo,
+ uint nod_flag, byte *key_pos,
+ byte *org_key, byte *prev_key,
+ const byte *key,
+ MARIA_KEY_PARAM *s_temp);
+extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo,
+ uint nod_flag, byte *key_pos,
+ byte *org_key, byte *prev_key,
+ const byte *key,
+ MARIA_KEY_PARAM *s_temp);
+void _ma_store_static_key(MARIA_KEYDEF *keyinfo, byte *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+#ifdef NOT_USED
+void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+#endif
+void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+
+extern int _ma_ck_delete(MARIA_HA *info, uint keynr, byte *key,
+ uint key_length);
+extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer);
+extern int _ma_writeinfo(MARIA_HA *info, uint options);
+extern int _ma_test_if_changed(MARIA_HA *info);
+extern int _ma_mark_file_changed(MARIA_HA *info);
+extern int _ma_decrement_open_count(MARIA_HA *info);
+extern int _ma_check_index(MARIA_HA *info, int inx);
+extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key,
+ uint key_len, uint nextflag, my_off_t pos);
+extern int _ma_bin_search(struct st_maria_info *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, uint key_len,
+ uint comp_flag, byte **ret_pos, byte *buff,
+ my_bool *was_last_key);
+extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, uint key_len,
+ uint comp_flag, byte ** ret_pos, byte *buff,
+ my_bool *was_last_key);
+extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, uint key_len,
+ uint comp_flag, byte ** ret_pos, byte *buff,
+ my_bool *was_last_key);
+extern my_off_t _ma_kpos(uint nod_flag, byte *after_key);
+extern void _ma_kpointer(MARIA_HA *info, byte *buff, my_off_t pos);
+extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag,
+ const byte *after_key);
+extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *info, byte *ptr);
+extern void _ma_dpointer(MARIA_HA *info, byte *buff, MARIA_RECORD_POS pos);
+extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte **page, byte *key);
+extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte **page, byte *key);
+extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
+ byte ** page_pos, byte *key);
+extern byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *keypos, byte *lastkey,
+ byte *endpos, uint *return_key_length);
+extern byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *page, byte *key, byte *keypos,
+ uint *return_key_length);
+extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const byte *key);
+extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key,
+ HA_KEYSEG *end);
+extern byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from);
+extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ byte *key, uint key_length, uint nextflag,
+ my_off_t pos);
+extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t pos);
+extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t pos);
+extern byte *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, int level, byte *buff,
+ int return_buffer);
+extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ my_off_t page, int level, byte *buff);
+extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos,
+ int level);
+extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level);
+extern uint _ma_make_key(MARIA_HA *info, uint keynr, byte *key,
+ const byte *record, MARIA_RECORD_POS filepos);
+extern uint _ma_pack_key(MARIA_HA *info, uint keynr, byte *key,
+ const byte *old, uint key_length,
+ HA_KEYSEG ** last_used_keyseg);
+extern int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS);
+extern int _ma_read_cache(IO_CACHE *info, byte *buff, MARIA_RECORD_POS pos,
+ uint length, int re_read_if_possibly);
+extern ulonglong ma_retrieve_auto_increment(MARIA_HA *info, const byte *record);
+
+extern my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size,
+ my_size_t new_size);
+extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from,
+ ulong reclength);
+extern my_bool _ma_rec_check(MARIA_HA *info, const char *record,
+ byte *packpos, ulong packed_length,
+ my_bool with_checkum);
+extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos,
+ ulong length, my_off_t next_filepos,
+ byte ** record, ulong *reclength,
+ int *flag);
+extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg,
+ const byte *key, uint length);
+extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile);
+extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share);
+extern int _ma_read_pack_record(MARIA_HA *info, byte *buf,
+ MARIA_RECORD_POS filepos);
+extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, MARIA_RECORD_POS,
+ my_bool);
+extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
+ byte *to, byte *from, ulong reclength);
+extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b);
+extern int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf,
+ const byte *oldrec, const byte *newrec,
+ my_off_t pos);
+
+/* Parameter to _ma_get_block_info */
+
+typedef struct st_maria_block_info
+{
+ uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH];
+ ulong rec_len;
+ ulong data_len;
+ ulong block_len;
+ ulong blob_len;
+ MARIA_RECORD_POS filepos;
+ MARIA_RECORD_POS next_filepos;
+ MARIA_RECORD_POS prev_filepos;
+ uint second_read;
+ uint offset;
+} MARIA_BLOCK_INFO;
+
+/* bits in return from _ma_get_block_info */
+
+#define BLOCK_FIRST 1
+#define BLOCK_LAST 2
+#define BLOCK_DELETED 4
+#define BLOCK_ERROR 8 /* Wrong data */
+#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+
+#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
+#define MAXERR 20
+#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+#define INDEX_TMP_EXT ".TMM"
+#define DATA_TMP_EXT ".TMD"
+
+#define UPDATE_TIME 1
+#define UPDATE_STAT 2
+#define UPDATE_SORT 4
+#define UPDATE_AUTO_INC 8
+#define UPDATE_OPEN_COUNT 16
+
+#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE)
+#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
+#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
+#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
+
+#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
+#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
+
+extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t);
+extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from);
+extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
+ MARIA_BLOCK_INFO *info, byte **rec_buff_p,
+ my_size_t *rec_buff_size,
+ File file, my_off_t filepos);
+extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length);
+extern void _ma_report_error(int errcode, const char *file_name);
+extern my_bool _ma_memmap_file(MARIA_HA *info);
+extern void _ma_unmap_file(MARIA_HA *info);
+extern uint _ma_save_pack_length(uint version, byte * block_buff,
+ ulong length);
+extern uint _ma_calc_pack_length(uint version, ulong length);
+extern ulong _ma_calc_blob_length(uint length, const byte *pos);
+extern uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags);
+extern uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags);
+extern uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags);
+extern uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer,
+ uint Count, my_off_t offset, myf MyFlags);
+
+uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite);
+byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state);
+uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state,
+ my_bool pRead);
+uint _ma_base_info_write(File file, MARIA_BASE_INFO *base);
+int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg);
+char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg);
+uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef);
+char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef);
+uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef);
+char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *keydef);
+uint _ma_recinfo_write(File file, MARIA_COLUMNDEF *recinfo);
+char *_ma_recinfo_read(char *ptr, MARIA_COLUMNDEF *recinfo);
+ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record);
+ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf);
+ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *buf);
+my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ byte *record, ha_checksum unique_hash,
+ MARIA_RECORD_POS pos);
+ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *buf);
+my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos);
+my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const byte *record, MARIA_RECORD_POS pos);
+my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b,
+ my_bool null_are_equal);
+void _ma_get_status(void *param, int concurrent_insert);
+void _ma_update_status(void *param);
+void _ma_copy_status(void *to, void *from);
+my_bool _ma_check_status(void *param);
+
+extern MARIA_HA *_ma_test_if_reopen(char *filename);
+my_bool _ma_check_table_is_closed(const char *name, const char *where);
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup);
+int _ma_open_keyfile(MARIA_SHARE *share);
+void _ma_setup_functions(register MARIA_SHARE *share);
+my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size);
+void _ma_remap_file(MARIA_HA *info, my_off_t size);
+
+MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record);
+my_bool _ma_write_abort_default(MARIA_HA *info);
+
+/* Functions needed by _ma_check (are overrided in MySQL) */
+C_MODE_START
+volatile int *_ma_killed_ptr(HA_CHECK *param);
+void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...));
+C_MODE_END
+
+int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param);
+int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param);
+int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param);
+#ifdef THREAD
+pthread_handler_t _ma_thr_find_all_keys(void *arg);
+#endif
+int _ma_flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file);
+
+int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param);
+int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
+ ulong);
+int _ma_sync_table_files(const MARIA_HA *info);
diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c
new file mode 100644
index 00000000000..ef30540bfce
--- /dev/null
+++ b/storage/maria/maria_ftdump.c
@@ -0,0 +1,279 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code
+ added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
+
+#include "ma_ftdefs.h"
+#include <my_getopt.h>
+
+static void usage();
+static void complain(int val);
+static my_bool get_one_option(int, const struct my_option *, char *);
+
+static int count=0, stats=0, dump=0, lstats=0;
+static my_bool verbose;
+static char *query=NULL;
+static uint lengths[256];
+
+#define MAX_LEN (HA_FT_MAXBYTELEN+10)
+#define HOW_OFTEN_TO_WRITE 10000
+
+static struct my_option my_long_options[] =
+{
+ {"help", 'h', "Display help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"help", '?', "Synonym for -h.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"count", 'c', "Calculate per-word stats (counts and global weights).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"dump", 'd', "Dump index (incl. data offsets and word weights).",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"length", 'l', "Report length distribution.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"stats", 's', "Report global stats.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Be verbose.",
+ (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+int main(int argc,char *argv[])
+{
+ int error=0, subkeys;
+ uint keylen, keylen2=0, inx, doc_cnt=0;
+ float weight= 1.0;
+ double gws, min_gws=0, avg_gws=0;
+ MARIA_HA *info;
+ char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
+ ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
+ struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
+
+ MY_INIT(argv[0]);
+ if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
+ exit(error);
+ maria_init();
+ if (count || dump)
+ verbose=0;
+ if (!count && !dump && !lstats && !query)
+ stats=1;
+
+ if (verbose)
+ setbuf(stdout,NULL);
+
+ if (argc < 2)
+ usage();
+
+ {
+ char *end;
+ inx= (uint) strtoll(argv[1], &end, 10);
+ if (*end)
+ usage();
+ }
+
+ init_key_cache(maria_key_cache,MARIA_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0);
+
+ if (!(info=maria_open(argv[0], O_RDONLY,
+ HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
+ {
+ error=my_errno;
+ goto err;
+ }
+
+ *buf2=0;
+ aio->info=info;
+
+ if ((inx >= info->s->base.keys) ||
+ !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
+ {
+ printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
+ goto err;
+ }
+
+ maria_lock_database(info, F_EXTRA_LCK);
+
+ info->cur_row.lastpos= HA_OFFSET_ERROR;
+ info->update|= HA_STATE_PREV_FOUND;
+
+ while (!(error=maria_rnext(info,NULL,inx)))
+ {
+ keylen=*(info->lastkey);
+
+ subkeys=ft_sintXkorr(info->lastkey+keylen+1);
+ if (subkeys >= 0)
+ weight=*(float*)&subkeys;
+
+#ifdef HAVE_SNPRINTF
+ snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
+#else
+ sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
+#endif
+ my_casedn_str(default_charset_info,buf);
+ total++;
+ lengths[keylen]++;
+
+ if (count || stats)
+ {
+ if (strcmp(buf, buf2))
+ {
+ if (*buf2)
+ {
+ uniq++;
+ avg_gws+=gws=GWS_IN_USE;
+ if (count)
+ printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
+ if (maxlen<keylen2)
+ {
+ maxlen=keylen2;
+ strmov(buf_maxlen, buf2);
+ }
+ if (max_doc_cnt < doc_cnt)
+ {
+ max_doc_cnt=doc_cnt;
+ strmov(buf_min_gws, buf2);
+ min_gws=gws;
+ }
+ }
+ strmov(buf2, buf);
+ keylen2=keylen;
+ doc_cnt=0;
+ }
+ doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
+ }
+ if (dump)
+ {
+ if (subkeys>=0)
+ printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf);
+ else
+ printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys,buf);
+ }
+ if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
+ printf("%10ld\r",total);
+ }
+ maria_lock_database(info, F_UNLCK);
+
+ if (count || stats)
+ {
+ if (*buf2)
+ {
+ uniq++;
+ avg_gws+=gws=GWS_IN_USE;
+ if (count)
+ printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
+ if (maxlen<keylen2)
+ {
+ maxlen=keylen2;
+ strmov(buf_maxlen, buf2);
+ }
+ if (max_doc_cnt < doc_cnt)
+ {
+ max_doc_cnt=doc_cnt;
+ strmov(buf_min_gws, buf2);
+ min_gws=gws;
+ }
+ }
+ }
+
+ if (stats)
+ {
+ count=0;
+ for (inx=0;inx<256;inx++)
+ {
+ count+=lengths[inx];
+ if ((ulong) count >= total/2)
+ break;
+ }
+ printf("Total rows: %lu\nTotal words: %lu\n"
+ "Unique words: %lu\nLongest word: %lu chars (%s)\n"
+ "Median length: %u\n"
+ "Average global weight: %f\n"
+ "Most common word: %lu times, weight: %f (%s)\n",
+ (long) info->state->records, total, uniq, maxlen, buf_maxlen,
+ inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
+ }
+ if (lstats)
+ {
+ count=0;
+ for (inx=0; inx<256; inx++)
+ {
+ count+=lengths[inx];
+ if (count && lengths[inx])
+ printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
+ (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
+ 100.0*count/total);
+ }
+ }
+
+err:
+ if (error && error != HA_ERR_END_OF_FILE)
+ printf("got error %d\n",my_errno);
+ if (info)
+ maria_close(info);
+ maria_end();
+ return 0;
+}
+
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument __attribute__((unused)))
+{
+ switch(optid) {
+ case 'd':
+ dump=1;
+ complain(count || query);
+ break;
+ case 's':
+ stats=1;
+ complain(query!=0);
+ break;
+ case 'c':
+ count= 1;
+ complain(dump || query);
+ break;
+ case 'l':
+ lstats=1;
+ complain(query!=0);
+ break;
+ case '?':
+ case 'h':
+ usage();
+ }
+ return 0;
+}
+
+#include <help_start.h>
+
+static void usage()
+{
+ printf("Use: maria_ft_dump <table_name> <index_num>\n");
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+ NETWARE_SET_SCREEN_MODE(1);
+ exit(1);
+}
+
+#include <help_end.h>
+
+static void complain(int val) /* Kinda assert :-) */
+{
+ if (val)
+ {
+ printf("You cannot use these options together!\n");
+ exit(1);
+ }
+}
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
new file mode 100644
index 00000000000..41c519693d5
--- /dev/null
+++ b/storage/maria/maria_pack.c
@@ -0,0 +1,3212 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Pack MARIA file */
+
+#ifndef USE_MY_FUNC
+#define USE_MY_FUNC /* We need at least my_malloc */
+#endif
+
+#include "maria_def.h"
+#include <queues.h>
+#include <my_tree.h>
+#include "mysys_err.h"
+#ifdef MSDOS
+#include <io.h>
+#endif
+#ifndef __GNU_LIBRARY__
+#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
+#endif
+#include <my_getopt.h>
+#include <assert.h>
+
+#if SIZEOF_LONG_LONG > 4
+#define BITS_SAVED 64
+#else
+#define BITS_SAVED 32
+#endif
+
+#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
+#define HEAD_LENGTH 32
+#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
+
+#define DATA_TMP_EXT ".TMD"
+#define OLD_EXT ".OLD"
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+
+struct st_file_buffer {
+ File file;
+ uchar *buffer,*pos,*end;
+ my_off_t pos_in_file;
+ int bits;
+ ulonglong bitbucket;
+};
+
+struct st_huff_tree;
+struct st_huff_element;
+
+typedef struct st_huff_counts {
+ uint field_length,max_zero_fill;
+ uint pack_type;
+ uint max_end_space,max_pre_space,length_bits,min_space;
+ ulong max_length;
+ enum en_fieldtype field_type;
+ struct st_huff_tree *tree; /* Tree for field */
+ my_off_t counts[256];
+ my_off_t end_space[8];
+ my_off_t pre_space[8];
+ my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
+ TREE int_tree; /* Tree for detecting distinct column values. */
+ byte *tree_buff; /* Column values, 'field_length' each. */
+ byte *tree_pos; /* Points to end of column values in 'tree_buff'. */
+} HUFF_COUNTS;
+
+typedef struct st_huff_element HUFF_ELEMENT;
+
+/*
+ WARNING: It is crucial for the optimizations in calc_packed_length()
+ that 'count' is the first element of 'HUFF_ELEMENT'.
+*/
+struct st_huff_element {
+ my_off_t count;
+ union un_element {
+ struct st_nod {
+ HUFF_ELEMENT *left,*right;
+ } nod;
+ struct st_leaf {
+ HUFF_ELEMENT *null;
+ uint element_nr; /* Number of element */
+ } leaf;
+ } a;
+};
+
+
+typedef struct st_huff_tree {
+ HUFF_ELEMENT *root,*element_buffer;
+ HUFF_COUNTS *counts;
+ uint tree_number;
+ uint elements;
+ my_off_t bytes_packed;
+ uint tree_pack_length;
+ uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
+ ulonglong *code;
+ uchar *code_len;
+} HUFF_TREE;
+
+
+typedef struct st_isam_mrg {
+ MARIA_HA **file,**current,**end;
+ uint free_file;
+ uint count;
+ uint min_pack_length; /* Theese is used by packed data */
+ uint max_pack_length;
+ uint ref_length;
+ uint max_blob_length;
+ my_off_t records;
+ /* true if at least one source file has at least one disabled index */
+ my_bool src_file_has_indexes_disabled;
+} PACK_MRG_INFO;
+
+
+extern int main(int argc,char * *argv);
+static void get_options(int *argc,char ***argv);
+static MARIA_HA *open_isam_file(char *name,int mode);
+static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count);
+static int compress(PACK_MRG_INFO *file,char *join_name);
+static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
+static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
+ uint trees,
+ HUFF_COUNTS *huff_counts,
+ uint fields);
+static int compare_tree(void* cmp_arg __attribute__((unused)),
+ const uchar *s,const uchar *t);
+static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
+static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
+ my_off_t records);
+static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
+ uint max_space_length,my_off_t *space_counts,
+ my_off_t tot_space_count,
+ enum en_fieldtype field_type);
+static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
+static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
+static int compare_huff_elements(void *not_used, byte *a,byte *b);
+static int save_counts_in_queue(byte *key,element_count count,
+ HUFF_TREE *tree);
+static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
+static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
+static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
+static void make_traverse_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,uint size,
+ ulonglong code);
+static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
+ my_off_t tot_elements,my_off_t filelength);
+static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
+static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
+static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,
+ uint *offset);
+static uint max_bit(uint value);
+static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
+static char *make_new_name(char *new_name,char *old_name);
+static char *make_old_name(char *new_name,char *old_name);
+static void init_file_buffer(File file,pbool read_buffer);
+static int flush_buffer(ulong neaded_length);
+static void end_file_buffer(void);
+static void write_bits(ulonglong value, uint bits);
+static void flush_bits(void);
+static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
+ ha_checksum crc);
+static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length,
+ ha_checksum crc);
+static int mrg_close(PACK_MRG_INFO *mrg);
+static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf);
+static void mrg_reset(PACK_MRG_INFO *mrg);
+#if !defined(DBUG_OFF)
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
+static int fakecmp(my_off_t **count1, my_off_t **count2);
+#endif
+
+
+static int error_on_write=0,test_only=0,verbose=0,silent=0,
+ write_loop=0,force_pack=0, isamchk_neaded=0;
+static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
+static my_bool backup, opt_wait;
+/*
+ tree_buff_length is somewhat arbitrary. The bigger it is the better
+ the chance to win in terms of compression factor. On the other hand,
+ this table becomes part of the compressed file header. And its length
+ is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
+*/
+static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
+static char tmp_dir[FN_REFLEN]={0},*join_table;
+static my_off_t intervall_length;
+static ha_checksum glob_crc;
+static struct st_file_buffer file_buffer;
+static QUEUE queue;
+static HUFF_COUNTS *global_count;
+static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const char *load_default_groups[]= { "mariapack",0 };
+
+ /* The main program */
+
+int main(int argc, char **argv)
+{
+ int error,ok;
+ PACK_MRG_INFO merge;
+ char **default_argv;
+ MY_INIT(argv[0]);
+
+ load_defaults("my",load_default_groups,&argc,&argv);
+ default_argv= argv;
+ get_options(&argc,&argv);
+ maria_init();
+
+ error=ok=isamchk_neaded=0;
+ if (join_table)
+ { /* Join files into one */
+ if (open_isam_files(&merge,argv,(uint) argc) ||
+ compress(&merge,join_table))
+ error=1;
+ }
+ else while (argc--)
+ {
+ MARIA_HA *isam_file;
+ if (!(isam_file=open_isam_file(*argv++,O_RDWR)))
+ error=1;
+ else
+ {
+ merge.file= &isam_file;
+ merge.current=0;
+ merge.free_file=0;
+ merge.count=1;
+ if (compress(&merge,0))
+ error=1;
+ else
+ ok=1;
+ }
+ }
+ if (ok && isamchk_neaded && !silent)
+ puts("Remember to run maria_chk -rq on compressed tables");
+ VOID(fflush(stdout));
+ VOID(fflush(stderr));
+ free_defaults(default_argv);
+ maria_end();
+ my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
+ exit(error ? 2 : 0);
+#ifndef _lint
+ return 0; /* No compiler warning */
+#endif
+}
+
+enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
+
+static struct my_option my_long_options[] =
+{
+#ifdef __NETWARE__
+ {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"backup", 'b', "Make a backup of the table as table_name.OLD.",
+ (gptr*) &backup, (gptr*) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"character-sets-dir", OPT_CHARSETS_DIR_MP,
+ "Directory where character sets are.", (gptr*) &charsets_dir,
+ (gptr*) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
+ 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ {"force", 'f',
+ "Force packing of table even if it gets bigger or if tempfile exists.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"join", 'j',
+ "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
+ (gptr*) &join_table, (gptr*) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
+ 0, 0, 0},
+ {"help", '?', "Display this help and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"silent", 's', "Be more silent.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"tmpdir", 'T', "Use temporary directory to store temporary table.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"test", 't', "Don't pack table, only test packing it.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Output version information and exit.",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"wait", 'w', "Wait and retry if table is in use.", (gptr*) &opt_wait,
+ (gptr*) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+#include <help_start.h>
+
+static void print_version(void)
+{
+ VOID(printf("%s Ver 1.0 for %s on %s\n",
+ my_progname, SYSTEM_TYPE, MACHINE_TYPE));
+ NETWARE_SET_SCREEN_MODE(1);
+}
+
+
+static void usage(void)
+{
+ print_version();
+ puts("Copyright (C) 2002 MySQL AB");
+ puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
+ puts("and you are welcome to modify and redistribute it under the GPL license\n");
+
+ puts("Pack a MARIA-table to take much less space.");
+ puts("Keys are not updated, you must run maria_chk -rq on the datafile");
+ puts("afterwards to update the keys.");
+ puts("You should give the .MYI file as the filename argument.");
+
+ VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
+ my_print_help(my_long_options);
+ print_defaults("my", load_default_groups);
+ my_print_variables(my_long_options);
+}
+
+#include <help_end.h>
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ uint length;
+
+ switch(optid) {
+#ifdef __NETWARE__
+ case OPT_AUTO_CLOSE:
+ setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
+ break;
+#endif
+ case 'f':
+ force_pack= 1;
+ tmpfile_createflag= O_RDWR | O_TRUNC;
+ break;
+ case 's':
+ write_loop= verbose= 0;
+ silent= 1;
+ break;
+ case 't':
+ test_only= 1;
+ /* Avoid to reset 'verbose' if it was already set > 1. */
+ if (! verbose)
+ verbose= 1;
+ break;
+ case 'T':
+ length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
+ if (length != dirname_length(tmp_dir))
+ {
+ tmp_dir[length]=FN_LIBCHAR;
+ tmp_dir[length+1]=0;
+ }
+ break;
+ case 'v':
+ verbose++; /* Allow for selecting the level of verbosity. */
+ silent= 0;
+ break;
+ case '#':
+ DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace");
+ break;
+ case 'V':
+ print_version();
+ exit(0);
+ case 'I':
+ case '?':
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+ /* reads options */
+ /* Initiates DEBUG - but no debugging here ! */
+
+static void get_options(int *argc,char ***argv)
+{
+ int ho_error;
+
+ my_progname= argv[0][0];
+ if (isatty(fileno(stdout)))
+ write_loop=1;
+
+ if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
+ exit(ho_error);
+
+ if (!*argc)
+ {
+ usage();
+ exit(1);
+ }
+ if (join_table)
+ {
+ backup=0; /* Not needed */
+ tmp_dir[0]=0;
+ }
+ return;
+}
+
+
+static MARIA_HA *open_isam_file(char *name,int mode)
+{
+ MARIA_HA *isam_file;
+ MARIA_SHARE *share;
+ DBUG_ENTER("open_isam_file");
+
+ if (!(isam_file=maria_open(name,mode,
+ (opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
+ HA_OPEN_ABORT_IF_LOCKED))))
+ {
+ VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
+ DBUG_RETURN(0);
+ }
+ share=isam_file->s;
+ if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
+ {
+ if (!force_pack)
+ {
+ VOID(fprintf(stderr, "%s is already compressed\n", name));
+ VOID(maria_close(isam_file));
+ DBUG_RETURN(0);
+ }
+ if (verbose)
+ puts("Recompressing already compressed table");
+ share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
+ }
+ if (! force_pack && share->state.state.records != 0 &&
+ (share->state.state.records <= 1 ||
+ share->state.state.data_file_length < 1024))
+ {
+ VOID(fprintf(stderr, "%s is too small to compress\n", name));
+ VOID(maria_close(isam_file));
+ DBUG_RETURN(0);
+ }
+ VOID(maria_lock_database(isam_file,F_WRLCK));
+ DBUG_RETURN(isam_file);
+}
+
+
+static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count)
+{
+ uint i,j;
+ mrg->count=0;
+ mrg->current=0;
+ mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
+ mrg->free_file=1;
+ mrg->src_file_has_indexes_disabled= 0;
+ for (i=0; i < count ; i++)
+ {
+ if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY)))
+ goto error;
+
+ mrg->src_file_has_indexes_disabled|=
+ ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
+ mrg->file[i]->s->base.keys);
+ }
+ /* Check that files are identical */
+ for (j=0 ; j < count-1 ; j++)
+ {
+ MARIA_COLUMNDEF *m1,*m2,*end;
+ if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
+ mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
+ goto diff_file;
+ m1=mrg->file[j]->s->rec;
+ end=m1+mrg->file[j]->s->base.fields;
+ m2=mrg->file[j+1]->s->rec;
+ for ( ; m1 != end ; m1++,m2++)
+ {
+ if (m1->type != m2->type || m1->length != m2->length)
+ goto diff_file;
+ }
+ }
+ mrg->count=count;
+ return 0;
+
+ diff_file:
+ VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
+ my_progname, names[j], names[j+1]));
+ error:
+ while (i--)
+ maria_close(mrg->file[i]);
+ my_free((gptr) mrg->file,MYF(0));
+ return 1;
+}
+
+
+static int compress(PACK_MRG_INFO *mrg,char *result_table)
+{
+ int error;
+ File new_file,join_isam_file;
+ MARIA_HA *isam_file;
+ MARIA_SHARE *share;
+ char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
+ uint i,header_length,fields,trees,used_trees;
+ my_off_t old_length,new_length,tot_elements;
+ HUFF_COUNTS *huff_counts;
+ HUFF_TREE *huff_trees;
+ DBUG_ENTER("compress");
+
+ isam_file=mrg->file[0]; /* Take this as an example */
+ share=isam_file->s;
+ new_file=join_isam_file= -1;
+ trees=fields=0;
+ huff_trees=0;
+ huff_counts=0;
+
+ /* Create temporary or join file */
+
+ if (backup)
+ VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2));
+ else
+ VOID(fn_format(org_name,isam_file->filename,"",MARIA_NAME_DEXT,2+4+16));
+ if (!test_only && result_table)
+ {
+ /* Make a new indexfile based on first file in list */
+ uint length;
+ char *buff;
+ strmov(org_name,result_table); /* Fix error messages */
+ VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2));
+ if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
+ < 0)
+ goto err;
+ length=(uint) share->base.keystart;
+ if (!(buff=my_malloc(length,MYF(MY_WME))))
+ goto err;
+ if (my_pread(share->kfile,buff,length,0L,MYF(MY_WME | MY_NABP)) ||
+ my_write(join_isam_file,buff,length,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ my_free(buff,MYF(0));
+ goto err;
+ }
+ my_free(buff,MYF(0));
+ VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2));
+ }
+ else if (!tmp_dir[0])
+ VOID(make_new_name(new_name,org_name));
+ else
+ VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4));
+ if (!test_only &&
+ (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
+ goto err;
+
+ /* Start calculating statistics */
+
+ mrg->records=0;
+ for (i=0 ; i < mrg->count ; i++)
+ mrg->records+=mrg->file[i]->s->state.state.records;
+
+ DBUG_PRINT("info", ("Compressing %s: (%lu records)",
+ result_table ? new_name : org_name,
+ (ulong) mrg->records));
+ if (write_loop || verbose)
+ {
+ VOID(printf("Compressing %s: (%lu records)\n",
+ result_table ? new_name : org_name, (ulong) mrg->records));
+ }
+ trees=fields=share->base.fields;
+ huff_counts=init_huff_count(isam_file,mrg->records);
+ QUICK_SAFEMALLOC;
+
+ /*
+ Read the whole data file(s) for statistics.
+ */
+ DBUG_PRINT("info", ("- Calculating statistics"));
+ if (write_loop || verbose)
+ VOID(printf("- Calculating statistics\n"));
+ if (get_statistic(mrg,huff_counts))
+ goto err;
+ NORMAL_SAFEMALLOC;
+ old_length=0;
+ for (i=0; i < mrg->count ; i++)
+ old_length+= (mrg->file[i]->s->state.state.data_file_length -
+ mrg->file[i]->s->state.state.empty);
+
+ /*
+ Create a global priority queue in preparation for making
+ temporary Huffman trees.
+ */
+ if (init_queue(&queue,256,0,0,compare_huff_elements,0))
+ goto err;
+
+ /*
+ Check each column if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress.
+ */
+ check_counts(huff_counts,fields,mrg->records);
+
+ /*
+ Build a Huffman tree for each column.
+ */
+ huff_trees=make_huff_trees(huff_counts,trees);
+
+ /*
+ If the packed lengths of combined columns is less then the sum of
+ the non-combined columns, then create common Huffman trees for them.
+ We do this only for byte compressed columns, not for distinct values
+ compressed columns.
+ */
+ if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
+ goto err;
+
+ /*
+ Assign codes to all byte or column values.
+ */
+ if (make_huff_decode_table(huff_trees,fields))
+ goto err;
+
+ /* Prepare a file buffer. */
+ init_file_buffer(new_file,0);
+
+ /*
+ Reserve space in the target file for the fixed compressed file header.
+ */
+ file_buffer.pos_in_file=HEAD_LENGTH;
+ if (! test_only)
+ VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
+
+ /*
+ Write field infos: field type, pack type, length bits, tree number.
+ */
+ write_field_info(huff_counts,fields,used_trees);
+
+ /*
+ Write decode trees.
+ */
+ if (!(tot_elements=write_huff_tree(huff_trees,trees)))
+ goto err;
+
+ /*
+ Calculate the total length of the compression info header.
+ This includes the fixed compressed file header, the column compression
+ type descriptions, and the decode trees.
+ */
+ header_length=(uint) file_buffer.pos_in_file+
+ (uint) (file_buffer.pos-file_buffer.buffer);
+
+ /*
+ Compress the source file into the target file.
+ */
+ DBUG_PRINT("info", ("- Compressing file"));
+ if (write_loop || verbose)
+ VOID(printf("- Compressing file\n"));
+ error=compress_isam_file(mrg,huff_counts);
+ new_length=file_buffer.pos_in_file;
+ if (!error && !test_only)
+ {
+ char buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
+ bzero(buff,sizeof(buff));
+ error=my_write(file_buffer.file,buff,sizeof(buff),
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
+ }
+
+ /*
+ Write the fixed compressed file header.
+ */
+ if (!error)
+ error=write_header(mrg,header_length,used_trees,tot_elements,
+ new_length);
+
+ /* Flush the file buffer. */
+ end_file_buffer();
+
+ /* Display statistics. */
+ DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld",
+ mrg->min_pack_length, mrg->max_pack_length,
+ (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
+ if (verbose && mrg->records)
+ VOID(printf("Min record length: %6d Max length: %6d "
+ "Mean total length: %6ld\n", mrg->min_pack_length,
+ mrg->max_pack_length, (ulong) (new_length/mrg->records)));
+
+ /* Close source and target file. */
+ if (!test_only)
+ {
+ error|=my_close(new_file,MYF(MY_WME));
+ if (!result_table)
+ {
+ error|=my_close(isam_file->dfile,MYF(MY_WME));
+ isam_file->dfile= -1; /* Tell maria_close file is closed */
+ isam_file->s->bitmap.file= -1;
+ }
+ }
+
+ /* Cleanup. */
+ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
+ if (! test_only && ! error)
+ {
+ if (result_table)
+ {
+ error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc);
+ }
+ else
+ {
+ if (backup)
+ {
+ if (my_rename(org_name,make_old_name(temp_name,isam_file->filename),
+ MYF(MY_WME)))
+ error=1;
+ else
+ {
+ if (tmp_dir[0])
+ error=my_copy(new_name,org_name,MYF(MY_WME));
+ else
+ error=my_rename(new_name,org_name,MYF(MY_WME));
+ if (!error)
+ {
+ VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME)));
+ if (tmp_dir[0])
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ }
+ }
+ }
+ else
+ {
+ if (tmp_dir[0])
+ {
+ error=my_copy(new_name,org_name,
+ MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
+ if (!error)
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ }
+ else
+ error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
+ }
+ if (! error)
+ error=save_state(isam_file,mrg,new_length,glob_crc);
+ }
+ }
+ error|=mrg_close(mrg);
+ if (join_isam_file >= 0)
+ error|=my_close(join_isam_file,MYF(MY_WME));
+ if (error)
+ {
+ VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
+ VOID(my_delete(new_name,MYF(MY_WME)));
+ DBUG_RETURN(-1);
+ }
+ if (write_loop || verbose)
+ {
+ if (old_length)
+ VOID(printf("%.4g%% \n",
+ (((longlong) (old_length - new_length)) * 100.0 /
+ (longlong) old_length)));
+ else
+ puts("Empty file saved in compressed format");
+ }
+ DBUG_RETURN(0);
+
+ err:
+ free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
+ if (new_file >= 0)
+ VOID(my_close(new_file,MYF(0)));
+ if (join_isam_file >= 0)
+ VOID(my_close(join_isam_file,MYF(0)));
+ mrg_close(mrg);
+ VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
+ DBUG_RETURN(-1);
+}
+
+ /* Init a huff_count-struct for each field and init it */
+
+static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
+{
+ reg2 uint i;
+ reg1 HUFF_COUNTS *count;
+ if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
+ sizeof(HUFF_COUNTS),
+ MYF(MY_ZEROFILL | MY_WME))))
+ {
+ for (i=0 ; i < info->s->base.fields ; i++)
+ {
+ enum en_fieldtype type;
+ count[i].field_length=info->s->rec[i].length;
+ type= count[i].field_type= (enum en_fieldtype) info->s->rec[i].type;
+ if (type == FIELD_INTERVALL ||
+ type == FIELD_CONSTANT ||
+ type == FIELD_ZERO)
+ type = FIELD_NORMAL;
+ if (count[i].field_length <= 8 &&
+ (type == FIELD_NORMAL ||
+ type == FIELD_SKIP_ZERO))
+ count[i].max_zero_fill= count[i].field_length;
+ /*
+ For every column initialize a tree, which is used to detect distinct
+ column values. 'int_tree' works together with 'tree_buff' and
+ 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
+ This is accomplished by '-1' as the element size.
+ */
+ init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
+ NULL);
+ if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
+ count[i].tree_pos=count[i].tree_buff =
+ my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
+ MYF(MY_WME));
+ }
+ }
+ return count;
+}
+
+
+ /* Free memory used by counts and trees */
+
+static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
+ HUFF_COUNTS *huff_counts,
+ uint fields)
+{
+ register uint i;
+
+ if (huff_trees)
+ {
+ for (i=0 ; i < trees ; i++)
+ {
+ if (huff_trees[i].element_buffer)
+ my_free((gptr) huff_trees[i].element_buffer,MYF(0));
+ if (huff_trees[i].code)
+ my_free((gptr) huff_trees[i].code,MYF(0));
+ }
+ my_free((gptr) huff_trees,MYF(0));
+ }
+ if (huff_counts)
+ {
+ for (i=0 ; i < fields ; i++)
+ {
+ if (huff_counts[i].tree_buff)
+ {
+ my_free((gptr) huff_counts[i].tree_buff,MYF(0));
+ delete_tree(&huff_counts[i].int_tree);
+ }
+ }
+ my_free((gptr) huff_counts,MYF(0));
+ }
+ delete_queue(&queue); /* This is safe to free */
+ return;
+}
+
+ /* Read through old file and gather some statistics */
+
+static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
+{
+ int error;
+ uint length, null_bytes;
+ ulong reclength,max_blob_length;
+ byte *record,*pos,*next_pos,*end_pos,*start_pos;
+ ha_rows record_count;
+ HUFF_COUNTS *count,*end_count;
+ TREE_ELEMENT *element;
+ ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *);
+ DBUG_ENTER("get_statistic");
+
+ reclength= mrg->file[0]->s->base.reclength;
+ null_bytes= mrg->file[0]->s->base.null_bytes;
+ record=(byte*) my_alloca(reclength);
+ end_count=huff_counts+mrg->file[0]->s->base.fields;
+ record_count=0; glob_crc=0;
+ max_blob_length=0;
+
+ /* Check how to calculate checksum */
+ if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
+ calc_checksum= _ma_static_checksum;
+ else
+ calc_checksum= _ma_checksum;
+
+ mrg_reset(mrg);
+ while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
+ {
+ ulong tot_blob_length=0;
+ if (! error)
+ {
+ /* glob_crc is a checksum over all bytes of all records. */
+ glob_crc+= (*calc_checksum)(mrg->file[0],record);
+
+ /* Count the incidence of values separately for every column. */
+ for (pos=record + null_bytes, count=huff_counts ;
+ count < end_count ;
+ count++,
+ pos=next_pos)
+ {
+ next_pos=end_pos=(start_pos=pos)+count->field_length;
+
+ /*
+ Put the whole column value in a tree if there is room for it.
+ 'int_tree' is used to quickly check for duplicate values.
+ 'tree_buff' collects as many distinct column values as
+ possible. If the field length is > 1, it is tree_buff_length,
+ else 2 bytes. Each value is 'field_length' bytes big. If there
+ are more distinct column values than fit into the buffer, we
+ give up with this tree. BLOBs and VARCHARs do not have a
+ tree_buff as it can only be used with fixed length columns.
+ For the special case of field length == 1, we handle only the
+ case that there is only one distinct value in the table(s).
+ Otherwise, we can have a maximum of 256 distinct values. This
+ is then handled by the normal Huffman tree build.
+
+ Another limit for collecting distinct column values is the
+ number of values itself. Since we would need to build a
+ Huffman tree for the values, we are limited by the 'IS_OFFSET'
+ constant. This constant expresses a bit which is used to
+ determine if a tree element holds a final value or an offset
+ to a child element. Hence, all values and offsets need to be
+ smaller than 'IS_OFFSET'. A tree element is implemented with
+ two integer values, one for the left branch and one for the
+ right branch. For the extreme case that the first element
+ points to the last element, the number of integers in the tree
+ must be less or equal to IS_OFFSET. So the number of elements
+ must be less or equal to IS_OFFSET / 2.
+
+ WARNING: At first, we insert a pointer into the record buffer
+ as the key for the tree. If we got a new distinct value, which
+ is really inserted into the tree, instead of being counted
+ only, we will copy the column value from the record buffer to
+ 'tree_buff' and adjust the key pointer of the tree accordingly.
+ */
+ if (count->tree_buff)
+ {
+ global_count=count;
+ if (!(element=tree_insert(&count->int_tree,pos, 0,
+ count->int_tree.custom_arg)) ||
+ (element->count == 1 &&
+ (count->tree_buff + tree_buff_length <
+ count->tree_pos + count->field_length)) ||
+ (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
+ (count->field_length == 1 &&
+ count->int_tree.elements_in_tree > 1))
+ {
+ delete_tree(&count->int_tree);
+ my_free(count->tree_buff,MYF(0));
+ count->tree_buff=0;
+ }
+ else
+ {
+ /*
+ If tree_insert() succeeds, it either creates a new element
+ or increments the counter of an existing element.
+ */
+ if (element->count == 1)
+ {
+ /* Copy the new column value into 'tree_buff'. */
+ memcpy(count->tree_pos,pos,(size_t) count->field_length);
+ /* Adjust the key pointer in the tree. */
+ tree_set_pointer(element,count->tree_pos);
+ /* Point behind the last column value so far. */
+ count->tree_pos+=count->field_length;
+ }
+ }
+ }
+
+ /* Save character counters and space-counts and zero-field-counts */
+ if (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_ENDSPACE)
+ {
+ /* Ignore trailing space. */
+ for ( ; end_pos > pos ; end_pos--)
+ if (end_pos[-1] != ' ')
+ break;
+ /* Empty fields are just counted. Go to the next record. */
+ if (end_pos == pos)
+ {
+ count->empty_fields++;
+ count->max_zero_fill=0;
+ continue;
+ }
+ /*
+ Count the total of all trailing spaces and the number of
+ short trailing spaces. Remember the longest trailing space.
+ */
+ length= (uint) (next_pos-end_pos);
+ count->tot_end_space+=length;
+ if (length < 8)
+ count->end_space[length]++;
+ if (count->max_end_space < length)
+ count->max_end_space = length;
+ }
+
+ if (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_PRESPACE)
+ {
+ /* Ignore leading space. */
+ for (pos=start_pos; pos < end_pos ; pos++)
+ if (pos[0] != ' ')
+ break;
+ /* Empty fields are just counted. Go to the next record. */
+ if (end_pos == pos)
+ {
+ count->empty_fields++;
+ count->max_zero_fill=0;
+ continue;
+ }
+ /*
+ Count the total of all leading spaces and the number of
+ short leading spaces. Remember the longest leading space.
+ */
+ length= (uint) (pos-start_pos);
+ count->tot_pre_space+=length;
+ if (length < 8)
+ count->pre_space[length]++;
+ if (count->max_pre_space < length)
+ count->max_pre_space = length;
+ }
+
+ /* Calculate pos, end_pos, and max_length for variable length fields. */
+ if (count->field_type == FIELD_BLOB)
+ {
+ uint field_length=count->field_length -maria_portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
+ memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*));
+ end_pos=pos+blob_length;
+ tot_blob_length+=blob_length;
+ set_if_bigger(count->max_length,blob_length);
+ }
+ else if (count->field_type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ pos= start_pos+pack_length;
+ end_pos= pos+length;
+ set_if_bigger(count->max_length,length);
+ }
+
+ /* Evaluate 'max_zero_fill' for short fields. */
+ if (count->field_length <= 8 &&
+ (count->field_type == FIELD_NORMAL ||
+ count->field_type == FIELD_SKIP_ZERO))
+ {
+ uint i;
+ /* Zero fields are just counted. Go to the next record. */
+ if (!memcmp((byte*) start_pos,zero_string,count->field_length))
+ {
+ count->zero_fields++;
+ continue;
+ }
+ /*
+ max_zero_fill starts with field_length. It is decreased every
+ time a shorter "zero trailer" is found. It is set to zero when
+ an empty field is found (see above). This suggests that the
+ variable should be called 'min_zero_fill'.
+ */
+ for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
+ i++) ;
+ if (i < count->max_zero_fill)
+ count->max_zero_fill=i;
+ }
+
+ /* Ignore zero fields and check fields. */
+ if (count->field_type == FIELD_ZERO ||
+ count->field_type == FIELD_CHECK)
+ continue;
+
+ /*
+ Count the incidence of every byte value in the
+ significant field value.
+ */
+ for ( ; pos < end_pos ; pos++)
+ count->counts[(uchar) *pos]++;
+
+ /* Step to next field. */
+ }
+
+ if (tot_blob_length > max_blob_length)
+ max_blob_length=tot_blob_length;
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
+ {
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
+ }
+ }
+ else if (error != HA_ERR_RECORD_DELETED)
+ {
+ VOID(fprintf(stderr, "Got error %d while reading rows", error));
+ break;
+ }
+
+ /* Step to next record. */
+ }
+ if (write_loop)
+ {
+ VOID(printf(" \r"));
+ VOID(fflush(stdout));
+ }
+
+ /*
+ If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
+ codes.
+ */
+ DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
+
+ DBUG_PRINT("info", ("Found the following number of incidents "
+ "of the byte codes:"));
+ if (verbose >= 2)
+ VOID(printf("Found the following number of incidents "
+ "of the byte codes:\n"));
+ for (count= huff_counts ; count < end_count; count++)
+ {
+ uint idx;
+ my_off_t total_count;
+ char llbuf[32];
+
+ DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
+ if (verbose >= 2)
+ VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1)));
+ if (count->tree_buff)
+ {
+ DBUG_PRINT("info", ("number of distinct values: %u",
+ (uint) ((count->tree_pos - count->tree_buff) /
+ count->field_length)));
+ if (verbose >= 2)
+ VOID(printf("number of distinct values: %u\n",
+ (uint) ((count->tree_pos - count->tree_buff) /
+ count->field_length)));
+ }
+ total_count= 0;
+ for (idx= 0; idx < 256; idx++)
+ {
+ if (count->counts[idx])
+ {
+ total_count+= count->counts[idx];
+ DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ if (verbose >= 2)
+ VOID(printf("counts[0x%02x]: %12s\n", idx,
+ llstr((longlong) count->counts[idx], llbuf)));
+ }
+ }
+ DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
+ llbuf)));
+ if ((verbose >= 2) && total_count)
+ {
+ VOID(printf("total: %12s\n",
+ llstr((longlong) total_count, llbuf)));
+ }
+ }
+
+ mrg->records=record_count;
+ mrg->max_blob_length=max_blob_length;
+ my_afree((gptr) record);
+ DBUG_RETURN(error != HA_ERR_END_OF_FILE);
+}
+
+static int compare_huff_elements(void *not_used __attribute__((unused)),
+ byte *a, byte *b)
+{
+ return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
+ (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
+}
+
+ /* Check each tree if we should use pre-space-compress, end-space-
+ compress, empty-field-compress or zero-field-compress */
+
+static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
+ my_off_t records)
+{
+ uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
+ my_off_t old_length,new_length,length;
+ DBUG_ENTER("check_counts");
+
+ bzero((gptr) field_count,sizeof(field_count));
+ space_fields=fill_zero_fields=0;
+
+ for (; trees-- ; huff_counts++)
+ {
+ if (huff_counts->field_type == FIELD_BLOB)
+ {
+ huff_counts->length_bits=max_bit(huff_counts->max_length);
+ goto found_pack;
+ }
+ else if (huff_counts->field_type == FIELD_VARCHAR)
+ {
+ huff_counts->length_bits=max_bit(huff_counts->max_length);
+ goto found_pack;
+ }
+ else if (huff_counts->field_type == FIELD_CHECK)
+ {
+ huff_counts->bytes_packed=0;
+ huff_counts->counts[0]=0;
+ goto found_pack;
+ }
+
+ huff_counts->field_type=FIELD_NORMAL;
+ huff_counts->pack_type=0;
+
+ /* Check for zero-filled records (in this column), or zero records. */
+ if (huff_counts->zero_fields || ! records)
+ {
+ my_off_t old_space_count;
+ /*
+ If there are only zero filled records (in this column),
+ or no records at all, we are done.
+ */
+ if (huff_counts->zero_fields == records)
+ {
+ huff_counts->field_type= FIELD_ZERO;
+ huff_counts->bytes_packed=0;
+ huff_counts->counts[0]=0;
+ goto found_pack;
+ }
+ /* Remeber the number of significant spaces. */
+ old_space_count=huff_counts->counts[' '];
+ /* Add all leading and trailing spaces. */
+ huff_counts->counts[' ']+= (huff_counts->tot_end_space +
+ huff_counts->tot_pre_space +
+ huff_counts->empty_fields *
+ huff_counts->field_length);
+ /* Check, what the compressed length of this would be. */
+ old_length=calc_packed_length(huff_counts,0)+records/8;
+ /* Get the number of zero bytes. */
+ length=huff_counts->zero_fields*huff_counts->field_length;
+ /* Add it to the counts. */
+ huff_counts->counts[0]+=length;
+ /* Check, what the compressed length of this would be. */
+ new_length=calc_packed_length(huff_counts,0);
+ /* If the compression without the zeroes would be shorter, we are done. */
+ if (old_length < new_length && huff_counts->field_length > 1)
+ {
+ huff_counts->field_type=FIELD_SKIP_ZERO;
+ huff_counts->counts[0]-=length;
+ huff_counts->bytes_packed=old_length- records/8;
+ goto found_pack;
+ }
+ /* Remove the insignificant spaces, but keep the zeroes. */
+ huff_counts->counts[' ']=old_space_count;
+ }
+ /* Check, what the compressed length of this column would be. */
+ huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+
+ /*
+ If there are enough empty records (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->empty_fields)
+ {
+ if (huff_counts->field_length > 2 &&
+ huff_counts->empty_fields + (records - huff_counts->empty_fields)*
+ (1+max_bit(max(huff_counts->max_pre_space,
+ huff_counts->max_end_space))) <
+ records * max_bit(huff_counts->field_length))
+ {
+ huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
+ }
+ else
+ {
+ length=huff_counts->empty_fields*huff_counts->field_length;
+ if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
+ {
+ huff_counts->tot_end_space+=length;
+ huff_counts->max_end_space=huff_counts->field_length;
+ if (huff_counts->field_length < 8)
+ huff_counts->end_space[huff_counts->field_length]+=
+ huff_counts->empty_fields;
+ }
+ if (huff_counts->tot_pre_space)
+ {
+ huff_counts->tot_pre_space+=length;
+ huff_counts->max_pre_space=huff_counts->field_length;
+ if (huff_counts->field_length < 8)
+ huff_counts->pre_space[huff_counts->field_length]+=
+ huff_counts->empty_fields;
+ }
+ }
+ }
+
+ /*
+ If there are enough trailing spaces (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->tot_end_space)
+ {
+ huff_counts->counts[' ']+=huff_counts->tot_pre_space;
+ if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
+ huff_counts->end_space,
+ huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
+ goto found_pack;
+ huff_counts->counts[' ']-=huff_counts->tot_pre_space;
+ }
+
+ /*
+ If there are enough leading spaces (in this column),
+ treating them specially may pay off.
+ */
+ if (huff_counts->tot_pre_space)
+ {
+ if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
+ huff_counts->pre_space,
+ huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
+ goto found_pack;
+ }
+
+ found_pack: /* Found field-packing */
+
+ /* Test if we can use zero-fill */
+
+ if (huff_counts->max_zero_fill &&
+ (huff_counts->field_type == FIELD_NORMAL ||
+ huff_counts->field_type == FIELD_SKIP_ZERO))
+ {
+ huff_counts->counts[0]-=huff_counts->max_zero_fill*
+ (huff_counts->field_type == FIELD_SKIP_ZERO ?
+ records - huff_counts->zero_fields : records);
+ huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
+ huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
+ }
+
+ /* Test if intervall-field is better */
+
+ if (huff_counts->tree_buff)
+ {
+ HUFF_TREE tree;
+
+ DBUG_EXECUTE_IF("forceintervall",
+ huff_counts->bytes_packed= ~ (my_off_t) 0;);
+ tree.element_buffer=0;
+ if (!make_huff_tree(&tree,huff_counts) &&
+ tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
+ {
+ if (tree.elements == 1)
+ huff_counts->field_type=FIELD_CONSTANT;
+ else
+ huff_counts->field_type=FIELD_INTERVALL;
+ huff_counts->pack_type=0;
+ }
+ else
+ {
+ my_free((gptr) huff_counts->tree_buff,MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff=0;
+ }
+ if (tree.element_buffer)
+ my_free((gptr) tree.element_buffer,MYF(0));
+ }
+ if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
+ space_fields++;
+ if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
+ fill_zero_fields++;
+ field_count[huff_counts->field_type]++;
+ }
+ DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields));
+ DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d",
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
+ if (verbose)
+ VOID(printf("\nnormal: %3d empty-space: %3d "
+ "empty-zero: %3d empty-fill: %3d\n"
+ "pre-space: %3d end-space: %3d "
+ "intervall-fields: %3d zero: %3d\n",
+ field_count[FIELD_NORMAL],space_fields,
+ field_count[FIELD_SKIP_ZERO],fill_zero_fields,
+ field_count[FIELD_SKIP_PRESPACE],
+ field_count[FIELD_SKIP_ENDSPACE],
+ field_count[FIELD_INTERVALL],
+ field_count[FIELD_ZERO]));
+ DBUG_VOID_RETURN;
+}
+
+
+/* Test if we can use space-compression and empty-field-compression */
+
+static int
+test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
+ uint max_space_length, my_off_t *space_counts,
+ my_off_t tot_space_count, enum en_fieldtype field_type)
+{
+ int min_pos;
+ uint length_bits,i;
+ my_off_t space_count,min_space_count,min_pack,new_length,skip;
+
+ length_bits=max_bit(max_space_length);
+
+ /* Default no end_space-packing */
+ space_count=huff_counts->counts[(uint) ' '];
+ min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
+ min_pack=calc_packed_length(huff_counts,0);
+ min_pos= -2;
+ huff_counts->counts[(uint) ' ']=space_count;
+
+ /* Test with allways space-count */
+ new_length=huff_counts->bytes_packed+length_bits*records/8;
+ if (new_length+1 < min_pack)
+ {
+ min_pos= -1;
+ min_pack=new_length;
+ min_space_count=space_count;
+ }
+ /* Test with length-flag */
+ for (skip=0L, i=0 ; i < 8 ; i++)
+ {
+ if (space_counts[i])
+ {
+ if (i)
+ huff_counts->counts[(uint) ' ']+=space_counts[i];
+ skip+=huff_counts->pre_space[i];
+ new_length=calc_packed_length(huff_counts,0)+
+ (records+(records-skip)*(1+length_bits))/8;
+ if (new_length < min_pack)
+ {
+ min_pos=(int) i;
+ min_pack=new_length;
+ min_space_count=huff_counts->counts[(uint) ' '];
+ }
+ }
+ }
+
+ huff_counts->counts[(uint) ' ']=min_space_count;
+ huff_counts->bytes_packed=min_pack;
+ switch (min_pos) {
+ case -2:
+ return(0); /* No space-compress */
+ case -1: /* Always space-count */
+ huff_counts->field_type=field_type;
+ huff_counts->min_space=0;
+ huff_counts->length_bits=max_bit(max_space_length);
+ break;
+ default:
+ huff_counts->field_type=field_type;
+ huff_counts->min_space=(uint) min_pos;
+ huff_counts->pack_type|=PACK_TYPE_SELECTED;
+ huff_counts->length_bits=max_bit(max_space_length);
+ break;
+ }
+ return(1); /* Using space-compress */
+}
+
+
+ /* Make a huff_tree of each huff_count */
+
+static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
+{
+ uint tree;
+ HUFF_TREE *huff_tree;
+ DBUG_ENTER("make_huff_trees");
+
+ if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
+ MYF(MY_WME | MY_ZEROFILL))))
+ DBUG_RETURN(0);
+
+ for (tree=0 ; tree < trees ; tree++)
+ {
+ if (make_huff_tree(huff_tree+tree,huff_counts+tree))
+ {
+ while (tree--)
+ my_free((gptr) huff_tree[tree].element_buffer,MYF(0));
+ my_free((gptr) huff_tree,MYF(0));
+ DBUG_RETURN(0);
+ }
+ }
+ DBUG_RETURN(huff_tree);
+}
+
+/*
+ Build a Huffman tree.
+
+ SYNOPSIS
+ make_huff_tree()
+ huff_tree The Huffman tree.
+ huff_counts The counts.
+
+ DESCRIPTION
+ Build a Huffman tree according to huff_counts->counts or
+ huff_counts->tree_buff. tree_buff, if non-NULL contains up to
+ tree_buff_length of distinct column values. In that case, whole
+ values can be Huffman encoded instead of single bytes.
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
+{
+ uint i,found,bits_packed,first,last;
+ my_off_t bytes_packed;
+ HUFF_ELEMENT *a,*b,*new_huff_el;
+
+ first=last=0;
+ if (huff_counts->tree_buff)
+ {
+ /* Calculate the number of distinct values in tree_buff. */
+ found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
+ huff_counts->field_length;
+ first=0; last=found-1;
+ }
+ else
+ {
+ /* Count the number of byte codes found in the column. */
+ for (i=found=0 ; i < 256 ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ if (! found++)
+ first=i;
+ last=i;
+ }
+ }
+ if (found < 2)
+ found=2;
+ }
+
+ /* When using 'tree_buff' we can have more that 256 values. */
+ if (queue.max_elements < found)
+ {
+ delete_queue(&queue);
+ if (init_queue(&queue,found,0,0,compare_huff_elements,0))
+ return -1;
+ }
+
+ /* Allocate or reallocate an element buffer for the Huffman tree. */
+ if (!huff_tree->element_buffer)
+ {
+ if (!(huff_tree->element_buffer=
+ (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
+ return 1;
+ }
+ else
+ {
+ HUFF_ELEMENT *temp;
+ if (!(temp=
+ (HUFF_ELEMENT*) my_realloc((gptr) huff_tree->element_buffer,
+ found*2*sizeof(HUFF_ELEMENT),
+ MYF(MY_WME))))
+ return 1;
+ huff_tree->element_buffer=temp;
+ }
+
+ huff_counts->tree=huff_tree;
+ huff_tree->counts=huff_counts;
+ huff_tree->min_chr=first;
+ huff_tree->max_chr=last;
+ huff_tree->char_bits=max_bit(last-first);
+ huff_tree->offset_bits=max_bit(found-1)+1;
+
+ if (huff_counts->tree_buff)
+ {
+ huff_tree->elements=0;
+ huff_tree->tree_pack_length=(1+15+16+5+5+
+ (huff_tree->char_bits+1)*found+
+ (huff_tree->offset_bits+1)*
+ (found-2)+7)/8 +
+ (uint) (huff_tree->counts->tree_pos-
+ huff_tree->counts->tree_buff);
+ /*
+ Put a HUFF_ELEMENT into the queue for every distinct column value.
+
+ tree_walk() calls save_counts_in_queue() for every element in
+ 'int_tree'. This takes elements from the target trees element
+ buffer and places references to them into the buffer of the
+ priority queue. We insert in column value order, but the order is
+ in fact irrelevant here. We will establish the correct order
+ later.
+ */
+ tree_walk(&huff_counts->int_tree,
+ (int (*)(void*, element_count,void*)) save_counts_in_queue,
+ (gptr) huff_tree, left_root_right);
+ }
+ else
+ {
+ huff_tree->elements=found;
+ huff_tree->tree_pack_length=(9+9+5+5+
+ (huff_tree->char_bits+1)*found+
+ (huff_tree->offset_bits+1)*
+ (found-2)+7)/8;
+ /*
+ Put a HUFF_ELEMENT into the queue for every byte code found in the column.
+
+ The elements are taken from the target trees element buffer.
+ Instead of using queue_insert(), we just place references to the
+ elements into the buffer of the priority queue. We insert in byte
+ value order, but the order is in fact irrelevant here. We will
+ establish the correct order later.
+ */
+ for (i=first, found=0 ; i <= last ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ new_huff_el=huff_tree->element_buffer+(found++);
+ new_huff_el->count=huff_counts->counts[i];
+ new_huff_el->a.leaf.null=0;
+ new_huff_el->a.leaf.element_nr=i;
+ queue.root[found]=(byte*) new_huff_el;
+ }
+ }
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which builds the Huffman tree.
+ */
+ while (found < 2)
+ {
+ new_huff_el=huff_tree->element_buffer+(found++);
+ new_huff_el->count=0;
+ new_huff_el->a.leaf.null=0;
+ if (last)
+ new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
+ else
+ new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
+ queue.root[found]=(byte*) new_huff_el;
+ }
+ }
+
+ /* Make a queue from the queue buffer. */
+ queue.elements=found;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
+ for (i=found/2 ; i > 0 ; i--)
+ _downheap(&queue,i);
+
+ /* The Huffman algorithm. */
+ bytes_packed=0; bits_packed=0;
+ for (i=1 ; i < found ; i++)
+ {
+ /*
+ Pop the top element from the queue (the one with the least incidence).
+ Popping from a priority queue includes a re-ordering of the queue,
+ to get the next least incidence element to the top.
+ */
+ a=(HUFF_ELEMENT*) queue_remove(&queue,0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b=(HUFF_ELEMENT*) queue.root[1];
+ /* Get a new element from the element buffer. */
+ new_huff_el=huff_tree->element_buffer+found+i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count=a->count+b->count;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
+ bits_packed+=(uint) (new_huff_el->count & 7);
+ bytes_packed+=new_huff_el->count/8;
+ /* The new element points to its children, lesser in left. */
+ new_huff_el->a.nod.left=a;
+ new_huff_el->a.nod.right=b;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue.
+ */
+ queue.root[1]=(byte*) new_huff_el;
+ queue_replaced(&queue);
+ }
+ huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
+ huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
+ return 0;
+}
+
+static int compare_tree(void* cmp_arg __attribute__((unused)),
+ register const uchar *s, register const uchar *t)
+{
+ uint length;
+ for (length=global_count->field_length; length-- ;)
+ if (*s++ != *t++)
+ return (int) s[-1] - (int) t[-1];
+ return 0;
+}
+
+/*
+ Organize distinct column values and their incidences into a priority queue.
+
+ SYNOPSIS
+ save_counts_in_queue()
+ key The column value.
+ count The incidence of this value.
+ tree The Huffman tree to be built later.
+
+ DESCRIPTION
+ We use the element buffer of the targeted tree. The distinct column
+ values are organized in a priority queue first. The Huffman
+ algorithm will later organize the elements into a Huffman tree. For
+ the time being, we just place references to the elements into the
+ queue buffer. The buffer will later be organized into a priority
+ queue.
+
+ RETURN
+ 0
+ */
+
+static int save_counts_in_queue(byte *key, element_count count,
+ HUFF_TREE *tree)
+{
+ HUFF_ELEMENT *new_huff_el;
+
+ new_huff_el=tree->element_buffer+(tree->elements++);
+ new_huff_el->count=count;
+ new_huff_el->a.leaf.null=0;
+ new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
+ tree->counts->field_length;
+ queue.root[tree->elements]=(byte*) new_huff_el;
+ return 0;
+}
+
+
+/*
+ Calculate length of file if given counts should be used.
+
+ SYNOPSIS
+ calc_packed_length()
+ huff_counts The counts for a column of the table(s).
+ add_tree_lenght If the decode tree length should be added.
+
+ DESCRIPTION
+ We need to follow the Huffman algorithm until we know, how many bits
+ are required for each byte code. But we do not need the resulting
+ Huffman tree. Hence, we can leave out some steps which are essential
+ in make_huff_tree().
+
+ RETURN
+ Number of bytes required to compress this table column.
+*/
+
+static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
+ uint add_tree_lenght)
+{
+ uint i,found,bits_packed,first,last;
+ my_off_t bytes_packed;
+ HUFF_ELEMENT element_buffer[256];
+ DBUG_ENTER("calc_packed_length");
+
+ /*
+ WARNING: We use a small hack for efficiency: Instead of placing
+ references to HUFF_ELEMENTs into the queue, we just insert
+ references to the counts of the byte codes which appeared in this
+ table column. During the Huffman algorithm they are successively
+ replaced by references to HUFF_ELEMENTs. This works, because
+ HUFF_ELEMENTs have the incidence count at their beginning.
+ Regardless, wether the queue array contains references to counts of
+ type my_off_t or references to HUFF_ELEMENTs which have the count of
+ type my_off_t at their beginning, it always points to a count of the
+ same type.
+
+ Instead of using queue_insert(), we just copy the references into
+ the buffer of the priority queue. We insert in byte value order, but
+ the order is in fact irrelevant here. We will establish the correct
+ order later.
+ */
+ first=last=0;
+ for (i=found=0 ; i < 256 ; i++)
+ {
+ if (huff_counts->counts[i])
+ {
+ if (! found++)
+ first=i;
+ last=i;
+ /* We start with root[1], which is the queues top element. */
+ queue.root[found]=(byte*) &huff_counts->counts[i];
+ }
+ }
+ if (!found)
+ DBUG_RETURN(0); /* Empty tree */
+ /*
+ If there is only a single byte value in this field in all records,
+ add a second element with zero incidence. This is required to enter
+ the loop, which follows the Huffman algorithm.
+ */
+ if (found < 2)
+ queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1];
+
+ /* Make a queue from the queue buffer. */
+ queue.elements=found;
+
+ bytes_packed=0; bits_packed=0;
+ /* Add the length of the coding table, which would become part of the file. */
+ if (add_tree_lenght)
+ bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
+ (max_bit(found-1)+1+1)*(found-2) +7)/8;
+
+ /*
+ Make a priority queue from the queue. Construct its index so that we
+ have a partially ordered tree.
+ */
+ for (i=(found+1)/2 ; i > 0 ; i--)
+ _downheap(&queue,i);
+
+ /* The Huffman algorithm. */
+ for (i=0 ; i < found-1 ; i++)
+ {
+ my_off_t *a;
+ my_off_t *b;
+ HUFF_ELEMENT *new_huff_el;
+
+ /*
+ Pop the top element from the queue (the one with the least
+ incidence). Popping from a priority queue includes a re-ordering
+ of the queue, to get the next least incidence element to the top.
+ */
+ a= (my_off_t*) queue_remove(&queue, 0);
+ /*
+ Copy the next least incidence element. The queue implementation
+ reserves root[0] for temporary purposes. root[1] is the top.
+ */
+ b= (my_off_t*) queue.root[1];
+ /* Create a new element in a local (automatic) buffer. */
+ new_huff_el= element_buffer + i;
+ /* The new element gets the sum of the two least incidence elements. */
+ new_huff_el->count= *a + *b;
+ /*
+ The Huffman algorithm assigns another bit to the code for a byte
+ every time that bytes incidence is combined (directly or indirectly)
+ to a new element as one of the two least incidence elements.
+ This means that one more bit per incidence of that byte is required
+ in the resulting file. So we add the new combined incidence as the
+ number of bits by which the result grows.
+ */
+ bits_packed+=(uint) (new_huff_el->count & 7);
+ bytes_packed+=new_huff_el->count/8;
+ /*
+ Replace the copied top element by the new element and re-order the
+ queue. This successively replaces the references to counts by
+ references to HUFF_ELEMENTs.
+ */
+ queue.root[1]=(byte*) new_huff_el;
+ queue_replaced(&queue);
+ }
+ DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
+}
+
+
+ /* Remove trees that don't give any compression */
+
+static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
+{
+ uint k,tree_number;
+ HUFF_COUNTS count,*i,*j,*last_count;
+
+ last_count=huff_counts+trees;
+ for (tree_number=0, i=huff_counts ; i < last_count ; i++)
+ {
+ if (!i->tree->tree_number)
+ {
+ i->tree->tree_number= ++tree_number;
+ if (i->tree_buff)
+ continue; /* Don't join intervall */
+ for (j=i+1 ; j < last_count ; j++)
+ {
+ if (! j->tree->tree_number && ! j->tree_buff)
+ {
+ for (k=0 ; k < 256 ; k++)
+ count.counts[k]=i->counts[k]+j->counts[k];
+ if (calc_packed_length(&count,1) <=
+ i->tree->bytes_packed + j->tree->bytes_packed+
+ i->tree->tree_pack_length+j->tree->tree_pack_length+
+ ALLOWED_JOIN_DIFF)
+ {
+ memcpy_fixed((byte*) i->counts,(byte*) count.counts,
+ sizeof(count.counts[0])*256);
+ my_free((gptr) j->tree->element_buffer,MYF(0));
+ j->tree->element_buffer=0;
+ j->tree=i->tree;
+ bmove((byte*) i->counts,(byte*) count.counts,
+ sizeof(count.counts[0])*256);
+ if (make_huff_tree(i->tree,i))
+ return (uint) -1;
+ }
+ }
+ }
+ }
+ }
+ DBUG_PRINT("info", ("Original trees: %d After join: %d",
+ trees, tree_number));
+ if (verbose)
+ VOID(printf("Original trees: %d After join: %d\n", trees, tree_number));
+ return tree_number; /* Return trees left */
+}
+
+
+/*
+ Fill in huff_tree encode tables.
+
+ SYNOPSIS
+ make_huff_decode_table()
+ huff_tree An array of HUFF_TREE which are to be encoded.
+ trees The number of HUFF_TREE in the array.
+
+ RETURN
+ 0 success
+ != 0 error
+*/
+
+static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
+{
+ uint elements;
+ for ( ; trees-- ; huff_tree++)
+ {
+ if (huff_tree->tree_number > 0)
+ {
+ elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ if (!(huff_tree->code =
+ (ulonglong*) my_malloc(elements*
+ (sizeof(ulonglong) + sizeof(uchar)),
+ MYF(MY_WME | MY_ZEROFILL))))
+ return 1;
+ huff_tree->code_len=(uchar*) (huff_tree->code+elements);
+ make_traverse_code_tree(huff_tree, huff_tree->root,
+ 8 * sizeof(ulonglong), LL(0));
+ }
+ }
+ return 0;
+}
+
+
+static void make_traverse_code_tree(HUFF_TREE *huff_tree,
+ HUFF_ELEMENT *element,
+ uint size, ulonglong code)
+{
+ uint chr;
+ if (!element->a.leaf.null)
+ {
+ chr=element->a.leaf.element_nr;
+ huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
+ huff_tree->code[chr]= (code >> size);
+ if (huff_tree->height < 8 * sizeof(ulonglong) - size)
+ huff_tree->height= 8 * sizeof(ulonglong) - size;
+ }
+ else
+ {
+ size--;
+ make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
+ make_traverse_code_tree(huff_tree, element->a.nod.right, size,
+ code + (((ulonglong) 1) << size));
+ }
+ return;
+}
+
+
+/*
+ Convert a value into binary digits.
+
+ SYNOPSIS
+ bindigits()
+ value The value.
+ length The number of low order bits to convert.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *bindigits(ulonglong value, uint bits)
+{
+ static char digits[72];
+ char *ptr= digits;
+ uint idx= bits;
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
+ *ptr= '\0';
+ return digits;
+}
+
+
+/*
+ Convert a value into hexadecimal digits.
+
+ SYNOPSIS
+ hexdigits()
+ value The value.
+
+ NOTE
+ The result string is in static storage. It is reused on every call.
+ So you cannot use it twice in one expression.
+
+ RETURN
+ A pointer to a static NUL-terminated string.
+ */
+
+static char *hexdigits(ulonglong value)
+{
+ static char digits[20];
+ char *ptr= digits;
+ uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
+
+ DBUG_ASSERT(idx < sizeof(digits));
+ while (idx)
+ {
+ if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
+ *(ptr - 1)+= 'a' - '9' - 1;
+ }
+ *ptr= '\0';
+ return digits;
+}
+
+
+ /* Write header to new packed data file */
+
+static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
+ my_off_t tot_elements,my_off_t filelength)
+{
+ byte *buff= (byte*) file_buffer.pos;
+
+ bzero(buff,HEAD_LENGTH);
+ memcpy_fixed(buff,maria_pack_file_magic,4);
+ int4store(buff+4,head_length);
+ int4store(buff+8, mrg->min_pack_length);
+ int4store(buff+12,mrg->max_pack_length);
+ int4store(buff+16,tot_elements);
+ int4store(buff+20,intervall_length);
+ int2store(buff+24,trees);
+ buff[26]=(char) mrg->ref_length;
+ /* Save record pointer length */
+ buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
+ if (test_only)
+ return 0;
+ VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0)));
+ return my_write(file_buffer.file,(const byte *) file_buffer.pos,HEAD_LENGTH,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
+}
+
+ /* Write fieldinfo to new packed file */
+
+static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
+{
+ reg1 uint i;
+ uint huff_tree_bits;
+ huff_tree_bits=max_bit(trees ? trees-1 : 0);
+
+ DBUG_PRINT("info", (" "));
+ DBUG_PRINT("info", ("column types:"));
+ DBUG_PRINT("info", ("FIELD_NORMAL 0"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
+ DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
+ DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
+ DBUG_PRINT("info", ("FIELD_BLOB 4"));
+ DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
+ DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
+ DBUG_PRINT("info", ("FIELD_ZERO 7"));
+ DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
+ DBUG_PRINT("info", ("FIELD_CHECK 9"));
+ DBUG_PRINT("info", (" "));
+ DBUG_PRINT("info", ("pack type as a set of flags:"));
+ DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
+ DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
+ DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ {
+ VOID(printf("\n"));
+ VOID(printf("column types:\n"));
+ VOID(printf("FIELD_NORMAL 0\n"));
+ VOID(printf("FIELD_SKIP_ENDSPACE 1\n"));
+ VOID(printf("FIELD_SKIP_PRESPACE 2\n"));
+ VOID(printf("FIELD_SKIP_ZERO 3\n"));
+ VOID(printf("FIELD_BLOB 4\n"));
+ VOID(printf("FIELD_CONSTANT 5\n"));
+ VOID(printf("FIELD_INTERVALL 6\n"));
+ VOID(printf("FIELD_ZERO 7\n"));
+ VOID(printf("FIELD_VARCHAR 8\n"));
+ VOID(printf("FIELD_CHECK 9\n"));
+ VOID(printf("\n"));
+ VOID(printf("pack type as a set of flags:\n"));
+ VOID(printf("PACK_TYPE_SELECTED 1\n"));
+ VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n"));
+ VOID(printf("PACK_TYPE_ZERO_FILL 4\n"));
+ VOID(printf("\n"));
+ }
+ for (i=0 ; i++ < fields ; counts++)
+ {
+ write_bits((ulonglong) (int) counts->field_type, 5);
+ write_bits(counts->pack_type,6);
+ if (counts->pack_type & PACK_TYPE_ZERO_FILL)
+ write_bits(counts->max_zero_fill,5);
+ else
+ write_bits(counts->length_bits,5);
+ write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
+ DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ i , counts->field_type, counts->pack_type,
+ counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ if (verbose >= 2)
+ VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
+ "tree: %2u length: %4u\n", i , counts->field_type,
+ counts->pack_type, counts->max_zero_fill, counts->length_bits,
+ counts->tree->tree_number, counts->field_length));
+ }
+ flush_bits();
+ return;
+}
+
+ /* Write all huff_trees to new datafile. Return tot count of
+ elements in all trees
+ Returns 0 on error */
+
+static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
+{
+ uint i,int_length;
+ uint tree_no;
+ uint codes;
+ uint errors= 0;
+ uint *packed_tree,*offset,length;
+ my_off_t elements;
+
+ /* Find the highest number of elements in the trees. */
+ for (i=length=0 ; i < trees ; i++)
+ if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
+ length=huff_tree[i].elements;
+ /*
+ Allocate a buffer for packing a decode tree. Two numbers per element
+ (left child and right child).
+ */
+ if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
+ {
+ my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
+ return 0;
+ }
+
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ tree_no= 0;
+ intervall_length=0;
+ for (elements=0; trees-- ; huff_tree++)
+ {
+ /* Skip columns that have been joined with other columns. */
+ if (huff_tree->tree_number == 0)
+ continue; /* Deleted tree */
+ tree_no++;
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 3)
+ VOID(printf("\n"));
+ /* Count the total number of elements (byte codes or column values). */
+ elements+=huff_tree->elements;
+ huff_tree->max_offset=2;
+ /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
+ if (huff_tree->elements <= 1)
+ offset=packed_tree;
+ else
+ offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
+
+ /* This should be the same as 'length' above. */
+ huff_tree->offset_bits=max_bit(huff_tree->max_offset);
+
+ /*
+ Since we check this during collecting the distinct column values,
+ this should never happen.
+ */
+ if (huff_tree->max_offset >= IS_OFFSET)
+ { /* This should be impossible */
+ VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
+ huff_tree->max_offset));
+ my_afree((gptr) packed_tree);
+ return 0;
+ }
+
+ DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
+ "char_bits: %u\n",
+ (ulong) (file_buffer.pos - file_buffer.buffer),
+ huff_tree->elements, (ulong) (offset - packed_tree),
+ huff_tree->char_bits));
+ if (!huff_tree->counts->tree_buff)
+ {
+ /* We do a byte compression on this column. Mark with bit 0. */
+ write_bits(0,1);
+ write_bits(huff_tree->min_chr,8);
+ write_bits(huff_tree->elements,9);
+ write_bits(huff_tree->char_bits,5);
+ write_bits(huff_tree->offset_bits,5);
+ int_length=0;
+ }
+ else
+ {
+ int_length=(uint) (huff_tree->counts->tree_pos -
+ huff_tree->counts->tree_buff);
+ /* We have distinct column values for this column. Mark with bit 1. */
+ write_bits(1,1);
+ write_bits(huff_tree->elements,15);
+ write_bits(int_length,16);
+ write_bits(huff_tree->char_bits,5);
+ write_bits(huff_tree->offset_bits,5);
+ intervall_length+=int_length;
+ }
+ DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
+ "offset_bits: %2u %s: %5u codelen: %2u",
+ tree_no, huff_tree->elements, huff_tree->char_bits,
+ huff_tree->offset_bits, huff_tree->counts->tree_buff ?
+ "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
+ int_length : huff_tree->min_chr, huff_tree->height));
+ if (verbose >= 2)
+ VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
+ "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
+ huff_tree->char_bits, huff_tree->offset_bits,
+ huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
+ huff_tree->counts->tree_buff ? int_length :
+ huff_tree->min_chr, huff_tree->height));
+
+ /* Check that the code tree length matches the element count. */
+ length=(uint) (offset-packed_tree);
+ if (length != huff_tree->elements*2-2)
+ {
+ VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
+ length, huff_tree->elements * 2 - 2));
+ errors++;
+ break;
+ }
+
+ for (i=0 ; i < length ; i++)
+ {
+ if (packed_tree[i] & IS_OFFSET)
+ write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
+ huff_tree->offset_bits+1);
+ else
+ write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
+ DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
+ i, (packed_tree[i] & IS_OFFSET) ?
+ " -> " : "", (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ if (verbose >= 3)
+ VOID(printf("tree[0x%04x]: %s0x%04x\n",
+ i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
+ (packed_tree[i] & IS_OFFSET) ?
+ packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
+ }
+ flush_bits();
+
+ /*
+ Display coding tables and check their correctness.
+ */
+ codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
+ for (i= 0; i < codes; i++)
+ {
+ ulonglong code;
+ uint bits;
+ uint len;
+ uint idx;
+
+ if (! (len= huff_tree->code_len[i]))
+ continue;
+ DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i],
+ huff_tree->code_len[i])));
+ if (verbose >= 3)
+ VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
+ bindigits(huff_tree->code[i], huff_tree->code_len[i])));
+
+ /* Check that the encode table decodes correctly. */
+ code= 0;
+ bits= 0;
+ idx= 0;
+ DBUG_EXECUTE_IF("forcechkerr1", len--;);
+ DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
+ DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
+ for (;;)
+ {
+ if (! len)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
+ hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
+ errors++;
+ break;
+ }
+ code<<= 1;
+ code|= (huff_tree->code[i] >> (--len)) & 1;
+ bits++;
+ if (bits > 8 * sizeof(code))
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
+ bits, (uint) (8 * sizeof(code))));
+ errors++;
+ break;
+ }
+ idx+= (uint) code & 1;
+ if (idx >= length)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
+ idx, length));
+ errors++;
+ break;
+ }
+ if (packed_tree[idx] & IS_OFFSET)
+ idx+= packed_tree[idx] & ~IS_OFFSET;
+ else
+ break; /* Hit a leaf. This contains the result value. */
+ }
+ if (errors)
+ break;
+
+ DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
+ if (packed_tree[idx] != i)
+ {
+ VOID(fflush(stdout));
+ VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
+ packed_tree[idx], i));
+ errors++;
+ break;
+ }
+ } /*end for (codes)*/
+ if (errors)
+ break;
+
+ /* Write column values in case of distinct column value compression. */
+ if (huff_tree->counts->tree_buff)
+ {
+ for (i=0 ; i < int_length ; i++)
+ {
+ write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
+ DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ if (verbose >= 3)
+ VOID(printf("column_values[0x%04x]: 0x%02x\n",
+ i, (uchar) huff_tree->counts->tree_buff[i]));
+ }
+ }
+ flush_bits();
+ }
+ DBUG_PRINT("info", (" "));
+ if (verbose >= 2)
+ VOID(printf("\n"));
+ my_afree((gptr) packed_tree);
+ if (errors)
+ {
+ VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
+ return 0;
+ }
+ return elements;
+}
+
+
+static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
+ uint *offset)
+{
+ uint *prev_offset;
+
+ prev_offset= offset;
+ /*
+ 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
+ then there is no left child and, hence no right child either. This
+ is a property of a binary tree. An element is either a node with two
+ childs, or a leaf without childs.
+
+ The current element is always a node with two childs. Go left first.
+ */
+ if (!element->a.nod.left->a.leaf.null)
+ {
+ /* Store the byte code or the index of the column value. */
+ prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
+ offset+=2;
+ }
+ else
+ {
+ /*
+ Recursively traverse the tree to the left. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
+ prev_offset[0]= IS_OFFSET+2;
+ offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
+ }
+
+ /* Now, check the right child. */
+ if (!element->a.nod.right->a.leaf.null)
+ {
+ /* Store the byte code or the index of the column value. */
+ prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
+ return offset;
+ }
+ else
+ {
+ /*
+ Recursively traverse the tree to the right. Mark it as an offset to
+ another tree node (in contrast to a byte code or column value index).
+ */
+ uint temp=(uint) (offset-prev_offset-1);
+ prev_offset[1]= IS_OFFSET+ temp;
+ if (huff_tree->max_offset < temp)
+ huff_tree->max_offset = temp;
+ return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
+ }
+}
+
+ /* Get number of bits neaded to represent value */
+
+static uint max_bit(register uint value)
+{
+ reg2 uint power=1;
+
+ while ((value>>=1))
+ power++;
+ return (power);
+}
+
+
+static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
+{
+ int error;
+ uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
+ uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
+ my_off_t record_count;
+ char llbuf[32];
+ ulong length,pack_length;
+ byte *record,*pos,*end_pos,*record_pos,*start_pos;
+ HUFF_COUNTS *count,*end_count;
+ HUFF_TREE *tree;
+ MARIA_HA *isam_file=mrg->file[0];
+ uint pack_version= (uint) isam_file->s->pack.version;
+ DBUG_ENTER("compress_isam_file");
+
+ /* Allocate a buffer for the records (excluding blobs). */
+ if (!(record=(byte*) my_alloca(isam_file->s->base.reclength)))
+ return -1;
+
+ end_count=huff_counts+isam_file->s->base.fields;
+ min_record_length= (uint) ~0;
+ max_record_length=0;
+ null_bytes= isam_file->s->base.null_bytes;
+
+ /*
+ Calculate the maximum number of bits required to pack the records.
+ Remember to understand 'max_zero_fill' as 'min_zero_fill'.
+ The tree height determines the maximum number of bits per value.
+ Some fields skip leading or trailing spaces or zeroes. The skipped
+ number of bytes is encoded by 'length_bits' bits.
+ Empty blobs and varchar are encoded with a single 1 bit. Other blobs
+ and varchar get a leading 0 bit.
+ */
+ max_calc_length= null_bytes;
+ for (i= 0 ; i < isam_file->s->base.fields ; i++)
+ {
+ if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
+ huff_counts[i].max_zero_fill=0;
+ if (huff_counts[i].field_type == FIELD_CONSTANT ||
+ huff_counts[i].field_type == FIELD_ZERO ||
+ huff_counts[i].field_type == FIELD_CHECK)
+ continue;
+ if (huff_counts[i].field_type == FIELD_INTERVALL)
+ max_calc_length+=huff_counts[i].tree->height;
+ else if (huff_counts[i].field_type == FIELD_BLOB ||
+ huff_counts[i].field_type == FIELD_VARCHAR)
+ max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
+ else
+ max_calc_length+=
+ (huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
+ huff_counts[i].tree->height+huff_counts[i].length_bits;
+ }
+ max_calc_length= (max_calc_length + 7) / 8;
+ pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
+ record_count=0;
+ /* 'max_blob_length' is the max length of all blobs of a record. */
+ pack_blob_length= isam_file->s->base.blobs ?
+ _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
+ max_pack_length=pack_ref_length+pack_blob_length;
+
+ DBUG_PRINT("fields", ("==="));
+ mrg_reset(mrg);
+ while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
+ {
+ ulong tot_blob_length=0;
+ if (! error)
+ {
+ if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length))
+ break;
+ record_pos= (byte*) file_buffer.pos;
+ file_buffer.pos+= max_pack_length;
+ if (null_bytes)
+ {
+ /* Copy null bits 'as is' */
+ memcpy(file_buffer.pos, record, null_bytes);
+ file_buffer.pos+= null_bytes;
+ }
+ for (start_pos=record+null_bytes, count= huff_counts;
+ count < end_count ;
+ count++)
+ {
+ end_pos=start_pos+(field_length=count->field_length);
+ tree=count->tree;
+
+ DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
+ "lbits: %2u tree: %2u length: %4u",
+ (ulong) (count - huff_counts + 1),
+ count->field_type,
+ count->pack_type, count->max_zero_fill,
+ count->length_bits, count->tree->tree_number,
+ count->field_length));
+
+ /* Check if the column contains spaces only. */
+ if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
+ {
+ for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
+ if (pos == end_pos)
+ {
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
+ DBUG_PRINT("fields", ("---"));
+ write_bits(1,1);
+ start_pos=end_pos;
+ continue;
+ }
+ DBUG_PRINT("fields",
+ ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
+ write_bits(0,1);
+ }
+ end_pos-=count->max_zero_fill;
+ field_length-=count->max_zero_fill;
+
+ switch (count->field_type) {
+ case FIELD_SKIP_ZERO:
+ if (!memcmp((byte*) start_pos,zero_string,field_length))
+ {
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
+ write_bits(1,1);
+ start_pos=end_pos;
+ break;
+ }
+ DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
+ write_bits(0,1);
+ /* Fall through */
+ case FIELD_NORMAL:
+ DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
+ (ulong) (end_pos - start_pos)));
+ for ( ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ break;
+ case FIELD_SKIP_ENDSPACE:
+ for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
+ length= (ulong) (end_pos - pos);
+ if (count->pack_type & PACK_TYPE_SELECTED)
+ {
+ if (length > count->min_space)
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(1,1);
+ write_bits(length,count->length_bits);
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE not more than min_space, "
+ "bits: 1"));
+ write_bits(0,1);
+ pos=end_pos;
+ }
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
+ (ulong) (pos - start_pos)));
+ for ( ; start_pos < pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ start_pos=end_pos;
+ break;
+ case FIELD_SKIP_PRESPACE:
+ for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
+ length= (ulong) (pos - start_pos);
+ if (count->pack_type & PACK_TYPE_SELECTED)
+ {
+ if (length > count->min_space)
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(1,1);
+ write_bits(length,count->length_bits);
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE not more than min_space, "
+ "bits: 1"));
+ pos=start_pos;
+ write_bits(0,1);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("fields",
+ ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
+ length, field_length, count->length_bits));
+ write_bits(length,count->length_bits);
+ }
+ /* Encode all significant bytes. */
+ DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
+ (ulong) (end_pos - start_pos)));
+ for (start_pos=pos ; start_pos < end_pos ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ break;
+ case FIELD_CONSTANT:
+ case FIELD_ZERO:
+ case FIELD_CHECK:
+ DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
+ start_pos=end_pos;
+ break;
+ case FIELD_INTERVALL:
+ global_count=count;
+ pos=(byte*) tree_search(&count->int_tree, start_pos,
+ count->int_tree.custom_arg);
+ intervall=(uint) (pos - count->tree_buff)/field_length;
+ DBUG_PRINT("fields", ("FIELD_INTERVALL"));
+ DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
+ intervall, hexdigits(tree->code[intervall]),
+ (uint) tree->code_len[intervall]));
+ write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
+ start_pos=end_pos;
+ break;
+ case FIELD_BLOB:
+ {
+ ulong blob_length= _ma_calc_blob_length(field_length-
+ maria_portable_sizeof_char_ptr,
+ start_pos);
+ /* Empty blobs are encoded with a single 1 bit. */
+ if (!blob_length)
+ {
+ DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
+ write_bits(1,1);
+ }
+ else
+ {
+ byte *blob,*blob_end;
+ DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
+ write_bits(0,1);
+ /* Write the blob length. */
+ DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
+ blob_length, count->length_bits));
+ write_bits(blob_length,count->length_bits);
+ memcpy_fixed(&blob,end_pos-maria_portable_sizeof_char_ptr,
+ sizeof(char*));
+ blob_end=blob+blob_length;
+ /* Encode the blob bytes. */
+ for ( ; blob < blob_end ; blob++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
+ (uint) tree->code_len[(uchar) *blob],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *blob],
+ (uint) tree->code_len[(uchar) *blob]);
+ }
+ tot_blob_length+=blob_length;
+ }
+ start_pos= end_pos;
+ break;
+ }
+ case FIELD_VARCHAR:
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
+ ulong col_length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
+ uint2korr(start_pos));
+ /* Empty varchar are encoded with a single 1 bit. */
+ if (!col_length)
+ {
+ DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
+ write_bits(1,1); /* Empty varchar */
+ }
+ else
+ {
+ byte *end=start_pos+pack_length+col_length;
+ DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
+ write_bits(0,1);
+ /* Write the varchar length. */
+ DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
+ col_length, count->length_bits));
+ write_bits(col_length,count->length_bits);
+ /* Encode the varchar bytes. */
+ for (start_pos+=pack_length ; start_pos < end ; start_pos++)
+ {
+ DBUG_PRINT("fields",
+ ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
+ (uchar) *start_pos,
+ hexdigits(tree->code[(uchar) *start_pos]),
+ (uint) tree->code_len[(uchar) *start_pos],
+ bindigits(tree->code[(uchar) *start_pos],
+ (uint)tree->code_len[(uchar) *start_pos])));
+ write_bits(tree->code[(uchar) *start_pos],
+ (uint) tree->code_len[(uchar) *start_pos]);
+ }
+ }
+ start_pos= end_pos;
+ break;
+ }
+ case FIELD_LAST:
+ case FIELD_enum_val_count:
+ abort(); /* Impossible */
+ }
+ start_pos+=count->max_zero_fill;
+ DBUG_PRINT("fields", ("---"));
+ }
+ flush_bits();
+ length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length;
+ pack_length= _ma_save_pack_length(pack_version, record_pos, length);
+ if (pack_blob_length)
+ pack_length+= _ma_save_pack_length(pack_version,
+ record_pos + pack_length,
+ tot_blob_length);
+ DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
+ "length-bytes: %lu", (ulong) record_count, length,
+ tot_blob_length, pack_length));
+ DBUG_PRINT("fields", ("==="));
+
+ /* Correct file buffer if the header was smaller */
+ if (pack_length != max_pack_length)
+ {
+ bmove(record_pos+pack_length,record_pos+max_pack_length,length);
+ file_buffer.pos-= (max_pack_length-pack_length);
+ }
+ if (length < (ulong) min_record_length)
+ min_record_length=(uint) length;
+ if (length > (ulong) max_record_length)
+ max_record_length=(uint) length;
+ record_count++;
+ if (write_loop && record_count % WRITE_COUNT == 0)
+ {
+ VOID(printf("%lu\r", (ulong) record_count));
+ VOID(fflush(stdout));
+ }
+ }
+ else if (error != HA_ERR_RECORD_DELETED)
+ break;
+ }
+ if (error == HA_ERR_END_OF_FILE)
+ error=0;
+ else
+ {
+ VOID(fprintf(stderr, "%s: Got error %d reading records\n",
+ my_progname, error));
+ }
+ if (verbose >= 2)
+ VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
+
+ my_afree((gptr) record);
+ mrg->ref_length=max_pack_length;
+ mrg->min_pack_length=max_record_length ? min_record_length : 0;
+ mrg->max_pack_length=max_record_length;
+ DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
+}
+
+
+static char *make_new_name(char *new_name, char *old_name)
+{
+ return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
+}
+
+static char *make_old_name(char *new_name, char *old_name)
+{
+ return fn_format(new_name,old_name,"",OLD_EXT,2+4);
+}
+
+ /* rutines for bit writing buffer */
+
+static void init_file_buffer(File file, pbool read_buffer)
+{
+ file_buffer.file=file;
+ file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
+ MYF(MY_WME));
+ file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
+ file_buffer.pos_in_file=0;
+ error_on_write=0;
+ if (read_buffer)
+ {
+
+ file_buffer.pos=file_buffer.end;
+ file_buffer.bits=0;
+ }
+ else
+ {
+ file_buffer.pos=file_buffer.buffer;
+ file_buffer.bits=BITS_SAVED;
+ }
+ file_buffer.bitbucket= 0;
+}
+
+
+static int flush_buffer(ulong neaded_length)
+{
+ ulong length;
+
+ /*
+ file_buffer.end is 8 bytes lower than the real end of the buffer.
+ This is done so that the end-of-buffer condition does not need to be
+ checked for every byte (see write_bits()). Consequently,
+ file_buffer.pos can become greater than file_buffer.end. The
+ algorithms in the other functions ensure that there will never be
+ more than 8 bytes written to the buffer without an end-of-buffer
+ check. So the buffer cannot be overrun. But we need to check for the
+ near-to-buffer-end condition to avoid a negative result, which is
+ casted to unsigned and thus becomes giant.
+ */
+ if ((file_buffer.pos < file_buffer.end) &&
+ ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
+ return 0;
+ length=(ulong) (file_buffer.pos-file_buffer.buffer);
+ file_buffer.pos=file_buffer.buffer;
+ file_buffer.pos_in_file+=length;
+ if (test_only)
+ return 0;
+ if (error_on_write|| my_write(file_buffer.file,
+ (const byte*) file_buffer.buffer,
+ length,
+ MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
+ {
+ error_on_write=1;
+ return 1;
+ }
+
+ if (neaded_length != ~(ulong) 0 &&
+ (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
+ {
+ char *tmp;
+ neaded_length+=256; /* some margin */
+ tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME));
+ if (!tmp)
+ return 1;
+ file_buffer.pos= ((uchar*) tmp +
+ (ulong) (file_buffer.pos - file_buffer.buffer));
+ file_buffer.buffer= (uchar*) tmp;
+ file_buffer.end= (uchar*) (tmp+neaded_length-8);
+ }
+ return 0;
+}
+
+
+static void end_file_buffer(void)
+{
+ my_free((gptr) file_buffer.buffer,MYF(0));
+}
+
+ /* output `bits` low bits of `value' */
+
+static void write_bits(register ulonglong value, register uint bits)
+{
+ DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
+ (bits == 8 * sizeof(value)));
+
+ if ((file_buffer.bits-= (int) bits) >= 0)
+ {
+ file_buffer.bitbucket|= value << file_buffer.bits;
+ }
+ else
+ {
+ reg3 ulonglong bit_buffer;
+ bits= (uint) -file_buffer.bits;
+ bit_buffer= (file_buffer.bitbucket |
+ ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
+#if BITS_SAVED == 64
+ *file_buffer.pos++= (uchar) (bit_buffer >> 56);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 48);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 40);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 32);
+#endif
+ *file_buffer.pos++= (uchar) (bit_buffer >> 24);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 16);
+ *file_buffer.pos++= (uchar) (bit_buffer >> 8);
+ *file_buffer.pos++= (uchar) (bit_buffer);
+
+ if (bits != 8 * sizeof(value))
+ value&= (((ulonglong) 1) << bits) - 1;
+ if (file_buffer.pos >= file_buffer.end)
+ VOID(flush_buffer(~ (ulong) 0));
+ file_buffer.bits=(int) (BITS_SAVED - bits);
+ file_buffer.bitbucket= value << (BITS_SAVED - bits);
+ }
+ return;
+}
+
+ /* Flush bits in bit_buffer to buffer */
+
+static void flush_bits(void)
+{
+ int bits;
+ ulonglong bit_buffer;
+
+ bits= file_buffer.bits & ~7;
+ bit_buffer= file_buffer.bitbucket >> bits;
+ bits= BITS_SAVED - bits;
+ while (bits > 0)
+ {
+ bits-= 8;
+ *file_buffer.pos++= (uchar) (bit_buffer >> bits);
+ }
+ if (file_buffer.pos >= file_buffer.end)
+ VOID(flush_buffer(~ (ulong) 0));
+ file_buffer.bits= BITS_SAVED;
+ file_buffer.bitbucket= 0;
+}
+
+
+/****************************************************************************
+** functions to handle the joined files
+****************************************************************************/
+
+static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
+ my_off_t new_length,
+ ha_checksum crc)
+{
+ MARIA_SHARE *share=isam_file->s;
+ uint options=mi_uint2korr(share->state.header.options);
+ uint key;
+ DBUG_ENTER("save_state");
+
+ options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
+ mi_int2store(share->state.header.options,options);
+ share->state.header.org_data_file_type= share->state.header.data_file_type;
+ share->state.header.data_file_type= COMPRESSED_RECORD;
+
+ share->state.state.data_file_length=new_length;
+ share->state.state.del=0;
+ share->state.state.empty=0;
+ share->state.dellink= HA_OFFSET_ERROR;
+ share->state.split=(ha_rows) mrg->records;
+ share->state.version=(ulong) time((time_t*) 0);
+ if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
+ {
+ /*
+ Some indexes are disabled, cannot use current key_file_length value
+ as an estimate of upper bound of index file size. Use packed data file
+ size instead.
+ */
+ share->state.state.key_file_length= new_length;
+ }
+ /*
+ If there are no disabled indexes, keep key_file_length value from
+ original file so "maria_chk -rq" can use this value (this is necessary
+ because index size cannot be easily calculated for fulltext keys)
+ */
+ maria_clear_all_keys_active(share->state.key_map);
+ for (key=0 ; key < share->base.keys ; key++)
+ share->state.key_root[key]= HA_OFFSET_ERROR;
+ share->state.key_del= HA_OFFSET_ERROR;
+ isam_file->state->checksum=crc; /* Save crc here */
+ share->changed=1; /* Force write of header */
+ share->state.open_count=0;
+ share->global_changed=0;
+ VOID(my_chsize(share->kfile, share->base.keystart, 0, MYF(0)));
+ if (share->base.keys)
+ isamchk_neaded=1;
+ DBUG_RETURN(_ma_state_info_write(share->kfile,&share->state,1+2));
+}
+
+
+static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
+ ha_checksum crc)
+{
+ MARIA_STATE_INFO state;
+ MARIA_HA *isam_file=mrg->file[0];
+ uint options;
+ DBUG_ENTER("save_state_mrg");
+
+ state= isam_file->s->state;
+ options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_READ_ONLY_DATA);
+ mi_int2store(state.header.options,options);
+ state.state.data_file_length=new_length;
+ state.state.del=0;
+ state.state.empty=0;
+ state.state.records=state.split=(ha_rows) mrg->records;
+ /* See comment above in save_state about key_file_length handling. */
+ if (mrg->src_file_has_indexes_disabled)
+ {
+ isam_file->s->state.state.key_file_length=
+ max(isam_file->s->state.state.key_file_length, new_length);
+ }
+ state.dellink= HA_OFFSET_ERROR;
+ state.version=(ulong) time((time_t*) 0);
+ maria_clear_all_keys_active(state.key_map);
+ state.state.checksum=crc;
+ if (isam_file->s->base.keys)
+ isamchk_neaded=1;
+ state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
+ DBUG_RETURN (_ma_state_info_write(file,&state,1+2));
+}
+
+
+/* reset for mrg_rrnd */
+
+static void mrg_reset(PACK_MRG_INFO *mrg)
+{
+ if (mrg->current)
+ {
+ maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
+ mrg->current=0;
+ }
+}
+
+static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf)
+{
+ int error;
+ MARIA_HA *isam_info;
+ my_off_t filepos;
+
+ if (!info->current)
+ {
+ isam_info= *(info->current=info->file);
+ info->end=info->current+info->count;
+ maria_reset(isam_info);
+ maria_extra(isam_info, HA_EXTRA_CACHE, 0);
+ if ((error= maria_scan_init(isam_info)))
+ return(error);
+ }
+ else
+ isam_info= *info->current;
+
+ for (;;)
+ {
+ if (!(error= maria_scan(isam_info, buf)) ||
+ error != HA_ERR_END_OF_FILE)
+ return (error);
+ maria_scan_end(isam_info);
+ maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
+ if (info->current+1 == info->end)
+ return(HA_ERR_END_OF_FILE);
+ info->current++;
+ isam_info= *info->current;
+ filepos=isam_info->s->pack.header_length;
+ maria_reset(isam_info);
+ maria_extra(isam_info,HA_EXTRA_CACHE, 0);
+ if ((error= maria_scan_init(isam_info)))
+ return(error);
+ }
+}
+
+
+static int mrg_close(PACK_MRG_INFO *mrg)
+{
+ uint i;
+ int error=0;
+ DBUG_ENTER("mrg_close");
+
+ for (i=0 ; i < mrg->count ; i++)
+ error|=maria_close(mrg->file[i]);
+ if (mrg->free_file)
+ my_free((gptr) mrg->file,MYF(0));
+ DBUG_RETURN(error);
+}
+
+
+#if !defined(DBUG_OFF)
+/*
+ Fake the counts to get big Huffman codes.
+
+ SYNOPSIS
+ fakebigcodes()
+ huff_counts A pointer to the counts array.
+ end_count A pointer past the counts array.
+
+ DESCRIPTION
+
+ Huffman coding works by removing the two least frequent values from
+ the list of values and add a new value with the sum of their
+ incidences in a loop until only one value is left. Every time a
+ value is reused for a new value, it gets one more bit for its
+ encoding. Hence, the least frequent values get the longest codes.
+
+ To get a maximum code length for a value, two of the values must
+ have an incidence of 1. As their sum is 2, the next infrequent value
+ must have at least an incidence of 2, then 4, 8, 16 and so on. This
+ means that one needs 2**n bytes (values) for a code length of n
+ bits. However, using more distinct values forces the use of longer
+ codes, or reaching the code length with less total bytes (values).
+
+ To get 64(32)-bit codes, I sort the counts by decreasing incidence.
+ I assign counts of 1 to the two most frequent values, a count of 2
+ for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
+ the remaining values get 1. That way every possible byte has an
+ assigned code, though not all codes are used if not all byte values
+ are present in the column.
+
+ This strategy would work with distinct column values too, but
+ requires that at least 64(32) values are present. To make things
+ easier here, I cancel all distinct column values and force byte
+ compression for all columns.
+
+ RETURN
+ void
+*/
+
+static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
+{
+ HUFF_COUNTS *count;
+ my_off_t *cur_count_p;
+ my_off_t *end_count_p;
+ my_off_t **cur_sort_p;
+ my_off_t **end_sort_p;
+ my_off_t *sort_counts[256];
+ my_off_t total;
+ DBUG_ENTER("fakebigcodes");
+
+ for (count= huff_counts; count < end_count; count++)
+ {
+ /*
+ Remove distinct column values.
+ */
+ if (huff_counts->tree_buff)
+ {
+ my_free((gptr) huff_counts->tree_buff, MYF(0));
+ delete_tree(&huff_counts->int_tree);
+ huff_counts->tree_buff= NULL;
+ DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
+ }
+
+ /*
+ Sort counts by decreasing incidence.
+ */
+ cur_count_p= count->counts;
+ end_count_p= cur_count_p + 256;
+ cur_sort_p= sort_counts;
+ while (cur_count_p < end_count_p)
+ *(cur_sort_p++)= cur_count_p++;
+ (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
+
+ /*
+ Assign faked counts.
+ */
+ cur_sort_p= sort_counts;
+#if SIZEOF_LONG_LONG > 4
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
+#else
+ end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
+#endif
+ /* Most frequent value gets a faked count of 1. */
+ **(cur_sort_p++)= 1;
+ total= 1;
+ while (cur_sort_p < end_sort_p)
+ {
+ **(cur_sort_p++)= total;
+ total<<= 1;
+ }
+ /* Set the last value. */
+ **(cur_sort_p++)= --total;
+ /*
+ Set the remaining counts.
+ */
+ end_sort_p= sort_counts + 256;
+ while (cur_sort_p < end_sort_p)
+ **(cur_sort_p++)= 1;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Compare two counts for reverse sorting.
+
+ SYNOPSIS
+ fakecmp()
+ count1 One count.
+ count2 Another count.
+
+ RETURN
+ 1 count1 < count2
+ 0 count1 == count2
+ -1 count1 > count2
+*/
+
+static int fakecmp(my_off_t **count1, my_off_t **count2)
+{
+ return ((**count1 < **count2) ? 1 :
+ (**count1 > **count2) ? -1 : 0);
+}
+#endif
diff --git a/storage/maria/maria_rename.sh b/storage/maria/maria_rename.sh
new file mode 100755
index 00000000000..fb20e47e635
--- /dev/null
+++ b/storage/maria/maria_rename.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+replace myisam maria MYISAM MARIA MyISAM MARIA -- mysql-test/t/*maria*test mysql-test/r/*maria*result
+
+FILES=`echo sql/ha_maria.{cc,h} include/maria*h storage/maria/*.{c,h}`
+
+replace myisam maria MYISAM MARIA MyISAM MARIA myisam.h maria.h myisamdef.h maria_def.h mi_ maria_ ft_ maria_ft_ "Copyright (C) 2000" "Copyright (C) 2006" MI_ISAMINFO MARIA_INFO MI_CREATE_INFO MARIA_CREATE_INFO maria_isam_ maria_ MI_INFO MARIA_HA MI_ MARIA_ MARIACHK MARIA_CHK rt_index.h ma_rt_index.h rtree_ maria_rtree rt_key.h ma_rt_key.h rt_mbr.h ma_rt_mbr.h -- $FILES
+
+replace check_table_is_closed _ma_check_table_is_closed test_if_reopen _ma_test_if_reopen my_n_base_info_read maria_n_base_info_read update_auto_increment _ma_update_auto_increment save_pack_length _ma_save_packlength calc_pack_length _ma_calc_pack_length -- $FILES
+
+replace mi_ ma_ ft_ ma_ft_ rt_ ma_rt_ myisam maria myisamchk maria_chk myisampack maria_pack myisamlog maria_log -- storage/maria/Makefile.am
+
+#
+# Restore wrong replaces
+#
+
+replace maria_sint1korr mi_sint1korr maria_uint1korr mi_uint1korr maria_sint2korr mi_sint2korr maria_sint3korr mi_sint3korr maria_sint4korr mi_sint4korr maria_sint8korr mi_sint8korr maria_uint2korr mi_uint2korr maria_uint3korr mi_uint3korr maria_uint4korr mi_uint4korr maria_uint5korr mi_uint5korr maria_uint6korr mi_uint6korr maria_uint7korr mi_uint7korr maria_uint8korr mi_uint8korr maria_int1store mi_int1store maria_int2store mi_int2store maria_int3store mi_int3store maria_int4store mi_int4store maria_int5store mi_int5store maria_int6store mi_int6store maria_int7store mi_int7store maria_int8store mi_int8store maria_float4store mi_float4store maria_float4get mi_float4get maria_float8store mi_float8store maria_float8get mi_float8get maria_rowstore mi_rowstore maria_rowkorr mi_rowkorr maria_sizestore mi_sizestore maria_sizekorr mi_sizekorr _maria_maria_ _maria MARIA_MAX_POSSIBLE_KEY HA_MAX_POSSIBLE_KEY MARIA_MAX_KEY_BUFF HA_MAX_KEY_BUFF MARIA_MAX_KEY_SEG HA_MAX_KEY_SEG maria_ft_sintXkorr ft_sintXkorr maria_ft_intXstore ft_intXstore maria_ft_boolean_syntax ft_boolean_syntax maria_ft_min_word_len ft_min_word_len maria_ft_max_word_len ft_max_word_len -- $FILES
diff --git a/storage/maria/plug.in b/storage/maria/plug.in
new file mode 100644
index 00000000000..198f5d8c289
--- /dev/null
+++ b/storage/maria/plug.in
@@ -0,0 +1,8 @@
+MYSQL_STORAGE_ENGINE(maria, no, [Maria Storage Engine],
+ [Traditional transactional MySQL tables], [max,max-no-ndb])
+MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria])
+MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)])
+MYSQL_PLUGIN_STATIC(maria, [libmaria.a])
+# Maria will probably go first into max builds, not all builds,
+# so we don't declare it mandatory.
+MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc])
diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c
new file mode 100644
index 00000000000..810c6c12ea4
--- /dev/null
+++ b/storage/maria/tablockman.c
@@ -0,0 +1,677 @@
+/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */
+/* QQ: automatically place S instead of LS if possible */
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <my_base.h>
+#include <hash.h>
+#include "tablockman.h"
+
+/*
+ Lock Manager for Table Locks
+
+ The code below handles locks on resources - but it is optimized for a
+ case when a number of resources is not very large, and there are many of
+ locks per resource - that is a resource is likely to be a table or a
+ database, but hardly a row in a table.
+
+ Locks belong to "lock owners". A Lock Owner is uniquely identified by a
+ 16-bit number - loid (lock owner identifier). A function loid_to_tlo must
+ be provided by the application that takes such a number as an argument
+ and returns a TABLE_LOCK_OWNER structure.
+
+ Lock levels are completely defined by three tables. Lock compatibility
+ matrix specifies which locks can be held at the same time on a resource.
+ Lock combining matrix specifies what lock level has the same behaviour as
+ a pair of two locks of given levels. getlock_result matrix simplifies
+ intention locking and lock escalation for an application, basically it
+ defines which locks are intention locks and which locks are "loose"
+ locks. It is only used to provide better diagnostics for the
+ application, lock manager itself does not differentiate between normal,
+ intention, and loose locks.
+
+ The assumptions are: few distinct resources, many locks are held at the
+ same time on one resource. Thus: a lock structure _per resource_ can be
+ rather large; a lock structure _per lock_ does not need to be very small
+ either; we need to optimize for _speed_. Operations we need are: place a
+ lock, check if a particular transaction already has a lock on this
+ resource, check if a conflicting lock exists, if yes - find who owns it.
+
+ Solution: every resource has a structure with
+ 1. Hash of latest (see the lock upgrade section below) granted locks with
+ loid as a key. Thus, checking if a given transaction has a lock on
+ this resource is O(1) operation.
+ 2. Doubly-linked lists of all granted locks - one list for every lock
+ type. Thus, checking if a conflicting lock exists is a check whether
+ an appropriate list head pointer is not null, also O(1).
+ 3. Every lock has a loid of the owner, thus checking who owns a
+ conflicting lock is also O(1).
+ 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo,
+ because for lock upgrades requests are added to the queue head, not
+ tail. This is a single place where there it gets O(N) on number
+ of locks - when a transaction wakes up from waiting on a condition,
+ it may need to scan the queue backward to the beginning to find
+ a conflicting lock. It is guaranteed though that "all transactions
+ before it" received the same - or earlier - signal. In other words a
+ transaction needs to scan all transactions before it that received the
+ signal but didn't have a chance to resume the execution yet, so
+ practically OS scheduler won't let the scan to be O(N).
+
+ Waiting: if there is a conflicting lock or if wait queue is not empty, a
+ requested lock cannot be granted at once. It is added to the end of the
+ wait queue. If a queue was empty and there is a conflicting lock - the
+ "blocker" transaction is the owner of this lock. If a queue is not empty,
+ an owner of the previous lock in the queue is the "blocker". But if the
+ previous lock is compatible with the request, then the "blocker" is the
+ transaction that the owner of the lock at the end of the queue is waiting
+ for (in other words, our lock is added to the end of the wait queue, and
+ our blocker is the same as of the lock right before us).
+
+ Lock upgrades: when a thread that has a lock on a given resource,
+ requests a new lock on the same resource and the old lock is not enough
+ to satisfy new lock requirements (which is defined by
+ lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock
+ (defined by lock_combining_matrix as above) is placed. Depending on
+ other granted locks it is immediately granted or it has to wait. Here the
+ lock is added to the start of the waiting queue, not to the end. Old
+ lock, is removed from the hash, but not from the doubly-linked lists.
+ (indeed, a transaction checks "do I have a lock on this resource ?" by
+ looking in a hash, and it should find a latest lock, so old locks must be
+ removed; but a transaction checks "are there conflicting locks ?" by
+ checking doubly-linked lists, it doesn't matter if it will find an old
+ lock - if it would be removed, a new lock would be also a conflict).
+ So, a hash contains only "latest" locks - there can be only one latest
+ lock per resource per transaction. But doubly-linked lists contain all
+ locks, even "obsolete" ones, because it doesnt't hurt. Note that old
+ locks can not be freed early, in particular they stay in the
+ 'active_locks' list of a lock owner, because they may be "re-enabled"
+ on a savepoint rollback.
+
+ To better support table-row relations where one needs to lock the table
+ with an intention lock before locking the row, extended diagnostics is
+ provided. When an intention lock (presumably on a table) is granted,
+ lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row,
+ perhaps the thread already has a normal lock on this table),
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual),
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check
+ whether it's possible to lock the row, but no need to lock it - perhaps
+ the thread has a loose lock on this table). This is defined by
+ getlock_result[] table.
+
+ Instant duration locks are not supported. Though they're trivial to add,
+ they are normally only used on rows, not on tables. So, presumably,
+ they are not needed here.
+
+ Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes
+ (TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex).
+ table mutex protects operations on the table lock structures, and lock
+ owner pointers waiting_for and waiting_for_loid.
+ lock owner mutex is only used to wait on lock owner condition
+ (TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock
+ structures, and only lock owner itself may access them.
+ The pool mutex protects a pool of unused locks. Note the locking order:
+ first the table mutex, then the owner mutex or a pool mutex.
+ Table mutex lock cannot be attempted when owner or pool mutex are locked.
+ No mutex lock can be attempted if owner or pool mutex are locked.
+*/
+
+/*
+ Lock compatibility matrix.
+
+ It's asymmetric. Read it as "Somebody has the lock <value in the row
+ label>, can I set the lock <value in the column label> ?"
+
+ ') Though you can take LS lock while somebody has S lock, it makes no
+ sense - it's simpler to take S lock too.
+
+ 1 - compatible
+ 0 - incompatible
+ -1 - "impossible", so that we can assert the impossibility.
+*/
+static const int lock_compatibility_matrix[10][10]=
+{ /* N S X IS IX SIX LS LX SLX LSIX */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
+ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
+ { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */
+ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */
+ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */
+ { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */
+};
+
+/*
+ Lock combining matrix.
+
+ It's symmetric. Read it as "what lock level L is identical to the
+ set of two locks A and B"
+
+ One should never get N from it, we assert the impossibility
+*/
+static const enum lock_type lock_combining_matrix[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { N, N, N, N, N, N, N, N, N, N}, /* N */
+ { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
+ { N, X, X, X, X, X, X, X, X, X}, /* X */
+ { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
+ { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
+ { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
+ { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
+ { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
+ { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
+ { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
+};
+
+/*
+ the return codes for lockman_getlock
+
+ It's asymmetric. Read it as "I have the lock <value in the row label>,
+ what value should be returned for <value in the column label> ?"
+
+ 0 means impossible combination (assert!)
+
+ Defines below help to preserve the table structure.
+ I/L/A values are self explanatory
+ x means the combination is possible (assert should not crash)
+ but it cannot happen in row locks, only in table locks (S,X),
+ or lock escalations (LS,LX)
+*/
+#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE
+#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+#define A GOT_THE_LOCK
+#define x GOT_THE_LOCK
+static const enum lockman_getlock_result getlock_result[10][10]=
+{/* N S X IS IX SIX LS LX SLX LSIX */
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
+ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
+ { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */
+ { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */
+ { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */
+ { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */
+ { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */
+ { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */
+ { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */
+ { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */
+};
+#undef I
+#undef L
+#undef A
+#undef x
+
+/*
+ this structure is optimized for a case when there're many locks
+ on the same resource - e.g. a table
+*/
+
+struct st_table_lock {
+ /* QQ: do we need upgraded_from ? */
+ struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev;
+ struct st_locked_table *table;
+ uint16 loid;
+ uchar lock_type;
+};
+
+#define hash_insert my_hash_insert /* for consistency :) */
+
+static inline
+TABLE_LOCK *find_by_loid(LOCKED_TABLE *table, uint16 loid)
+{
+ return (TABLE_LOCK *)hash_search(& table->latest_locks,
+ (byte *)& loid, sizeof(loid));
+}
+
+static inline
+void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table)
+{
+ DBUG_ASSERT(table == lock->table);
+ if (lock->prev)
+ {
+ DBUG_ASSERT(table->wait_queue_out != lock);
+ lock->prev->next= lock->next;
+ }
+ else
+ {
+ DBUG_ASSERT(table->wait_queue_out == lock);
+ table->wait_queue_out= lock->next;
+ }
+ if (lock->next)
+ {
+ DBUG_ASSERT(table->wait_queue_in != lock);
+ lock->next->prev= lock->prev;
+ }
+ else
+ {
+ DBUG_ASSERT(table->wait_queue_in == lock);
+ table->wait_queue_in= lock->prev;
+ }
+}
+
+/*
+ DESCRIPTION
+ tries to lock a resource 'table' with a lock level 'lock'.
+
+ RETURN
+ see enum lockman_getlock_result
+*/
+enum lockman_getlock_result
+tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
+ LOCKED_TABLE *table, enum lock_type lock)
+{
+ TABLE_LOCK *old, *new, *blocker, *blocker2;
+ TABLE_LOCK_OWNER *wait_for;
+ ulonglong deadline;
+ struct timespec timeout;
+ enum lock_type new_lock;
+ enum lockman_getlock_result res;
+ int i;
+
+ DBUG_ASSERT(lo->waiting_lock == 0);
+ DBUG_ASSERT(lo->waiting_for == 0);
+ DBUG_ASSERT(lo->waiting_for_loid == 0);
+
+ pthread_mutex_lock(& table->mutex);
+ /* do we already have a lock on this resource ? */
+ old= find_by_loid(table, lo->loid);
+
+ /* calculate the level of the upgraded lock, if yes */
+ new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
+
+ /* and check if old lock is enough to satisfy the new request */
+ if (old && new_lock == old->lock_type)
+ {
+ /* yes */
+ res= getlock_result[old->lock_type][lock];
+ goto ret;
+ }
+
+ /* no, placing a new lock. first - take a free lock structure from the pool */
+ pthread_mutex_lock(& lm->pool_mutex);
+ new= lm->pool;
+ if (new)
+ {
+ lm->pool= new->next;
+ pthread_mutex_unlock(& lm->pool_mutex);
+ }
+ else
+ {
+ pthread_mutex_unlock(& lm->pool_mutex);
+ new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME));
+ if (unlikely(!new))
+ {
+ res= NO_MEMORY_FOR_LOCK;
+ goto ret;
+ }
+ }
+
+ new->loid= lo->loid;
+ new->lock_type= new_lock;
+ new->table= table;
+
+ /* and try to place it */
+ for (new->prev= table->wait_queue_in;;)
+ {
+ wait_for= 0;
+ if (!old)
+ {
+ /* not upgrading - a lock must be added to the _end_ of the wait queue */
+ for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev)
+ {
+ TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid);
+
+ /* find a blocking lock */
+ DBUG_ASSERT(table->wait_queue_out);
+ DBUG_ASSERT(table->wait_queue_in);
+ if (!lock_compatibility_matrix[blocker->lock_type][lock])
+ {
+ /* found! */
+ wait_for= tmp;
+ break;
+ }
+
+ /*
+ hmm, the lock before doesn't block us, let's look one step further.
+ the condition below means:
+
+ if we never waited on a condition yet
+ OR
+ the lock before ours (blocker) waits on a lock (blocker2) that is
+ present in the hash AND and conflicts with 'blocker'
+
+ the condition after OR may fail if 'blocker2' was removed from
+ the hash, its signal woke us up, but 'blocker' itself didn't see
+ the signal yet.
+ */
+ if (!lo->waiting_lock ||
+ ((blocker2= find_by_loid(table, tmp->waiting_for_loid)) &&
+ !lock_compatibility_matrix[blocker2->lock_type]
+ [blocker->lock_type]))
+ {
+ /* but it's waiting for a real lock. we'll wait for the same lock */
+ wait_for= tmp->waiting_for;
+ /*
+ We don't really need tmp->waiting_for, as tmp->waiting_for_loid
+ is enough. waiting_for is just a local cache to avoid calling
+ loid_to_tlo().
+ But it's essensial that tmp->waiting_for pointer can ONLY
+ be dereferenced if find_by_loid() above returns a non-null
+ pointer, because a TABLE_LOCK_OWNER object that it points to
+ may've been freed when we come here after a signal.
+ In particular tmp->waiting_for_loid cannot be replaced
+ with tmp->waiting_for->loid.
+ */
+ DBUG_ASSERT(wait_for == lm->loid_to_tlo(tmp->waiting_for_loid));
+ break;
+ }
+
+ /*
+ otherwise - a lock it's waiting for doesn't exist.
+ We've no choice but to scan the wait queue backwards, looking
+ for a conflicting lock or a lock waiting for a real lock.
+ QQ is there a way to avoid this scanning ?
+ */
+ }
+ }
+
+ if (wait_for == 0)
+ {
+ /* checking for compatibility with existing locks */
+ for (blocker= 0, i= 0; i < LOCK_TYPES; i++)
+ {
+ if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock])
+ {
+ blocker= table->active_locks[i];
+ /* if the first lock in the list is our own - skip it */
+ if (blocker->loid == lo->loid)
+ blocker= blocker->next;
+ if (blocker) /* found a conflicting lock, need to wait */
+ break;
+ }
+ }
+ if (!blocker) /* free to go */
+ break;
+ wait_for= lm->loid_to_tlo(blocker->loid);
+ }
+
+ /* ok, we're here - the wait is inevitable */
+ lo->waiting_for= wait_for;
+ lo->waiting_for_loid= wait_for->loid;
+ if (!lo->waiting_lock) /* first iteration of the for() loop */
+ {
+ /* lock upgrade or new lock request ? */
+ if (old)
+ {
+ /* upgrade - add the lock to the _start_ of the wait queue */
+ new->prev= 0;
+ if ((new->next= table->wait_queue_out))
+ new->next->prev= new;
+ table->wait_queue_out= new;
+ if (!table->wait_queue_in)
+ table->wait_queue_in= table->wait_queue_out;
+ }
+ else
+ {
+ /* new lock - add the lock to the _end_ of the wait queue */
+ new->next= 0;
+ if ((new->prev= table->wait_queue_in))
+ new->prev->next= new;
+ table->wait_queue_in= new;
+ if (!table->wait_queue_out)
+ table->wait_queue_out= table->wait_queue_in;
+ }
+ lo->waiting_lock= new;
+
+ deadline= my_getsystime() + lm->lock_timeout * 10000;
+ timeout.tv_sec= deadline/10000000;
+ timeout.tv_nsec= (deadline % 10000000) * 100;
+ }
+
+ /*
+ prepare to wait.
+ we must lock blocker's mutex to wait on blocker's cond.
+ and we must release table's mutex.
+ note that blocker's mutex is locked _before_ table's mutex is released
+ */
+ pthread_mutex_lock(wait_for->mutex);
+ pthread_mutex_unlock(& table->mutex);
+
+ /* now really wait */
+ i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
+
+ pthread_mutex_unlock(wait_for->mutex);
+
+ if (i == ETIMEDOUT || i == ETIME)
+ {
+ /* we rely on the caller to rollback and release all locks */
+ res= LOCK_TIMEOUT;
+ goto ret2;
+ }
+
+ pthread_mutex_lock(& table->mutex);
+
+ /* ... and repeat from the beginning */
+ }
+ /* yeah! we can place the lock now */
+
+ /* remove the lock from the wait queue, if it was there */
+ if (lo->waiting_lock)
+ {
+ remove_from_wait_queue(new, table);
+ lo->waiting_lock= 0;
+ lo->waiting_for= 0;
+ lo->waiting_for_loid= 0;
+ }
+
+ /* add it to the list of all locks of this lock owner */
+ new->next_in_lo= lo->active_locks;
+ lo->active_locks= new;
+
+ /* and to the list of active locks of this lock type */
+ new->prev= 0;
+ if ((new->next= table->active_locks[new_lock-1]))
+ new->next->prev= new;
+ table->active_locks[new_lock-1]= new;
+
+ /* update the latest_locks hash */
+ if (old)
+ hash_delete(& table->latest_locks, (byte *)old);
+ hash_insert(& table->latest_locks, (byte *)new);
+
+ new->upgraded_from= old;
+
+ res= getlock_result[lock][lock];
+
+ret:
+ pthread_mutex_unlock(& table->mutex);
+ret2:
+ DBUG_ASSERT(res);
+ return res;
+}
+
+/*
+ DESCRIPTION
+ release all locks belonging to a transaction.
+ signal waiters to continue
+*/
+void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
+{
+ TABLE_LOCK *lock, *local_pool= 0, *local_pool_end;
+
+ /*
+ instead of adding released locks to a pool one by one, we'll link
+ them in a list and add to a pool in one short action (under a mutex)
+ */
+ local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks;
+ if (!local_pool_end)
+ return;
+
+ /* release a waiting lock, if any */
+ if ((lock= lo->waiting_lock))
+ {
+ DBUG_ASSERT(lock->loid == lo->loid);
+ pthread_mutex_lock(& lock->table->mutex);
+ remove_from_wait_queue(lock, lock->table);
+
+ /*
+ a special case: if this lock was not the last in the wait queue
+ and it's compatible with the next lock, than the next lock
+ is waiting for our blocker though really it waits for us, indirectly.
+ Signal our blocker to release this next lock (after we removed our
+ lock from the wait queue, of course).
+ */
+ /*
+ An example to clarify the above:
+ trn1> S-lock the table. Granted.
+ trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1
+ trn3> IS-lock the table. The queue is not empty, so IS-lock is added
+ to the queue. It's compatible with the waiting IX-lock, so trn3
+ waits for trn2->waiting_for, that is trn1.
+ if trn1 releases the lock it signals trn1->cond and both waiting
+ transactions are awaken. But if trn2 times out, trn3 must be notified
+ too (as IS and S locks are compatible). So trn2 must signal trn1->cond.
+ */
+ if (lock->next &&
+ lock_compatibility_matrix[lock->next->lock_type][lock->lock_type])
+ {
+ pthread_mutex_lock(lo->waiting_for->mutex);
+ pthread_cond_broadcast(lo->waiting_for->cond);
+ pthread_mutex_unlock(lo->waiting_for->mutex);
+ }
+ lo->waiting_for= 0;
+ lo->waiting_for_loid= 0;
+ pthread_mutex_unlock(& lock->table->mutex);
+
+ lock->next= local_pool;
+ local_pool= lock;
+ }
+
+ /* now release granted locks */
+ lock= lo->active_locks;
+ while (lock)
+ {
+ TABLE_LOCK *cur= lock;
+ pthread_mutex_t *mutex= & lock->table->mutex;
+ DBUG_ASSERT(cur->loid == lo->loid);
+
+ DBUG_ASSERT(lock != lock->next_in_lo);
+ lock= lock->next_in_lo;
+
+ /* TODO ? group locks by table to reduce the number of mutex locks */
+ pthread_mutex_lock(mutex);
+ hash_delete(& cur->table->latest_locks, (byte *)cur);
+
+ if (cur->prev)
+ cur->prev->next= cur->next;
+ if (cur->next)
+ cur->next->prev= cur->prev;
+ if (cur->table->active_locks[cur->lock_type-1] == cur)
+ cur->table->active_locks[cur->lock_type-1]= cur->next;
+
+ cur->next= local_pool;
+ local_pool= cur;
+
+ pthread_mutex_unlock(mutex);
+ }
+
+ lo->waiting_lock= lo->active_locks= 0;
+
+ /*
+ okay, all locks released. now signal that we're leaving,
+ in case somebody's waiting for it
+ */
+ pthread_mutex_lock(lo->mutex);
+ pthread_cond_broadcast(lo->cond);
+ pthread_mutex_unlock(lo->mutex);
+
+ /* and push all freed locks to the lockman's pool */
+ pthread_mutex_lock(& lm->pool_mutex);
+ local_pool_end->next= lm->pool;
+ lm->pool= local_pool;
+ pthread_mutex_unlock(& lm->pool_mutex);
+}
+
+void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
+{
+ lm->pool= 0;
+ lm->loid_to_tlo= func;
+ lm->lock_timeout= timeout;
+ pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
+ my_getsystime(); /* ensure that my_getsystime() is initialized */
+}
+
+void tablockman_destroy(TABLOCKMAN *lm)
+{
+ while (lm->pool)
+ {
+ TABLE_LOCK *tmp= lm->pool;
+ lm->pool= tmp->next;
+ my_free((void *)tmp, MYF(0));
+ }
+ pthread_mutex_destroy(& lm->pool_mutex);
+}
+
+/*
+ initialize a LOCKED_TABLE structure
+
+ SYNOPSYS
+ lt a LOCKED_TABLE to initialize
+ initial_hash_size initial size for 'latest_locks' hash
+*/
+void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size)
+{
+ bzero(lt, sizeof(*lt));
+ pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST);
+ hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size,
+ offsetof(TABLE_LOCK, loid),
+ sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0);
+}
+
+void tablockman_destroy_locked_table(LOCKED_TABLE *lt)
+{
+ int i;
+
+ DBUG_ASSERT(lt->wait_queue_out == 0);
+ DBUG_ASSERT(lt->wait_queue_in == 0);
+ DBUG_ASSERT(lt->latest_locks.records == 0);
+ for (i= 0; i<LOCK_TYPES; i++)
+ DBUG_ASSERT(lt->active_locks[i] == 0);
+
+ hash_free(& lt->latest_locks);
+ pthread_mutex_destroy(& lt->mutex);
+}
+
+#ifdef EXTRA_DEBUG
+static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX",
+ "LS", "LX", "SLX", "LSIX"};
+
+void tablockman_print_tlo(TABLE_LOCK_OWNER *lo)
+{
+ TABLE_LOCK *lock;
+
+ printf("lo%d>", lo->loid);
+ if ((lock= lo->waiting_lock))
+ printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table);
+ for (lock= lo->active_locks;
+ lock && lock != lock->next_in_lo;
+ lock= lock->next_in_lo)
+ printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table);
+ if (lock && lock == lock->next_in_lo)
+ printf("!");
+ printf("\n");
+}
+#endif
+
diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h
new file mode 100644
index 00000000000..2c6fb6996a3
--- /dev/null
+++ b/storage/maria/tablockman.h
@@ -0,0 +1,88 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _tablockman_h
+#define _tablockman_h
+
+/*
+ Lock levels:
+ ^^^^^^^^^^^
+
+ N - "no lock", not a lock, used sometimes internally to simplify the code
+ S - Shared
+ X - eXclusive
+ IS - Intention Shared
+ IX - Intention eXclusive
+ SIX - Shared + Intention eXclusive
+ LS - Loose Shared
+ LX - Loose eXclusive
+ SLX - Shared + Loose eXclusive
+ LSIX - Loose Shared + Intention eXclusive
+*/
+#ifndef _lockman_h
+/* QQ: TODO remove N-locks */
+enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
+enum lockman_getlock_result {
+ NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
+ GOT_THE_LOCK,
+ GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
+ GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
+};
+#endif
+
+#define LOCK_TYPES (LOCK_TYPE_LAST-1)
+
+typedef struct st_table_lock TABLE_LOCK;
+
+typedef struct st_table_lock_owner {
+ TABLE_LOCK *active_locks; /* list of active locks */
+ TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
+ struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */
+ pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
+ pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
+ uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */
+} TABLE_LOCK_OWNER;
+
+typedef struct st_locked_table {
+ pthread_mutex_t mutex; /* mutex for everything below */
+ HASH latest_locks; /* latest locks in a hash */
+ TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
+ TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/
+} LOCKED_TABLE;
+
+typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16);
+
+typedef struct {
+ pthread_mutex_t pool_mutex;
+ TABLE_LOCK *pool; /* lifo pool of free locks */
+ uint lock_timeout; /* lock timeout in milliseconds */
+ loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */
+} TABLOCKMAN;
+
+void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint);
+void tablockman_destroy(TABLOCKMAN *);
+enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *,
+ LOCKED_TABLE *, enum lock_type);
+void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *);
+void tablockman_init_locked_table(LOCKED_TABLE *, int);
+void tablockman_destroy_locked_table(LOCKED_TABLE *);
+
+#ifdef EXTRA_DEBUG
+void tablockman_print_tlo(TABLE_LOCK_OWNER *);
+#endif
+
+#endif
+
diff --git a/storage/maria/test_pack b/storage/maria/test_pack
new file mode 100755
index 00000000000..689645b1661
--- /dev/null
+++ b/storage/maria/test_pack
@@ -0,0 +1,10 @@
+silent="-s"
+suffix=""
+
+ma_test1$suffix -s ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -us test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -S ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ;maria_chk$suffix -us test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -b ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1
+ma_test1$suffix -s -w ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -ros test1 ; maria_chk$suffix -es test1
+
+ma_test2$suffix -s -t4 ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2
+ma_test2$suffix -s -t4 -b ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c
new file mode 100644
index 00000000000..37978e4ff76
--- /dev/null
+++ b/storage/maria/trnman.c
@@ -0,0 +1,519 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <lf.h>
+#include <m_string.h>
+#include "trnman.h"
+
+/*
+ status variables:
+ how many trns in the active list currently,
+ in the committed list currently, allocated since startup.
+*/
+uint trnman_active_transactions, trnman_committed_transactions,
+ trnman_allocated_transactions;
+
+/* list of active transactions in the trid order */
+static TRN active_list_min, active_list_max;
+/* list of committed transactions in the trid order */
+static TRN committed_list_min, committed_list_max;
+
+/* a counter, used to generate transaction ids */
+static TrID global_trid_generator;
+
+/* the mutex for everything above */
+static pthread_mutex_t LOCK_trn_list;
+
+/* LIFO pool of unused TRN structured for reuse */
+static TRN *pool;
+
+/* a hash for committed transactions that maps trid to a TRN structure */
+static LF_HASH trid_to_committed_trn;
+
+/* an array that maps short_trid of an active transaction to a TRN structure */
+static TRN **short_trid_to_active_trn;
+
+/* locks for short_trid_to_active_trn and pool */
+static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool;
+
+static LOCKMAN maria_lockman;
+
+/*
+ short transaction id is at the same time its identifier
+ for a lock manager - its lock owner identifier (loid)
+*/
+#define short_id locks.loid
+
+/*
+ NOTE
+ Just as short_id doubles as loid, this function doubles as
+ short_trid_to_LOCK_OWNER. See the compile-time assert below.
+*/
+static TRN *short_trid_to_TRN(uint16 short_trid)
+{
+ TRN *trn;
+ compile_time_assert(offsetof(TRN, locks) == 0);
+ my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
+ trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]);
+ my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
+ return (TRN *)trn;
+}
+
+static byte *trn_get_hash_key(const byte *trn, uint* len,
+ my_bool unused __attribute__ ((unused)))
+{
+ *len= sizeof(TrID);
+ return (byte *) & ((*((TRN **)trn))->trid);
+}
+
+int trnman_init()
+{
+ /*
+ Initialize lists.
+ active_list_max.min_read_from must be larger than any trid,
+ so that when an active list is empty we would could free
+ all committed list.
+ And committed_list_max itself can not be freed so
+ committed_list_max.commit_trid must not be smaller that
+ active_list_max.min_read_from
+ */
+
+ active_list_max.trid= active_list_min.trid= 0;
+ active_list_max.min_read_from= ~0;
+ active_list_max.next= active_list_min.prev= 0;
+ active_list_max.prev= &active_list_min;
+ active_list_min.next= &active_list_max;
+
+ committed_list_max.commit_trid= ~0;
+ committed_list_max.next= committed_list_min.prev= 0;
+ committed_list_max.prev= &committed_list_min;
+ committed_list_min.next= &committed_list_max;
+
+ trnman_active_transactions= 0;
+ trnman_committed_transactions= 0;
+ trnman_allocated_transactions= 0;
+
+ pool= 0;
+ global_trid_generator= 0; /* set later by the recovery code */
+ lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE,
+ 0, 0, trn_get_hash_key, 0);
+ pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
+ my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
+ my_atomic_rwlock_init(&LOCK_pool);
+ short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*),
+ MYF(MY_WME|MY_ZEROFILL));
+ if (unlikely(!short_trid_to_active_trn))
+ return 1;
+ short_trid_to_active_trn--; /* min short_trid is 1 */
+
+ lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000);
+
+ return 0;
+}
+
+/*
+ NOTE
+ this could only be called in the "idle" state - no transaction can be
+ running. See asserts below.
+*/
+void trnman_destroy()
+{
+ DBUG_ASSERT(trid_to_committed_trn.count == 0);
+ DBUG_ASSERT(trnman_active_transactions == 0);
+ DBUG_ASSERT(trnman_committed_transactions == 0);
+ DBUG_ASSERT(active_list_max.prev == &active_list_min);
+ DBUG_ASSERT(active_list_min.next == &active_list_max);
+ DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
+ DBUG_ASSERT(committed_list_min.next == &committed_list_max);
+ while (pool)
+ {
+ TRN *trn= pool;
+ pool= pool->next;
+ DBUG_ASSERT(trn->locks.mutex == 0);
+ DBUG_ASSERT(trn->locks.cond == 0);
+ my_free((void *)trn, MYF(0));
+ }
+ lf_hash_destroy(&trid_to_committed_trn);
+ pthread_mutex_destroy(&LOCK_trn_list);
+ my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
+ my_atomic_rwlock_destroy(&LOCK_pool);
+ my_free((void *)(short_trid_to_active_trn+1), MYF(0));
+ lockman_destroy(&maria_lockman);
+}
+
+/*
+ NOTE
+ TrID is limited to 6 bytes. Initial value of the generator
+ is set by the recovery code - being read from the last checkpoint
+ (or 1 on a first run).
+*/
+static TrID new_trid()
+{
+ DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL);
+ safe_mutex_assert_owner(&LOCK_trn_list);
+ return ++global_trid_generator;
+}
+
+static void set_short_trid(TRN *trn)
+{
+ int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1;
+ my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
+ for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */
+ {
+ void *tmp= NULL;
+ if (short_trid_to_active_trn[i] == NULL &&
+ my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
+ break;
+ }
+ my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
+ trn->short_id= i;
+}
+
+/*
+ DESCRIPTION
+ start a new transaction, allocate and initialize transaction object
+ mutex and cond will be used for lock waits
+*/
+TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond)
+{
+ TRN *trn;
+
+ /*
+ we have a mutex, to do simple things under it - allocate a TRN,
+ increment trnman_active_transactions, set trn->min_read_from.
+
+ Note that all the above is fast. generating short_trid may be slow,
+ as it involves scanning a large array - so it's done outside of the
+ mutex.
+ */
+
+ pthread_mutex_lock(&LOCK_trn_list);
+
+ /* Allocating a new TRN structure */
+ trn= pool;
+ /*
+ Popping an unused TRN from the pool
+ (ABA isn't possible, we're behind a mutex
+ */
+ my_atomic_rwlock_wrlock(&LOCK_pool);
+ while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn,
+ (void *)trn->next))
+ /* no-op */;
+ my_atomic_rwlock_wrunlock(&LOCK_pool);
+
+ /* Nothing in the pool ? Allocate a new one */
+ if (!trn)
+ {
+ trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME));
+ if (unlikely(!trn))
+ {
+ pthread_mutex_unlock(&LOCK_trn_list);
+ return 0;
+ }
+ trnman_allocated_transactions++;
+ }
+ trnman_active_transactions++;
+
+ trn->min_read_from= active_list_min.next->trid;
+
+ trn->trid= new_trid();
+ trn->short_id= 0;
+
+ trn->next= &active_list_max;
+ trn->prev= active_list_max.prev;
+ active_list_max.prev= trn->prev->next= trn;
+ pthread_mutex_unlock(&LOCK_trn_list);
+
+ trn->pins= lf_hash_get_pins(&trid_to_committed_trn);
+
+ if (unlikely(!trn->min_read_from))
+ trn->min_read_from= trn->trid;
+
+ trn->commit_trid= 0;
+
+ trn->locks.mutex= mutex;
+ trn->locks.cond= cond;
+ trn->locks.waiting_for= 0;
+ trn->locks.all_locks= 0;
+ trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc);
+
+ /*
+ only after the following function TRN is considered initialized,
+ so it must be done the last
+ */
+ set_short_trid(trn);
+
+ return trn;
+}
+
+/*
+ remove a trn from the active list.
+ if necessary - move to committed list and set commit_trid
+
+ NOTE
+ Locks are released at the end. In particular, after placing the
+ transaction in commit list, and after setting commit_trid. It's
+ important, as commit_trid affects visibility. Locks don't affect
+ anything they simply delay execution of other threads - they could be
+ released arbitrarily late. In other words, when locks are released it
+ serves as a start banner for other threads, they start to run. So
+ everything they may need must be ready at that point.
+*/
+void trnman_end_trn(TRN *trn, my_bool commit)
+{
+ TRN *free_me= 0;
+ LF_PINS *pins= trn->pins;
+
+ pthread_mutex_lock(&LOCK_trn_list);
+
+ /* remove from active list */
+ trn->next->prev= trn->prev;
+ trn->prev->next= trn->next;
+
+ /*
+ if trn was the oldest active transaction, now that it goes away there
+ may be committed transactions in the list which no active transaction
+ needs to bother about - clean up the committed list
+ */
+ if (trn->prev == &active_list_min)
+ {
+ uint free_me_count;
+ TRN *t;
+ for (t= committed_list_min.next, free_me_count= 0;
+ t->commit_trid < active_list_min.next->min_read_from;
+ t= t->next, free_me_count++) /* no-op */;
+
+ DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) ||
+ (t == committed_list_min.next && free_me_count == 0));
+ /* found transactions committed before the oldest active one */
+ if (t != committed_list_min.next)
+ {
+ free_me= committed_list_min.next;
+ committed_list_min.next= t;
+ t->prev->next= 0;
+ t->prev= &committed_list_min;
+ trnman_committed_transactions-= free_me_count;
+ }
+ }
+
+ /*
+ if transaction is committed and it was not the only active transaction -
+ add it to the committed list (which is used for read-from relation)
+ */
+ if (commit && active_list_min.next != &active_list_max)
+ {
+ int res;
+
+ trn->commit_trid= global_trid_generator;
+ trn->next= &committed_list_max;
+ trn->prev= committed_list_max.prev;
+ committed_list_max.prev= trn->prev->next= trn;
+ trnman_committed_transactions++;
+
+ res= lf_hash_insert(&trid_to_committed_trn, pins, &trn);
+ DBUG_ASSERT(res == 0);
+ }
+ else /* otherwise free it right away */
+ {
+ trn->next= free_me;
+ free_me= trn;
+ }
+ trnman_active_transactions--;
+ pthread_mutex_unlock(&LOCK_trn_list);
+
+ /* the rest is done outside of a critical section */
+ lockman_release_locks(&maria_lockman, &trn->locks);
+ trn->locks.mutex= 0;
+ trn->locks.cond= 0;
+ my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
+ my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0);
+ my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
+
+ /*
+ we, under the mutex, removed going-in-free_me transactions from the
+ active and committed lists, thus nobody else may see them when it scans
+ those lists, and thus nobody may want to free them. Now we don't
+ need a mutex to access free_me list
+ */
+ /* QQ: send them to the purge thread */
+ while (free_me)
+ {
+ TRN *t= free_me;
+ free_me= free_me->next;
+
+ lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID));
+
+ trnman_free_trn(t);
+ }
+
+ lf_hash_put_pins(pins);
+ lf_pinbox_put_pins(trn->locks.pins);
+}
+
+/*
+ free a trn (add to the pool, that is)
+ note - we can never really free() a TRN if there's at least one other
+ running transaction - see, e.g., how lock waits are implemented in
+ lockman.c
+ The same is true for other lock-free data structures too. We may need some
+ kind of FLUSH command to reset them all - ensuring that no transactions are
+ running. It may even be called automatically on checkpoints if no
+ transactions are running.
+*/
+void trnman_free_trn(TRN *trn)
+{
+ TRN *tmp= pool;
+
+ my_atomic_rwlock_wrlock(&LOCK_pool);
+ do
+ {
+ /*
+ without this volatile cast gcc-3.4.4 moved the assignment
+ down after the loop at -O2
+ */
+ *(TRN * volatile *)&(trn->next)= tmp;
+ } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn));
+ my_atomic_rwlock_wrunlock(&LOCK_pool);
+}
+
+/*
+ NOTE
+ here we access the hash in a lock-free manner.
+ It's safe, a 'found' TRN can never be freed/reused before we access it.
+ In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
+ can only be removed from the hash when:
+ found->commit_trid < ALL (trn->min_read_from)
+ that is, at least
+ found->commit_trid < trn->min_read_from
+ but
+ found->trid >= trn->min_read_from
+ and
+ found->commit_trid > found->trid
+*/
+my_bool trnman_can_read_from(TRN *trn, TrID trid)
+{
+ TRN **found;
+ my_bool can;
+ LF_REQUIRE_PINS(3);
+
+ if (trid < trn->min_read_from)
+ return TRUE; /* can read */
+ if (trid > trn->trid)
+ return FALSE; /* cannot read */
+
+ found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid));
+ if (!found)
+ return FALSE; /* not in the hash of committed transactions = cannot read */
+
+ can= (*found)->commit_trid < trn->trid;
+ lf_unpin(trn->pins, 2);
+ return can;
+}
+
+
+/*
+ Allocates two buffers and stores in them some information about transactions
+ of the active list (into the first buffer) and of the committed list (into
+ the second buffer).
+
+ SYNOPSIS
+ trnman_collect_transactions()
+ str_act (OUT) pointer to a LEX_STRING where the allocated buffer, and
+ its size, will be put
+ str_com (OUT) pointer to a LEX_STRING where the allocated buffer, and
+ its size, will be put
+
+
+ DESCRIPTION
+ Does the allocation because the caller cannot know the size itself.
+ Memory freeing is to be done by the caller (if the "str" member of the
+ LEX_STRING is not NULL).
+ The caller has the intention of doing checkpoints.
+
+ RETURN
+ 0 on success
+ 1 on error
+*/
+my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com)
+{
+ my_bool error;
+ TRN *trn;
+ char *ptr;
+ DBUG_ENTER("trnman_collect_transactions");
+
+ DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str));
+
+ pthread_mutex_lock(&LOCK_trn_list);
+ str_act->length= 8+(6+2+7+7+7)*trnman_active_transactions;
+ str_com->length= 8+(6+7+7)*trnman_committed_transactions;
+ if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) ||
+ (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME)))))
+ goto err;
+ /* First, the active transactions */
+ ptr= str_act->str;
+ int8store(ptr, (ulonglong)trnman_active_transactions);
+ ptr+= 8;
+ for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
+ {
+ /*
+ trns with a short trid of 0 are not initialized; Recovery will recognize
+ this and ignore them.
+ State is not needed for now (only when we supported prepared trns).
+ For LSNs, Sanja will soon push lsn7store.
+ */
+ int6store(ptr, trn->trid);
+ ptr+= 6;
+ int2store(ptr, trn->short_id);
+ ptr+= 2;
+ /* needed for rollback */
+ /* lsn7store(ptr, trn->undo_lsn); */
+ ptr+= 7;
+ /* needed for purge */
+ /* lsn7store(ptr, trn->undo_purge_lsn); */
+ ptr+= 7;
+ /* needed for low-water mark calculation */
+ /* lsn7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */
+ ptr+= 7;
+ }
+ /* do the same for committed ones */
+ ptr= str_com->str;
+ int8store(ptr, (ulonglong)trnman_committed_transactions);
+ ptr+= 8;
+ for (trn= committed_list_min.next; trn != &committed_list_max;
+ trn= trn->next)
+ {
+ int6store(ptr, trn->trid);
+ ptr+= 6;
+ /* mi_int7store(ptr, trn->undo_purge_lsn); */
+ ptr+= 7;
+ /* mi_int7store(ptr, read_non_atomic(&trn->first_undo_lsn)); */
+ ptr+= 7;
+ }
+ /*
+ TODO: if we see there exists no transaction (active and committed) we can
+ tell the lock-free structures to do some freeing (my_free()).
+ */
+ error= 0;
+ goto end;
+err:
+ error= 1;
+end:
+ pthread_mutex_unlock(&LOCK_trn_list);
+ DBUG_RETURN(error);
+}
diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h
new file mode 100644
index 00000000000..267e3cabd7a
--- /dev/null
+++ b/storage/maria/trnman.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _trnman_h
+#define _trnman_h
+
+#include "lockman.h"
+
+typedef uint64 TrID; /* our TrID is 6 bytes */
+typedef struct st_transaction TRN;
+
+/*
+ trid - 6 byte transaction identifier. Assigned when a transaction
+ is created. Transaction can always be identified by its trid,
+ even after transaction has ended.
+
+ short_trid - 2-byte transaction identifier, identifies a running
+ transaction, is reassigned when transaction ends.
+*/
+struct st_transaction
+{
+ LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */
+ LF_PINS *pins;
+ TrID trid, min_read_from, commit_trid;
+ TRN *next, *prev;
+ /* Note! if locks.loid is 0, trn is NOT initialized */
+};
+
+#define SHORT_TRID_MAX 65535
+
+extern uint trnman_active_transactions, trnman_allocated_transactions;
+
+int trnman_init(void);
+void trnman_destroy(void);
+TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond);
+void trnman_end_trn(TRN *trn, my_bool commit);
+#define trnman_commit_trn(T) trnman_end_trn(T, TRUE)
+#define trnman_abort_trn(T) trnman_end_trn(T, FALSE)
+void trnman_free_trn(TRN *trn);
+my_bool trnman_can_read_from(TRN *trn, TrID trid);
+my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com);
+
+#endif
+
diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am
new file mode 100644
index 00000000000..33f1bfed560
--- /dev/null
+++ b/storage/maria/unittest/Makefile.am
@@ -0,0 +1,87 @@
+# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+
+# Only reason to link with libmyisam.a here is that it's where some fulltext
+# pieces are (but soon we'll remove fulltext dependencies from Maria).
+LDADD= $(top_builddir)/unittest/mytap/libmytap.a \
+ $(top_builddir)/storage/maria/libmaria.a \
+ $(top_builddir)/storage/myisam/libmyisam.a \
+ $(top_builddir)/mysys/libmysys.a \
+ $(top_builddir)/dbug/libdbug.a \
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ \
+ $(top_builddir)/storage/maria/ma_loghandler.o
+noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \
+ mf_pagecache_single_1k-t mf_pagecache_single_8k-t \
+ mf_pagecache_single_64k-t-big \
+ mf_pagecache_consist_1k-t-big \
+ mf_pagecache_consist_64k-t-big \
+ mf_pagecache_consist_1kHC-t-big \
+ mf_pagecache_consist_64kHC-t-big \
+ mf_pagecache_consist_1kRD-t-big \
+ mf_pagecache_consist_64kRD-t-big \
+ mf_pagecache_consist_1kWR-t-big \
+ mf_pagecache_consist_64kWR-t-big \
+ ma_test_loghandler-t \
+ ma_test_loghandler_multigroup-t \
+ ma_test_loghandler_multithread-t \
+ ma_test_loghandler_pagecache-t
+
+ma_test_loghandler_t_SOURCES= ma_test_loghandler-t.c ma_maria_log_cleanup.c
+ma_test_loghandler_multigroup_t_SOURCES= ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c
+ma_test_loghandler_multithread_t_SOURCES= ma_test_loghandler_multithread-t.c ma_maria_log_cleanup.c
+ma_test_loghandler_pagecache_t_SOURCES= ma_test_loghandler_pagecache-t.c ma_maria_log_cleanup.c
+
+mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
+mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
+mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN
+
+mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_64k_t_big_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
+mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192
+mf_pagecache_single_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+mf_pagecache_consist_1k_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
+mf_pagecache_consist_64k_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64k_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+mf_pagecache_consist_1kHC_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY
+mf_pagecache_consist_64kHC_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kHC_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY
+
+mf_pagecache_consist_1kRD_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS
+mf_pagecache_consist_64kRD_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kRD_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS
+
+mf_pagecache_consist_1kWR_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS
+mf_pagecache_consist_64kWR_t_big_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kWR_t_big_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS
+
+# the generic lock manager may not be used in the end and lockman1-t crashes,
+# so we don't build lockman-t and lockman1-t
+CLEANFILES = maria_control page_cache_test_file_1 \
+ maria_log.????????
+
diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c
new file mode 100644
index 00000000000..8c0f71175e7
--- /dev/null
+++ b/storage/maria/unittest/lockman-t.c
@@ -0,0 +1,309 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ lockman for row and table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../lockman.h"
+
+#define Nlos 100
+LOCK_OWNER loarray[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKMAN lockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lockhash(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+LOCK_OWNER *loid2lo(uint16 loid)
+{
+ return loarray+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman)
+#define test_lock(O, R, L, S, RES) \
+ ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lockhash(&lockman)
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK);
+
+void test_lockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+char *res2str[4]= {
+ "DIDN'T GET THE LOCK",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ LOCK_OWNER *lo;
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo= loid2lo(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ { /* table lock */
+ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case DIDNT_GET_THE_LOCK:
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
+ }
+
+ lockman_release_locks(&lockman, lo);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main()
+{
+ int i;
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(35);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ lockman_init(&lockman, &loid2lo, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ loarray[i].pins= lf_alloc_get_pins(&lockman.alloc);
+ loarray[i].all_locks= 0;
+ loarray[i].waiting_for= 0;
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+ loarray[i].mutex= &mutexes[i];
+ loarray[i].cond= &conds[i];
+ loarray[i].loid= i+1;
+ }
+
+ test_lockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ lockman_release_locks(&lockman, &loarray[i]);
+ pthread_mutex_destroy(loarray[i].mutex);
+ pthread_cond_destroy(loarray[i].cond);
+ lf_pinbox_put_pins(loarray[i].pins);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ lockman_destroy(&lockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c
new file mode 100644
index 00000000000..41a1f0fd2f4
--- /dev/null
+++ b/storage/maria/unittest/lockman1-t.c
@@ -0,0 +1,335 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ lockman for row locks, tablockman for table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../lockman.h"
+#include "../tablockman.h"
+
+#define Nlos 100
+#define Ntbls 10
+LOCK_OWNER loarray[Nlos];
+TABLE_LOCK_OWNER loarray1[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKED_TABLE ltarray[Ntbls];
+LOCKMAN lockman;
+TABLOCKMAN tablockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lo1(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+LOCK_OWNER *loid2lo(uint16 loid)
+{
+ return loarray+loid-1;
+}
+TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
+{
+ return loarray1+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ tablockman_release_locks(&tablockman, loid2lo1(O));
+#define test_lock(O, R, L, S, RES) \
+ ok(tablockman_getlock(&tablockman, loid2lo1(O), &ltarray[R], L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lo1(loid2lo1(O));
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
+
+void test_tablockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+char *res2str[]= {
+ "DIDN'T GET THE LOCK",
+ "OUT OF MEMORY",
+ "DEADLOCK",
+ "LOCK TIMEOUT",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ LOCK_OWNER *lo; TABLE_LOCK_OWNER *lo1; DBUG_ASSERT(Ntables <= Ntbls);
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo= loid2lo(loid); lo1= loid2lo1(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ { /* table lock */
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res == DIDNT_GET_THE_LOCK)
+ {
+ lockman_release_locks(&lockman, lo);
+ tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ }
+ }
+
+ lockman_release_locks(&lockman, lo);
+ tablockman_release_locks(&tablockman, lo1);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main()
+{
+ int i;
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(35);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ lockman_init(&lockman, &loid2lo, 50);
+ tablockman_init(&tablockman, &loid2lo1, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+
+ loarray[i].pins= lf_alloc_get_pins(&lockman.alloc);
+ loarray[i].all_locks= 0;
+ loarray[i].waiting_for= 0;
+ loarray[i].mutex= &mutexes[i];
+ loarray[i].cond= &conds[i];
+ loarray[i].loid= i+1;
+
+ loarray1[i].active_locks= 0;
+ loarray1[i].waiting_lock= 0;
+ loarray1[i].waiting_for= 0;
+ loarray1[i].mutex= &mutexes[i];
+ loarray1[i].cond= &conds[i];
+ loarray1[i].loid= i+1;
+ }
+
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_init_locked_table(ltarray+i, Nlos);
+ }
+
+ test_tablockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ lockman_release_locks(&lockman, &loarray[i]);
+ pthread_mutex_destroy(loarray[i].mutex);
+ pthread_cond_destroy(loarray[i].cond);
+ lf_pinbox_put_pins(loarray[i].pins);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ lockman_destroy(&lockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c
new file mode 100644
index 00000000000..01af1a03d22
--- /dev/null
+++ b/storage/maria/unittest/lockman2-t.c
@@ -0,0 +1,361 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ tablockman for row and table locks
+*/
+
+/* #define EXTRA_VERBOSE */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include "../tablockman.h"
+
+#define Nlos 100
+#define Ntbls 110
+TABLE_LOCK_OWNER loarray1[Nlos];
+pthread_mutex_t mutexes[Nlos];
+pthread_cond_t conds[Nlos];
+LOCKED_TABLE ltarray[Ntbls];
+TABLOCKMAN tablockman;
+
+#ifndef EXTRA_VERBOSE
+#define print_lo1(X) /* no-op */
+#define DIAG(X) /* no-op */
+#else
+#define DIAG(X) diag X
+#endif
+
+TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
+{
+ return loarray1+loid-1;
+}
+
+#define unlock_all(O) diag("lo" #O "> release all locks"); \
+ tablockman_release_locks(&tablockman, loid2lo1(O));
+#define test_lock(O, R, L, S, RES) \
+ ok(tablockman_getlock(&tablockman, loid2lo1(O), &ltarray[R], L) == RES, \
+ "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \
+ print_lo1(loid2lo1(O));
+#define lock_ok_a(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK)
+#define lock_ok_i(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE)
+#define lock_ok_l(O, R, L) \
+ test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
+#define lock_conflict(O, R, L) \
+ test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
+
+void test_tablockman_simple()
+{
+ /* simple */
+ lock_ok_a(1, 1, S);
+ lock_ok_i(2, 2, IS);
+ lock_ok_i(1, 2, IX);
+ /* lock escalation */
+ lock_ok_a(1, 1, X);
+ lock_ok_i(2, 2, IX);
+ /* failures */
+ lock_conflict(2, 1, X);
+ unlock_all(2);
+ lock_ok_a(1, 2, S);
+ lock_ok_a(1, 2, IS);
+ lock_ok_a(1, 2, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_a(2, 3, LS);
+ lock_ok_i(1, 3, IX);
+ lock_ok_l(2, 3, IS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_conflict(2, 1, S);
+ lock_ok_a(1, 1, LS);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_a(2, 1, LS);
+ lock_ok_a(1, 1, LS);
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_i(1, 4, IS);
+ lock_ok_i(2, 4, IS);
+ lock_ok_i(3, 4, IS);
+ lock_ok_a(3, 4, LS);
+ lock_ok_i(4, 4, IS);
+ lock_conflict(4, 4, IX);
+ lock_conflict(2, 4, IX);
+ lock_ok_a(1, 4, LS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+ unlock_all(4);
+
+ lock_ok_i(1, 1, IX);
+ lock_ok_i(2, 1, IX);
+ lock_conflict(1, 1, S);
+ lock_conflict(2, 1, X);
+ unlock_all(1);
+ unlock_all(2);
+
+ lock_ok_i(1, 1, IS);
+ lock_conflict(2, 1, X);
+ lock_conflict(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+
+ lock_ok_a(1, 1, S);
+ lock_conflict(2, 1, IX);
+ lock_conflict(3, 1, IS);
+ unlock_all(1);
+ unlock_all(2);
+ unlock_all(3);
+}
+
+int rt_num_threads;
+int litmus;
+int thread_number= 0, timeouts= 0;
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ thread_number= timeouts= 0;
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Running %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
+{
+#ifdef NOT_USED_YET
+ TABLE_LOCK_OWNER backup= *lo;
+#endif
+
+ tablockman_release_locks(lm, lo);
+#ifdef NOT_USED_YET
+ pthread_mutex_destroy(lo->mutex);
+ pthread_cond_destroy(lo->cond);
+ bzero(lo, sizeof(*lo));
+
+ lo->mutex= backup.mutex;
+ lo->cond= backup.cond;
+ lo->loid= backup.loid;
+ pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(lo->cond, 0);
+#endif
+}
+
+pthread_mutex_t rt_mutex;
+int Nrows= 100;
+int Ntables= 10;
+int table_lock_ratio= 10;
+enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
+const char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
+const char *res2str[]= {
+ 0,
+ "OUT OF MEMORY",
+ "DEADLOCK",
+ "LOCK TIMEOUT",
+ "GOT THE LOCK",
+ "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
+ "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
+
+pthread_handler_t test_lockman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, loid, row, table, res, locklevel, timeout= 0;
+ TABLE_LOCK_OWNER *lo1;
+ DBUG_ASSERT(Ntables <= Ntbls);
+ DBUG_ASSERT(Nrows + Ntables <= Ntbls);
+
+ pthread_mutex_lock(&rt_mutex);
+ loid= ++thread_number;
+ pthread_mutex_unlock(&rt_mutex);
+ lo1= loid2lo1(loid);
+
+ for (x= ((int)(intptr)(&m)); m > 0; m--)
+ {
+ /* three prime numbers */
+ x= (uint) ((x*LL(3628273133) + LL(1500450271)) % LL(9576890767));
+ row= x % Nrows + Ntables;
+ table= row % Ntables;
+ locklevel= (x/Nrows) & 3;
+ if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0)
+ {
+ /* table lock */
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table,
+ lock_array[locklevel]);
+ DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ }
+ else
+ { /* row lock */
+ locklevel&= 1;
+ res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]);
+ DIAG(("loid %2d, row %d, lock %s, res %s", loid, row,
+ lock2str[locklevel+4], res2str[res]));
+ switch (res)
+ {
+ case GOT_THE_LOCK:
+ continue;
+ case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
+ /* not implemented, so take a regular lock */
+ case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE:
+ res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]);
+ DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
+ lock2str[locklevel], res2str[res]));
+ if (res < GOT_THE_LOCK)
+ {
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ DBUG_ASSERT(res == GOT_THE_LOCK);
+ continue;
+ default:
+ reinit_tlo(&tablockman, lo1);
+ DIAG(("loid %2d, release all locks", loid));
+ timeout++;
+ continue;
+ }
+ }
+ }
+
+ reinit_tlo(&tablockman, lo1);
+
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ timeouts+= timeout;
+ if (!rt_num_threads)
+ diag("number of timeouts: %d", timeouts);
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+
+int main()
+{
+ int i;
+
+ my_init();
+ pthread_mutex_init(&rt_mutex, 0);
+
+ plan(40);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+
+ tablockman_init(&tablockman, &loid2lo1, 50);
+
+ for (i= 0; i < Nlos; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init (&conds[i], 0);
+
+ loarray1[i].active_locks= 0;
+ loarray1[i].waiting_lock= 0;
+ loarray1[i].waiting_for= 0;
+ loarray1[i].mutex= &mutexes[i];
+ loarray1[i].cond= &conds[i];
+ loarray1[i].loid= i+1;
+ }
+
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_init_locked_table(ltarray+i, Nlos);
+ }
+
+ test_tablockman_simple();
+
+#define CYCLES 10000
+#define THREADS Nlos /* don't change this line */
+
+ /* mixed load, stress-test with random locks */
+ Nrows= 100;
+ Ntables= 10;
+ table_lock_ratio= 10;
+ run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
+#if 0
+ /* "real-life" simulation - many rows, no table locks */
+ Nrows= 1000000;
+ Ntables= 10;
+ table_lock_ratio= 0;
+ run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10);
+#endif
+ for (i= 0; i < Nlos; i++)
+ {
+ tablockman_release_locks(&tablockman, &loarray1[i]);
+ pthread_mutex_destroy(loarray1[i].mutex);
+ pthread_cond_destroy(loarray1[i].cond);
+ }
+
+ {
+ ulonglong now= my_getsystime();
+ for (i= 0; i < Ntbls; i++)
+ {
+ tablockman_destroy_locked_table(ltarray+i);
+ }
+ tablockman_destroy(&tablockman);
+ now= my_getsystime()-now;
+ diag("lockman_destroy: %g secs", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c
new file mode 100644
index 00000000000..1b37ee3f53c
--- /dev/null
+++ b/storage/maria/unittest/ma_control_file-t.c
@@ -0,0 +1,446 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Unit test of the control file module of the Maria engine WL#3234 */
+
+/*
+ Note that it is not possible to test the durability of the write (can't
+ pull the plug programmatically :)
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <tap.h>
+
+#ifndef WITH_MARIA_STORAGE_ENGINE
+/*
+ If Maria is not compiled in, normally we don't come to building this test.
+*/
+#error "Maria engine is not compiled in, test cannot be built"
+#endif
+
+#include "maria.h"
+#include "../../../storage/maria/maria_def.h"
+#include <my_getopt.h>
+
+char file_name[FN_REFLEN];
+
+/* The values we'll set and expect the control file module to return */
+LSN expect_checkpoint_lsn;
+uint32 expect_logno;
+
+static int delete_file(myf my_flags);
+/*
+ Those are test-specific wrappers around the module's API functions: after
+ calling the module's API functions they perform checks on the result.
+*/
+static int close_file(); /* wraps ma_control_file_end */
+static int create_or_open_file(); /* wraps ma_control_file_open_or_create */
+static int write_file(); /* wraps ma_control_file_write_and_force */
+
+/* Tests */
+static int test_one_log();
+static int test_five_logs();
+static int test_3_checkpoints_and_2_logs();
+static int test_binary_content();
+static int test_start_stop();
+static int test_2_open_and_2_close();
+static int test_bad_magic_string();
+static int test_bad_checksum();
+static int test_bad_size();
+
+/* Utility */
+static int verify_module_values_match_expected();
+static int verify_module_values_are_impossible();
+static void usage();
+static void get_options(int argc, char *argv[]);
+
+/*
+ If "expr" is FALSE, this macro will make the function print a diagnostic
+ message and immediately return 1.
+ This is inspired from assert() but does not crash the binary (sometimes we
+ may want to see how other tests go even if one fails).
+ RET_ERR means "return error".
+*/
+
+#define RET_ERR_UNLESS(expr) \
+ {if (!(expr)) {diag("line %d: failure: '%s'", __LINE__, #expr); return 1;}}
+
+
+int main(int argc,char *argv[])
+{
+ MY_INIT(argv[0]);
+ maria_data_root= ".";
+
+ plan(9);
+
+ diag("Unit tests for control file");
+
+ get_options(argc,argv);
+
+ diag("Deleting control file at startup, if there is an old one");
+ RET_ERR_UNLESS(0 == delete_file(0)); /* if fails, can't continue */
+
+ diag("Tests of normal conditions");
+ ok(0 == test_one_log(), "test of creating one log");
+ ok(0 == test_five_logs(), "test of creating five logs");
+ ok(0 == test_3_checkpoints_and_2_logs(),
+ "test of creating three checkpoints and two logs");
+ ok(0 == test_binary_content(), "test of the binary content of the file");
+ ok(0 == test_start_stop(), "test of multiple starts and stops");
+ diag("Tests of abnormal conditions");
+ ok(0 == test_2_open_and_2_close(),
+ "test of two open and two close (strange call sequence)");
+ ok(0 == test_bad_magic_string(), "test of bad magic string");
+ ok(0 == test_bad_checksum(), "test of bad checksum");
+ ok(0 == test_bad_size(), "test of too small/big file");
+
+ return exit_status();
+}
+
+
+static int delete_file(myf my_flags)
+{
+ RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) != NullS);
+ /*
+ Maybe file does not exist, ignore error.
+ The error will however be printed on stderr.
+ */
+ my_delete(file_name, my_flags);
+ expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
+ expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO;
+
+ return 0;
+}
+
+/*
+ Verifies that global values last_checkpoint_lsn and last_logno (belonging
+ to the module) match what we expect.
+*/
+static int verify_module_values_match_expected()
+{
+ RET_ERR_UNLESS(last_logno == expect_logno);
+ RET_ERR_UNLESS(last_checkpoint_lsn ==
+ expect_checkpoint_lsn);
+ return 0;
+}
+
+
+/*
+ Verifies that global values last_checkpoint_lsn and last_logno (belonging
+ to the module) are impossible (this is used when the file has been closed).
+*/
+static int verify_module_values_are_impossible()
+{
+ RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO);
+ RET_ERR_UNLESS(last_checkpoint_lsn ==
+ CONTROL_FILE_IMPOSSIBLE_LSN);
+ return 0;
+}
+
+
+static int close_file()
+{
+ /* Simulate shutdown */
+ ma_control_file_end();
+ /* Verify amnesia */
+ RET_ERR_UNLESS(verify_module_values_are_impossible() == 0);
+ return 0;
+}
+
+static int create_or_open_file()
+{
+ RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK);
+ /* Check that the module reports expected information */
+ RET_ERR_UNLESS(verify_module_values_match_expected() == 0);
+ return 0;
+}
+
+static int write_file(const LSN checkpoint_lsn,
+ uint32 logno,
+ uint objs_to_write)
+{
+ RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno,
+ objs_to_write) == 0);
+ /* Check that the module reports expected information */
+ RET_ERR_UNLESS(verify_module_values_match_expected() == 0);
+ return 0;
+}
+
+static int test_one_log()
+{
+ uint objs_to_write;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 123;
+ RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN,
+ expect_logno,
+ objs_to_write) == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_five_logs()
+{
+ uint objs_to_write;
+ uint i;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 100;
+ for (i= 0; i<5; i++)
+ {
+ expect_logno*= 3;
+ RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno,
+ objs_to_write) == 0);
+ }
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_3_checkpoints_and_2_logs()
+{
+ uint objs_to_write;
+ /*
+ Simulate one checkpoint, one log creation, two checkpoints, one
+ log creation.
+ */
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(5, 10000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 17;
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(17, 20000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN;
+ expect_checkpoint_lsn= MAKE_LSN(17, 45000);
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+
+ objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO;
+ expect_logno= 19;
+ RET_ERR_UNLESS(write_file(expect_checkpoint_lsn,
+ expect_logno, objs_to_write) == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_binary_content()
+{
+ uint i;
+ int fd;
+
+ /*
+ TEST4: actually check by ourselves the content of the file.
+ Note that constants (offsets) are hard-coded here, precisely to prevent
+ someone from changing them in the control file module and breaking
+ backward-compatibility.
+ TODO: when we reach the format-freeze state, we may even just do a
+ comparison with a raw binary string, to not depend on any uint4korr
+ future change/breakage.
+ */
+
+ char buffer[20];
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_read(fd, buffer, 20, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ i= uint3korr(buffer+9);
+ RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn));
+ i= uint4korr(buffer+12);
+ RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn));
+ i= uint4korr(buffer+16);
+ RET_ERR_UNLESS(i == last_logno);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_start_stop()
+{
+ /* TEST5: Simulate start/nothing/stop/start/nothing/stop/start */
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_2_open_and_2_close()
+{
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+
+static int test_bad_magic_string()
+{
+ char buffer[4];
+ int fd;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ /* Corrupt magic string */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(ma_control_file_create_or_open() ==
+ CONTROL_FILE_BAD_MAGIC_STRING);
+ /* Restore magic string */
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+ return 0;
+}
+
+static int test_bad_checksum()
+{
+ char buffer[4];
+ int fd;
+
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ /* Corrupt checksum */
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_pread(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0);
+ buffer[0]+= 3; /* mangle checksum */
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(ma_control_file_create_or_open() ==
+ CONTROL_FILE_BAD_CHECKSUM);
+ /* Restore checksum */
+ buffer[0]-= 3;
+ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+
+ return 0;
+}
+
+
+static int test_bad_size()
+{
+ char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+ int fd;
+
+ /* A too short file */
+ RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS((fd= my_open(file_name,
+ O_BINARY | O_RDWR | O_CREAT,
+ MYF(MY_WME))) >= 0);
+ RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL);
+ RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0);
+ /* Check that control file module sees the problem */
+ RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG);
+ RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
+
+ /* Leave a correct control file */
+ RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0);
+ RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
+ RET_ERR_UNLESS(close_file() == 0);
+
+ return 0;
+}
+
+
+static struct my_option my_long_options[] =
+{
+#ifndef DBUG_OFF
+ {"debug", '#', "Debug log.",
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#endif
+ {"help", '?', "Display help and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"version", 'V', "Print version number and exit",
+ 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+ { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+
+static void version()
+{
+ printf("ma_control_file_test: unit test for the control file "
+ "module of the Maria storage engine. Ver 1.0 \n");
+}
+
+static my_bool
+get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
+ char *argument)
+{
+ switch(optid) {
+ case 'V':
+ version();
+ exit(0);
+ case '#':
+ DBUG_PUSH (argument);
+ break;
+ case '?':
+ version();
+ usage();
+ exit(0);
+ }
+ return 0;
+}
+
+
+/* Read options */
+
+static void get_options(int argc, char *argv[])
+{
+ int ho_error;
+
+ if ((ho_error=handle_options(&argc, &argv, my_long_options,
+ get_one_option)))
+ exit(ho_error);
+
+ return;
+} /* get options */
+
+
+static void usage()
+{
+ printf("Usage: %s [options]\n\n", my_progname);
+ my_print_help(my_long_options);
+ my_print_variables(my_long_options);
+}
diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c
new file mode 100644
index 00000000000..c5917764b9b
--- /dev/null
+++ b/storage/maria/unittest/ma_maria_log_cleanup.c
@@ -0,0 +1,45 @@
+#include "../maria_def.h"
+#include <my_dir.h>
+
+my_bool maria_log_remove()
+{
+ MY_DIR *dirp;
+ uint i;
+ MY_STAT stat_buff;
+ char file_name[FN_REFLEN];
+
+ /* Removes control file */
+ if (fn_format(file_name, CONTROL_FILE_BASE_NAME,
+ maria_data_root, "", MYF(MY_WME)) == NullS)
+ return 1;
+ if (my_stat(file_name, &stat_buff, MYF(0)) &&
+ my_delete(file_name, MYF(MY_WME)) != 0)
+ return 1;
+
+ /* Finds and removes transaction log files */
+ if (!(dirp = my_dir(maria_data_root, MYF(MY_DONT_SORT))))
+ return 1;
+
+ for (i= 0; i < dirp->number_off_files; i++)
+ {
+ char *file= dirp->dir_entry[i].name;
+ if (strncmp(file, "maria_log.", 10) == 0 &&
+ file[10] >= '0' && file[10] <= '9' &&
+ file[11] >= '0' && file[11] <= '9' &&
+ file[12] >= '0' && file[12] <= '9' &&
+ file[13] >= '0' && file[13] <= '9' &&
+ file[14] >= '0' && file[14] <= '9' &&
+ file[15] >= '0' && file[15] <= '9' &&
+ file[16] >= '0' && file[16] <= '9' &&
+ file[17] >= '0' && file[17] <= '9' &&
+ file[18] == '\0')
+ {
+ if (fn_format(file_name, file,
+ maria_data_root, "", MYF(MY_WME)) == NullS ||
+ my_delete(file_name, MYF(MY_WME)) != 0)
+ return 1;
+ }
+ }
+ return 0;
+}
+
diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c
new file mode 100644
index 00000000000..757520322c8
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler-t.c
@@ -0,0 +1,578 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+
+extern my_bool maria_log_remove();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+#define LONG_BUFFER_SIZE (100 * 1024)
+
+
+#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*1024L
+#define ITERATIONS 181000
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(byte *ptr, ulong length)
+{
+ ulong i;
+ byte buff[2];
+ for (i= 0; i < length; i++)
+ {
+ if (i % 2 == 0)
+ int2store(buff, i >> 1);
+ if (ptr[i] != buff[i % 2])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 2]);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ byte *buffer, uint skip)
+{
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2);
+ if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) !=
+ rec->record_length)
+ return 1;
+ return check_content(buffer + skip, rec->record_length - skip);
+}
+
+int main(int argc, char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ byte long_tr_id[6];
+ byte lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ byte long_buffer[LONG_BUFFER_SIZE * 2 + LSN_STORE_SIZE * 2 + 2];
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2)
+ {
+ int2store(long_buffer + i, (i >> 1));
+ /* long_buffer[i]= (i & 0xFF); */
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ plan(((ITERATIONS - 1) * 4 + 1)*2 + ITERATIONS - 1);
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ ok(0, "write LOGREC_LONG_TRANSACTION_ID");
+ exit(1);
+ }
+ ok(1, "write LOGREC_LONG_TRANSACTION_ID");
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ if (i % 2)
+ {
+ lsn_store(lsn_buff, lsn_base);
+ if (translog_write_record(&lsn,
+ LOGREC_CLR_END,
+ (i % 0xFFFF), NULL, 7, lsn_buff, 0))
+ {
+ fprintf(stderr, "1 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_CLR_END");
+ exit(1);
+ }
+ ok(1, "write LOGREC_CLR_END");
+ lsn_store(lsn_buff, lsn_base);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_INSERT,
+ (i % 0xFFFF),
+ NULL, 7, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "1 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_KEY_INSERT");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_KEY_INSERT");
+ }
+ else
+ {
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_ROW_DELETE,
+ (i % 0xFFFF), NULL, 23, lsn_buff, 0))
+ {
+ fprintf(stderr, "0 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_ROW_DELETE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_ROW_DELETE");
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_DELETE,
+ (i % 0xFFFF),
+ NULL, 14, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "0 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_KEY_DELETE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_KEY_DELETE");
+ }
+ int4store(long_tr_id, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ (i % 0xFFFF), NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_LONG_TRANSACTION_ID");
+ exit(1);
+ }
+ ok(1, "write LOGREC_LONG_TRANSACTION_ID");
+
+ lsn_base= lsn;
+
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ (i % 0xFFFF), NULL, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD");
+ exit(1);
+ }
+ ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD");
+ if (translog_flush(lsn))
+ {
+ fprintf(stderr, "Can't flush #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "flush");
+ exit(1);
+ }
+ ok(1, "flush");
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "pass2: Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+ srandom(122334817L);
+
+
+ rc= 1;
+
+ {
+ translog_size_t len= translog_read_record_header(first_lsn, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF ||
+ first_lsn != rec.lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(%lu,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ (uint) uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ lsn= first_lsn;
+ if (translog_init_scanner(first_lsn, 1, &scanner))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 1;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ goto err;
+ }
+ break;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 7 || ref != lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d) "
+ "type: %u strid: %u len: %u"
+ "ref: (%lu,0x%lx) (%lu,0x%lx) "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref),
+ (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if (rec.type != LOGREC_UNDO_ROW_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ ((uchar)rec.header[22]) != 0x55 ||
+ ((uchar)rec.header[21]) != 0xAA ||
+ ((uchar)rec.header[20]) != 0x55 ||
+ ((uchar)rec.header[19]) != 0xAA ||
+ ((uchar)rec.header[18]) != 0x55 ||
+ ((uchar)rec.header[17]) != 0xAA ||
+ ((uchar)rec.header[16]) != 0x55 ||
+ ((uchar)rec.header[15]) != 0xAA ||
+ ((uchar)rec.header[14]) != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)"
+ "type %u, strid %u, len %u, ref1(%lu,0x%lx), "
+ "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1),
+ (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2),
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ if (rec.type !=LOGREC_UNDO_KEY_INSERT ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE ||
+ len != 12 || ref != lsn ||
+ check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %u (%d), "
+ "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type !=LOGREC_UNDO_KEY_INSERT,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + LSN_STORE_SIZE,
+ (uint) len,
+ len != 12,
+ (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn),
+ (len != 12 || ref != lsn),
+ check_content(rec.header + LSN_STORE_SIZE,
+ len - LSN_STORE_SIZE));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ if (rec.type !=LOGREC_UNDO_KEY_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE * 2 ||
+ len != 19 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ check_content(rec.header + LSN_STORE_SIZE * 2,
+ len - LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)"
+ "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, "
+ "ref1(%lu,0x%lx), ref2(%lu,0x%lx), "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1),
+ (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ lsn= rec.lsn;
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)"
+ "type %u, strid %u, len %lu != %lu, hdr len: %u, "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "read record");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (maria_log_remove())
+ exit(1);
+ return(test(exit_status()));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
new file mode 100644
index 00000000000..4aaf30bd9a3
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
@@ -0,0 +1,600 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+
+extern my_bool maria_log_remove();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512))
+
+#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1)
+
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 2
+/*#define ITERATIONS 63 */
+
+/*
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+/*
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(byte *ptr, ulong length)
+{
+ ulong i;
+ byte buff[4];
+ DBUG_ENTER("check_content");
+ for (i= 0; i < length; i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ if (ptr[i] != buff[i % 4])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 4]);
+ DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0),
+ (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ byte *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+ DBUG_ENTER("read_and_check_content");
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2);
+ if ((len= translog_read_record(rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ DBUG_RETURN(res);
+}
+
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+int main(int argc, char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ byte long_tr_id[6];
+ byte lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ byte *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2);
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+
+ {
+ byte buff[4];
+ for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ long_buffer[i]= buff[i % 4];
+ }
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ plan(((ITERATIONS - 1) * 4 + 1) * 2);
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ ok(0, "write LOGREC_LONG_TRANSACTION_ID");
+ exit(1);
+ }
+ ok(1, "write LOGREC_LONG_TRANSACTION_ID");
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ if (i % 2)
+ {
+ lsn_store(lsn_buff, lsn_base);
+ if (translog_write_record(&lsn,
+ LOGREC_CLR_END,
+ (i % 0xFFFF), NULL,
+ LSN_STORE_SIZE, lsn_buff, 0))
+ {
+ fprintf(stderr, "1 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_CLR_END");
+ exit(1);
+ }
+ ok(1, "write LOGREC_CLR_END");
+ lsn_store(lsn_buff, lsn_base);
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_INSERT,
+ (i % 0xFFFF),
+ NULL, LSN_STORE_SIZE, lsn_buff,
+ rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "1 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_KEY_INSERT");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_KEY_INSERT");
+ }
+ else
+ {
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_ROW_DELETE,
+ (i % 0xFFFF), NULL, 23, lsn_buff, 0))
+ {
+ fprintf(stderr, "0 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_ROW_DELETE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_ROW_DELETE");
+ lsn_store(lsn_buff, lsn_base);
+ lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_DELETE,
+ (i % 0xFFFF),
+ NULL, LSN_STORE_SIZE * 2, lsn_buff,
+ rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "0 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_UNDO_KEY_DELETE");
+ exit(1);
+ }
+ ok(1, "write LOGREC_UNDO_KEY_DELETE");
+ }
+ int4store(long_tr_id, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ (i % 0xFFFF), NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_LONG_TRANSACTION_ID");
+ exit(1);
+ }
+ ok(1, "write LOGREC_LONG_TRANSACTION_ID");
+
+ lsn_base= lsn;
+
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ (i % 0xFFFF), NULL, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ ok(0, "write LOGREC_REDO_INSERT_ROW_HEAD");
+ exit(1);
+ }
+ ok(1, "write LOGREC_REDO_INSERT_ROW_HEAD");
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "pass2: Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0))
+ {
+ fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+
+ rc= 1;
+
+ {
+ translog_size_t len= translog_read_record_header(first_lsn, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF ||
+ first_lsn != rec.lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(0x%lu,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ (uint)uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ lsn= first_lsn;
+ if (translog_init_scanner(first_lsn, 1, &scanner))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 1;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ if (rec.type != LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != LSN_STORE_SIZE || ref != lsn)
+ {
+ fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)"
+ "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ if (rec.type !=LOGREC_UNDO_ROW_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ ((uchar)rec.header[22]) != 0x55 ||
+ ((uchar)rec.header[21]) != 0xAA ||
+ ((uchar)rec.header[20]) != 0x55 ||
+ ((uchar)rec.header[19]) != 0xAA ||
+ ((uchar)rec.header[18]) != 0x55 ||
+ ((uchar)rec.header[17]) != 0xAA ||
+ ((uchar)rec.header[16]) != 0x55 ||
+ ((uchar)rec.header[15]) != 0xAA ||
+ ((uchar)rec.header[14]) != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)"
+ "type %u, strid %u, len %u, ref1(%lu,0x%lx), "
+ "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1),
+ (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2),
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ ref= lsn_korr(rec.header);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_UNDO_KEY_INSERT ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE ||
+ len != 12 || ref != lsn ||
+ check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %u (%d), "
+ "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type !=LOGREC_UNDO_KEY_INSERT,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + LSN_STORE_SIZE,
+ (uint) len,
+ len != 12,
+ (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn),
+ (ref != lsn),
+ check_content(rec.header + LSN_STORE_SIZE,
+ len - LSN_STORE_SIZE));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ ref1= lsn_korr(rec.header);
+ ref2= lsn_korr(rec.header + LSN_STORE_SIZE);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_UNDO_KEY_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + LSN_STORE_SIZE * 2 ||
+ len != 19 ||
+ ref1 != lsn ||
+ ref2 != first_lsn ||
+ check_content(rec.header + LSN_STORE_SIZE * 2,
+ len - LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)"
+ "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, "
+ "ref1(%lu,0x%lx), ref2(%lu,0x%lx), "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1),
+ (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2),
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint)uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+
+ lsn= rec.lsn;
+
+ len= translog_read_next_record_header(&scanner, &rec);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)"
+ "type %u, strid %u, len %lu != %lu, hdr len: %u, "
+ "lsn(%lu,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ ok(1, "read record");
+ translog_free_record_header(&rec);
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "read record");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+
+ return (test(exit_status()));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
new file mode 100644
index 00000000000..1834e720328
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
@@ -0,0 +1,457 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+extern my_bool maria_log_remove();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */
+#define LOG_FLAGS 0
+/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/
+#define LONG_BUFFER_SIZE (1024L*1024L*1024L)
+#define MIN_REC_LENGTH 30
+#define SHOW_DIVIDER 10
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 3
+#define WRITERS 3
+static uint number_of_writers= WRITERS;
+
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+
+static ulong lens[WRITERS][ITERATIONS];
+static LSN lsns1[WRITERS][ITERATIONS];
+static LSN lsns2[WRITERS][ITERATIONS];
+static byte *long_buffer;
+
+/*
+ Get pseudo-random length of the field in
+ limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE]
+
+ SYNOPSIS
+ get_len()
+
+ RETURN
+ length - length >= 0 length <= LONG_BUFFER_SIZE
+*/
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(byte *ptr, ulong length)
+{
+ ulong i;
+ for (i= 0; i < length; i++)
+ {
+ if (((uchar)ptr[i]) != (i & 0xFF))
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) (i & 0xFF));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ byte *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+
+ if ((len= translog_read_record(rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ return(res);
+}
+
+void writer(int num)
+{
+ LSN lsn;
+ byte long_tr_id[6];
+ uint i;
+
+ for (i= 0; i < ITERATIONS; i++)
+ {
+ uint len= get_len();
+ lens[num][i]= len;
+
+ int2store(long_tr_id, num);
+ int4store(long_tr_id + 2, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ num, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write LOGREC_LONG_TRANSACTION_ID record #%lu "
+ "thread %i\n", (ulong) i, num);
+ translog_destroy();
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(0, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ return;
+ }
+ lsns1[num][i]= lsn;
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ num, NULL, len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(0, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ return;
+ }
+ lsns2[num][i]= lsn;
+ pthread_mutex_lock(&LOCK_thread_count);
+ ok(1, "write records");
+ pthread_mutex_unlock(&LOCK_thread_count);
+ }
+ return;
+}
+
+
+static void *test_thread_writer(void *arg)
+{
+ int param= *((int*) arg);
+
+ my_thread_init();
+
+ writer(param);
+
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ ok(1, "writer finished"); /* just to show progress */
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are
+ ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((gptr) arg);
+ my_thread_end();
+ return(0);
+}
+
+
+int main(int argc, char **argv __attribute__ ((unused)))
+{
+ uint32 i;
+ uint pagen;
+ PAGECACHE pagecache;
+ LSN first_lsn;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error;
+ int rc;
+
+ plan(WRITERS + ITERATIONS * WRITERS * 3);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2);
+ if (long_buffer == 0)
+ {
+ fprintf(stderr, "End of memory\n");
+ exit(1);
+ }
+ for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++)
+ long_buffer[i]= (i & 0xFF);
+
+ MY_INIT(argv[0]);
+ if (maria_log_remove())
+ exit(1);
+
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_attr_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ my_thread_global_init();
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+ {
+ byte long_tr_id[6]=
+ {
+ 0x11, 0x22, 0x33, 0x44, 0x55, 0x66
+ };
+
+ if (translog_write_record(&first_lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write the first record\n");
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+
+ while (number_of_writers != 0)
+ {
+ param= (int*) malloc(sizeof(int));
+ *param= number_of_writers - 1;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+ (void*) param)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_writers--;
+ }
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ /* wait finishing */
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count)))
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error);
+
+ /* Find last LSN and flush up to it (all our log) */
+ {
+ LSN max= 0;
+ for (i= 0; i < WRITERS; i++)
+ {
+ if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0)
+ max= lsns2[i][ITERATIONS - 1];
+ }
+ translog_flush(max);
+ }
+
+ rc= 1;
+
+ {
+ uint indeces[WRITERS];
+ uint index, len, stage;
+ bzero(indeces, sizeof(uint) * WRITERS);
+
+ bzero(indeces, sizeof(indeces));
+
+ if (translog_init_scanner(first_lsn, 1, &scanner))
+ {
+ fprintf(stderr, "scanner init failed\n");
+ goto err;
+ }
+ for (i= 0;; i++)
+ {
+ len= translog_read_next_record_header(&scanner, &rec);
+
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN)
+ {
+ if (i != WRITERS * ITERATIONS * 2)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS * WRITERS * 2);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+ index= indeces[rec.short_trid] / 2;
+ stage= indeces[rec.short_trid] % 2;
+ if (stage == 0)
+ {
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.record_length != 6 ||
+ uint2korr(rec.header) != rec.short_trid ||
+ index != uint4korr(rec.header + 2) ||
+ cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u %u, len %u, i: %u %u, "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.type,
+ (uint) rec.short_trid, (uint) uint2korr(rec.header),
+ (uint) rec.record_length,
+ (uint) index, (uint) uint4korr(rec.header + 2),
+ (ulong) LSN_FILE_NO(rec.lsn),
+ (ulong) LSN_OFFSET(rec.lsn),
+ (ulong) LSN_FILE_NO(lsns1[rec.short_trid][index]),
+ (ulong) LSN_OFFSET(lsns1[rec.short_trid][index]));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ len != 9 ||
+ rec.record_length != lens[rec.short_trid][index] ||
+ cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 ||
+ check_content(rec.header, len))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d) "
+ " thread: %d, iteration %d, stage %d\n"
+ "type %u (%d), len %u, length %lu %lu (%d) "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.short_trid, index, stage,
+ (uint) rec.type, (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD),
+ (uint) len,
+ (ulong) rec.record_length, lens[rec.short_trid][index],
+ (rec.record_length != lens[rec.short_trid][index]),
+ (ulong) LSN_FILE_NO(rec.lsn),
+ (ulong) LSN_OFFSET(rec.lsn),
+ (ulong) LSN_FILE_NO(lsns2[rec.short_trid][index]),
+ (ulong) LSN_OFFSET(lsns2[rec.short_trid][index]));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_REDO_INSERT_ROW_HEAD in whole rec read "
+ "lsn(%lu,0x%lx)\n",
+ (ulong) LSN_FILE_NO(rec.lsn),
+ (ulong) LSN_OFFSET(rec.lsn));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ ok(1, "record read");
+ translog_free_record_header(&rec);
+ indeces[rec.short_trid]++;
+ }
+ }
+
+ rc= 0;
+err:
+ if (rc)
+ ok(0, "record read");
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ if (maria_log_remove())
+ exit(1);
+
+ return(exit_status());
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
new file mode 100644
index 00000000000..211eb6dea9e
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
@@ -0,0 +1,149 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+#include <tap.h>
+
+extern my_bool maria_log_remove();
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define LOG_FLAGS 0
+
+static char *first_translog_file= (char*)"maria_log.00000001";
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+
+int main(int argc, char *argv[])
+{
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn;
+ MY_STAT st, *stat;
+
+ MY_INIT(argv[0]);
+
+ plan(1);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ if (maria_log_remove())
+ exit(1);
+ /* be sure that we have no logs in the directory*/
+ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0)))
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ if (my_stat(first_translog_file, &st, MYF(0)))
+ my_delete(first_translog_file, MYF(0));
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE);
+ exit(1);
+ }
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+
+ {
+ uchar page[PCACHE_PAGE];
+
+ bzero(page, PCACHE_PAGE);
+#define PAGE_LSN_OFFSET 0
+ lsn_store(page + PAGE_LSN_OFFSET, lsn);
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)page,
+ PAGECACHE_LSN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ }
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE * 2)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file,
+ (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2));
+ ok(0, "log triggered");
+ exit(1);
+ }
+ ok(1, "log triggered");
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ my_delete(first_translog_file, MYF(0));
+ my_delete(file1_name, MYF(0));
+
+ exit(0);
+}
diff --git a/storage/maria/unittest/mf_pagecache_consist.c b/storage/maria/unittest/mf_pagecache_consist.c
new file mode 100755
index 00000000000..8ea0094762c
--- /dev/null
+++ b/storage/maria/unittest/mf_pagecache_consist.c
@@ -0,0 +1,458 @@
+/*
+ TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+ my_atomic-t.c (see BUG#22320).
+ Use diag() instead of fprintf(stderr). Use ok() and plan().
+*/
+
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (PAGE_SIZE*1024*8)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+static PAGECACHE pagecache;
+
+#ifdef TEST_HIGH_CONCURENCY
+static uint number_of_readers= 10;
+static uint number_of_writers= 20;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_HIGH_CONCURENCY*/
+#ifdef TEST_READERS
+static uint number_of_readers= 10;
+static uint number_of_writers= 1;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_READERS*/
+#ifdef TEST_WRITERS
+static uint number_of_readers= 0;
+static uint number_of_writers= 10;
+static uint number_of_tests= 30000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20;
+static uint flush_divider= 1000;
+#else /*TEST_WRITERS*/
+static uint number_of_readers= 10;
+static uint number_of_writers= 10;
+static uint number_of_tests= 50000;
+static uint record_length_limit= PAGE_SIZE/200;
+static uint number_of_pages= 20000;
+static uint flush_divider= 1000;
+#endif /*TEST_WRITERS*/
+#endif /*TEST_READERS*/
+#endif /*TEST_HIGH_CONCURENCY*/
+
+
+/*
+ Get pseudo-random length of the field in (0;limit)
+
+ SYNOPSYS
+ get_len()
+ limit limit for generated value
+
+ RETURN
+ length where length >= 0 & length < limit
+*/
+
+static uint get_len(uint limit)
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / limit);
+ } while (rec_len >= limit || rec_len == 0);
+ return rec_len;
+}
+
+
+/* check page consistency */
+uint check_page(uchar *buff, ulong offset, int page_locked, int page_no,
+ int tag)
+{
+ uint end= sizeof(uint);
+ uint num= *((uint *)buff);
+ uint i;
+ DBUG_ENTER("check_page");
+
+ for (i= 0; i < num; i++)
+ {
+ uint len= *((uint *)(buff + end));
+ uint j;
+ end+= sizeof(uint) + sizeof(uint);
+ if (len + end > PAGE_SIZE)
+ {
+ diag("incorrect field header #%u by offset %lu\n", i, offset + end + j);
+ goto err;
+ }
+ for(j= 0; j < len; j++)
+ {
+ if (buff[end + j] != (uchar)((i+1) % 256))
+ {
+ diag("incorrect %lu byte\n", offset + end + j);
+ goto err;
+ }
+ }
+ end+= len;
+ }
+ for(i= end; i < PAGE_SIZE; i++)
+ {
+ if (buff[i] != 0)
+ {
+ int h;
+ DBUG_PRINT("err",
+ ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n",
+ offset + i, offset, i, page_no,
+ (page_locked ? "locked" : "unlocked"),
+ end, num, tag));
+ diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n",
+ offset + i, offset, i, page_no,
+ (page_locked ? "locked" : "unlocked"),
+ end, num, tag);
+ h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0));
+ my_pwrite(h, (byte*) buff, PAGE_SIZE, 0, MYF(0));
+ my_close(h, MYF(0));
+ goto err;
+ }
+ }
+ DBUG_RETURN(end);
+err:
+ DBUG_PRINT("err", ("try to flush"));
+ if (page_locked)
+ {
+ pagecache_delete_page(&pagecache, &file1, page_no,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, 1);
+ }
+ else
+ {
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
+ }
+ exit(1);
+}
+
+void put_rec(uchar *buff, uint end, uint len, uint tag)
+{
+ uint i;
+ uint num= *((uint *)buff);
+ if (!len)
+ len= 1;
+ if (end + sizeof(uint)*2 + len > PAGE_SIZE)
+ return;
+ *((uint *)(buff + end))= len;
+ end+= sizeof(uint);
+ *((uint *)(buff + end))= tag;
+ end+= sizeof(uint);
+ num++;
+ *((uint *)buff)= num;
+ *((uint*)(buff + end))= len;
+ for (i= end; i < (len + end); i++)
+ {
+ buff[i]= (uchar) num % 256;
+ }
+}
+
+/*
+ Recreate and reopen a file for test
+
+ SYNOPSIS
+ reset_file()
+ file File to reset
+ file_name Path (and name) of file which should be reset
+*/
+
+void reset_file(PAGECACHE_FILE file, char *file_name)
+{
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ diag("Got error during %s closing from close() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+ my_delete(file_name, MYF(0));
+ if ((file.file= my_open(file_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ diag("Got error during %s creation from open() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+}
+
+
+void reader(int num)
+{
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+
+ for (i= 0; i < number_of_tests; i++)
+ {
+ uint page= get_len(number_of_pages);
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ check_page(buffr, page * PAGE_SIZE, 0, page, -num);
+ if (i % 500 == 0)
+ printf("reader%d: %d\n", num, i);
+
+ }
+ printf("reader%d: done\n", num);
+ free(buffr);
+}
+
+
+void writer(int num)
+{
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+
+ for (i= 0; i < number_of_tests; i++)
+ {
+ uint end;
+ uint page= get_len(number_of_pages);
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ end= check_page(buffr, page * PAGE_SIZE, 1, page, num);
+ put_rec(buffr, end, get_len(record_length_limit), num);
+ pagecache_write(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+
+ if (i % flush_divider == 0)
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ if (i % 500 == 0)
+ printf("writer%d: %d\n", num, i);
+ }
+ printf("writer%d: done\n", num);
+ free(buffr);
+}
+
+
+static void *test_thread_reader(void *arg)
+{
+ int param=*((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_reader");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ reader(param);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((gptr) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+static void *test_thread_writer(void *arg)
+{
+ int param=*((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_writer");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ writer(param);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((gptr) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+int main(int argc, char **argv __attribute__((unused)))
+{
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error, pagen;
+
+ MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+
+ DBUG_ENTER("main");
+ DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("file1: %d", file1.file));
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ my_pwrite(file1.file, "test file", 9, 0, MYF(0));
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("Page cache %d pages", pagen));
+ {
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ uint i;
+ memset(buffr, '\0', PAGE_SIZE);
+ for (i= 0; i < number_of_pages; i++)
+ {
+ pagecache_write(&pagecache, &file1, i, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ }
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ free(buffr);
+ }
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ while (number_of_readers != 0 || number_of_writers != 0)
+ {
+ if (number_of_readers != 0)
+ {
+ param=(int*) malloc(sizeof(int));
+ *param= number_of_readers;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_reader,
+ (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_readers--;
+ }
+ if (number_of_writers != 0)
+ {
+ param=(int*) malloc(sizeof(int));
+ *param= number_of_writers;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+ (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_writers--;
+ }
+ }
+ DBUG_PRINT("info", ("Thread started"));
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ /* wait finishing */
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count)))
+ fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error);
+ DBUG_PRINT("info", ("thread ended"));
+
+ end_pagecache(&pagecache, 1);
+ DBUG_PRINT("info", ("Page cache ended"));
+
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ /*my_delete(file1_name, MYF(0));*/
+ my_end(0);
+
+ DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+
+ DBUG_PRINT("info", ("Program end"));
+
+ DBUG_RETURN(exit_status());
+}
diff --git a/storage/maria/unittest/mf_pagecache_single.c b/storage/maria/unittest/mf_pagecache_single.c
new file mode 100644
index 00000000000..91cceee618d
--- /dev/null
+++ b/storage/maria/unittest/mf_pagecache_single.c
@@ -0,0 +1,580 @@
+/*
+ TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+ my_atomic-t.c (see BUG#22320).
+ Use diag() instead of fprintf(stderr).
+*/
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (PAGE_SIZE*1024*10)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+static PAGECACHE pagecache;
+
+/*
+ File contance descriptors
+*/
+static struct file_desc simple_read_write_test_file[]=
+{
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_read_change_write_read_test_file[]=
+{
+ {PAGE_SIZE/2, '\65'},
+ {PAGE_SIZE/2, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_pin_test_file1[]=
+{
+ {PAGE_SIZE*2, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_pin_test_file2[]=
+{
+ {PAGE_SIZE/2, '\1'},
+ {PAGE_SIZE/2, (unsigned char)129},
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_delete_forget_test_file[]=
+{
+ {PAGE_SIZE, '\1'},
+ { 0, 0}
+};
+static struct file_desc simple_delete_flush_test_file[]=
+{
+ {PAGE_SIZE, '\2'},
+ { 0, 0}
+};
+
+
+/*
+ Recreate and reopen a file for test
+
+ SYNOPSIS
+ reset_file()
+ file File to reset
+ file_name Path (and name) of file which should be reset
+*/
+
+void reset_file(PAGECACHE_FILE file, char *file_name)
+{
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE);
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ diag("Got error during %s closing from close() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+ my_delete(file_name, MYF(0));
+ if ((file.file= my_open(file_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ diag("Got error during %s creation from open() (errno: %d)\n",
+ file_name, errno);
+ exit(1);
+ }
+}
+
+/*
+ Write then read page, check file on disk
+*/
+
+int simple_read_write_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_read_write_test");
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)),
+ "Simple write-read page ");
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_read_write_test_file))),
+ "Simple write-read page file");
+ if (res)
+ reset_file(file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Prepare page, then read (and lock), change (write new value and unlock),
+ then check the page in the cache and on the disk
+*/
+int simple_read_change_write_read_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_read_change_write_read_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ /* test */
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ bfill(buffw, PAGE_SIZE/2, '\65');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)),
+ "Simple read-change-write-read page ");
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_read_change_write_read_test_file))),
+ "Simple read-change-write-read page file");
+ if (res)
+ reset_file(file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Prepare page, read page 0 (and pin) then write page 1 and page 0.
+ Flush the file (shold flush only page 1 and return 1 (page 0 is
+ still pinned).
+ Check file on the disk.
+ Unpin and flush.
+ Check file on the disk.
+*/
+int simple_pin_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_pin_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ /* test */
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("error in flush_pagecache_blocks\n");
+ exit(1);
+ }
+ pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ 0);
+ pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ bfill(buffw + PAGE_SIZE/2, PAGE_SIZE/2, ((unsigned char) 129));
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_TO_READ,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ /*
+ We have to get error because one page of the file is pinned,
+ other page should be flushed
+ */
+ if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Did not get error in flush_pagecache_blocks\n");
+ res= 0;
+ goto err;
+ }
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2,
+ simple_pin_test_file1))),
+ "Simple pin page file with pin");
+ pagecache_unlock_page(&pagecache,
+ &file1,
+ 0,
+ PAGECACHE_LOCK_READ_UNLOCK,
+ PAGECACHE_UNPIN,
+ 0);
+ if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
+ {
+ diag("Got error in flush_pagecache_blocks\n");
+ res= 0;
+ goto err;
+ }
+ ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE,
+ simple_pin_test_file2))),
+ "Simple pin page result file");
+ if (res)
+ reset_file(file1, file1_name);
+err:
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+/*
+ Prepare page, write new value, then delete page from cache without flush,
+ on the disk should be page with old content written during preparation
+*/
+
+int simple_delete_forget_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_delete_forget_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ /* test */
+ bfill(buffw, PAGE_SIZE, '\2');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ pagecache_delete_page(&pagecache, &file1, 0,
+ PAGECACHE_LOCK_WRITE, 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_delete_forget_test_file))),
+ "Simple delete-forget page file");
+ if (res)
+ reset_file(file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+/*
+ Prepare page with locking, write new content to the page,
+ delete page with flush and on existing lock,
+ check that page on disk contain new value.
+*/
+
+int simple_delete_flush_test()
+{
+ unsigned char *buffw= malloc(PAGE_SIZE);
+ unsigned char *buffr= malloc(PAGE_SIZE);
+ int res;
+ DBUG_ENTER("simple_delete_flush_test");
+ /* prepare the file */
+ bfill(buffw, PAGE_SIZE, '\1');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE,
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ /* test */
+ bfill(buffw, PAGE_SIZE, '\2');
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ PAGECACHE_PIN_LEFT_PINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ pagecache_delete_page(&pagecache, &file1, 0,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, 1);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE,
+ simple_delete_flush_test_file))),
+ "Simple delete-forget page file");
+ if (res)
+ reset_file(file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ write then read file bigger then cache
+*/
+
+int simple_big_test()
+{
+ unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE);
+ unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE);
+ struct file_desc *desc=
+ (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) *
+ sizeof(struct file_desc));
+ int res, i;
+ DBUG_ENTER("simple_big_test");
+ /* prepare the file twice larger then cache */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++)
+ {
+ bfill(buffw, PAGE_SIZE, (unsigned char) (i & 0xff));
+ desc[i].length= PAGE_SIZE;
+ desc[i].content= (i & 0xff);
+ pagecache_write(&pagecache, &file1, i, 3, (char*)buffw,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ }
+ desc[i].length= 0;
+ desc[i].content= '\0';
+ ok(1, "Simple big file write");
+ /* check written pages sequentally read */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++)
+ {
+ int j;
+ pagecache_read(&pagecache, &file1, i, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ for(j= 0; j < PAGE_SIZE; j++)
+ {
+ if (buffr[j] != (i & 0xff))
+ {
+ diag("simple_big_test seq: page %u byte %u mismatch\n", i, j);
+ return 0;
+ }
+ }
+ }
+ ok(1, "simple big file sequentally read");
+ /* chack random reads */
+ for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++)
+ {
+ int j, page;
+ page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2));
+ pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ 0);
+ for(j= 0; j < PAGE_SIZE; j++)
+ {
+ if (buffr[j] != (page & 0xff))
+ {
+ diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j);
+ return 0;
+ }
+ }
+ }
+ ok(1, "simple big file random read");
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+
+ ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE,
+ desc))),
+ "Simple big file");
+ if (res)
+ reset_file(file1, file1_name);
+ free(buffw);
+ free(buffr);
+ DBUG_RETURN(res);
+}
+/*
+ Thread function
+*/
+
+static void *test_thread(void *arg)
+{
+ int param=*((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_thread");
+
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ if (!simple_read_write_test() ||
+ !simple_read_change_write_read_test() ||
+ !simple_pin_test() ||
+ !simple_delete_forget_test() ||
+ !simple_delete_flush_test() ||
+ !simple_big_test())
+ exit(1);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((gptr) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+
+int main(int argc, char **argv __attribute__((unused)))
+{
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error, pagen;
+
+ MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+
+ DBUG_ENTER("main");
+ DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("file1: %d", file1.file));
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ my_pwrite(file1.file, "test file", 9, 0, MYF(0));
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ plan(12);
+
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ DBUG_PRINT("info", ("Page cache %d pages", pagen));
+
+ if ((error=pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ param=(int*) malloc(sizeof(int));
+ *param= 1;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param)))
+ {
+ fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n",
+ error,errno);
+ exit(1);
+ }
+ thread_count++;
+ DBUG_PRINT("info", ("Thread started"));
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error);
+ DBUG_PRINT("info", ("thread ended"));
+
+ end_pagecache(&pagecache, 1);
+ DBUG_PRINT("info", ("Page cache ended"));
+
+ if (my_close(file1.file, MYF(0)) != 0)
+ {
+ fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ /*my_delete(file1_name, MYF(0));*/
+ my_end(0);
+
+ DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+
+ DBUG_PRINT("info", ("Program end"));
+
+ DBUG_RETURN(exit_status());
+}
diff --git a/storage/maria/unittest/test_file.c b/storage/maria/unittest/test_file.c
new file mode 100644
index 00000000000..758d0bfa81b
--- /dev/null
+++ b/storage/maria/unittest/test_file.c
@@ -0,0 +1,68 @@
+#include <tap.h>
+#include <my_sys.h>
+#include <my_dir.h>
+#include "test_file.h"
+
+
+/*
+ Check that file contance correspond to descriptor
+
+ SYNOPSIS
+ test_file()
+ file File to test
+ file_name Path (and name) of file which is tested
+ size size of file
+ buff_size size of buffer which is enought to check the file
+ desc file descriptor to check with
+
+ RETURN
+ 1 file if OK
+ 0 error
+*/
+
+int test_file(PAGECACHE_FILE file, char *file_name,
+ off_t size, size_t buff_size, struct file_desc *desc)
+{
+ MY_STAT stat_buff, *stat;
+ unsigned char *buffr= malloc(buff_size);
+ off_t pos= 0;
+ size_t byte;
+ int step= 0;
+
+ if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL)
+ {
+ diag("Can't stat() %s (errno: %d)\n", file_name, errno);
+ return 0;
+ }
+ if (stat->st_size != size)
+ {
+ diag("file %s size is %lu (should be %lu)\n",
+ file_name, (ulong) stat->st_size, (ulong) size);
+ return 0;
+ }
+ /* check content */
+ my_seek(file.file, 0, SEEK_SET, MYF(0));
+ while (desc[step].length != 0)
+ {
+ if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) !=
+ desc[step].length)
+ {
+ diag("Can't read %u bytes from %s (errno: %d)\n",
+ (uint)desc[step].length, file_name, errno);
+ return 0;
+ }
+ for (byte= 0; byte < desc[step].length; byte++)
+ {
+ if (buffr[byte] != desc[step].content)
+ {
+ diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n",
+ file_name, (uint) buffr[byte], (ulong) (pos + byte),
+ desc[step].content);
+ return 0;
+ }
+ }
+ pos+= desc[step].length;
+ step++;
+ }
+ return 1;
+}
diff --git a/storage/maria/unittest/test_file.h b/storage/maria/unittest/test_file.h
new file mode 100644
index 00000000000..bfc660b13d0
--- /dev/null
+++ b/storage/maria/unittest/test_file.h
@@ -0,0 +1,14 @@
+#include <m_string.h>
+#include <pagecache.h>
+
+/*
+ File content descriptor
+*/
+struct file_desc
+{
+ unsigned int length;
+ unsigned char content;
+};
+
+int test_file(PAGECACHE_FILE file, char *file_name,
+ off_t size, size_t buff_size, struct file_desc *desc);
diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c
new file mode 100644
index 00000000000..7d97794b685
--- /dev/null
+++ b/storage/maria/unittest/trnman-t.c
@@ -0,0 +1,185 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <tap.h>
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <lf.h>
+#include <m_string.h>
+#include "../trnman.h"
+
+pthread_mutex_t rt_mutex;
+int rt_num_threads;
+
+int litmus;
+
+/*
+ create and end (commit or rollback) transactions randomly
+*/
+#define MAX_ITER 100
+pthread_handler_t test_trnman(void *arg)
+{
+ int m= (*(int *)arg);
+ uint x, y, i, n;
+ TRN *trn[MAX_ITER];
+ pthread_mutex_t mutexes[MAX_ITER];
+ pthread_cond_t conds[MAX_ITER];
+
+ for (i= 0; i < MAX_ITER; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&conds[i], 0);
+ }
+
+ for (x= ((int)(intptr)(&m)); m > 0; )
+ {
+ y= x= (x*LL(3628273133) + LL(1500450271)) % LL(9576890767); /* three prime numbers */
+ m-= n= x % MAX_ITER;
+ for (i= 0; i < n; i++)
+ {
+ trn[i]= trnman_new_trn(&mutexes[i], &conds[i]);
+ if (!trn[i])
+ {
+ diag("trnman_new_trn() failed");
+ litmus++;
+ }
+ }
+ for (i= 0; i < n; i++)
+ {
+ y= (y*19 + 7) % 31;
+ trnman_end_trn(trn[i], y & 1);
+ }
+ }
+ for (i= 0; i < MAX_ITER; i++)
+ {
+ pthread_mutex_destroy(&mutexes[i]);
+ pthread_cond_destroy(&conds[i]);
+ }
+ pthread_mutex_lock(&rt_mutex);
+ rt_num_threads--;
+ pthread_mutex_unlock(&rt_mutex);
+
+ return 0;
+}
+#undef MAX_ITER
+
+void run_test(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
+
+ litmus= 0;
+
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
+ diag("Testing %s with %d threads, %d iterations... ", test, n, m);
+ rt_num_threads= n;
+ for (i= 0; i < n ; i++)
+ if (pthread_create(threads+i, 0, handler, &m))
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus);
+ my_free((void*)threads, MYF(0));
+}
+
+#define ok_read_from(T1, T2, RES) \
+ i= trnman_can_read_from(trn[T1], trn[T2]->trid); \
+ ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot")
+#define start_transaction(T) \
+ trn[T]= trnman_new_trn(&mutexes[T], &conds[T])
+#define commit(T) trnman_commit_trn(trn[T])
+#define abort(T) trnman_abort_trn(trn[T])
+
+#define Ntrns 4
+void test_trnman_read_from()
+{
+ TRN *trn[Ntrns];
+ pthread_mutex_t mutexes[Ntrns];
+ pthread_cond_t conds[Ntrns];
+ int i;
+
+ for (i= 0; i < Ntrns; i++)
+ {
+ pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&conds[i], 0);
+ }
+
+ start_transaction(0); /* start trn1 */
+ start_transaction(1); /* start trn2 */
+ ok_read_from(1, 0, 0);
+ commit(0); /* commit trn1 */
+ start_transaction(2); /* start trn4 */
+ abort(2); /* abort trn4 */
+ start_transaction(3); /* start trn5 */
+ ok_read_from(3, 0, 1);
+ ok_read_from(3, 1, 0);
+ ok_read_from(3, 2, 0);
+ commit(1); /* commit trn2 */
+ ok_read_from(3, 1, 0);
+ commit(3); /* commit trn5 */
+
+ for (i= 0; i < Ntrns; i++)
+ {
+ pthread_mutex_destroy(&mutexes[i]);
+ pthread_cond_destroy(&conds[i]);
+ }
+}
+
+int main()
+{
+ my_init();
+
+ plan(6);
+
+ if (my_atomic_initialize())
+ return exit_status();
+
+ pthread_mutex_init(&rt_mutex, 0);
+
+#define CYCLES 10000
+#define THREADS 10
+
+ trnman_init();
+
+ test_trnman_read_from();
+ run_test("trnman", test_trnman, THREADS, CYCLES);
+
+ diag("mallocs: %d", trnman_allocated_transactions);
+ {
+ ulonglong now= my_getsystime();
+ trnman_destroy();
+ now= my_getsystime()-now;
+ diag("trnman_destroy: %g", ((double)now)/1e7);
+ }
+
+ pthread_mutex_destroy(&rt_mutex);
+ my_end(0);
+ return exit_status();
+}
+
diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am
index f50c312b8e4..4bd0b177daa 100644
--- a/storage/myisam/Makefile.am
+++ b/storage/myisam/Makefile.am
@@ -97,8 +97,8 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \
mi_delete_table.c mi_rename.c mi_check.c \
mi_keycache.c mi_preload.c \
ft_parser.c ft_stopwords.c ft_static.c \
- ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \
- ha_myisam.cc \
+ ft_update.c ft_boolean_search.c ft_nlq_search.c \
+ sort.c ha_myisam.cc ft_myisam.c \
rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c
CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY?
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 68076d7e401..b1b67099b22 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -149,7 +149,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word DESC, ndepth DESC */
- int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
+ int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
(uchar*) (*a)->word+1,(*a)->len-1,0,0);
if (!i)
i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
@@ -183,7 +183,7 @@ static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
case FT_TOKEN_WORD:
ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
sizeof(FTB_WORD) +
- (info->trunc ? MI_MAX_KEY_BUFF :
+ (info->trunc ? HA_MAX_KEY_BUFF :
word_len * ftb_param->ftb->charset->mbmaxlen +
HA_FT_WLEN +
ftb_param->ftb->info->s->rec_reflength));
@@ -333,7 +333,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength;
byte *lastkey_buf=ftbw->word+ftbw->off;
- LINT_INIT(off);
if (ftbw->flags & FTB_FLAG_TRUNC)
lastkey_buf+=ftbw->len;
@@ -377,7 +376,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (!r && !ftbw->off)
{
- r= mi_compare_text(ftb->charset,
+ r= ha_compare_text(ftb->charset,
info->lastkey+1,
info->lastkey_length-extra-1,
(uchar*) ftbw->word+1,
@@ -837,7 +836,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
{
ftbw= ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
(my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0)
b= c;
@@ -847,7 +846,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (; c >= 0; c--)
{
ftbw= ftb->list[c];
- if (mi_compare_text(ftb->charset, (uchar*)word, len,
+ if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
(my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0))
break;
diff --git a/storage/myisam/ft_myisam.c b/storage/myisam/ft_myisam.c
new file mode 100644
index 00000000000..76c04ba4c0b
--- /dev/null
+++ b/storage/myisam/ft_myisam.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+/*
+ This function is for interface functions between fulltext and myisam
+*/
+
+#include "ftdefs.h"
+
+FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
+ byte *query, uint query_len, CHARSET_INFO *cs,
+ byte *record)
+{
+ FT_INFO *res;
+ if (flags & FT_BOOL)
+ res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs);
+ else
+ res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags,
+ record);
+ return res;
+}
diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c
index 5c6f66897ee..58d88731201 100644
--- a/storage/myisam/ft_nlq_search.c
+++ b/storage/myisam/ft_nlq_search.c
@@ -103,7 +103,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
{
if (keylen &&
- mi_compare_text(aio->charset,info->lastkey+1,
+ ha_compare_text(aio->charset,info->lastkey+1,
info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0))
break;
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index 5992d9c118e..59fa25469f1 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -31,7 +31,7 @@ typedef struct st_my_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
- return mi_compare_text(cs, (uchar*) w1->pos, w1->len,
+ return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len, 0, 0);
}
diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c
index 34608be1721..ea3c336cc7a 100644
--- a/storage/myisam/ft_static.c
+++ b/storage/myisam/ft_static.c
@@ -55,19 +55,6 @@ const struct _ft_vft _ft_vft_boolean = {
};
-FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
- byte *query, uint query_len, CHARSET_INFO *cs,
- byte *record)
-{
- FT_INFO *res;
- if (flags & FT_BOOL)
- res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs);
- else
- res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags,
- record);
- return res;
-}
-
const char *ft_stopword_file = 0;
const char *ft_precompiled_stopwords[] = {
diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c
index 63732ebadc9..1b6cff5e903 100644
--- a/storage/myisam/ft_stopwords.c
+++ b/storage/myisam/ft_stopwords.c
@@ -29,7 +29,7 @@ static TREE *stopwords3=NULL;
static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
FT_STOPWORD *w1, FT_STOPWORD *w2)
{
- return mi_compare_text(default_charset_info,
+ return ha_compare_text(default_charset_info,
(uchar *)w1->pos,w1->len,
(uchar *)w2->pos,w2->len,0,0);
}
diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c
index e176d550b1d..a5a17ff4bd2 100644
--- a/storage/myisam/ft_update.c
+++ b/storage/myisam/ft_update.c
@@ -180,7 +180,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
{
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
- mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
+ ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
(uchar*) ftsi2.pos,ftsi2.len,0,0)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
@@ -209,7 +209,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
error=0;
while(old_word->pos && new_word->pos)
{
- cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len,
+ cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
(uchar*) new_word->pos,new_word->len,0,0);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
diff --git a/storage/myisam/fulltext.h b/storage/myisam/fulltext.h
index bea2fa96969..56cca3257a2 100644
--- a/storage/myisam/fulltext.h
+++ b/storage/myisam/fulltext.h
@@ -20,18 +20,8 @@
#include "myisamdef.h"
#include "ft_global.h"
-#define HA_FT_WTYPE HA_KEYTYPE_FLOAT
-#define HA_FT_WLEN 4
-#define FT_SEGS 2
-
-#define ft_sintXkorr(A) mi_sint4korr(A)
-#define ft_intXstore(T,A) mi_int4store(T,A)
-
-extern const HA_KEYSEG ft_keysegs[FT_SEGS];
-
int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *);
int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t);
int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t);
uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *);
-
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 06ec5c4b44e..0113fe80a08 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -22,6 +22,7 @@
#include "mysql_priv.h"
#include <mysql/plugin.h>
#include <m_ctype.h>
+#include <my_bit.h>
#include <myisampack.h>
#include "ha_myisam.h"
#include <stdarg.h>
@@ -56,7 +57,7 @@ static handler *myisam_create_handler(handlerton *hton,
// collect errors printed by mi_check routines
-static void mi_check_print_msg(MI_CHECK *param, const char* msg_type,
+static void mi_check_print_msg(HA_CHECK *param, const char* msg_type,
const char *fmt, va_list args)
{
THD* thd = (THD*)param->thd;
@@ -399,13 +400,13 @@ int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo,
extern "C" {
-volatile int *killed_ptr(MI_CHECK *param)
+volatile int *killed_ptr(HA_CHECK *param)
{
/* In theory Unsafe conversion, but should be ok for now */
return (int*) &(((THD *)(param->thd))->killed);
}
-void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_error(HA_CHECK *param, const char *fmt,...)
{
param->error_printed|=1;
param->out_flag|= O_DATA_LOST;
@@ -415,7 +416,7 @@ void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
va_end(args);
}
-void mi_check_print_info(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_info(HA_CHECK *param, const char *fmt,...)
{
va_list args;
va_start(args, fmt);
@@ -423,7 +424,7 @@ void mi_check_print_info(MI_CHECK *param, const char *fmt,...)
va_end(args);
}
-void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_warning(HA_CHECK *param, const char *fmt,...)
{
param->warning_printed=1;
param->out_flag|= O_DATA_LOST;
@@ -674,7 +675,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
{
if (!file) return HA_ADMIN_INTERNAL_ERROR;
int error;
- MI_CHECK param;
+ HA_CHECK param;
MYISAM_SHARE* share = file->s;
const char *old_proc_info=thd->proc_info;
@@ -685,7 +686,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
param.db_name= table->s->db.str;
param.table_name= table->alias;
param.testflag = check_opt->flags | T_CHECK | T_SILENT;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method;
if (!(table->db_stat & HA_READ_ONLY))
param.testflag|= T_STATISTICS;
@@ -766,7 +767,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
{
int error=0;
- MI_CHECK param;
+ HA_CHECK param;
MYISAM_SHARE* share = file->s;
myisamchk_init(&param);
@@ -777,7 +778,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt)
param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
T_DONT_CHECK_CHECKSUM);
param.using_global_keycache = 1;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method;
if (!(share->state.changed & STATE_NOT_ANALYZED))
return HA_ADMIN_ALREADY_DONE;
@@ -826,7 +827,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "restore";
@@ -889,7 +890,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "backup";
@@ -905,7 +906,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt)
int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt)
{
int error;
- MI_CHECK param;
+ HA_CHECK param;
ha_rows start_records;
if (!file) return HA_ADMIN_INTERNAL_ERROR;
@@ -955,7 +956,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
{
int error;
if (!file) return HA_ADMIN_INTERNAL_ERROR;
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd = thd;
@@ -974,7 +975,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt)
}
-int ha_myisam::repair(THD *thd, MI_CHECK &param, bool do_optimize)
+int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
{
int error=0;
uint local_testflag=param.testflag;
@@ -1152,7 +1153,7 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt)
if (error != HA_ADMIN_OK)
{
/* Send error to user */
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "assign_to_keycache";
@@ -1220,7 +1221,7 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt)
err:
{
- MI_CHECK param;
+ HA_CHECK param;
myisamchk_init(&param);
param.thd= thd;
param.op_name= "preload_keys";
@@ -1327,7 +1328,7 @@ int ha_myisam::enable_indexes(uint mode)
else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
{
THD *thd=current_thd;
- MI_CHECK param;
+ HA_CHECK param;
const char *save_proc_info=thd->proc_info;
thd->proc_info="Creating index";
myisamchk_init(&param);
@@ -1336,7 +1337,7 @@ int ha_myisam::enable_indexes(uint mode)
T_CREATE_MISSING_KEYS);
param.myf_rw&= ~MY_WAIT_IF_FULL;
param.sort_buffer_length= thd->variables.myisam_sort_buff_size;
- param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method;
+ param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method;
param.tmpdir=&mysql_tmpdir_list;
if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair)
{
@@ -1839,7 +1840,7 @@ void ha_myisam::get_auto_increment(ulonglong offset, ulonglong increment,
{
ulonglong nr;
int error;
- byte key[MI_MAX_KEY_LENGTH];
+ byte key[HA_MAX_KEY_LENGTH];
if (!table->s->next_number_key_offset)
{ // Autoincrement at key-start
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 882900bd35f..4eaeb11fece 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -21,6 +21,7 @@
/* class for the the myisam handler */
#include <myisam.h>
+#include <myisamchk.h>
#include <ft_global.h>
#define HA_RECOVER_NONE 0 /* No automatic recover */
@@ -39,7 +40,7 @@ class ha_myisam: public handler
ulonglong int_table_flags;
char *data_file_name, *index_file_name;
bool can_enable_indexes;
- int repair(THD *thd, MI_CHECK &param, bool optimize);
+ int repair(THD *thd, HA_CHECK &param, bool optimize);
public:
ha_myisam(handlerton *hton, TABLE_SHARE *table_arg);
@@ -56,8 +57,8 @@ class ha_myisam: public handler
HA_READ_ORDER | HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; }
+ uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
uint checksum() const;
virtual bool check_if_locking_is_allowed(uint sql_command,
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 7bcb8041fe0..8c955acf011 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -59,14 +59,14 @@
/* Functions defined in this file */
-static int check_k_link(MI_CHECK *param, MI_INFO *info,uint nr);
-static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
+static int check_k_link(HA_CHECK *param, MI_INFO *info,uint nr);
+static int chk_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level);
static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo);
static ha_checksum calc_checksum(ha_rows count);
static int writekeys(MI_SORT_PARAM *sort_param);
-static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
+static int sort_one_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo,
my_off_t pagepos, File new_file);
static int sort_key_read(MI_SORT_PARAM *sort_param,void *key);
static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key);
@@ -80,13 +80,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
reg1 SORT_KEY_BLOCKS *key_block,
uchar *key, my_off_t prev_block);
static int sort_delete_record(MI_SORT_PARAM *sort_param);
-/*static int flush_pending_blocks(MI_CHECK *param);*/
-static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks,
+/*static int flush_pending_blocks(HA_CHECK *param);*/
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
uint buffer_length);
static ha_checksum mi_byte_checksum(const byte *buf, uint length);
-static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share);
+static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share);
-void myisamchk_init(MI_CHECK *param)
+void myisamchk_init(HA_CHECK *param)
{
bzero((gptr) param,sizeof(*param));
param->opt_follow_links=1;
@@ -108,7 +108,7 @@ void myisamchk_init(MI_CHECK *param)
/* Check the status flags for the table */
-int chk_status(MI_CHECK *param, register MI_INFO *info)
+int chk_status(HA_CHECK *param, register MI_INFO *info)
{
MYISAM_SHARE *share=info->s;
@@ -136,7 +136,7 @@ int chk_status(MI_CHECK *param, register MI_INFO *info)
/* Check delete links */
-int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag)
+int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag)
{
reg2 ha_rows i;
uint delete_link_length;
@@ -245,7 +245,7 @@ wrong:
/* Check delete links in index file */
-static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr)
+static int check_k_link(HA_CHECK *param, register MI_INFO *info, uint nr)
{
my_off_t next_link;
uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH;
@@ -322,7 +322,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr)
/* Check sizes of files */
-int chk_size(MI_CHECK *param, register MI_INFO *info)
+int chk_size(HA_CHECK *param, register MI_INFO *info)
{
int error=0;
register my_off_t skr,size;
@@ -398,7 +398,7 @@ int chk_size(MI_CHECK *param, register MI_INFO *info)
/* Check keys */
-int chk_key(MI_CHECK *param, register MI_INFO *info)
+int chk_key(HA_CHECK *param, register MI_INFO *info)
{
uint key,found_keys=0,full_text_keys=0,result=0;
ha_rows keys;
@@ -583,7 +583,7 @@ do_stat:
} /* chk_key */
-static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int chk_index_down(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level)
{
@@ -729,13 +729,13 @@ int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
/* Check if index is ok */
-static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
ha_checksum *key_checksum, uint level)
{
int flag;
uint used_length,comp_flag,nod_flag,key_length=0;
- uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos;
my_off_t next_page,record;
char llbuff[22];
uint diff_pos[2];
@@ -932,7 +932,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo)
/* Check that record-link is ok */
-int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
+int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend)
{
int error,got_error,flag;
uint key,left_length,b_type,field;
@@ -942,7 +942,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
byte *record,*to;
char llbuff[22],llbuff2[22],llbuff3[22];
ha_checksum intern_record_checksum;
- ha_checksum key_checksum[MI_MAX_POSSIBLE_KEY];
+ ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY];
my_bool static_row_size;
MI_KEYDEF *keyinfo;
MI_BLOCK_INFO block_info;
@@ -990,6 +990,9 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
if (*killed_ptr(param))
goto err2;
switch (info->s->data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
if (my_b_read(&param->read_cache,(byte*) record,
info->s->base.pack_reclength))
@@ -1377,7 +1380,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend)
/* Recover old table by reading each record and writing all keys */
/* Save new datafile-name in temp_filename */
-int mi_repair(MI_CHECK *param, register MI_INFO *info,
+int mi_repair(HA_CHECK *param, register MI_INFO *info,
my_string name, int rep_quick)
{
int error,got_error;
@@ -1387,7 +1390,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info,
File new_file;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
MI_SORT_PARAM sort_param;
DBUG_ENTER("mi_repair");
@@ -1770,7 +1773,7 @@ int movepoint(register MI_INFO *info, byte *record, my_off_t oldpos,
/* Tell system that we want all memory for our cache */
-void lock_memory(MI_CHECK *param __attribute__((unused)))
+void lock_memory(HA_CHECK *param __attribute__((unused)))
{
#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
if (param->opt_lock_memory)
@@ -1786,7 +1789,7 @@ void lock_memory(MI_CHECK *param __attribute__((unused)))
/* Flush all changed blocks to disk */
-int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file)
+int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file)
{
if (flush_key_blocks(key_cache, file, FLUSH_RELEASE))
{
@@ -1801,12 +1804,12 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file)
/* Sort index for more efficent reads */
-int mi_sort_index(MI_CHECK *param, register MI_INFO *info, my_string name)
+int mi_sort_index(HA_CHECK *param, register MI_INFO *info, my_string name)
{
reg2 uint key;
reg1 MI_KEYDEF *keyinfo;
File new_file;
- my_off_t index_pos[MI_MAX_POSSIBLE_KEY];
+ my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
uint r_locks,w_locks;
int old_lock;
MYISAM_SHARE *share=info->s;
@@ -1901,12 +1904,12 @@ err2:
/* Sort records recursive using one index */
-static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
+static int sort_one_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
my_off_t pagepos, File new_file)
{
uint length,nod_flag,used_length, key_length;
uchar *buff,*keypos,*endpos;
- uchar key[MI_MAX_POSSIBLE_KEY_BUFF];
+ uchar key[HA_MAX_POSSIBLE_KEY_BUFF];
my_off_t new_page_pos,next_page;
char llbuff[22];
DBUG_ENTER("sort_one_index");
@@ -2021,7 +2024,7 @@ int change_to_newfile(const char * filename, const char * old_ext,
/* Locks a whole file */
/* Gives an error-message if file can't be locked */
-int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
+int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type,
const char *filetype, const char *filename)
{
if (my_lock(file,lock_type,start,F_TO_EOF,
@@ -2038,7 +2041,7 @@ int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type,
/* Copy a block between two files */
-int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
+int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type)
{
char tmp_buff[IO_SIZE],*buff;
@@ -2089,7 +2092,7 @@ err:
<>0 Error
*/
-int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick)
{
int got_error;
@@ -2103,7 +2106,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
HA_KEYSEG *keyseg;
ulong *rec_per_key_part;
char llbuff[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
ulonglong key_map=share->state.key_map;
DBUG_ENTER("mi_repair_by_sort");
@@ -2509,7 +2512,7 @@ err:
<>0 Error
*/
-int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
+int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
const char * name, int rep_quick)
{
#ifndef THREAD
@@ -2528,7 +2531,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
char llbuff[22];
IO_CACHE new_data_cache; /* For non-quick repair. */
IO_CACHE_SHARE io_share;
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
ulonglong key_map=share->state.key_map;
pthread_attr_t thr_attr;
DBUG_ENTER("mi_repair_parallel");
@@ -3007,7 +3010,7 @@ err:
static int sort_key_read(MI_SORT_PARAM *sort_param, void *key)
{
int error;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
MI_INFO *info=sort_info->info;
DBUG_ENTER("sort_key_read");
@@ -3034,7 +3037,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key)
static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key)
{
int error;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
MI_INFO *info=sort_info->info;
FT_WORD *wptr=0;
DBUG_ENTER("sort_ft_key_read");
@@ -3121,8 +3124,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
my_off_t pos;
byte *to;
MI_BLOCK_INFO block_info;
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
@@ -3132,6 +3135,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
DBUG_RETURN(1);
switch (share->data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
for (;;)
{
@@ -3547,8 +3553,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
ulong block_length,reclength;
byte *from;
byte block_buff[8];
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
MYISAM_SHARE *share=info->s;
DBUG_ENTER("sort_write_record");
@@ -3556,6 +3562,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
if (sort_param->fix_datafile)
{
switch (sort_info->new_data_file_type) {
+ case BLOCK_RECORD:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
case STATIC_RECORD:
if (my_b_write(&info->rec_cache,sort_param->record,
share->base.pack_reclength))
@@ -3574,7 +3583,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
{
/* must be sure that local buffer is big enough */
reclength=info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,sort_param->record)+
+ _mi_calc_total_blob_length(info,sort_param->record)+
ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER;
if (sort_info->buff_length < reclength)
@@ -3663,24 +3672,25 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
{
uint diff_pos[2];
char llbuff[22],llbuff2[22];
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param= sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param= sort_info->param;
int cmp;
if (sort_info->key_block->inited)
{
- cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
+ cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE,
diff_pos);
if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
- ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
+ ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg,
sort_param->notnull,
- sort_info->key_block->lastkey,
+ (uchar*) sort_info->
+ key_block->lastkey,
(uchar*)a);
}
sort_param->unique[diff_pos[0]-1]++;
@@ -3703,8 +3713,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
llstr(sort_info->info->lastpos,llbuff),
llstr(get_record_for_key(sort_info->info,
sort_param->keyinfo,
- sort_info->key_block->
- lastkey),
+ (uchar*) sort_info->
+ key_block->lastkey),
llbuff2));
param->testflag|=T_RETRY_WITHOUT_QUICK;
if (sort_info->param->testflag & T_VERBOSE)
@@ -3725,7 +3735,7 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
{
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
SORT_KEY_BLOCKS *key_block=sort_info->key_block;
MYISAM_SHARE *share=sort_info->info->s;
uint val_off, val_len;
@@ -3735,19 +3745,19 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
val_len=share->ft2_keyinfo.keylength;
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
- to=ft_buf->lastkey+val_off;
+ to= (uchar*) ft_buf->lastkey+val_off;
if (ft_buf->buf)
{
/* flushing first-level tree */
- error=sort_insert_key(sort_param,key_block,ft_buf->lastkey,
+ error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey,
HA_OFFSET_ERROR);
for (from=to+val_len;
- !error && from < ft_buf->buf;
+ !error && from < (uchar*) ft_buf->buf;
from+= val_len)
{
memcpy(to, from, val_len);
- error=sort_insert_key(sort_param,key_block,ft_buf->lastkey,
+ error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey,
HA_OFFSET_ERROR);
}
return error;
@@ -3756,8 +3766,8 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
error=flush_pending_blocks(sort_param);
/* updating lastkey with second-level tree info */
ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count);
- _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN,
- share->state.key_root[sort_param->key]);
+ _mi_dpointer(sort_info->info, (uchar*) ft_buf->lastkey+val_off+HA_FT_WLEN,
+ share->state.key_root[sort_param->key]);
/* restoring first level tree data in sort_info/sort_param */
sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
sort_param->keyinfo=share->keyinfo+sort_param->key;
@@ -3765,14 +3775,14 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param)
/* writing lastkey in first-level tree */
return error ? error :
sort_insert_key(sort_param,sort_info->key_block,
- ft_buf->lastkey,HA_OFFSET_ERROR);
+ (uchar*) ft_buf->lastkey,HA_OFFSET_ERROR);
}
static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
{
uint a_len, val_off, val_len, error;
uchar *p;
- SORT_INFO *sort_info=sort_param->sort_info;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
SORT_FT_BUF *ft_buf=sort_info->ft_buf;
SORT_KEY_BLOCKS *key_block=sort_info->key_block;
@@ -3802,9 +3812,9 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
}
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
- if (mi_compare_text(sort_param->seg->charset,
+ if (ha_compare_text(sort_param->seg->charset,
((uchar *)a)+1,a_len-1,
- ft_buf->lastkey+1,val_off-1, 0, 0)==0)
+ (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0)
{
if (!ft_buf->buf) /* store in second-level tree */
{
@@ -3820,16 +3830,16 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
return 0;
/* converting to two-level tree */
- p=ft_buf->lastkey+val_off;
+ p= (uchar*) ft_buf->lastkey+val_off;
while (key_block->inited)
key_block++;
sort_info->key_block=key_block;
sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo;
- ft_buf->count=(ft_buf->buf - p)/val_len;
+ ft_buf->count=((uchar*) ft_buf->buf - p)/val_len;
/* flushing buffer to second-level tree */
- for (error=0; !error && p < ft_buf->buf; p+= val_len)
+ for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len)
error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
ft_buf->buf=0;
return error;
@@ -3877,13 +3887,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
MI_KEY_PARAM s_temp;
MI_INFO *info;
MI_KEYDEF *keyinfo=sort_param->keyinfo;
- SORT_INFO *sort_info= sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
DBUG_ENTER("sort_insert_key");
- anc_buff=key_block->buff;
+ anc_buff= (uchar*) key_block->buff;
info=sort_info->info;
- lastkey=key_block->lastkey;
+ lastkey= (uchar*) key_block->lastkey;
nod_flag= (key_block == sort_info->key_block ? 0 :
info->s->base.key_reflength);
@@ -3896,7 +3906,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
DBUG_RETURN(1);
}
a_length=2+nod_flag;
- key_block->end_pos=anc_buff+2;
+ key_block->end_pos= (char*) anc_buff+2;
lastkey=0; /* No previous key in block */
}
else
@@ -3904,18 +3914,18 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
/* Save pointer to previous block */
if (nod_flag)
- _mi_kpointer(info,key_block->end_pos,prev_block);
+ _mi_kpointer(info,(uchar*) key_block->end_pos,prev_block);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,
(uchar*) 0,lastkey,lastkey,key,
&s_temp);
- (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
+ (*keyinfo->store_key)(keyinfo, (uchar*) key_block->end_pos+nod_flag,&s_temp);
a_length+=t_length;
mi_putint(anc_buff,a_length,nod_flag);
key_block->end_pos+=t_length;
if (a_length <= keyinfo->block_length)
{
- VOID(_mi_move_key(keyinfo,key_block->lastkey,key));
+ VOID(_mi_move_key(keyinfo,(uchar*) key_block->lastkey,key));
key_block->last_length=a_length-t_length;
DBUG_RETURN(0);
}
@@ -3940,7 +3950,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param,
DBUG_DUMP("buff",(byte*) anc_buff,mi_getint(anc_buff));
/* Write separator-key to block in next level */
- if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
+ if (sort_insert_key(sort_param,key_block+1,(uchar*) key_block->lastkey,
+ filepos))
DBUG_RETURN(1);
/* clear old block and write new key in it */
@@ -3956,8 +3967,8 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param)
uint i;
int old_file,error;
uchar *key;
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
MI_INFO *info=sort_info->info;
DBUG_ENTER("sort_delete_record");
@@ -4013,7 +4024,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
uint nod_flag,length;
my_off_t filepos,key_file_length;
SORT_KEY_BLOCKS *key_block;
- SORT_INFO *sort_info= sort_param->sort_info;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
myf myf_rw=sort_info->param->myf_rw;
MI_INFO *info=sort_info->info;
MI_KEYDEF *keyinfo=sort_param->keyinfo;
@@ -4026,7 +4037,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
key_block->inited=0;
length=mi_getint(key_block->buff);
if (nod_flag)
- _mi_kpointer(info,key_block->end_pos,filepos);
+ _mi_kpointer(info,(uchar*) key_block->end_pos,filepos);
key_file_length=info->state->key_file_length;
bzero((byte*) key_block->buff+length, keyinfo->block_length-length);
if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
@@ -4036,7 +4047,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
if (key_file_length == info->state->key_file_length)
{
if (_mi_write_keypage(info, keyinfo, filepos,
- DFLT_INIT_HITS, key_block->buff))
+ DFLT_INIT_HITS, (uchar*) key_block->buff))
DBUG_RETURN(1);
}
else if (my_pwrite(info->s->kfile,(byte*) key_block->buff,
@@ -4051,7 +4062,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param)
/* alloc space and pointers for key_blocks */
-static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks,
+static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
uint buffer_length)
{
reg1 uint i;
@@ -4068,7 +4079,7 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks,
for (i=0 ; i < blocks ; i++)
{
block[i].inited=0;
- block[i].buff=(uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
+ block[i].buff=(byte*) (block+blocks)+(buffer_length+IO_SIZE)*i;
}
DBUG_RETURN(block);
} /* alloc_key_blocks */
@@ -4088,7 +4099,7 @@ int test_if_almost_full(MI_INFO *info)
/* Recreate table with bigger more alloced record-data */
-int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename)
+int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename)
{
int error;
MI_INFO info;
@@ -4261,7 +4272,7 @@ end:
/* write suffix to data file if neaded */
-int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile)
+int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile)
{
MI_INFO *info=sort_info->info;
@@ -4282,7 +4293,7 @@ int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile)
/* Update state and myisamchk_time of indexfile */
-int update_state_info(MI_CHECK *param, MI_INFO *info,uint update)
+int update_state_info(HA_CHECK *param, MI_INFO *info,uint update)
{
MYISAM_SHARE *share=info->s;
@@ -4354,7 +4365,7 @@ err:
param->auto_increment is bigger than the biggest key.
*/
-void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
+void update_auto_increment_key(HA_CHECK *param, MI_INFO *info,
my_bool repair_only)
{
byte *record;
@@ -4587,7 +4598,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows,
static void
-set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share)
+set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share)
{
if ((sort_info->new_data_file_type=share->data_file_type) ==
COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index 2b8cbcc7da5..e9f5095bdc4 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -17,6 +17,7 @@
#include "ftdefs.h"
#include "sp_defs.h"
+#include <my_bit.h>
#if defined(MSDOS) || defined(__WIN__)
#ifdef __WIN__
@@ -40,7 +41,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
File dfile,file;
int errpos,save_errno, create_mode= O_RDWR | O_TRUNC;
myf create_flag;
- uint fields,length,max_key_length,packed,pointer,real_length_diff,
+ uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff,
key_length,info_length,key_segs,options,min_key_length_skip,
base_pos,long_varchar_count,varchar_length,
max_key_block_length,unique_key_parts,fulltext_keys,offset;
@@ -56,7 +57,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
HA_KEYSEG *keyseg,tmp_keyseg;
MI_COLUMNDEF *rec;
ulong *rec_per_key_part;
- my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
MI_CREATE_INFO tmp_create_info;
DBUG_ENTER("mi_create");
DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u",
@@ -94,7 +95,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */
if (!(rec_per_key_part=
- (ulong*) my_malloc((keys + uniques)*MI_MAX_KEY_SEG*sizeof(long),
+ (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long),
MYF(MY_WME | MY_ZEROFILL))))
DBUG_RETURN(my_errno);
@@ -188,11 +189,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
options|= HA_OPTION_RELIES_ON_SQL_LAYER;
- packed=(packed+7)/8;
+ pack_bytes= (packed+7)/8;
if (pack_reclength != INT_MAX32)
pack_reclength+= reclength+packed +
test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD));
- min_pack_length+=packed;
+ min_pack_length+= pack_bytes;
if (!ci->data_file_length && ci->max_rows)
{
@@ -416,7 +417,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
}
} /* if HA_FULLTEXT */
key_segs+=keydef->keysegs;
- if (keydef->keysegs > MI_MAX_KEY_SEG)
+ if (keydef->keysegs > HA_MAX_KEY_SEG)
{
my_errno=HA_WRONG_CREATE_OPTION;
goto err;
@@ -431,7 +432,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.state.rec_per_key_part[key_segs-1]=1L;
length+=key_length;
/* Get block length for key, if defined by user */
- block_length= (keydef->block_length ?
+ block_length= (keydef->block_length ?
my_round_up_to_next_power(keydef->block_length) :
myisam_block_size);
block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH);
@@ -441,7 +442,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
pointer,MI_MAX_KEYPTR_SIZE,
block_length);
if (keydef->block_length > MI_MAX_KEY_BLOCK_LENGTH ||
- length >= MI_MAX_KEY_BUFF)
+ length >= HA_MAX_KEY_BUFF)
{
my_errno=HA_WRONG_CREATE_OPTION;
goto err;
@@ -546,9 +547,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM);
share.base.max_pack_length=pack_reclength;
share.base.min_pack_length=min_pack_length;
- share.base.pack_bits=packed;
+ share.base.pack_bits= pack_bytes;
share.base.fields=fields;
- share.base.pack_fields=packed;
+ share.base.pack_fields= packed;
#ifdef USE_RAID
share.base.raid_type=ci->raid_type;
share.base.raid_chunks=ci->raid_chunks;
diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c
index 409930ff7fb..cec483f8056 100644
--- a/storage/myisam/mi_delete.c
+++ b/storage/myisam/mi_delete.c
@@ -159,7 +159,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo,
DBUG_RETURN(my_errno=HA_ERR_CRASHED);
}
if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
{
DBUG_PRINT("error",("Couldn't allocate memory"));
DBUG_RETURN(my_errno=ENOMEM);
@@ -221,7 +221,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
my_bool last_key;
uchar *leaf_buff,*keypos;
my_off_t leaf_page,next_block;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
DBUG_ENTER("d_search");
DBUG_DUMP("page",(byte*) anc_buff,mi_getint(anc_buff));
@@ -306,7 +306,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
leaf_page=_mi_kpos(nod_flag,keypos);
if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
{
DBUG_PRINT("error",("Couldn't allocate memory"));
my_errno=ENOMEM;
@@ -365,9 +365,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{ /* This happens only with packed keys */
DBUG_PRINT("test",("Enlarging of key when deleting"));
if (!_mi_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length))
- {
goto err;
- }
ret_value=_mi_insert(info,keyinfo,key,anc_buff,keypos,lastkey,
(uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0);
}
@@ -405,7 +403,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
int ret_value,length;
uint a_length,nod_flag,tmp;
my_off_t next_page;
- uchar keybuff[MI_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
+ uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
MYISAM_SHARE *share=info->s;
MI_KEY_PARAM s_temp;
DBUG_ENTER("del");
@@ -422,7 +420,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key,
{
next_page= _mi_kpos(nod_flag,endpos);
if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0))
ret_value= -1;
@@ -509,7 +507,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag,
key_reflength,key_length;
my_off_t next_page;
- uchar anc_key[MI_MAX_KEY_BUFF],leaf_key[MI_MAX_KEY_BUFF],
+ uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF],
*buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key,
*after_key;
MI_KEY_PARAM s_temp;
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index 5342619c79b..d56df7b269b 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -252,7 +252,7 @@ int _mi_write_blob_record(MI_INFO *info, const byte *record)
extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER+1);
reclength= (info->s->base.pack_reclength +
- _my_calc_total_blob_length(info,record)+ extra);
+ _mi_calc_total_blob_length(info,record)+ extra);
#ifdef NOT_USED /* We now support big rows */
if (reclength > MI_DYN_MAX_ROW_LENGTH)
{
@@ -286,7 +286,7 @@ int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const byte *record)
extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+
MI_DYN_DELETE_BLOCK_HEADER);
reclength= (info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,record)+ extra);
+ _mi_calc_total_blob_length(info,record)+ extra);
#ifdef NOT_USED /* We now support big rows */
if (reclength > MI_DYN_MAX_ROW_LENGTH)
{
@@ -1258,7 +1258,7 @@ err:
/* Calc length of blob. Update info in blobs->length */
-ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record)
+ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record)
{
ulong length;
MI_BLOB *blob,*end;
@@ -1292,7 +1292,7 @@ ulong _mi_calc_blob_length(uint length, const byte *pos)
}
-void _my_store_blob_length(byte *pos,uint pack_length,uint length)
+void _mi_store_blob_length(byte *pos,uint pack_length,uint length)
{
switch (pack_length) {
case 1:
@@ -1505,7 +1505,7 @@ int _mi_cmp_dynamic_record(register MI_INFO *info, register const byte *record)
if (info->s->base.blobs)
{
if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+
- _my_calc_total_blob_length(info,record))))
+ _mi_calc_total_blob_length(info,record))))
DBUG_RETURN(-1);
}
reclength=_mi_rec_pack(info,buffer,record);
diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c
index b203286d544..03ac511df61 100644
--- a/storage/myisam/mi_key.c
+++ b/storage/myisam/mi_key.c
@@ -448,7 +448,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
/* The above changed info->lastkey2. Inform mi_rnext_same(). */
info->update&= ~HA_STATE_RNEXT_SAME;
- _my_store_blob_length(record+keyseg->start,
+ _mi_store_blob_length(record+keyseg->start,
(uint) keyseg->bit_start,length);
key+=length;
}
diff --git a/storage/myisam/mi_log.c b/storage/myisam/mi_log.c
index f720f752a06..e09899cd1bf 100644
--- a/storage/myisam/mi_log.c
+++ b/storage/myisam/mi_log.c
@@ -133,7 +133,7 @@ void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info,
if (!info->s->base.blobs)
length=info->s->base.reclength;
else
- length=info->s->base.reclength+ _my_calc_total_blob_length(info,record);
+ length=info->s->base.reclength+ _mi_calc_total_blob_length(info,record);
buff[0]=(char) command;
mi_int2store(buff+1,info->dfile);
mi_int4store(buff+3,pid);
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 830332fe0c1..6fd7d7a571f 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -82,8 +82,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
char *disk_cache, *disk_pos, *end_pos;
MI_INFO info,*m_info,*old_info;
MYISAM_SHARE share_buff,*share;
- ulong rec_per_key_part[MI_MAX_POSSIBLE_KEY*MI_MAX_KEY_SEG];
- my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
+ ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG];
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE];
ulonglong max_key_file_length, max_data_file_length;
DBUG_ENTER("mi_open");
@@ -104,7 +104,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
share_buff.state.rec_per_key_part=rec_per_key_part;
share_buff.state.key_root=key_root;
share_buff.state.key_del=key_del;
- share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff));
+ share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff),
+ dflt_key_cache);
DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open",
if (strstr(name, "/t1"))
@@ -210,7 +211,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
len,MI_BASE_INFO_SIZE));
}
disk_pos= (char*)
- my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base);
+ mi_n_base_info_read((uchar*) disk_cache + base_pos, &share->base);
share->state.state_length=base_pos;
if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
@@ -235,8 +236,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
key_parts+=fulltext_keys*FT_SEGS;
- if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY ||
- key_parts >= MI_MAX_KEY * MI_MAX_KEY_SEG)
+ if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MI_MAX_KEY ||
+ key_parts >= MI_MAX_KEY * HA_MAX_KEY_SEG)
{
DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
my_errno=HA_ERR_UNSUPPORTED;
@@ -1025,7 +1026,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base)
}
-uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base)
+uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base)
{
base->keystart = mi_sizekorr(ptr); ptr +=8;
base->max_data_file_length = mi_sizekorr(ptr); ptr +=8;
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index a5a9aaededd..c74bfb5af41 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -105,6 +105,7 @@ static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length);
static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count);
static void fill_buffer(MI_BIT_BUFF *bit_buff);
static uint max_bit(uint value);
+static uint read_pack_length(uint version, const uchar *buf, ulong *length);
#ifdef HAVE_MMAP
static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
MI_BLOCK_INFO *info, byte **rec_buff_p,
@@ -1043,7 +1044,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff,
return;
}
decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length);
- _my_store_blob_length((byte*) to,pack_length,length);
+ _mi_store_blob_length((byte*) to,pack_length,length);
memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos,
sizeof(char*));
bit_buff->blob_pos+=length;
@@ -1624,7 +1625,7 @@ uint save_pack_length(uint version, byte *block_buff, ulong length)
}
-uint read_pack_length(uint version, const uchar *buf, ulong *length)
+static uint read_pack_length(uint version, const uchar *buf, ulong *length)
{
if (buf[0] < 254)
{
diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c
index 6655f5a7de6..5c1278637b5 100644
--- a/storage/myisam/mi_range.c
+++ b/storage/myisam/mi_range.c
@@ -232,7 +232,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
uchar *keypos, uint *ret_max_key)
{
uint nod_flag,keynr,max_key;
- uchar t_buff[MI_MAX_KEY_BUFF],*end;
+ uchar t_buff[HA_MAX_KEY_BUFF],*end;
end= page+mi_getint(page);
nod_flag=mi_test_if_nod(page);
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 8d2b68a97f0..735bfd3fd80 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -60,7 +60,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
int error,flag;
uint nod_flag;
uchar *keypos,*maxpos;
- uchar lastkey[MI_MAX_KEY_BUFF],*buff;
+ uchar lastkey[HA_MAX_KEY_BUFF],*buff;
DBUG_ENTER("_mi_search");
DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu",
(ulong) pos, nextflag, (ulong) info->lastpos));
@@ -242,7 +242,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
{
int flag;
uint nod_flag,length,not_used[2];
- uchar t_buff[MI_MAX_KEY_BUFF],*end;
+ uchar t_buff[HA_MAX_KEY_BUFF],*end;
DBUG_ENTER("_mi_seq_search");
LINT_INIT(flag); LINT_INIT(length);
@@ -296,7 +296,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
int key_len_skip, seg_len_pack, key_len_left;
uchar *end, *kseg, *vseg;
uchar *sort_order=keyinfo->seg->charset->sort_order;
- uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
+ uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
uchar *saved_from, *saved_to, *saved_vseg;
uint saved_length=0, saved_prefix_len=0;
uint length_pack;
@@ -920,7 +920,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
DBUG_ENTER("_mi_get_binary_pack_key");
page= *page_pos;
- page_end=page+MI_MAX_KEY_BUFF+1;
+ page_end=page+HA_MAX_KEY_BUFF+1;
start_key=key;
/*
@@ -1210,7 +1210,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo,
{
int error;
uint nod_flag;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_search_next");
DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu",
nextflag, (ulong) info->lastpos,
diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c
index ebb9cdcb2f7..c5a1ffcd5d7 100644
--- a/storage/myisam/mi_test1.c
+++ b/storage/myisam/mi_test1.c
@@ -628,7 +628,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
key_type= HA_KEYTYPE_VARTEXT1;
break;
case 'k':
- if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH)
+ if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH)
{
fprintf(stderr,"Wrong key length\n");
exit(1);
diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c
index 878bba31ea8..894e48d863c 100644
--- a/storage/myisam/mi_test2.c
+++ b/storage/myisam/mi_test2.c
@@ -26,6 +26,7 @@
#endif
#include "myisamdef.h"
#include <m_ctype.h>
+#include <my_bit.h>
#define STANDARD_LENGTH 37
#define MYISAM_KEYS 6
@@ -602,7 +603,7 @@ int main(int argc, char *argv[])
if (mi_rsame(file,read_record2,(int) i)) goto err;
if (bcmp(read_record,read_record2,reclength) != 0)
{
- printf("is_rsame didn't find same record\n");
+ printf("mi_rsame didn't find same record\n");
goto end;
}
}
diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c
index 635f6c18247..342a9c910ca 100644
--- a/storage/myisam/mi_unique.c
+++ b/storage/myisam/mi_unique.c
@@ -212,7 +212,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
{
- if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
+ if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
(uchar *) pos_b, b_length, 0, 1))
return 1;
}
diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c
index bea457d2e9a..2ef4df71fc7 100644
--- a/storage/myisam/mi_update.c
+++ b/storage/myisam/mi_update.c
@@ -23,7 +23,7 @@ int mi_update(register MI_INFO *info, const byte *oldrec, byte *newrec)
int flag,key_changed,save_errno;
reg3 my_off_t pos;
uint i;
- uchar old_key[MI_MAX_KEY_BUFF],*new_key;
+ uchar old_key[HA_MAX_KEY_BUFF],*new_key;
bool auto_key_changed=0;
ulonglong changed;
MYISAM_SHARE *share=info->s;
diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c
index 57c054f2de8..1ae7e179b29 100644
--- a/storage/myisam/mi_write.c
+++ b/storage/myisam/mi_write.c
@@ -346,7 +346,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
int error,flag;
uint nod_flag, search_key_length;
uchar *temp_buff,*keypos;
- uchar keybuff[MI_MAX_KEY_BUFF];
+ uchar keybuff[HA_MAX_KEY_BUFF];
my_bool was_last_key;
my_off_t next_page, dupp_key_pos;
DBUG_ENTER("w_search");
@@ -354,7 +354,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY;
if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- MI_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0))
goto err;
@@ -545,7 +545,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
- mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
+ ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
@@ -707,7 +707,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page,
{
uint keys,length,last_length,key_ref_length;
uchar *end,*lastpos,*prevpos;
- uchar key_buff[MI_MAX_KEY_BUFF];
+ uchar key_buff[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_find_last_pos");
key_ref_length=2;
@@ -764,7 +764,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo,
length,keys;
uchar *pos,*buff,*extra_buff;
my_off_t next_page,new_pos;
- byte tmp_part_key[MI_MAX_KEY_BUFF];
+ byte tmp_part_key[HA_MAX_KEY_BUFF];
DBUG_ENTER("_mi_balance_page");
k_length=keyinfo->keylength;
@@ -930,7 +930,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param)
Probably I can use info->lastkey here, but I'm not sure,
and to be safe I'd better use local lastkey.
*/
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
uint keylen;
MI_KEYDEF *keyinfo;
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index 066e6cdb81b..ebe46e7c6e5 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -16,10 +16,10 @@
/* Describe, check and repair of MyISAM tables */
#include "fulltext.h"
-
#include <m_ctype.h>
#include <stdarg.h>
#include <my_getopt.h>
+#include <my_bit.h>
#ifdef HAVE_SYS_VADVICE_H
#include <sys/vadvise.h>
#endif
@@ -67,9 +67,9 @@ static const char *myisam_stats_method_str="nulls_unequal";
static void get_options(int *argc,char * * *argv);
static void print_version(void);
static void usage(void);
-static int myisamchk(MI_CHECK *param, char *filename);
-static void descript(MI_CHECK *param, register MI_INFO *info, my_string name);
-static int mi_sort_records(MI_CHECK *param, register MI_INFO *info,
+static int myisamchk(HA_CHECK *param, char *filename);
+static void descript(HA_CHECK *param, register MI_INFO *info, my_string name);
+static int mi_sort_records(HA_CHECK *param, register MI_INFO *info,
my_string name, uint sort_key,
my_bool write_info, my_bool update_index);
static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info,
@@ -77,7 +77,7 @@ static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info,
my_off_t page,uchar *buff,uint sortkey,
File new_file, my_bool update_index);
-MI_CHECK check_param;
+HA_CHECK check_param;
/* Main program */
@@ -695,7 +695,7 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
- enum_mi_stats_method method_conv;
+ enum_handler_stats_method method_conv;
LINT_INIT(method_conv);
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
@@ -794,7 +794,7 @@ static void get_options(register int *argc,register char ***argv)
/* Check table */
-static int myisamchk(MI_CHECK *param, my_string filename)
+static int myisamchk(HA_CHECK *param, my_string filename)
{
int error,lock_type,recreate;
int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS);
@@ -1199,7 +1199,7 @@ end2:
/* Write info about table */
-static void descript(MI_CHECK *param, register MI_INFO *info, my_string name)
+static void descript(HA_CHECK *param, register MI_INFO *info, my_string name)
{
uint key,keyseg_nr,field,start;
reg3 MI_KEYDEF *keyinfo;
@@ -1464,7 +1464,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, my_string name)
/* Sort records according to one key */
-static int mi_sort_records(MI_CHECK *param,
+static int mi_sort_records(HA_CHECK *param,
register MI_INFO *info, my_string name,
uint sort_key,
my_bool write_info,
@@ -1478,7 +1478,7 @@ static int mi_sort_records(MI_CHECK *param,
ha_rows old_record_count;
MYISAM_SHARE *share=info->s;
char llbuff[22],llbuff2[22];
- SORT_INFO sort_info;
+ MI_SORT_INFO sort_info;
MI_SORT_PARAM sort_param;
DBUG_ENTER("sort_records");
@@ -1653,10 +1653,10 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info,
uint nod_flag,used_length,key_length;
uchar *temp_buff,*keypos,*endpos;
my_off_t next_page,rec_pos;
- uchar lastkey[MI_MAX_KEY_BUFF];
+ uchar lastkey[HA_MAX_KEY_BUFF];
char llbuff[22];
- SORT_INFO *sort_info= sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info= sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
DBUG_ENTER("sort_record_index");
nod_flag=mi_test_if_nod(buff);
@@ -1744,7 +1744,7 @@ err:
static int not_killed= 0;
-volatile int *killed_ptr(MI_CHECK *param __attribute__((unused)))
+volatile int *killed_ptr(HA_CHECK *param __attribute__((unused)))
{
return &not_killed; /* always NULL */
}
@@ -1752,7 +1752,7 @@ volatile int *killed_ptr(MI_CHECK *param __attribute__((unused)))
/* print warnings and errors */
/* VARARGS */
-void mi_check_print_info(MI_CHECK *param __attribute__((unused)),
+void mi_check_print_info(HA_CHECK *param __attribute__((unused)),
const char *fmt,...)
{
va_list args;
@@ -1765,7 +1765,7 @@ void mi_check_print_info(MI_CHECK *param __attribute__((unused)),
/* VARARGS */
-void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_warning(HA_CHECK *param, const char *fmt,...)
{
va_list args;
DBUG_ENTER("mi_check_print_warning");
@@ -1790,7 +1790,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...)
/* VARARGS */
-void mi_check_print_error(MI_CHECK *param, const char *fmt,...)
+void mi_check_print_error(HA_CHECK *param, const char *fmt,...)
{
va_list args;
DBUG_ENTER("mi_check_print_error");
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index dceccd10ae2..9686af00bc6 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -15,8 +15,8 @@
/* This file is included by all internal myisam files */
-#include "myisam.h" /* Structs & some defines */
-#include "myisampack.h" /* packing of keys */
+#include "myisam.h" /* Structs & some defines */
+#include "myisampack.h" /* packing of keys */
#include <my_tree.h>
#ifdef THREAD
#include <my_pthread.h>
@@ -26,15 +26,16 @@
#endif
#if defined(my_write) && !defined(MAP_TO_USE_RAID)
-#undef my_write /* undef map from my_nosys; We need test-if-disk full */
+/* undef map from my_nosys; We need test-if-disk full */
+#undef my_write
#endif
typedef struct st_mi_status_info
{
- ha_rows records; /* Rows in table */
- ha_rows del; /* Removed rows */
- my_off_t empty; /* lost space in datafile */
- my_off_t key_empty; /* lost space in indexfile */
+ ha_rows records; /* Rows in table */
+ ha_rows del; /* Removed rows */
+ my_off_t empty; /* lost space in datafile */
+ my_off_t key_empty; /* lost space in indexfile */
my_off_t key_file_length;
my_off_t data_file_length;
ha_checksum checksum;
@@ -42,345 +43,291 @@ typedef struct st_mi_status_info
typedef struct st_mi_state_info
{
- struct { /* Fileheader */
+ struct
+ { /* Fileheader */
uchar file_version[4];
uchar options[2];
uchar header_length[2];
uchar state_info_length[2];
uchar base_info_length[2];
uchar base_pos[2];
- uchar key_parts[2]; /* Key parts */
- uchar unique_key_parts[2]; /* Key parts + unique parts */
- uchar keys; /* number of keys in file */
- uchar uniques; /* number of UNIQUE definitions */
- uchar language; /* Language for indexes */
- uchar max_block_size_index; /* max keyblock size */
+ uchar key_parts[2]; /* Key parts */
+ uchar unique_key_parts[2]; /* Key parts + unique parts */
+ uchar keys; /* number of keys in file */
+ uchar uniques; /* number of UNIQUE definitions */
+ uchar language; /* Language for indexes */
+ uchar max_block_size_index; /* max keyblock size */
uchar fulltext_keys;
uchar not_used; /* To align to 8 */
} header;
MI_STATUS_INFO state;
- ha_rows split; /* number of split blocks */
- my_off_t dellink; /* Link to next removed block */
+ ha_rows split; /* number of split blocks */
+ my_off_t dellink; /* Link to next removed block */
ulonglong auto_increment;
- ulong process; /* process that updated table last */
- ulong unique; /* Unique number for this process */
- ulong update_count; /* Updated for each write lock */
+ ulong process; /* process that updated table last */
+ ulong unique; /* Unique number for this process */
+ ulong update_count; /* Updated for each write lock */
ulong status;
ulong *rec_per_key_part;
- my_off_t *key_root; /* Start of key trees */
- my_off_t *key_del; /* delete links for trees */
- my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */
-
- ulong sec_index_changed; /* Updated when new sec_index */
- ulong sec_index_used; /* which extra index are in use */
- ulonglong key_map; /* Which keys are in use */
ha_checksum checksum; /* Table checksum */
- ulong version; /* timestamp of create */
- time_t create_time; /* Time when created database */
- time_t recover_time; /* Time for last recover */
- time_t check_time; /* Time for last check */
- uint sortkey; /* sorted by this key (not used) */
+ my_off_t *key_root; /* Start of key trees */
+ my_off_t *key_del; /* delete links for trees */
+ my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */
+
+ ulong sec_index_changed; /* Updated when new sec_index */
+ ulong sec_index_used; /* which extra index are in use */
+ ulonglong key_map; /* Which keys are in use */
+ ulong version; /* timestamp of create */
+ time_t create_time; /* Time when created database */
+ time_t recover_time; /* Time for last recover */
+ time_t check_time; /* Time for last check */
+ uint sortkey; /* sorted by this key (not used) */
uint open_count;
- uint8 changed; /* Changed since myisamchk */
+ uint8 changed; /* Changed since myisamchk */
/* the following isn't saved on disk */
- uint state_diff_length; /* Should be 0 */
- uint state_length; /* Length of state header in file */
+ uint state_diff_length; /* Should be 0 */
+ uint state_length; /* Length of state header in file */
ulong *key_info;
} MI_STATE_INFO;
-#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8)
-#define MI_STATE_KEY_SIZE 8
+#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8)
+#define MI_STATE_KEY_SIZE 8
#define MI_STATE_KEYBLOCK_SIZE 8
-#define MI_STATE_KEYSEG_SIZE 4
-#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE)
-#define MI_KEYDEF_SIZE (2+ 5*2)
-#define MI_UNIQUEDEF_SIZE (2+1+1)
-#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
-#define MI_COLUMNDEF_SIZE (2*3+1)
-#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16)
-#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
+#define MI_STATE_KEYSEG_SIZE 4
+#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE)
+#define MI_KEYDEF_SIZE (2+ 5*2)
+#define MI_UNIQUEDEF_SIZE (2+1+1)
+#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
+#define MI_COLUMNDEF_SIZE (2*3+1)
+#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16)
+#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
typedef struct st_mi_base_info
{
- my_off_t keystart; /* Start of keys */
+ my_off_t keystart; /* Start of keys */
my_off_t max_data_file_length;
my_off_t max_key_file_length;
my_off_t margin_key_file_length;
- ha_rows records,reloc; /* Create information */
- ulong mean_row_length; /* Create information */
- ulong reclength; /* length of unpacked record */
- ulong pack_reclength; /* Length of full packed rec. */
+ ha_rows records, reloc; /* Create information */
+ ulong mean_row_length; /* Create information */
+ ulong reclength; /* length of unpacked record */
+ ulong pack_reclength; /* Length of full packed rec. */
ulong min_pack_length;
- ulong max_pack_length; /* Max possibly length of packed rec.*/
+ ulong max_pack_length; /* Max possibly length of packed rec.*/
ulong min_block_length;
- ulong fields, /* fields in table */
- pack_fields; /* packed fields in table */
- uint rec_reflength; /* = 2-8 */
- uint key_reflength; /* = 2-8 */
- uint keys; /* same as in state.header */
- uint auto_key; /* Which key-1 is a auto key */
- uint blobs; /* Number of blobs */
- uint pack_bits; /* Length of packed bits */
- uint max_key_block_length; /* Max block length */
- uint max_key_length; /* Max key length */
+ ulong fields, /* fields in table */
+ pack_fields; /* packed fields in table */
+ uint rec_reflength; /* = 2-8 */
+ uint key_reflength; /* = 2-8 */
+ uint keys; /* same as in state.header */
+ uint auto_key; /* Which key-1 is a auto key */
+ uint blobs; /* Number of blobs */
+ uint pack_bits; /* Length of packed bits */
+ uint max_key_block_length; /* Max block length */
+ uint max_key_length; /* Max key length */
/* Extra allocation when using dynamic record format */
uint extra_alloc_bytes;
uint extra_alloc_procent;
/* Info about raid */
- uint raid_type,raid_chunks;
+ uint raid_type, raid_chunks;
ulong raid_chunksize;
/* The following are from the header */
- uint key_parts,all_key_parts;
+ uint key_parts, all_key_parts;
} MI_BASE_INFO;
- /* Structs used intern in database */
+ /* Structs used intern in database */
-typedef struct st_mi_blob /* Info of record */
+typedef struct st_mi_blob /* Info of record */
{
- ulong offset; /* Offset to blob in record */
- uint pack_length; /* Type of packed length */
- ulong length; /* Calc:ed for each record */
+ ulong offset; /* Offset to blob in record */
+ uint pack_length; /* Type of packed length */
+ ulong length; /* Calc:ed for each record */
} MI_BLOB;
-typedef struct st_mi_isam_pack {
+typedef struct st_mi_isam_pack
+{
ulong header_length;
uint ref_length;
uchar version;
} MI_PACK;
-#define MAX_NONMAPPED_INSERTS 1000
+#define MAX_NONMAPPED_INSERTS 1000
-typedef struct st_mi_isam_share { /* Shared between opens */
+typedef struct st_mi_isam_share
+{ /* Shared between opens */
MI_STATE_INFO state;
MI_BASE_INFO base;
- MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
- MI_KEYDEF *keyinfo; /* Key definitions */
- MI_UNIQUEDEF *uniqueinfo; /* unique definitions */
- HA_KEYSEG *keyparts; /* key part info */
- MI_COLUMNDEF *rec; /* Pointer to field information */
- MI_PACK pack; /* Data about packed records */
- MI_BLOB *blobs; /* Pointer to blobs */
- char *unique_file_name; /* realpath() of index file */
- char *data_file_name, /* Resolved path names from symlinks */
- *index_file_name;
- byte *file_map; /* mem-map of file if possible */
- KEY_CACHE *key_cache; /* ref to the current key cache */
+ MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
+ MI_KEYDEF *keyinfo; /* Key definitions */
+ MI_UNIQUEDEF *uniqueinfo; /* unique definitions */
+ HA_KEYSEG *keyparts; /* key part info */
+ MI_COLUMNDEF *rec; /* Pointer to field information */
+ MI_PACK pack; /* Data about packed records */
+ MI_BLOB *blobs; /* Pointer to blobs */
+ char *unique_file_name; /* realpath() of index file */
+ char *data_file_name, /* Resolved path names from symlinks */
+ *index_file_name;
+ byte *file_map; /* mem-map of file if possible */
+ KEY_CACHE *key_cache; /* ref to the current key cache */
MI_DECODE_TREE *decode_trees;
uint16 *decode_tables;
- int (*read_record)(struct st_myisam_info*, my_off_t, byte*);
- int (*write_record)(struct st_myisam_info*, const byte*);
- int (*update_record)(struct st_myisam_info*, my_off_t, const byte*);
- int (*delete_record)(struct st_myisam_info*);
- int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool);
- int (*compare_record)(struct st_myisam_info*, const byte *);
/* Function to use for a row checksum. */
- ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *);
- int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *,
- const byte *record, my_off_t pos);
- uint (*file_read)(MI_INFO *, byte *, uint, my_off_t, myf);
- uint (*file_write)(MI_INFO *, byte *, uint, my_off_t, myf);
+ int(*read_record) (struct st_myisam_info *, my_off_t, byte *);
+ int(*write_record) (struct st_myisam_info *, const byte *);
+ int(*update_record) (struct st_myisam_info *, my_off_t, const byte *);
+ int(*delete_record) (struct st_myisam_info *);
+ int(*read_rnd) (struct st_myisam_info *, byte *, my_off_t, my_bool);
+ int(*compare_record) (struct st_myisam_info *, const byte *);
+ ha_checksum(*calc_checksum) (struct st_myisam_info *, const byte *);
+ int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *,
+ const byte * record, my_off_t pos);
+ uint(*file_read) (MI_INFO *, byte *, uint, my_off_t, myf);
+ uint(*file_write) (MI_INFO *, byte *, uint, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */
- ulong this_process; /* processid */
- ulong last_process; /* For table-change-check */
- ulong last_version; /* Version on start */
- ulong options; /* Options used */
- ulong min_pack_length; /* Theese are used by packed data */
+ ulong this_process; /* processid */
+ ulong last_process; /* For table-change-check */
+ ulong last_version; /* Version on start */
+ ulong options; /* Options used */
+ ulong min_pack_length; /* Theese are used by packed data */
ulong max_pack_length;
ulong state_diff_length;
- uint rec_reflength; /* rec_reflength in use now */
- uint unique_name_length;
+ uint rec_reflength; /* rec_reflength in use now */
+ uint unique_name_length;
uint32 ftparsers; /* Number of distinct ftparsers + 1 */
- File kfile; /* Shared keyfile */
- File data_file; /* Shared data file */
- int mode; /* mode of file on open */
- uint reopen; /* How many times reopened */
- uint w_locks,r_locks,tot_locks; /* Number of read/write locks */
- uint blocksize; /* blocksize of keyfile */
+ File kfile; /* Shared keyfile */
+ File data_file; /* Shared data file */
+ int mode; /* mode of file on open */
+ uint reopen; /* How many times reopened */
+ uint w_locks, r_locks, tot_locks; /* Number of read/write locks */
+ uint blocksize; /* blocksize of keyfile */
myf write_flag;
enum data_file_type data_file_type;
/* Below flag is needed to make log tables work with concurrent insert */
my_bool is_log_table;
- my_bool changed, /* If changed since lock */
- global_changed, /* If changed since open */
- not_flushed,
- temporary,delay_key_write,
- concurrent_insert;
+ my_bool changed, /* If changed since lock */
+ global_changed, /* If changed since open */
+ not_flushed, temporary, delay_key_write, concurrent_insert;
#ifdef THREAD
THR_LOCK lock;
- pthread_mutex_t intern_lock; /* Locking for use with _locking */
+ pthread_mutex_t intern_lock; /* Locking for use with _locking */
rw_lock_t *key_root_lock;
#endif
my_off_t mmaped_length;
- uint nonmmaped_inserts; /* counter of writing in non-mmaped
- area */
+ /* counter of writing in non-mmaped area */
+ uint nonmmaped_inserts;
rw_lock_t mmap_lock;
} MYISAM_SHARE;
-typedef uint mi_bit_type;
-
-typedef struct st_mi_bit_buff { /* Used for packing of record */
- mi_bit_type current_byte;
- uint bits;
- uchar *pos,*end,*blob_pos,*blob_end;
- uint error;
-} MI_BIT_BUFF;
-
-struct st_myisam_info {
- MYISAM_SHARE *s; /* Shared between open:s */
- MI_STATUS_INFO *state,save_state;
- MI_BLOB *blobs; /* Pointer to blobs */
- MI_BIT_BUFF bit_buff;
+struct st_myisam_info
+{
+ MYISAM_SHARE *s; /* Shared between open:s */
+ MI_STATUS_INFO *state, save_state;
+ MI_BLOB *blobs; /* Pointer to blobs */
+ MI_BIT_BUFF bit_buff;
/* accumulate indexfile changes between write's */
- TREE *bulk_insert;
+ TREE *bulk_insert;
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MEM_ROOT ft_memroot; /* used by the parser */
- MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
- char *filename; /* parameter to open filename */
- uchar *buff, /* Temp area for key */
- *lastkey,*lastkey2; /* Last used search key */
- uchar *first_mbr_key; /* Searhed spatial key */
- byte *rec_buff; /* Tempbuff for recordpack */
- uchar *int_keypos, /* Save position for next/previous */
- *int_maxpos; /* -""- */
- uint int_nod_flag; /* -""- */
- uint32 int_keytree_version; /* -""- */
- int (*read_record)(struct st_myisam_info*, my_off_t, byte*);
+ MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
+ char *filename; /* parameter to open filename */
+ uchar *buff, /* Temp area for key */
+ *lastkey, *lastkey2; /* Last used search key */
+ uchar *first_mbr_key; /* Searhed spatial key */
+ byte *rec_buff; /* Tempbuff for recordpack */
+ uchar *int_keypos, /* Save position for next/previous */
+ *int_maxpos; /* -""- */
+ uint int_nod_flag; /* -""- */
+ uint32 int_keytree_version; /* -""- */
+ int(*read_record) (struct st_myisam_info *, my_off_t, byte *);
invalidator_by_filename invalidator; /* query cache invalidator */
- ulong this_unique; /* uniq filenumber or thread */
- ulong last_unique; /* last unique number */
- ulong this_loop; /* counter for this open */
- ulong last_loop; /* last used counter */
- my_off_t lastpos, /* Last record position */
- nextpos; /* Position to next record */
+ ulong this_unique; /* uniq filenumber or thread */
+ ulong last_unique; /* last unique number */
+ ulong this_loop; /* counter for this open */
+ ulong last_loop; /* last used counter */
+ my_off_t lastpos, /* Last record position */
+ nextpos; /* Position to next record */
my_off_t save_lastpos;
- my_off_t pos; /* Intern variable */
- my_off_t last_keypage; /* Last key page read */
- my_off_t last_search_keypage; /* Last keypage when searching */
+ my_off_t pos; /* Intern variable */
+ my_off_t last_keypage; /* Last key page read */
+ my_off_t last_search_keypage; /* Last keypage when searching */
my_off_t dupp_key_pos;
ha_checksum checksum; /* Temp storage for row checksum */
- /* QQ: the folloing two xxx_length fields should be removed,
- as they are not compatible with parallel repair */
- ulong packed_length,blob_length; /* Length of found, packed record */
- int dfile; /* The datafile */
- uint opt_flag; /* Optim. for space/speed */
- uint update; /* If file changed since open */
- int lastinx; /* Last used index */
- uint lastkey_length; /* Length of key in lastkey */
- uint last_rkey_length; /* Last length in mi_rkey() */
+ /*
+ QQ: the folloing two xxx_length fields should be removed,
+ as they are not compatible with parallel repair
+ */
+ ulong packed_length, blob_length; /* Length of found, packed record */
+ int dfile; /* The datafile */
+ uint opt_flag; /* Optim. for space/speed */
+ uint update; /* If file changed since open */
+ int lastinx; /* Last used index */
+ uint lastkey_length; /* Length of key in lastkey */
+ uint last_rkey_length; /* Last length in mi_rkey() */
enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
- uint save_lastkey_length;
- uint pack_key_length; /* For MYISAMMRG */
- int errkey; /* Got last error on this key */
- int lock_type; /* How database was locked */
- int tmp_lock_type; /* When locked by readinfo */
- uint data_changed; /* Somebody has changed data */
- uint save_update; /* When using KEY_READ */
- int save_lastinx;
- LIST open_list;
- IO_CACHE rec_cache; /* When cacheing records */
- uint preload_buff_size; /* When preloading indexes */
- myf lock_wait; /* is 0 or MY_DONT_WAIT */
- my_bool was_locked; /* Was locked in panic */
- my_bool append_insert_at_end; /* Set if concurrent insert */
+ uint save_lastkey_length;
+ uint pack_key_length; /* For MYISAMMRG */
+ int errkey; /* Got last error on this key */
+ int lock_type; /* How database was locked */
+ int tmp_lock_type; /* When locked by readinfo */
+ uint data_changed; /* Somebody has changed data */
+ uint save_update; /* When using KEY_READ */
+ int save_lastinx;
+ LIST open_list;
+ IO_CACHE rec_cache; /* When cacheing records */
+ uint preload_buff_size; /* When preloading indexes */
+ myf lock_wait; /* is 0 or MY_DONT_WAIT */
+ my_bool was_locked; /* Was locked in panic */
+ my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
- my_bool page_changed; /* If info->buff can't be used for rnext */
- my_bool buff_used; /* If info->buff has to be reread for rnext */
- my_bool once_flags; /* For MYISAMMRG */
+ /* If info->buff can't be used for rnext */
+ my_bool page_changed;
+ /* If info->buff has to be reread for rnext */
+ my_bool buff_used;
+ my_bool once_flags; /* For MYISAMMRG */
#ifdef __WIN__
my_bool owned_by_merge; /* This MyISAM table is part of a merge union */
#endif
#ifdef THREAD
THR_LOCK_DATA lock;
#endif
- uchar *rtree_recursion_state; /* For RTREE */
- int rtree_recursion_depth;
+ uchar *rtree_recursion_state; /* For RTREE */
+ int rtree_recursion_depth;
};
-typedef struct st_buffpek {
- my_off_t file_pos; /* Where we are in the sort file */
- uchar *base,*key; /* Key pointers */
- ha_rows count; /* Number of rows in table */
- ulong mem_count; /* numbers of keys in memory */
- ulong max_keys; /* Max keys in buffert */
-} BUFFPEK;
-
-typedef struct st_mi_sort_param
-{
- pthread_t thr;
- IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
- DYNAMIC_ARRAY buffpek;
- MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
-
- /*
- The next two are used to collect statistics, see update_key_parts for
- description.
- */
- ulonglong unique[MI_MAX_KEY_SEG+1];
- ulonglong notnull[MI_MAX_KEY_SEG+1];
-
- my_off_t pos,max_pos,filepos,start_recpos;
- uint key, key_length,real_key_length,sortbuff_size;
- uint maxbuffers, keys, find_length, sort_keys_length;
- my_bool fix_datafile, master;
- my_bool calc_checksum; /* calculate table checksum */
- MI_KEYDEF *keyinfo;
- HA_KEYSEG *seg;
- SORT_INFO *sort_info;
- uchar **sort_keys;
- byte *rec_buff;
- void *wordlist, *wordptr;
- MEM_ROOT wordroot;
- char *record;
- MY_TMPDIR *tmpdir;
- int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *);
- int (*key_read)(struct st_mi_sort_param *,void *);
- int (*key_write)(struct st_mi_sort_param *, const void *);
- void (*lock_in_memory)(MI_CHECK *);
- NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **,
- uint , struct st_buffpek *, IO_CACHE *);
- NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
- NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *,
- uint, uint);
-} MI_SORT_PARAM;
- /* Some defines used by isam-funktions */
-
-#define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */
-#define F_EXTRA_LCK -1
-
- /* bits in opt_flag */
-#define MEMMAP_USED 32
+#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */
+#define F_EXTRA_LCK -1
+/* bits in opt_flag */
+#define MEMMAP_USED 32
#define REMEMBER_OLD_POS 64
-#define WRITEINFO_UPDATE_KEYFILE 1
-#define WRITEINFO_NO_UNLOCK 2
+#define WRITEINFO_UPDATE_KEYFILE 1
+#define WRITEINFO_NO_UNLOCK 2
- /* once_flags */
+/* once_flags */
#define USE_PACKED_KEYS 1
#define RRND_PRESERVE_LASTINX 2
- /* bits in state.changed */
-
-#define STATE_CHANGED 1
-#define STATE_CRASHED 2
+/* bits in state.changed */
+#define STATE_CHANGED 1
+#define STATE_CRASHED 2
#define STATE_CRASHED_ON_REPAIR 4
-#define STATE_NOT_ANALYZED 8
+#define STATE_NOT_ANALYZED 8
#define STATE_NOT_OPTIMIZED_KEYS 16
-#define STATE_NOT_SORTED_PAGES 32
-
- /* options to mi_read_cache */
+#define STATE_NOT_SORTED_PAGES 32
-#define READING_NEXT 1
-#define READING_HEADER 2
+/* options to mi_read_cache */
+#define READING_NEXT 1
+#define READING_HEADER 2
-#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767)
+#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767)
#define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\
- mi_int2store(x,boh); }
+ mi_int2store(x,boh); }
#define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0)
#define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
DBUG_PRINT("error", ("Marked table crashed")); \
@@ -398,13 +345,6 @@ typedef struct st_mi_sort_param
/* Functions to store length of space packed keys, VARCHAR or BLOB keys */
-#define store_key_length_inc(key,length) \
-{ if ((length) < 255) \
- { *(key)++=(length); } \
- else \
- { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \
-}
-
#define store_key_length(key,length) \
{ if ((length) < 255) \
{ *(key)=(length); } \
@@ -428,39 +368,39 @@ typedef struct st_mi_sort_param
#define get_pack_length(length) ((length) >= 255 ? 3 : 1)
-#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
-#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */
-#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2)
-#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */
+#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
+#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */
+#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2)
+#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */
#define MI_BLOCK_INFO_HEADER_LENGTH 20
-#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
-#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
-#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH)
-#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */
-#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
-#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1)))
+#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
+#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
+#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH)
+#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */
+#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */
+#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1)))
#define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
-#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
+#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
-#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
-#define PACK_TYPE_SPACE_FIELDS 2
-#define PACK_TYPE_ZERO_FILL 4
-#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
+#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */
+#define PACK_TYPE_SPACE_FIELDS 2
+#define PACK_TYPE_ZERO_FILL 4
+#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */
-#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH)
+#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH)
#define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
-#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
-#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
+#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
+#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
-#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
+#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */
#define MI_MIN_ROWS_TO_USE_BULK_INSERT 100
#define MI_MIN_ROWS_TO_DISABLE_INDEXES 100
#define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10
/* The UNIQUE check is done with a hashed long key */
-#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
+#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
#define mi_unique_store(A,B) mi_int4store((A),(B))
#ifdef THREAD
@@ -472,174 +412,181 @@ extern pthread_mutex_t THR_LOCK_myisam;
#define rw_unlock(A) {}
#endif
- /* Some extern variables */
+/* Some extern variables */
extern LIST *myisam_open_list;
-extern uchar NEAR myisam_file_magic[],NEAR myisam_pack_file_magic[];
-extern uint NEAR myisam_read_vec[],NEAR myisam_readnext_vec[];
+extern uchar NEAR myisam_file_magic[], NEAR myisam_pack_file_magic[];
+extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[];
extern uint myisam_quick_table_bits;
extern File myisam_log_file;
extern ulong myisam_pid;
- /* This is used by _mi_calc_xxx_key_length och _mi_store_key */
+/* This is used by _mi_calc_xxx_key_length och _mi_store_key */
typedef struct st_mi_s_param
{
- uint ref_length,key_length,
- n_ref_length,
- n_length,
- totlength,
- part_of_prev_key,prev_length,pack_marker;
- uchar *key, *prev_key,*next_key_pos;
- bool store_not_null;
+ uint ref_length, key_length,
+ n_ref_length,
+ n_length, totlength, part_of_prev_key, prev_length, pack_marker;
+ uchar *key, *prev_key, *next_key_pos;
+ bool store_not_null;
} MI_KEY_PARAM;
- /* Prototypes for intern functions */
+/* Prototypes for intern functions */
-extern int _mi_read_dynamic_record(MI_INFO *info,my_off_t filepos,byte *buf);
-extern int _mi_write_dynamic_record(MI_INFO*, const byte*);
-extern int _mi_update_dynamic_record(MI_INFO*, my_off_t, const byte*);
+extern int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf);
+extern int _mi_write_dynamic_record(MI_INFO *, const byte *);
+extern int _mi_update_dynamic_record(MI_INFO *, my_off_t, const byte *);
extern int _mi_delete_dynamic_record(MI_INFO *info);
-extern int _mi_cmp_dynamic_record(MI_INFO *info,const byte *record);
-extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *,my_off_t, my_bool);
-extern int _mi_write_blob_record(MI_INFO*, const byte*);
-extern int _mi_update_blob_record(MI_INFO*, my_off_t, const byte*);
-extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos,byte *buf);
-extern int _mi_write_static_record(MI_INFO*, const byte*);
-extern int _mi_update_static_record(MI_INFO*, my_off_t, const byte*);
+extern int _mi_cmp_dynamic_record(MI_INFO *info, const byte *record);
+extern int _mi_read_rnd_dynamic_record(MI_INFO *, byte *, my_off_t, my_bool);
+extern int _mi_write_blob_record(MI_INFO *, const byte *);
+extern int _mi_update_blob_record(MI_INFO *, my_off_t, const byte *);
+extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos, byte *buf);
+extern int _mi_write_static_record(MI_INFO *, const byte *);
+extern int _mi_update_static_record(MI_INFO *, my_off_t, const byte *);
extern int _mi_delete_static_record(MI_INFO *info);
-extern int _mi_cmp_static_record(MI_INFO *info,const byte *record);
-extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool);
-extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length);
+extern int _mi_cmp_static_record(MI_INFO *info, const byte *record);
+extern int _mi_read_rnd_static_record(MI_INFO *, byte *, my_off_t, my_bool);
+extern int _mi_ck_write(MI_INFO *info, uint keynr, uchar *key, uint length);
extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length,
my_off_t *root, uint comp_flag);
-extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root);
-extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uchar *anc_buff,uchar *key_pos,uchar *key_buff,
- uchar *father_buff, uchar *father_keypos,
- my_off_t father_page, my_bool insert_last);
-extern int _mi_split_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uchar *buff,uchar *key_buff, my_bool insert_last);
-extern uchar *_mi_find_half_pos(uint nod_flag,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint *return_key_length,
- uchar **after_key);
-extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *key_buff,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *key_buff,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos, uchar *org_key,
- uchar *prev_key,
- uchar *key, MI_KEY_PARAM *s_temp);
-extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag,
- uchar *key_pos,uchar *org_key,
- uchar *prev_key,
- uchar *key, MI_KEY_PARAM *s_temp);
-void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
-void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+extern int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ my_off_t *root);
+extern int _mi_insert(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uchar *anc_buff, uchar *key_pos, uchar *key_buff,
+ uchar *father_buff, uchar *father_keypos,
+ my_off_t father_page, my_bool insert_last);
+extern int _mi_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uchar *buff, uchar *key_buff, my_bool insert_last);
+extern uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo,
+ uchar *page, uchar *key,
+ uint *return_key_length, uchar ** after_key);
+extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *key_buff, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *prev_key, uchar *key,
+ MI_KEY_PARAM *s_temp);
+extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar *key_pos, uchar *org_key,
+ uchar *prev_key, uchar *key,
+ MI_KEY_PARAM *s_temp);
+void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
+void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
#ifdef NOT_USED
-void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
#endif
-void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
- MI_KEY_PARAM *s_temp);
+void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos,
+ MI_KEY_PARAM *s_temp);
-extern int _mi_ck_delete(MI_INFO *info,uint keynr,uchar *key,uint key_length);
-extern int _mi_readinfo(MI_INFO *info,int lock_flag,int check_keybuffer);
-extern int _mi_writeinfo(MI_INFO *info,uint options);
+extern int _mi_ck_delete(MI_INFO *info, uint keynr, uchar *key,
+ uint key_length);
+extern int _mi_readinfo(MI_INFO *info, int lock_flag, int check_keybuffer);
+extern int _mi_writeinfo(MI_INFO *info, uint options);
extern int _mi_test_if_changed(MI_INFO *info);
extern int _mi_mark_file_changed(MI_INFO *info);
extern int _mi_decrement_open_count(MI_INFO *info);
-extern int _mi_check_index(MI_INFO *info,int inx);
-extern int _mi_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uint key_len,
- uint nextflag,my_off_t pos);
-extern int _mi_bin_search(struct st_myisam_info *info,MI_KEYDEF *keyinfo,
- uchar *page,uchar *key,uint key_len,uint comp_flag,
- uchar * *ret_pos,uchar *buff, my_bool *was_last_key);
-extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint key_len,uint comp_flag,
- uchar **ret_pos,uchar *buff, my_bool *was_last_key);
-extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page,
- uchar *key,uint key_len,uint comp_flag,
- uchar **ret_pos,uchar *buff, my_bool *was_last_key);
-extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key);
-extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos);
-extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key);
+extern int _mi_check_index(MI_INFO *info, int inx);
+extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uint key_len, uint nextflag, my_off_t pos);
+extern int _mi_bin_search(struct st_myisam_info *info, MI_KEYDEF *keyinfo,
+ uchar *page, uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _mi_seq_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uint key_len, uint comp_flag,
+ uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern int _mi_prefix_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
+ uchar *key, uint key_len, uint comp_flag,
+ uchar ** ret_pos, uchar *buff,
+ my_bool *was_last_key);
+extern my_off_t _mi_kpos(uint nod_flag, uchar *after_key);
+extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos);
+extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key);
extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr);
-extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos);
-extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b,
- uint key_length,uint nextflag,uint *diff_length);
-extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page,
- uchar *key);
-extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page,
- uchar *key);
+extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos);
+extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b,
+ uint key_length, uint nextflag, uint *diff_length);
+extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag,
+ uchar **page, uchar *key);
+extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page,
+ uchar *key);
extern uint _mi_get_binary_pack_key(MI_KEYDEF *keyinfo, uint nod_flag,
- uchar **page_pos, uchar *key);
-extern uchar *_mi_get_last_key(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *keypos,
- uchar *lastkey,uchar *endpos,
- uint *return_key_length);
+ uchar ** page_pos, uchar *key);
+extern uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo,
+ uchar *keypos, uchar *lastkey, uchar *endpos,
+ uint *return_key_length);
extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page,
- uchar *key, uchar *keypos, uint *return_key_length);
-extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key);
+ uchar *key, uchar *keypos,
+ uint *return_key_length);
+extern uint _mi_keylength(MI_KEYDEF *keyinfo, uchar *key);
extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key,
- HA_KEYSEG *end);
-extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from);
-extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
- uint key_length,uint nextflag,my_off_t pos);
-extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos);
-extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos);
-extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page,
- int level,uchar *buff,int return_buffer);
-extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page,
- int level, uchar *buff);
-extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos,
- int level);
-extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level);
-extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key,
- const byte *record,my_off_t filepos);
-extern uint _mi_pack_key(MI_INFO *info,uint keynr,uchar *key,uchar *old,
- uint key_length, HA_KEYSEG **last_used_keyseg);
-extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,byte *buf);
-extern int _mi_read_cache(IO_CACHE *info,byte *buff,my_off_t pos,
- uint length,int re_read_if_possibly);
-extern ulonglong retrieve_auto_increment(MI_INFO *info,const byte *record);
-
-extern byte *mi_alloc_rec_buff(MI_INFO *,ulong, byte**);
+ HA_KEYSEG *end);
+extern uchar *_mi_move_key(MI_KEYDEF *keyinfo, uchar *to, uchar *from);
+extern int _mi_search_next(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
+ uint key_length, uint nextflag, my_off_t pos);
+extern int _mi_search_first(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos);
+extern int _mi_search_last(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos);
+extern uchar *_mi_fetch_keypage(MI_INFO *info, MI_KEYDEF *keyinfo,
+ my_off_t page, int level, uchar *buff,
+ int return_buffer);
+extern int _mi_write_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page,
+ int level, uchar *buff);
+extern int _mi_dispose(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos,
+ int level);
+extern my_off_t _mi_new(MI_INFO *info, MI_KEYDEF *keyinfo, int level);
+extern uint _mi_make_key(MI_INFO *info, uint keynr, uchar *key,
+ const byte *record, my_off_t filepos);
+extern uint _mi_pack_key(MI_INFO *info, uint keynr, uchar *key, uchar *old,
+ uint key_length, HA_KEYSEG ** last_used_keyseg);
+extern int _mi_read_key_record(MI_INFO *info, my_off_t filepos, byte *buf);
+extern int _mi_read_cache(IO_CACHE *info, byte *buff, my_off_t pos,
+ uint length, int re_read_if_possibly);
+extern ulonglong retrieve_auto_increment(MI_INFO *info, const byte *record);
+
+extern byte *mi_alloc_rec_buff(MI_INFO *, ulong, byte **);
#define mi_get_rec_buff_ptr(info,buf) \
((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \
(buf) - MI_REC_BUFF_OFFSET : (buf))
#define mi_get_rec_buff_len(info,buf) \
(*((uint32 *)(mi_get_rec_buff_ptr(info,buf))))
-extern ulong _mi_rec_unpack(MI_INFO *info,byte *to,byte *from,
- ulong reclength);
-extern my_bool _mi_rec_check(MI_INFO *info,const char *record, byte *packpos,
+extern ulong _mi_rec_unpack(MI_INFO *info, byte *to, byte *from,
+ ulong reclength);
+extern my_bool _mi_rec_check(MI_INFO *info, const char *record, byte *packpos,
ulong packed_length, my_bool with_checkum);
-extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length,
- my_off_t next_filepos,byte **record,
- ulong *reclength,int *flag);
-extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key,
- uint length);
-extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys);
-extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf);
-extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool);
+extern int _mi_write_part_record(MI_INFO *info, my_off_t filepos, ulong length,
+ my_off_t next_filepos, byte ** record,
+ ulong *reclength, int *flag);
+extern void _mi_print_key(FILE *stream, HA_KEYSEG *keyseg, const uchar *key,
+ uint length);
+extern my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys);
+extern int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf);
+extern int _mi_read_rnd_pack_record(MI_INFO *, byte *, my_off_t, my_bool);
extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff,
byte *to, byte *from, ulong reclength);
-extern ulonglong mi_safe_mul(ulonglong a,ulonglong b);
+extern ulonglong mi_safe_mul(ulonglong a, ulonglong b);
extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
- const byte *oldrec, const byte *newrec, my_off_t pos);
+ const byte *oldrec, const byte *newrec,
+ my_off_t pos);
struct st_sort_info;
-typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */
+typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */
+{
uchar header[MI_BLOCK_INFO_HEADER_LENGTH];
ulong rec_len;
ulong data_len;
@@ -652,35 +599,37 @@ typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */
uint offset;
} MI_BLOCK_INFO;
- /* bits in return from _mi_get_block_info */
-
-#define BLOCK_FIRST 1
-#define BLOCK_LAST 2
-#define BLOCK_DELETED 4
-#define BLOCK_ERROR 8 /* Wrong data */
-#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
-#define BLOCK_FATAL_ERROR 32 /* hardware-error */
-
-#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
-#define MAXERR 20
-#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
-#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
-#define INDEX_TMP_EXT ".TMM"
-#define DATA_TMP_EXT ".TMD"
-
-#define UPDATE_TIME 1
-#define UPDATE_STAT 2
-#define UPDATE_SORT 4
-#define UPDATE_AUTO_INC 8
-#define UPDATE_OPEN_COUNT 16
-
-#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE)
-#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
-#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
-#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
-
-enum myisam_log_commands {
- MI_LOG_OPEN,MI_LOG_WRITE,MI_LOG_UPDATE,MI_LOG_DELETE,MI_LOG_CLOSE,MI_LOG_EXTRA,MI_LOG_LOCK,MI_LOG_DELETE_ALL
+ /* bits in return from _mi_get_block_info */
+
+#define BLOCK_FIRST 1
+#define BLOCK_LAST 2
+#define BLOCK_DELETED 4
+#define BLOCK_ERROR 8 /* Wrong data */
+#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */
+#define BLOCK_FATAL_ERROR 32 /* hardware-error */
+
+#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
+#define MAXERR 20
+#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
+#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
+#define INDEX_TMP_EXT ".TMM"
+#define DATA_TMP_EXT ".TMD"
+
+#define UPDATE_TIME 1
+#define UPDATE_STAT 2
+#define UPDATE_SORT 4
+#define UPDATE_AUTO_INC 8
+#define UPDATE_OPEN_COUNT 16
+
+#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE)
+#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD)
+#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
+#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
+
+enum myisam_log_commands
+{
+ MI_LOG_OPEN, MI_LOG_WRITE, MI_LOG_UPDATE, MI_LOG_DELETE, MI_LOG_CLOSE,
+ MI_LOG_EXTRA, MI_LOG_LOCK, MI_LOG_DELETE_ALL
};
#define myisam_log(a,b,c,d) if (myisam_log_file >= 0) _myisam_log(a,b,c,d)
@@ -690,36 +639,34 @@ enum myisam_log_commands {
#define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0)
#define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1)
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
-
-extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t);
-extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from);
+ extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t);
+extern uint _mi_rec_pack(MI_INFO *info, byte *to, const byte *from);
extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
MI_BLOCK_INFO *info, byte **rec_buff_p,
File file, my_off_t filepos);
-extern void _my_store_blob_length(byte *pos,uint pack_length,uint length);
-extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info,
- const byte *buffert,uint length);
+extern void _mi_store_blob_length(byte *pos, uint pack_length, uint length);
+extern void _myisam_log(enum myisam_log_commands command, MI_INFO *info,
+ const byte *buffert, uint length);
extern void _myisam_log_command(enum myisam_log_commands command,
- MI_INFO *info, const byte *buffert,
- uint length, int result);
-extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info,
- const byte *record,my_off_t filepos,
- int result);
+ MI_INFO *info, const byte *buffert,
+ uint length, int result);
+extern void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info,
+ const byte *record, my_off_t filepos,
+ int result);
extern void mi_report_error(int errcode, const char *file_name);
extern my_bool _mi_memmap_file(MI_INFO *info);
extern void _mi_unmap_file(MI_INFO *info);
extern uint save_pack_length(uint version, byte *block_buff, ulong length);
-extern uint read_pack_length(uint version, const uchar *buf, ulong *length);
extern uint calc_pack_length(uint version, ulong length);
extern uint mi_mmap_pread(MI_INFO *info, byte *Buffer,
- uint Count, my_off_t offset, myf MyFlags);
+ uint Count, my_off_t offset, myf MyFlags);
extern uint mi_mmap_pwrite(MI_INFO *info, byte *Buffer,
- uint Count, my_off_t offset, myf MyFlags);
+ uint Count, my_off_t offset, myf MyFlags);
extern uint mi_nommap_pread(MI_INFO *info, byte *Buffer,
- uint Count, my_off_t offset, myf MyFlags);
+ uint Count, my_off_t offset, myf MyFlags);
extern uint mi_nommap_pwrite(MI_INFO *info, byte *Buffer,
uint Count, my_off_t offset, myf MyFlags);
@@ -727,7 +674,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite);
uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state);
uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead);
uint mi_base_info_write(File file, MI_BASE_INFO *base);
-uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base);
+uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base);
int mi_keyseg_write(File file, const HA_KEYSEG *keyseg);
char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg);
uint mi_keydef_write(File file, MI_KEYDEF *keydef);
@@ -739,23 +686,23 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo);
extern int mi_disable_indexes(MI_INFO *info);
extern int mi_enable_indexes(MI_INFO *info);
extern int mi_indexes_are_disabled(MI_INFO *info);
-ulong _my_calc_total_blob_length(MI_INFO *info, const byte *record);
+ulong _mi_calc_total_blob_length(MI_INFO *info, const byte *record);
ha_checksum mi_checksum(MI_INFO *info, const byte *buf);
ha_checksum mi_static_checksum(MI_INFO *info, const byte *buf);
my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, byte *record,
- ha_checksum unique_hash, my_off_t pos);
+ ha_checksum unique_hash, my_off_t pos);
ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const byte *buf);
int _mi_cmp_static_unique(MI_INFO *info, MI_UNIQUEDEF *def,
- const byte *record, my_off_t pos);
+ const byte *record, my_off_t pos);
int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def,
- const byte *record, my_off_t pos);
+ const byte *record, my_off_t pos);
int mi_unique_comp(MI_UNIQUEDEF *def, const byte *a, const byte *b,
- my_bool null_are_equal);
-void mi_get_status(void* param, int concurrent_insert);
-void mi_update_status(void* param);
-void mi_restore_status(void* param);
-void mi_copy_status(void* to,void *from);
-my_bool mi_check_status(void* param);
+ my_bool null_are_equal);
+void mi_get_status(void *param, int concurrent_insert);
+void mi_update_status(void *param);
+void mi_restore_status(void *param);
+void mi_copy_status(void *to, void *from);
+my_bool mi_check_status(void *param);
void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows);
extern MI_INFO *test_if_reopen(char *filename);
@@ -767,22 +714,14 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
void mi_remap_file(MI_INFO *info, my_off_t size);
/* Functions needed by mi_check */
-volatile int *killed_ptr(MI_CHECK *param);
-void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...));
-void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...));
-void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...));
-int flush_pending_blocks(MI_SORT_PARAM *param);
-int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
-int thr_write_keys(MI_SORT_PARAM *sort_param);
+volatile int *killed_ptr(HA_CHECK *param);
+void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void mi_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...));
+void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...));
#ifdef THREAD
pthread_handler_t thr_find_all_keys(void *arg);
#endif
-int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file);
-
-int sort_write_record(MI_SORT_PARAM *sort_param);
-int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong);
-
+int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file);
#ifdef __cplusplus
}
#endif
-
diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c
index 0bcf74d87a4..9e039058097 100644
--- a/storage/myisam/myisamlog.c
+++ b/storage/myisam/myisamlog.c
@@ -805,7 +805,7 @@ static int find_record_with_key(struct file_info *file_info, byte *record)
{
uint key;
MI_INFO *info=file_info->isam;
- uchar tmp_key[MI_MAX_KEY_BUFF];
+ uchar tmp_key[HA_MAX_KEY_BUFF];
for (key=0 ; key < info->s->base.keys ; key++)
{
diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c
index edb33ec10b9..c1f59e2f65d 100644
--- a/storage/myisam/rt_index.c
+++ b/storage/myisam/rt_index.c
@@ -540,7 +540,7 @@ static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key,
int res;
if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length +
- MI_MAX_KEY_BUFF)))
+ HA_MAX_KEY_BUFF)))
{
my_errno = HA_ERR_OUT_OF_MEM;
return -1;
@@ -652,7 +652,7 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key,
uint nod_flag = info->s->base.key_reflength;
if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length +
- MI_MAX_KEY_BUFF)))
+ HA_MAX_KEY_BUFF)))
{
my_errno = HA_ERR_OUT_OF_MEM;
return -1;
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 53eb6b2e310..23395fd9558 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -15,7 +15,7 @@
/*
Creates a index for a database by reading keys, sorting them and outputing
- them in sorted order through SORT_INFO functions.
+ them in sorted order through MI_SORT_INFO functions.
*/
#include "fulltext.h"
@@ -486,8 +486,8 @@ ok:
int thr_write_keys(MI_SORT_PARAM *sort_param)
{
- SORT_INFO *sort_info=sort_param->sort_info;
- MI_CHECK *param=sort_info->param;
+ MI_SORT_INFO *sort_info=sort_param->sort_info;
+ HA_CHECK *param=sort_info->param;
ulong length, keys;
ulong *rec_per_key_part=param->rec_per_key_part;
int got_error=sort_info->got_error;
@@ -829,7 +829,7 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
register uint count;
uint16 length_of_key = 0;
uint idx;
- uchar *buffp;
+ byte *buffp;
if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
{
@@ -916,7 +916,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
{
count+= buffpek->count;
- buffpek->base= strpos;
+ buffpek->base= (byte*) strpos;
buffpek->max_keys=maxcount;
strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek,
sort_length));
@@ -954,7 +954,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
{
if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length)))
{
- uchar *base=buffpek->base;
+ byte *base= buffpek->base;
uint max_keys=buffpek->max_keys;
VOID(queue_remove(&queue,0));
@@ -986,7 +986,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
}
}
buffpek=(BUFFPEK*) queue_top(&queue);
- buffpek->base=(uchar *) sort_keys;
+ buffpek->base= (byte*) sort_keys;
buffpek->max_keys=keys;
do
{
@@ -1001,7 +1001,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file,
else
{
register uchar *end;
- strpos= buffpek->key;
+ strpos= (uchar*) buffpek->key;
for (end=strpos+buffpek->mem_count*sort_length;
strpos != end ;
strpos+=sort_length)
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index ffa55673ad1..99e7b72e5fe 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -46,8 +46,8 @@ class ha_myisammrg: public handler
HA_READ_ORDER | HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MI_MAX_KEY; }
- uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; }
- uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; }
+ uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
+ uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; }
diff --git a/support-files/magic b/support-files/magic
index 9844142ba93..e2bd4d70fb5 100644
--- a/support-files/magic
+++ b/support-files/magic
@@ -4,12 +4,23 @@
#
0 beshort 0xfe01 MySQL table definition file
>2 byte x Version %d
-0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file
+0 belong&0xffffff00 0xfefe0700 MySQL MISAM index file
>3 byte x Version %d
-0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file
+0 belong&0xffffff00 0xfefe0800 MySQL MISAM compressed data file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0900 MySQL Maria index file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0A00 MySQL Maria compressed data file
>3 byte x Version %d
0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file
>3 byte x Version %d
0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file
>3 byte x Version %d
0 string \376bin MySQL replication log
+0 belong&0xffffff00 0xfefe0b00
+>4 string MARIALOG MySQL Maria transaction log file
+>>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0c00
+>4 string MACF MySQL Maria control file
+>>3 byte x Version %d
+
diff --git a/unittest/Makefile.am b/unittest/Makefile.am
index 6197586b008..cc617292941 100644
--- a/unittest/Makefile.am
+++ b/unittest/Makefile.am
@@ -13,7 +13,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-SUBDIRS = mytap . mysys examples
+SUBDIRS = mytap mysys examples
EXTRA_DIST = unit.pl
CLEANFILES = unit
diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am
index 54b3d203e10..229e8f0339b 100644
--- a/unittest/mysys/Makefile.am
+++ b/unittest/mysys/Makefile.am
@@ -13,13 +13,11 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include
-AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+
LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
$(top_builddir)/strings/libmystrings.a
-
-noinst_PROGRAMS = bitmap-t base64-t my_atomic-t
-
diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c
index c4ba7850ae1..d3be33f4163 100644
--- a/unittest/mysys/my_atomic-t.c
+++ b/unittest/mysys/my_atomic-t.c
@@ -13,27 +13,27 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#include <my_global.h>
#include <tap.h>
+
+#include <my_global.h>
#include <my_sys.h>
#include <my_atomic.h>
+#include <lf.h>
-int32 a32,b32,c32;
+volatile uint32 a32,b32;
+volatile int32 c32, N;
my_atomic_rwlock_t rwl;
-
-pthread_attr_t thr_attr;
-pthread_mutex_t mutex;
-pthread_cond_t cond;
-int N;
+LF_ALLOCATOR lf_allocator;
+LF_HASH lf_hash;
/* add and sub a random number in a loop. Must get 0 at the end */
pthread_handler_t test_atomic_add_handler(void *arg)
{
- int m=*(int *)arg;
+ int m= (*(int *)arg)/2;
int32 x;
- for (x=((int)((long)(&m))); m ; m--)
+ for (x= ((int)(intptr)(&m)); m ; m--)
{
- x=x*m+0x87654321;
+ x= (x*m+0x87654321) & INT_MAX32;
my_atomic_rwlock_wrlock(&rwl);
my_atomic_add32(&a32, x);
my_atomic_rwlock_wrunlock(&rwl);
@@ -42,10 +42,6 @@ pthread_handler_t test_atomic_add_handler(void *arg)
my_atomic_add32(&a32, -x);
my_atomic_rwlock_wrunlock(&rwl);
}
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
return 0;
}
@@ -57,30 +53,24 @@ pthread_handler_t test_atomic_add_handler(void *arg)
5. subtract result from a32
must get 0 in a32 at the end
*/
-pthread_handler_t test_atomic_swap_handler(void *arg)
+pthread_handler_t test_atomic_fas_handler(void *arg)
{
- int m=*(int *)arg;
- int32 x;
-
- my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_add32(&b32, 1);
- my_atomic_rwlock_wrunlock(&rwl);
+ int m= *(int *)arg;
+ uint32 x= my_atomic_add32(&b32, 1);
- my_atomic_rwlock_wrlock(&rwl);
my_atomic_add32(&a32, x);
- my_atomic_rwlock_wrunlock(&rwl);
for (; m ; m--)
{
my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_swap32(&c32, x);
+ x= my_atomic_fas32(&c32, x);
my_atomic_rwlock_wrunlock(&rwl);
}
if (!x)
{
my_atomic_rwlock_wrlock(&rwl);
- x=my_atomic_swap32(&c32, x);
+ x= my_atomic_fas32(&c32, x);
my_atomic_rwlock_wrunlock(&rwl);
}
@@ -88,113 +78,222 @@ pthread_handler_t test_atomic_swap_handler(void *arg)
my_atomic_add32(&a32, -x);
my_atomic_rwlock_wrunlock(&rwl);
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
return 0;
}
/*
same as test_atomic_add_handler, but my_atomic_add32 is emulated with
- (slower) my_atomic_cas32
+ my_atomic_cas32 - notice that the slowdown is proportional to the
+ number of CPUs
*/
pthread_handler_t test_atomic_cas_handler(void *arg)
{
- int m=*(int *)arg, ok;
- int32 x,y;
- for (x=((int)((long)(&m))); m ; m--)
+ int m= (*(int *)arg)/2, ok= 0;
+ int32 x, y;
+ for (x= ((int)(intptr)(&m)); m ; m--)
{
my_atomic_rwlock_wrlock(&rwl);
- y=my_atomic_load32(&a32);
+ y= my_atomic_load32(&a32);
my_atomic_rwlock_wrunlock(&rwl);
-
- x=x*m+0x87654321;
+ x= (x*m+0x87654321) & INT_MAX32;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok=my_atomic_cas32(&a32, &y, y+x);
+ ok= my_atomic_cas32(&a32, &y, (uint32)y+x);
my_atomic_rwlock_wrunlock(&rwl);
- } while (!ok);
+ } while (!ok) ;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok=my_atomic_cas32(&a32, &y, y-x);
+ ok= my_atomic_cas32(&a32, &y, y-x);
my_atomic_rwlock_wrunlock(&rwl);
- } while (!ok);
+ } while (!ok) ;
}
- pthread_mutex_lock(&mutex);
- N--;
- if (!N) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
+ return 0;
+}
+
+/*
+ pin allocator - alloc and release an element in a loop
+*/
+pthread_handler_t test_lf_pinbox(void *arg)
+{
+ int m= *(int *)arg;
+ int32 x= 0;
+ LF_PINS *pins;
+
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ lf_pinbox_put_pins(pins);
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+ }
+ lf_pinbox_put_pins(pins);
+ return 0;
+}
+
+typedef union {
+ int32 data;
+ void *not_used;
+} TLA;
+
+pthread_handler_t test_lf_alloc(void *arg)
+{
+ int m= (*(int *)arg)/2;
+ int32 x,y= 0;
+ LF_PINS *pins;
+
+ pins= lf_alloc_get_pins(&lf_allocator);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ TLA *node1, *node2;
+ x= (x*m+0x87654321) & INT_MAX32;
+ node1= (TLA *)lf_alloc_new(pins);
+ node1->data= x;
+ y+= node1->data;
+ node1->data= 0;
+ node2= (TLA *)lf_alloc_new(pins);
+ node2->data= x;
+ y-= node2->data;
+ node2->data= 0;
+ lf_alloc_free(pins, node1);
+ lf_alloc_free(pins, node2);
+ }
+ lf_alloc_put_pins(pins);
+ my_atomic_rwlock_wrlock(&rwl);
+ my_atomic_add32(&a32, y);
+
+ if (my_atomic_add32(&N, -1) == 1)
+ {
+ diag("%d mallocs, %d pins in stack",
+ lf_allocator.mallocs, lf_allocator.pinbox.pins_in_stack);
+#ifdef MY_LF_EXTRA_DEBUG
+ a32|= lf_allocator.mallocs - lf_alloc_in_pool(&lf_allocator);
+#endif
+ }
+ my_atomic_rwlock_wrunlock(&rwl);
+ return 0;
+}
+
+#define N_TLH 1000
+pthread_handler_t test_lf_hash(void *arg)
+{
+ int m= (*(int *)arg)/(2*N_TLH);
+ int32 x,y,z,sum= 0, ins= 0;
+ LF_PINS *pins;
+
+ pins= lf_hash_get_pins(&lf_hash);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ int i;
+ y= x;
+ for (i= 0; i < N_TLH; i++)
+ {
+ x= (x*(m+i)+0x87654321) & INT_MAX32;
+ z= (x<0) ? -x : x;
+ if (lf_hash_insert(&lf_hash, pins, &z))
+ {
+ sum+= z;
+ ins++;
+ }
+ }
+ for (i= 0; i < N_TLH; i++)
+ {
+ y= (y*(m+i)+0x87654321) & INT_MAX32;
+ z= (y<0) ? -y : y;
+ if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
+ sum-= z;
+ }
+ }
+ lf_hash_put_pins(pins);
+ my_atomic_rwlock_wrlock(&rwl);
+ my_atomic_add32(&a32, sum);
+ my_atomic_add32(&b32, ins);
+
+ if (my_atomic_add32(&N, -1) == 1)
+ {
+ diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
+ lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_stack,
+ lf_hash.size, b32);
+ a32|= lf_hash.count;
+ }
+ my_atomic_rwlock_wrunlock(&rwl);
return 0;
}
void test_atomic(const char *test, pthread_handler handler, int n, int m)
{
- pthread_t t;
- ulonglong now=my_getsystime();
+ pthread_t *threads;
+ ulonglong now= my_getsystime();
+ int i;
a32= 0;
b32= 0;
c32= 0;
+ threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0));
+ if (!threads)
+ {
+ diag("Out of memory");
+ abort();
+ }
+
diag("Testing %s with %d threads, %d iterations... ", test, n, m);
- for (N=n ; n ; n--)
+ N= n;
+ for (i= 0 ; i < n ; i++)
{
- if (pthread_create(&t, &thr_attr, handler, &m) != 0)
+ if (pthread_create(threads+i, 0, handler, &m) != 0)
{
diag("Could not create thread");
- a32= 1;
- goto err;
+ abort();
}
}
-
- pthread_mutex_lock(&mutex);
- while (N)
- pthread_cond_wait(&cond, &mutex);
- pthread_mutex_unlock(&mutex);
- now=my_getsystime()-now;
-err:
- ok(a32 == 0, "tested %s in %g secs", test, ((double)now)/1e7);
+ for (i= 0 ; i < n ; i++)
+ pthread_join(threads[i], 0);
+ now= my_getsystime()-now;
+ ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32);
+ my_free((void *)threads, MYF(0));
}
+
int main()
{
int err;
- diag("N CPUs: %d", my_getncpus());
+ my_init();
+
+ diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
err= my_atomic_initialize();
- plan(4);
+ plan(7);
ok(err == 0, "my_atomic_initialize() returned %d", err);
- pthread_attr_init(&thr_attr);
- pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
- pthread_mutex_init(&mutex, 0);
- pthread_cond_init(&cond, 0);
my_atomic_rwlock_init(&rwl);
+ lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
+ lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
+ &my_charset_bin);
-#ifdef HPUX11
-#define CYCLES 1000
+#ifdef MY_ATOMIC_MODE_RWLOCKS
+#define CYCLES 3000
#else
-#define CYCLES 10000
+#define CYCLES 300000
#endif
#define THREADS 100
- test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS, CYCLES);
- test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS, CYCLES);
- test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS, CYCLES);
+
+ test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES);
+ test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES);
+ test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES);
+ test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES);
+ test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES);
+ test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10);
+
+ lf_hash_destroy(&lf_hash);
+ lf_alloc_destroy(&lf_allocator);
/* workaround until we know why this includes dbug but not safemalloc */
if(err) { my_thread_global_init(); my_free(my_malloc(0, MYF(0)), MYF(0)); }
- /*
- workaround until we know why it crashes randomly on some machine
- (BUG#22320).
- */
- sleep(2);
-
- pthread_mutex_destroy(&mutex);
- pthread_cond_destroy(&cond);
- pthread_attr_destroy(&thr_attr);
my_atomic_rwlock_destroy(&rwl);
+ my_end(0);
return exit_status();
}
diff --git a/unittest/mytap/tap.c b/unittest/mytap/tap.c
index 4e053e3e745..a99da3fd975 100644
--- a/unittest/mytap/tap.c
+++ b/unittest/mytap/tap.c
@@ -169,6 +169,8 @@ static signal_entry install_signal[]= {
void
plan(int const count)
{
+
+ setvbuf(tapout, 0, _IONBF, 0); /* provide output at once */
/*
Install signal handler
*/