57 files changed, 6135 insertions, 1683 deletions
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 49a2e4f6728..c3c4d6b5246 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -33,7 +33,8 @@ SET(MYSYS_SOURCES  array.c charset-def.c charset.c checksum.c default.c
 				rijndael.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c
 				thr_rwlock.c tree.c typelib.c base64.c my_memmem.c my_getpagesize.c
 				lf_alloc-pin.c lf_dynarray.c lf_hash.c
-				my_atomic.c my_getncpus.c
+				my_atomic.c my_getncpus.c my_safehash.c my_chmod.c my_rnd.c
+                                my_uuid.c wqueue.c waiting_threads.c
 				my_rdtsc.c)
 
 IF (WIN32)
@@ -77,3 +78,4 @@ DTRACE_INSTRUMENT(mysys)
 ADD_EXECUTABLE(thr_lock thr_lock.c)
 TARGET_LINK_LIBRARIES(thr_lock mysys)
 SET_TARGET_PROPERTIES(thr_lock PROPERTIES COMPILE_FLAGS "-DMAIN")
+
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index 357fa321da6..0962cfc9636 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -21,12 +21,13 @@ INCLUDES =		@ZLIB_INCLUDES@ @RDTSC_SPARC_ASSEMBLY@ \
 			-I$(top_srcdir)/include -I$(srcdir)
 pkglib_LIBRARIES =	libmysys.a
 LDADD =			libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a
-noinst_HEADERS =	mysys_priv.h my_static.h my_handler_errors.h
+noinst_HEADERS =	mysys_priv.h my_static.h my_handler_errors.h \
+			my_safehash.h
 libmysys_a_SOURCES =    my_init.c my_getwd.c mf_getdate.c my_mmap.c \
 			mf_path.c mf_loadpath.c my_file.c \
 			my_open.c my_create.c my_seek.c my_read.c \
 			my_pread.c my_write.c my_getpagesize.c \
-			mf_keycaches.c my_crc32.c \
+			my_crc32.c \
 			mf_iocache.c mf_iocache2.c mf_cache.c mf_tempfile.c \
 			mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \
 			my_malloc.c my_once.c mulalloc.c \
@@ -48,6 +49,7 @@ libmysys_a_SOURCES =    my_init.c my_getwd.c mf_getdate.c my_mmap.c \
 			default.c my_compress.c checksum.c \
 			my_port.c my_sleep.c \
 			charset.c charset-def.c my_bitmap.c my_bit.c md5.c \
+			my_safehash.c my_rnd.c my_uuid.c my_chmod.c wqueue.c \
 			my_gethostbyname.c rijndael.c my_aes.c sha1.c \
 			my_handler.c my_largepage.c \
 			my_memmem.c stacktrace.c \
@@ -58,11 +60,11 @@ if NEED_THREAD
 # mf_keycache is used only in the server, so it is safe to leave the file
 # out of the non-threaded library.
 # In fact, it will currently not compile without thread support.
-libmysys_a_SOURCES +=   mf_keycache.c
+libmysys_a_SOURCES +=   mf_keycache.c mf_keycaches.c
 endif
 
 EXTRA_DIST =		thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
-			thr_mutex.c thr_rwlock.c \
+			thr_mutex.c thr_rwlock.c waiting_threads.c \
 			CMakeLists.txt mf_soundex.c \
 			my_conio.c my_wincond.c my_winthread.c my_winerr.c \
 			my_winfile.c \
@@ -72,13 +74,13 @@ libmysys_a_LIBADD =	@THREAD_LOBJECTS@
 # testhash_DEPENDENCIES=	$(LIBRARIES)
 # test_charset_DEPENDENCIES=	$(LIBRARIES)
 # charset2html_DEPENDENCIES=	$(LIBRARIES)
-DEFS =			-DDEFAULT_BASEDIR=\"$(prefix)\" \
-			-DMYSQL_DATADIR="\"$(MYSQLDATAdir)\"" \
-			-DDEFAULT_CHARSET_HOME="\"$(MYSQLBASEdir)\"" \
-			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" \
+DEFS =			-DDEFAULT_BASEDIR='"$(prefix)"' \
+			-DMYSQL_DATADIR='"$(MYSQLDATAdir)"' \
+			-DDEFAULT_CHARSET_HOME='"$(MYSQLBASEdir)"' \
+			-DSHAREDIR'="$(MYSQLSHAREdir)"' \
 			-DDEFAULT_HOME_ENV=MYSQL_HOME \
 			-DDEFAULT_GROUP_SUFFIX_ENV=MYSQL_GROUP_SUFFIX \
-			-DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \
+			-DDEFAULT_SYSCONFDIR='"$(sysconfdir)"' \
                         @DEFS@
 
 libmysys_a_DEPENDENCIES= @THREAD_LOBJECTS@
@@ -86,6 +88,13 @@ libmysys_a_DEPENDENCIES= @THREAD_LOBJECTS@
 # I hope this always does the right thing. Otherwise this is only test programs
 FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@
 
+CLEANFILES =		test_bitmap$(EXEEXT) test_priority_queue$(EXEEXT) \
+			test_thr_alarm$(EXEEXT) test_thr_lock$(EXEEXT) \
+			test_vsnprintf$(EXEEXT) test_io_cache$(EXEEXT) \
+			test_dir$(EXEEXT) test_charset$(EXEEXT) \
+			testhash$(EXEEXT) test_gethwaddr$(EXEEXT) \
+			test_base64$(EXEEXT) test_thr_mutex$(EXEEXT)
+
 #
 # The CP .. RM stuff is to avoid problems with some compilers (like alpha ccc)
 # which automaticly removes the object files you use to compile a final program
@@ -136,6 +145,9 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES)
 	$(LINK) $(FLAGS) -DMAIN  ./test_base64.c $(LDADD) $(LIBS)
 	$(RM) -f ./test_base64.c
 
+test_thr_mutex$(EXEEXT): test_thr_mutex.c $(LIBRARIES)
+	$(LINK) $(FLAGS) $(srcdir)/test_thr_mutex.c $(LDADD) $(LIBS)
+
 if HAVE_DTRACE_DASH_G
 libmysys_a_LIBADD += probes_mysql.o
 libmysys_a_DEPENDENCIES += probes_mysql.o dtrace_files dtrace_providers
diff --git a/mysys/array.c b/mysys/array.c
index 70c7a59aa3a..164e62bd9f9 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -30,8 +30,8 @@
       alloc_increment	Increment for adding new elements
 
   DESCRIPTION
-    init_dynamic_array() initiates array and allocate space for 
-    init_alloc eilements. 
+    init_dynamic_array() initiates array and allocate space for
+    init_alloc eilements.
     Array is usable even if space allocation failed, hence, the
     function never returns TRUE.
     Static buffers must begin immediately after the array structure.
@@ -51,12 +51,6 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
     if (init_alloc > 8 && alloc_increment > init_alloc * 2)
       alloc_increment=init_alloc*2;
   }
-
-  if (!init_alloc)
-  {
-    init_alloc=alloc_increment;
-    init_buffer= 0;
-  }
   array->elements=0;
   array->max_element=init_alloc;
   array->alloc_increment=alloc_increment;
@@ -70,13 +64,13 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
   if (!(array->buffer= (uchar*) my_malloc(element_size*init_alloc, MYF(0))))
     array->max_element=0;
   DBUG_RETURN(FALSE);
-} 
+}
 
 my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size,
                            uint init_alloc, uint alloc_increment)
 {
   /* placeholder to preserve ABI */
-  return my_init_dynamic_array_ci(array, element_size, init_alloc, 
+  return my_init_dynamic_array_ci(array, element_size, init_alloc,
                                   alloc_increment);
 }
 /*
@@ -92,7 +86,7 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size,
     FALSE	Ok
 */
 
-my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element)
+my_bool insert_dynamic(DYNAMIC_ARRAY *array, const uchar* element)
 {
   uchar* buffer;
   if (array->elements == array->max_element)
@@ -111,7 +105,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element)
 
 
 /*
-  Alloc space for next element(s) 
+  Alloc space for next element(s)
 
   SYNOPSIS
     alloc_dynamic()
@@ -129,6 +123,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element)
 
 uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
 {
+  DBUG_ENTER("alloc_dynamic");
   if (array->elements == array->max_element)
   {
     char *new_ptr;
@@ -142,20 +137,20 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
                                          array->alloc_increment) *
                                         array->size_of_element,
                                         MYF(MY_WME))))
-        return 0;
-      memcpy(new_ptr, array->buffer, 
+        DBUG_RETURN(0);
+      memcpy(new_ptr, array->buffer,
              array->elements * array->size_of_element);
     }
-    else
-    if (!(new_ptr=(char*) my_realloc(array->buffer,(array->max_element+
-                                     array->alloc_increment)*
-                                     array->size_of_element,
-                                     MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
-      return 0;
+    else if (!(new_ptr=(char*)
+               my_realloc(array->buffer,(array->max_element+
+                                         array->alloc_increment)*
+                          array->size_of_element,
+                          MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+      DBUG_RETURN(0);
     array->buffer= (uchar*) new_ptr;
     array->max_element+=array->alloc_increment;
   }
-  return array->buffer+(array->elements++ * array->size_of_element);
+  DBUG_RETURN(array->buffer+(array->elements++ * array->size_of_element));
 }
 
 
@@ -165,8 +160,8 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array)
   SYNOPSIS
     pop_dynamic()
       array
-  
-  RETURN VALUE    
+
+  RETURN VALUE
     pointer	Ok
     0		Array is empty
 */
@@ -188,9 +183,9 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array)
       idx	Index where element is to be inserted
 
   DESCRIPTION
-    set_dynamic() replaces element in array. 
-    If idx > max_element insert new element. Allocate memory if needed. 
- 
+    set_dynamic() replaces element in array.
+    If idx > max_element insert new element. Allocate memory if needed.
+
   RETURN VALUE
     TRUE	Idx was out of range and allocation of new memory failed
     FALSE	Ok
@@ -230,6 +225,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx)
 
 my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
 {
+  DBUG_ENTER("allocate_dynamic");
+
   if (max_elements >= array->max_element)
   {
     uint size;
@@ -243,23 +240,20 @@ my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
          so we have to create an all-new malloc since we overflowed
        */
        if (!(new_ptr= (uchar *) my_malloc(size *
-                                         array->size_of_element,
-                                         MYF(MY_WME))))
-         return 0;
-       memcpy(new_ptr, array->buffer, 
+                                          array->size_of_element,
+                                          MYF(MY_WME))))
+         DBUG_RETURN(0);
+       memcpy(new_ptr, array->buffer,
               array->elements * array->size_of_element);
      }
-     else
-
-
-    if (!(new_ptr= (uchar*) my_realloc(array->buffer,size*
-                                       array->size_of_element,
-                                       MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
-      return TRUE;
+    else if (!(new_ptr= (uchar*) my_realloc(array->buffer,size*
+                                            array->size_of_element,
+                                            MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+      DBUG_RETURN(TRUE);
     array->buffer= new_ptr;
     array->max_element= size;
   }
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }
 
 
@@ -268,9 +262,9 @@ my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
 
   SYNOPSIS
     get_dynamic()
-      array	
+      array
       uchar*	Element to be returned. If idx > elements contain zeroes.
-      idx	Index of element wanted. 
+      idx	Index of element wanted.
 */
 
 void get_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx)
@@ -347,7 +341,7 @@ void freeze_size(DYNAMIC_ARRAY *array)
   */
   if (array->buffer == (uchar *)(array + 1))
     return;
-    
+
   if (array->buffer && array->max_element != elements)
   {
     array->buffer=(uchar*) my_realloc(array->buffer,
@@ -364,7 +358,7 @@ void freeze_size(DYNAMIC_ARRAY *array)
   SYNOPSIS
     get_index_dynamic()
      array	Array
-     element Whose element index 
+     element Whose element index
 
 */
 
diff --git a/mysys/charset-def.c b/mysys/charset-def.c
index 9089347f002..82b3db32c43 100644
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -24,119 +24,123 @@
 #ifdef HAVE_UCA_COLLATIONS
 
 #ifdef HAVE_CHARSET_ucs2
-extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_slovenian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_polish_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_czech_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_danish_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_roman_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_persian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
-extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
+extern struct charset_info_st my_charset_ucs2_icelandic_uca_ci;
+extern struct charset_info_st my_charset_ucs2_latvian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_romanian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_slovenian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_polish_uca_ci;
+extern struct charset_info_st my_charset_ucs2_estonian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_spanish_uca_ci;
+extern struct charset_info_st my_charset_ucs2_swedish_uca_ci;
+extern struct charset_info_st my_charset_ucs2_turkish_uca_ci;
+extern struct charset_info_st my_charset_ucs2_czech_uca_ci;
+extern struct charset_info_st my_charset_ucs2_danish_uca_ci;
+extern struct charset_info_st my_charset_ucs2_lithuanian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_slovak_uca_ci;
+extern struct charset_info_st my_charset_ucs2_spanish2_uca_ci;
+extern struct charset_info_st my_charset_ucs2_roman_uca_ci;
+extern struct charset_info_st my_charset_ucs2_persian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_esperanto_uca_ci;
+extern struct charset_info_st my_charset_ucs2_hungarian_uca_ci;
+extern struct charset_info_st my_charset_ucs2_sinhala_uca_ci;
+extern struct charset_info_st my_charset_ucs2_croatian_uca_ci;
 #endif
 
 
 #ifdef HAVE_CHARSET_utf32
-extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
-extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
-extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
-extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
-extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
-extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
-extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
-extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
-extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
-extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
-extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
-extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
-extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf32_icelandic_uca_ci;
+extern struct charset_info_st my_charset_utf32_latvian_uca_ci;
+extern struct charset_info_st my_charset_utf32_romanian_uca_ci;
+extern struct charset_info_st my_charset_utf32_slovenian_uca_ci;
+extern struct charset_info_st my_charset_utf32_polish_uca_ci;
+extern struct charset_info_st my_charset_utf32_estonian_uca_ci;
+extern struct charset_info_st my_charset_utf32_spanish_uca_ci;
+extern struct charset_info_st my_charset_utf32_swedish_uca_ci;
+extern struct charset_info_st my_charset_utf32_turkish_uca_ci;
+extern struct charset_info_st my_charset_utf32_czech_uca_ci;
+extern struct charset_info_st my_charset_utf32_danish_uca_ci;
+extern struct charset_info_st my_charset_utf32_lithuanian_uca_ci;
+extern struct charset_info_st my_charset_utf32_slovak_uca_ci;
+extern struct charset_info_st my_charset_utf32_spanish2_uca_ci;
+extern struct charset_info_st my_charset_utf32_roman_uca_ci;
+extern struct charset_info_st my_charset_utf32_persian_uca_ci;
+extern struct charset_info_st my_charset_utf32_esperanto_uca_ci;
+extern struct charset_info_st my_charset_utf32_hungarian_uca_ci;
+extern struct charset_info_st my_charset_utf32_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf32_croatian_uca_ci;
 #endif /* HAVE_CHARSET_utf32 */
 
 
 #ifdef HAVE_CHARSET_utf16
-extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
-extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
-extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
-extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
-extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
-extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
-extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
-extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
-extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
-extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
-extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
-extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
-extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf16_icelandic_uca_ci;
+extern struct charset_info_st my_charset_utf16_latvian_uca_ci;
+extern struct charset_info_st my_charset_utf16_romanian_uca_ci;
+extern struct charset_info_st my_charset_utf16_slovenian_uca_ci;
+extern struct charset_info_st my_charset_utf16_polish_uca_ci;
+extern struct charset_info_st my_charset_utf16_estonian_uca_ci;
+extern struct charset_info_st my_charset_utf16_spanish_uca_ci;
+extern struct charset_info_st my_charset_utf16_swedish_uca_ci;
+extern struct charset_info_st my_charset_utf16_turkish_uca_ci;
+extern struct charset_info_st my_charset_utf16_czech_uca_ci;
+extern struct charset_info_st my_charset_utf16_danish_uca_ci;
+extern struct charset_info_st my_charset_utf16_lithuanian_uca_ci;
+extern struct charset_info_st my_charset_utf16_slovak_uca_ci;
+extern struct charset_info_st my_charset_utf16_spanish2_uca_ci;
+extern struct charset_info_st my_charset_utf16_roman_uca_ci;
+extern struct charset_info_st my_charset_utf16_persian_uca_ci;
+extern struct charset_info_st my_charset_utf16_esperanto_uca_ci;
+extern struct charset_info_st my_charset_utf16_hungarian_uca_ci;
+extern struct charset_info_st my_charset_utf16_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf16_croatian_uca_ci;
 #endif  /* HAVE_CHARSET_utf16 */
 
 
 #ifdef HAVE_CHARSET_utf8
-extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
-extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_romanian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_slovenian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_polish_uca_ci;
-extern CHARSET_INFO my_charset_utf8_estonian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_spanish_uca_ci;
-extern CHARSET_INFO my_charset_utf8_swedish_uca_ci;
-extern CHARSET_INFO my_charset_utf8_turkish_uca_ci;
-extern CHARSET_INFO my_charset_utf8_czech_uca_ci;
-extern CHARSET_INFO my_charset_utf8_danish_uca_ci;
-extern CHARSET_INFO my_charset_utf8_lithuanian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
-extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
-extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
-extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci;
-extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci;
-extern CHARSET_INFO my_charset_utf8_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf8_icelandic_uca_ci;
+extern struct charset_info_st my_charset_utf8_latvian_uca_ci;
+extern struct charset_info_st my_charset_utf8_romanian_uca_ci;
+extern struct charset_info_st my_charset_utf8_slovenian_uca_ci;
+extern struct charset_info_st my_charset_utf8_polish_uca_ci;
+extern struct charset_info_st my_charset_utf8_estonian_uca_ci;
+extern struct charset_info_st my_charset_utf8_spanish_uca_ci;
+extern struct charset_info_st my_charset_utf8_swedish_uca_ci;
+extern struct charset_info_st my_charset_utf8_turkish_uca_ci;
+extern struct charset_info_st my_charset_utf8_czech_uca_ci;
+extern struct charset_info_st my_charset_utf8_danish_uca_ci;
+extern struct charset_info_st my_charset_utf8_lithuanian_uca_ci;
+extern struct charset_info_st my_charset_utf8_slovak_uca_ci;
+extern struct charset_info_st my_charset_utf8_spanish2_uca_ci;
+extern struct charset_info_st my_charset_utf8_roman_uca_ci;
+extern struct charset_info_st my_charset_utf8_persian_uca_ci;
+extern struct charset_info_st my_charset_utf8_esperanto_uca_ci;
+extern struct charset_info_st my_charset_utf8_hungarian_uca_ci;
+extern struct charset_info_st my_charset_utf8_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf8_croatian_uca_ci;
 #ifdef HAVE_UTF8_GENERAL_CS
-extern CHARSET_INFO my_charset_utf8_general_cs;
+extern struct charset_info_st my_charset_utf8_general_cs;
 #endif
 #endif
 
 #ifdef HAVE_CHARSET_utf8mb4
-extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
-extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_latvian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_romanian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_polish_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_estonian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_spanish_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_swedish_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_turkish_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_czech_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_danish_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_slovak_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_roman_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_persian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci;
+extern struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci;
 #endif /* HAVE_CHARSET_utf8mb4 */
 
 #endif /* HAVE_UCA_COLLATIONS */
@@ -224,6 +228,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci);
   add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
   add_compiled_collation(&my_charset_ucs2_sinhala_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_croatian_uca_ci);
 #endif
 #endif
 
@@ -259,6 +264,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_utf8_esperanto_uca_ci);
   add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
   add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
+  add_compiled_collation(&my_charset_utf8_croatian_uca_ci);
 #endif
 #endif /* HAVE_CHARSET_utf8 */
 
@@ -287,6 +293,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
   add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
   add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_croatian_uca_ci);
 #endif /* HAVE_UCA_COLLATIONS  */
 #endif /* HAVE_CHARSET_utf8mb4 */
 
@@ -315,6 +322,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
   add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
   add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
+  add_compiled_collation(&my_charset_utf16_croatian_uca_ci);
 #endif /* HAVE_UCA_COLLATIOINS */
 #endif /* HAVE_CHARSET_utf16 */
 
@@ -343,12 +351,13 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
   add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
   add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
   add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
+  add_compiled_collation(&my_charset_utf32_croatian_uca_ci);
 #endif /* HAVE_UCA_COLLATIONS */
 #endif /* HAVE_CHARSET_utf32 */
 
   /* Copy compiled charsets */
   for (cs=compiled_charsets; cs->name; cs++)
-    add_compiled_collation(cs);
+    add_compiled_collation((struct charset_info_st *) cs);
   
   return FALSE;
 }
diff --git a/mysys/charset.c b/mysys/charset.c
index 167d6b8ff6e..7f414fbad97 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -53,21 +53,18 @@ get_collation_number_internal(const char *name)
 }
 
 
-static my_bool init_state_maps(CHARSET_INFO *cs)
+static my_bool init_state_maps(struct charset_info_st *cs)
 {
   uint i;
   uchar *state_map;
   uchar *ident_map;
 
-  if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+  if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
     return 1;
     
-  if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+  if (!(cs->ident_map= ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
     return 1;
 
-  state_map= cs->state_map;
-  ident_map= cs->ident_map;
-  
   /* Fill state_map with states to get a faster parser */
   for (i=0; i < 256 ; i++)
   {
@@ -118,7 +115,7 @@ static my_bool init_state_maps(CHARSET_INFO *cs)
 }
 
 
-static void simple_cs_init_functions(CHARSET_INFO *cs)
+static void simple_cs_init_functions(struct charset_info_st *cs)
 {
   if (cs->state & MY_CS_BINSORT)
     cs->coll= &my_collation_8bit_bin_handler;
@@ -130,7 +127,7 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
 
 
 
-static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
 {
   to->number= from->number ? from->number : to->number;
 
@@ -203,7 +200,7 @@ static my_bool simple_cs_is_full(CHARSET_INFO *cs)
 
 
 static void
-copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
+copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from)
 {
   to->cset= from->cset;
   to->coll= from->coll;
@@ -217,18 +214,19 @@ copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
 }
 
 
-static int add_collation(CHARSET_INFO *cs)
+static int add_collation(struct charset_info_st *cs)
 {
   if (cs->name && (cs->number ||
                    (cs->number=get_collation_number_internal(cs->name))) &&
       cs->number < array_elements(all_charsets))
   {
-    if (!all_charsets[cs->number])
+    struct charset_info_st *newcs;
+    if (!(newcs= (struct charset_info_st*) all_charsets[cs->number]))
     {
-      if (!(all_charsets[cs->number]=
-         (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
+      if (!(all_charsets[cs->number]= newcs=
+         (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
         return MY_XML_ERROR;
-      bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
+      bzero(newcs,sizeof(CHARSET_INFO));
     }
     
     if (cs->primary_number == cs->number)
@@ -237,12 +235,11 @@ static int add_collation(CHARSET_INFO *cs)
     if (cs->binary_number == cs->number)
       cs->state |= MY_CS_BINSORT;
     
-    all_charsets[cs->number]->state|= cs->state;
+    newcs->state|= cs->state;
     
-    if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
+    if (!(newcs->state & MY_CS_COMPILED))
     {
-      CHARSET_INFO *newcs= all_charsets[cs->number];
-      if (cs_copy_data(all_charsets[cs->number],cs))
+      if (cs_copy_data(newcs,cs))
         return MY_XML_ERROR;
 
       newcs->caseup_multiply= newcs->casedn_multiply= 1;
@@ -287,15 +284,15 @@ static int add_collation(CHARSET_INFO *cs)
       }
       else
       {
-        uchar *sort_order= all_charsets[cs->number]->sort_order;
-        simple_cs_init_functions(all_charsets[cs->number]);
+        const uchar *sort_order= newcs->sort_order;
+        simple_cs_init_functions(newcs);
         newcs->mbminlen= 1;
         newcs->mbmaxlen= 1;
-        if (simple_cs_is_full(all_charsets[cs->number]))
+        if (simple_cs_is_full(newcs))
         {
-          all_charsets[cs->number]->state |= MY_CS_LOADED;
+          newcs->state |= MY_CS_LOADED;
         }
-        all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
+        newcs->state|= MY_CS_AVAILABLE;
         
         /*
           Check if case sensitive sort order: A < a < B.
@@ -305,12 +302,12 @@ static int add_collation(CHARSET_INFO *cs)
         */
         if (sort_order && sort_order['A'] < sort_order['a'] &&
                           sort_order['a'] < sort_order['B'])
-          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
+          newcs->state|= MY_CS_CSSORT; 
 
-        if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
-          all_charsets[cs->number]->state|= MY_CS_PUREASCII;
+        if (my_charset_is_8bit_pure_ascii(newcs))
+          newcs->state|= MY_CS_PUREASCII;
         if (!my_charset_is_ascii_compatible(cs))
-          all_charsets[cs->number]->state|= MY_CS_NONASCII;
+	  newcs->state|= MY_CS_NONASCII;
       }
     }
     else
@@ -324,16 +321,15 @@ static int add_collation(CHARSET_INFO *cs)
         If a character set was compiled, this information
         will get lost and overwritten in add_compiled_collation().
       */
-      CHARSET_INFO *dst= all_charsets[cs->number];
-      dst->number= cs->number;
+      newcs->number= cs->number;
       if (cs->comment)
-	if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
+	if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
 	  return MY_XML_ERROR;
       if (cs->csname)
-        if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
+        if (!(newcs->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
 	  return MY_XML_ERROR;
       if (cs->name)
-	if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
+	if (!(newcs->name= my_once_strdup(cs->name,MYF(MY_WME))))
 	  return MY_XML_ERROR;
     }
     cs->number= 0;
@@ -417,7 +413,7 @@ char *get_charsets_dir(char *buf)
 CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
 CHARSET_INFO *default_charset_info = &my_charset_latin1;
 
-void add_compiled_collation(CHARSET_INFO *cs)
+void add_compiled_collation(struct charset_info_st *cs)
 {
   all_charsets[cs->number]= cs;
   cs->state|= MY_CS_AVAILABLE;
@@ -435,14 +431,15 @@ static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
 static void init_available_charsets(void)
 {
   char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
-  CHARSET_INFO **cs;
+  struct charset_info_st **cs;
 
-  bzero(&all_charsets,sizeof(all_charsets));
+  bzero((char*) &all_charsets,sizeof(all_charsets));
   init_compiled_charsets(MYF(0));
 
   /* Copy compiled charsets */
-  for (cs=all_charsets;
-       cs < all_charsets+array_elements(all_charsets)-1 ;
+  for (cs= (struct charset_info_st**) all_charsets;
+       cs < (struct charset_info_st**) all_charsets +
+            array_elements(all_charsets)-1 ;
        cs++)
   {
     if (*cs)
@@ -543,9 +540,9 @@ const char *get_charset_name(uint charset_number)
 static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
 {
   char  buf[FN_REFLEN];
-  CHARSET_INFO *cs;
+  struct charset_info_st *cs;
 
-  if ((cs= all_charsets[cs_number]))
+  if ((cs= (struct charset_info_st*) all_charsets[cs_number]))
   {
     if (cs->state & MY_CS_READY)  /* if CS is already initialized */
         return cs;
diff --git a/mysys/checksum.c b/mysys/checksum.c
index a96ea31ea0e..b2579351134 100644
--- a/mysys/checksum.c
+++ b/mysys/checksum.c
@@ -18,6 +18,8 @@
 #include <my_sys.h>
 #include <zlib.h>
 
+ha_checksum my_crc_dbug_check= 1;               /* Unlikely number */
+
 /*
   Calculate a long checksum for a memoryblock.
 
@@ -30,6 +32,9 @@
 
 ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length)
 {
-  return (ha_checksum)crc32((uint)crc, pos, (uint)length);
+  crc= (ha_checksum) crc32((uint)crc, pos, (uint) length);
+  DBUG_PRINT("info", ("crc: %lu", (ulong) crc));
+  if (crc == my_crc_dbug_check)
+    my_debug_put_break_here();
+  return crc;
 }
-
diff --git a/mysys/errors.c b/mysys/errors.c
index 8bc310652f1..5c8afff6395 100644
--- a/mysys/errors.c
+++ b/mysys/errors.c
@@ -52,6 +52,9 @@ const char *globerrs[GLOBERRS]=
   "File '%s' (fileno: %d) was not closed",
   "Can't change ownership of the file '%s' (Errcode: %d)",
   "Can't change permissions of the file '%s' (Errcode: %d)",
+  "Can't change mode for file '%s' to 0x%lx (Error: %d)",
+  "Can't do seek on file '%s' (Errcode: %d)",
+  "Warning: Can't copy ownership for file '%s' (Error: %d)"
 };
 
 void init_glob_errs(void)
@@ -94,6 +97,9 @@ void init_glob_errs()
   EE(EE_FILE_NOT_CLOSED) = "File '%s' (fileno: %d) was not closed";
   EE(EE_CHANGE_OWNERSHIP)   = "Can't change ownership of the file '%s' (Errcode: %d)";
   EE(EE_CHANGE_PERMISSIONS) = "Can't change permissions of the file '%s' (Errcode: %d)";
+  EE(EE_CANT_CHMOD)    = "Can't change mode for file '%s' to 0x%lx (Error: %d)";
+  EE(EE_CANT_SEEK)     = "Can't do seek on file '%s' (Errcode: %d)";
+  EE(EE_CANT_COPY_OWNERSHIP)= "Warning: Can't copy ownership for file '%s' (Error: %d)";
 }
 #endif
 
diff --git a/mysys/hash.c b/mysys/hash.c
index f54ac1a4abb..392d4f4fe15 100644
--- a/mysys/hash.c
+++ b/mysys/hash.c
@@ -361,7 +361,13 @@ static int hashcmp(const HASH *hash, HASH_LINK *pos, const uchar *key,
 }
 
 
-	/* Write a hash-key to the hash-index */
+/**
+   Write a hash-key to the hash-index
+
+   @return
+   @retval  0  ok
+   @retval  1  Duplicate key or out of memory
+*/
 
 my_bool my_hash_insert(HASH *info, const uchar *record)
 {
@@ -371,7 +377,7 @@ my_bool my_hash_insert(HASH *info, const uchar *record)
   uchar *UNINIT_VAR(ptr_to_rec),*UNINIT_VAR(ptr_to_rec2);
   HASH_LINK *data,*empty,*UNINIT_VAR(gpos),*UNINIT_VAR(gpos2),*pos;
 
-  if (HASH_UNIQUE & info->flags)
+  if (info->flags & HASH_UNIQUE)
   {
     uchar *key= (uchar*) my_hash_key(info, record, &idx, 1);
     if (my_hash_search(info, key, idx))
@@ -495,11 +501,21 @@ my_bool my_hash_insert(HASH *info, const uchar *record)
 }
 
 
-/******************************************************************************
-** Remove one record from hash-table. The record with the same record
-** ptr is removed.
-** if there is a free-function it's called for record if found
-******************************************************************************/
+/**
+   Remove one record from hash-table.
+
+   @fn    hash_delete()
+   @param hash		Hash tree
+   @param record	Row to be deleted
+
+   @notes
+   The record with the same record ptr is removed.
+   If there is a free-function it's called if record was found.
+
+   @return
+   @retval  0  ok
+   @retval  1 Record not found
+*/
 
 my_bool my_hash_delete(HASH *hash, uchar *record)
 {
@@ -584,10 +600,11 @@ exit:
   DBUG_RETURN(0);
 }
 
-	/*
-	  Update keys when record has changed.
-	  This is much more efficent than using a delete & insert.
-	  */
+
+/**
+   Update keys when record has changed.
+   This is much more efficent than using a delete & insert.
+*/
 
 my_bool my_hash_update(HASH *hash, uchar *record, uchar *old_key,
                        size_t old_key_length)
@@ -710,6 +727,37 @@ void my_hash_replace(HASH *hash, HASH_SEARCH_STATE *current_record,
 }
 
 
+/**
+   Iterate over all elements in hash and call function with the element
+
+   @param hash     hash array
+   @param action   function to call for each argument
+   @param argument second argument for call to action
+
+   @notes
+   If one of functions calls returns 1 then the iteration aborts
+
+   @retval 0  ok
+   @retval 1  iteration aborted becasue action returned 1
+*/
+
+my_bool my_hash_iterate(HASH *hash, my_hash_walk_action action, void *argument)
+{
+  uint records, i;
+  HASH_LINK *data;
+
+  records= hash->records;
+  data= dynamic_element(&hash->array,0,HASH_LINK*);
+
+  for (i= 0 ; i < records ; i++)
+  {
+    if ((*action)(data[i].data, argument))
+      return 1;
+  }
+  return 0;
+}
+
+
 #ifndef DBUG_OFF
 
 my_bool my_hash_check(HASH *hash)
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index 4ed01ac8083..3eec91236c5 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -115,7 +115,7 @@ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
                     lf_pinbox_free_func *free_func, void *free_func_arg)
 {
   DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
-  compile_time_assert(sizeof(LF_PINS) == 64);
+  compile_time_assert(sizeof(LF_PINS) == 128);
   lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
   pinbox->pinstack_top_ver= 0;
   pinbox->pins_in_array= 0;
@@ -448,6 +448,8 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
   allocator->top= 0;
   allocator->mallocs= 0;
   allocator->element_size= size;
+  allocator->constructor= 0;
+  allocator->destructor= 0;
   DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
 }
 
@@ -468,6 +470,8 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator)
   while (node)
   {
     uchar *tmp= anext_node(node);
+    if (allocator->destructor)
+      allocator->destructor(node);
     my_free(node);
     node= tmp;
   }
@@ -496,6 +500,8 @@ void *_lf_alloc_new(LF_PINS *pins)
     if (!node)
     {
       node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+      if (allocator->constructor)
+        allocator->constructor(node);
 #ifdef MY_LF_EXTRA_DEBUG
       if (likely(node != 0))
         my_atomic_add32(&allocator->mallocs, 1);
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
index e7bf82fc6ca..2c89f5ca7ca 100644
--- a/mysys/lf_hash.c
+++ b/mysys/lf_hash.c
@@ -41,6 +41,8 @@ typedef struct {
   */
 } LF_SLIST;
 
+const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
+
 /*
   a structure to pass the context (pointers two the three successive elements
   in a list) from lfind to linsert/ldelete
@@ -121,8 +123,8 @@ retry:
         we found a deleted node - be nice, help the other thread
         and remove this deleted node
       */
-      if (my_atomic_casptr((void **)cursor->prev,
-                           (void **)&cursor->curr, cursor->next))
+      if (my_atomic_casptr((void **) cursor->prev,
+                           (void **)(char*) &cursor->curr, cursor->next))
         _lf_alloc_free(pins, cursor->curr);
       else
       {
@@ -168,7 +170,8 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
       node->link= (intptr)cursor.curr;
       DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
       DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
-      if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+      if (my_atomic_casptr((void **) cursor.prev,
+                           (void **)(char*) &cursor.curr, node))
       {
         res= 1; /* inserted ok */
         break;
@@ -215,13 +218,13 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
     else
     {
       /* mark the node deleted */
-      if (my_atomic_casptr((void **)&(cursor.curr->link),
-                           (void **)&cursor.next,
+      if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
+                           (void **) (char*) &cursor.next,
                            (void *)(((intptr)cursor.next) | 1)))
       {
         /* and remove it from the list */
         if (my_atomic_casptr((void **)cursor.prev,
-                             (void **)&cursor.curr, cursor.next))
+                             (void **)(char*)&cursor.curr, cursor.next))
           _lf_alloc_free(pins, cursor.curr);
         else
         {
@@ -490,7 +493,7 @@ static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
     my_free(dummy);
     dummy= cur;
   }
-  my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+  my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
   /*
     note that if the CAS above failed (after linsert() succeeded),
     it would mean that some other thread has executed linsert() for
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 173b678cdd1..aa7e4be03eb 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -567,7 +567,7 @@ int _my_b_read(register IO_CACHE *info, uchar *Buffer, size_t Count)
     if (Count)
     {
       /* We couldn't fulfil the request. Return, how much we got. */
-      info->error= left_length;
+      info->error= (int) left_length;
       DBUG_RETURN(1);
     }
     length=0;				/* Didn't read any chars */
@@ -1306,7 +1306,7 @@ read_append_buffer:
     info->append_read_pos += copy_len;
     Count -= copy_len;
     if (Count)
-      info->error = save_count - Count;
+      info->error= (int) (save_count - Count);
 
     /* Fill read buffer with data from write buffer */
     memcpy(info->buffer, info->append_read_pos,
@@ -1695,8 +1695,8 @@ int my_block_write(register IO_CACHE *info, const uchar *Buffer, size_t Count,
   {
     /* Of no overlap, write everything without buffering */
     if (pos + Count <= info->pos_in_file)
-      return my_pwrite(info->file, Buffer, Count, pos,
-		       info->myflags | MY_NABP);
+      return (int) my_pwrite(info->file, Buffer, Count, pos,
+                             info->myflags | MY_NABP);
     /* Write the part of the block that is before buffer */
     length= (uint) (info->pos_in_file - pos);
     if (my_pwrite(info->file, Buffer, length, pos, info->myflags | MY_NABP))
@@ -1888,6 +1888,9 @@ int end_io_cache(IO_CACHE *info)
     mysql_mutex_destroy(&info->append_buffer_lock);
 #endif
   }
+#ifdef THREAD
+  info->share= 0;
+#endif
   DBUG_RETURN(error);
 } /* end_io_cache */
 
diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
index 7a40ea8a86f..04a5214e2d4 100644
--- a/mysys/mf_iocache2.c
+++ b/mysys/mf_iocache2.c
@@ -429,9 +429,9 @@ process_flags:
       /* minimum width padding */
       if (minimum_width > length2) 
       {
-        char *buffz;
+        uchar *buffz;
                     
-        buffz= my_alloca(minimum_width - length2);
+        buffz= (uchar*) my_alloca(minimum_width - length2);
         if (is_zero_padded)
           memset(buffz, '0', minimum_width - length2);
         else
@@ -473,3 +473,53 @@ process_flags:
 err:
   return (size_t) -1;
 }
+
+
+int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
+                          const char *default_val)
+{
+  uint length;
+  DBUG_ENTER("init_strvar_from_file");
+
+  if ((length=my_b_gets(f,var, max_size)))
+  {
+    char* last_p = var + length -1;
+    if (*last_p == '\n')
+      *last_p = 0; /* if we stopped on newline, kill it */
+    else
+    {
+      /*
+        If we truncated a line or stopped on last char, remove all chars
+        up to and including newline.
+      */
+      int c;
+      while (((c=my_b_get(f)) != '\n' && c != my_b_EOF))
+        ;
+    }
+    DBUG_RETURN(0);
+  }
+  else if (default_val)
+  {
+    strmake(var,  default_val, max_size-1);
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(1);
+}
+
+int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
+{
+  char buf[32];
+  DBUG_ENTER("init_intvar_from_file");
+
+  if (my_b_gets(f, buf, sizeof(buf)))
+  {
+    *var = atoi(buf);
+    DBUG_RETURN(0);
+  }
+  else if (default_val)
+  {
+    *var = default_val;
+    DBUG_RETURN(0);
+  }
+  DBUG_RETURN(1);
+}
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index fc62d3d8a8e..6a283383e24 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -13,12 +13,44 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+
+
 /**
-  @file
+  @file 
+  The file contains the following modules:
+
+    Simple Key Cache Module
+
+    Partitioned Key Cache Module
+
+    Key Cache Interface Module
+     
+*/
+
+#include "mysys_priv.h"
+#include "mysys_err.h"
+#include <keycache.h>
+#include "my_static.h"
+#include <m_string.h>
+#include <my_bit.h>
+#include <errno.h>
+#include <stdarg.h>
+#include "probes_mysql.h"
+
+/****************************************************************************** 
+  Simple Key Cache Module
+
+  The module contains implementations of all key cache interface functions
+  employed by partitioned key caches. 
+     
+******************************************************************************/
+
+/*
   These functions handle keyblock cacheing for ISAM and MyISAM tables.
 
   One cache can handle many files.
   It must contain buffers of the same blocksize.
+
   init_key_cache() should be used to init cache handler.
 
   The free list (free_block_list) is a stack like structure.
@@ -37,9 +69,7 @@
   blocks_unused is the sum of never used blocks in the pool and of currently
   free blocks. blocks_used is the number of blocks fetched from the pool and
   as such gives the maximum number of in-use blocks at any time.
-*/
 
-/*
   Key Cache Locking
   =================
 
@@ -104,15 +134,77 @@
   I/O finished.
 */
 
-#include "mysys_priv.h"
-#include "mysys_err.h"
-#include <keycache.h>
-#include "my_static.h"
-#include <m_string.h>
-#include <my_bit.h>
-#include <errno.h>
-#include <stdarg.h>
-#include "probes_mysql.h"
+/* declare structures that is used by st_key_cache */
+
+struct st_block_link;
+typedef struct st_block_link BLOCK_LINK;
+struct st_keycache_page;
+typedef struct st_keycache_page KEYCACHE_PAGE;
+struct st_hash_link;
+typedef struct st_hash_link HASH_LINK;
+
+/* info about requests in a waiting queue */
+typedef struct st_keycache_wqueue
+{
+  struct st_my_thread_var *last_thread;  /* circular list of waiting threads */
+} KEYCACHE_WQUEUE;
+
+#define CHANGED_BLOCKS_HASH 128             /* must be power of 2 */
+
+/* Control block for a simple (non-partitioned) key cache */
+
+typedef struct st_simple_key_cache_cb
+{
+  my_bool key_cache_inited;      /* <=> control block is allocated           */
+  my_bool in_resize;             /* true during resize operation             */
+  my_bool resize_in_flush;       /* true during flush of resize operation    */
+  my_bool can_be_used;           /* usage of cache for read/write is allowed */
+  size_t key_cache_mem_size;     /* specified size of the cache memory       */
+  uint key_cache_block_size;     /* size of the page buffer of a cache block */
+  ulong min_warm_blocks;         /* min number of warm blocks;               */
+  ulong age_threshold;           /* age threshold for hot blocks             */
+  ulonglong keycache_time;       /* total number of block link operations    */
+  uint hash_entries;             /* max number of entries in the hash table  */
+  int hash_links;                /* max number of hash links                 */
+  int hash_links_used;           /* number of hash links currently used      */
+  int disk_blocks;               /* max number of blocks in the cache        */
+  ulong blocks_used;           /* maximum number of concurrently used blocks */
+  ulong blocks_unused;           /* number of currently unused blocks        */
+  ulong blocks_changed;          /* number of currently dirty blocks         */
+  ulong warm_blocks;             /* number of blocks in warm sub-chain       */
+  ulong cnt_for_resize_op;       /* counter to block resize operation        */
+  long blocks_available;      /* number of blocks available in the LRU chain */
+  HASH_LINK **hash_root;         /* arr. of entries into hash table buckets  */
+  HASH_LINK *hash_link_root;     /* memory for hash table links              */
+  HASH_LINK *free_hash_list;     /* list of free hash links                  */
+  BLOCK_LINK *free_block_list;   /* list of free blocks                      */
+  BLOCK_LINK *block_root;        /* memory for block links                   */
+  uchar HUGE_PTR *block_mem;     /* memory for block buffers                 */
+  BLOCK_LINK *used_last;         /* ptr to the last block of the LRU chain   */
+  BLOCK_LINK *used_ins;          /* ptr to the insertion block in LRU chain  */
+  pthread_mutex_t cache_lock;    /* to lock access to the cache structure    */
+  KEYCACHE_WQUEUE resize_queue;  /* threads waiting during resize operation  */
+  /*
+    Waiting for a zero resize count. Using a queue for symmetry though
+    only one thread can wait here.
+  */
+  KEYCACHE_WQUEUE waiting_for_resize_cnt;
+  KEYCACHE_WQUEUE waiting_for_hash_link; /* waiting for a free hash link     */
+  KEYCACHE_WQUEUE waiting_for_block;    /* requests waiting for a free block */
+  BLOCK_LINK *changed_blocks[CHANGED_BLOCKS_HASH]; /* hash for dirty file bl.*/
+  BLOCK_LINK *file_blocks[CHANGED_BLOCKS_HASH];    /* hash for other file bl.*/
+
+  /* Statistics variables. These are reset in reset_key_cache_counters(). */
+  ulong global_blocks_changed;      /* number of currently dirty blocks      */
+  ulonglong global_cache_w_requests;/* number of write requests (write hits) */
+  ulonglong global_cache_write;     /* number of writes from cache to files  */
+  ulonglong global_cache_r_requests;/* number of read requests (read hits)   */
+  ulonglong global_cache_read;      /* number of reads from files to cache   */
+
+  int blocks;                   /* max number of blocks in the cache        */
+  uint hash_factor;             /* factor used to calculate hash function   */
+  my_bool in_init;		/* Set to 1 in MySQL during init/resize     */
+} SIMPLE_KEY_CACHE_CB;
 
 /*
   Some compilation flags have been added specifically for this module
@@ -224,7 +316,8 @@ KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
 
 #define FLUSH_CACHE         2000            /* sort this many blocks at once */
 
-static int flush_all_key_blocks(KEY_CACHE *keycache);
+static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache);
+static void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup);
 #ifdef THREAD
 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
                           mysql_mutex_t *mutex);
@@ -233,15 +326,16 @@ static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
 #define wait_on_queue(wqueue, mutex)    do {} while (0)
 #define release_whole_queue(wqueue)     do {} while (0)
 #endif
-static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
+static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block);
 #if !defined(DBUG_OFF)
-static void test_key_cache(KEY_CACHE *keycache,
+static void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
                            const char *where, my_bool lock);
 #endif
-
+#define KEYCACHE_BASE_EXPR(f, pos)                                            \
+  ((ulong) ((pos) / keycache->key_cache_block_size) +	 (ulong) (f))
 #define KEYCACHE_HASH(f, pos)                                                 \
-(((ulong) ((pos) / keycache->key_cache_block_size) +                          \
-                                     (ulong) (f)) & (keycache->hash_entries-1))
+  ((KEYCACHE_BASE_EXPR(f, pos) / keycache->hash_factor) &                     \
+      (keycache->hash_entries-1))
 #define FILE_HASH(f)                 ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
 
 #define DEFAULT_KEYCACHE_DEBUG_LOG  "keycache_debug.log"
@@ -337,9 +431,10 @@ static int keycache_pthread_cond_signal(mysql_cond_t *cond);
 #define inline  /* disabled inline for easier debugging */
 static int fail_block(BLOCK_LINK *block);
 static int fail_hlink(HASH_LINK *hlink);
-static int cache_empty(KEY_CACHE *keycache);
+static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache);
 #endif
 
+
 static inline uint next_power(uint value)
 {
   return (uint) my_round_up_to_next_power((uint32) value) << 1;
@@ -347,19 +442,32 @@ static inline uint next_power(uint value)
 
 
 /*
-  Initialize a key cache
+  Initialize a simple key cache
 
   SYNOPSIS
-    init_key_cache()
-    keycache			pointer to a key cache data structure
-    key_cache_block_size	size of blocks to keep cached data
-    use_mem                 	total memory to use for the key cache
-    division_limit		division limit (may be zero)
-    age_threshold		age threshold (may be zero)
+    init_simple_key_cache()
+    keycache                pointer to the control block of a simple key cache 
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem                 memory to use for the key cache buferrs/structures
+    division_limit          division limit (may be zero)
+    age_threshold           age threshold (may be zero)
+
+  DESCRIPTION
+    This function is the implementation of the init_key_cache interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function builds a simple key cache and initializes the control block
+    structure of the type SIMPLE_KEY_CACHE_CB that is used for this key cache. 
+    The parameter keycache is supposed to point to this structure. 
+    The parameter key_cache_block_size specifies the size of the blocks in
+    the key cache to be built. The parameters division_limit and age_threshhold
+    determine the initial values of those characteristics of the key cache
+    that are used for midpoint insertion strategy. The parameter use_mem
+    specifies the total amount of memory to be allocated for key cache blocks
+    and auxiliary structures.       
 
   RETURN VALUE
     number of blocks in the key cache, if successful,
-    0 - otherwise.
+    <= 0 - otherwise.
 
   NOTES.
     if keycache->key_cache_inited != 0 we assume that the key cache
@@ -368,17 +476,17 @@ static inline uint next_power(uint value)
 
     It's assumed that no two threads call this function simultaneously
     referring to the same key cache handle.
-
 */
 
-int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
-                   size_t use_mem, uint division_limit,
-                   uint age_threshold)
+static
+int init_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, uint key_cache_block_size,
+		          size_t use_mem, uint division_limit,
+		          uint age_threshold)
 {
   ulong blocks, hash_links;
   size_t length;
   int error;
-  DBUG_ENTER("init_key_cache");
+  DBUG_ENTER("init_simple_key_cache");
   DBUG_ASSERT(key_cache_block_size >= 512);
 
   KEYCACHE_DEBUG_OPEN;
@@ -388,12 +496,15 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
     DBUG_RETURN(0);
   }
 
+  keycache->blocks_used= keycache->blocks_unused= 0;
+  keycache->global_blocks_changed= 0;
   keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
   keycache->global_cache_read= keycache->global_cache_write= 0;
   keycache->disk_blocks= -1;
   if (! keycache->key_cache_inited)
   {
     keycache->key_cache_inited= 1;
+    keycache->hash_factor= 1;
     /*
       Initialize these variables once only.
       Their value must survive re-initialization during resizing.
@@ -536,51 +647,42 @@ err:
 
 
 /*
-  Resize a key cache
+  Prepare for resizing a simple key cache
 
   SYNOPSIS
-    resize_key_cache()
-    keycache     	        pointer to a key cache data structure
-    key_cache_block_size        size of blocks to keep cached data
-    use_mem			total memory to use for the new key cache
-    division_limit		new division limit (if not zero)
-    age_threshold		new age threshold (if not zero)
-
-  RETURN VALUE
-    number of blocks in the key cache, if successful,
-    0 - otherwise.
+    prepare_resize_simple_key_cache()
+    keycache                pointer to the control block of a simple key cache
+    with_resize_queue       <=> resize queue is used		
+    release_lock            <=> release the key cache lock before return
 
-  NOTES.
-    The function first compares the memory size and the block size parameters
-    with the key cache values.
+  DESCRIPTION
+    This function flushes all dirty pages from a simple key cache and after
+    this it destroys the key cache calling end_simple_key_cache. The function 
+    takes the parameter keycache as a pointer to the control block 
+    structure of the type SIMPLE_KEY_CACHE_CB for this key cache.
+    The parameter with_resize_queue determines weather the resize queue is
+    involved (MySQL server never uses this queue). The parameter release_lock
+    says weather the key cache lock must be released before return from 
+    the function.
 
-    If they differ the function free the the memory allocated for the
-    old key cache blocks by calling the end_key_cache function and
-    then rebuilds the key cache with new blocks by calling
-    init_key_cache.
+  RETURN VALUE
+    0 - on success,
+    1 - otherwise.
 
-    The function starts the operation only when all other threads
-    performing operations with the key cache let her to proceed
-    (when cnt_for_resize=0).
+  NOTES
+    This function is the called by resize_simple_key_cache and
+    resize_partitioned_key_cache that resize simple and partitioned key caches
+    respectively. 
 */
 
-int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
-                     size_t use_mem, uint division_limit,
-                     uint age_threshold)
+static 
+int prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
+                                    my_bool with_resize_queue,
+                                    my_bool release_lock)
 {
-  int blocks;
-  DBUG_ENTER("resize_key_cache");
-
-  if (!keycache->key_cache_inited)
-    DBUG_RETURN(keycache->disk_blocks);
-
-  if(key_cache_block_size == keycache->key_cache_block_size &&
-     use_mem == keycache->key_cache_mem_size)
-  {
-    change_key_cache_param(keycache, division_limit, age_threshold);
-    DBUG_RETURN(keycache->disk_blocks);
-  }
-
+  int res= 0;
+  DBUG_ENTER("prepare_resize_simple_key_cache"); 
+ 
   keycache_pthread_mutex_lock(&keycache->cache_lock);
 
 #ifdef THREAD
@@ -590,7 +692,7 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
     one resizer only. In set_var.cc keycache->in_init is used to block
     multiple attempts.
   */
-  while (keycache->in_resize)
+  while (with_resize_queue && keycache->in_resize)
   {
     /* purecov: begin inspected */
     wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
@@ -615,8 +717,8 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
     {
       /* TODO: if this happens, we should write a warning in the log file ! */
       keycache->resize_in_flush= 0;
-      blocks= 0;
       keycache->can_be_used= 0;
+      res= 1;
       goto finish;
     }
     DBUG_ASSERT(cache_empty(keycache));
@@ -642,29 +744,144 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
 #else
   KEYCACHE_DBUG_ASSERT(keycache->cnt_for_resize_op == 0);
 #endif
-
-  /*
-    Free old cache structures, allocate new structures, and initialize
-    them. Note that the cache_lock mutex and the resize_queue are left
-    untouched. We do not lose the cache_lock and will release it only at
-    the end of this function.
-  */
-  end_key_cache(keycache, 0);			/* Don't free mutex */
-  /* The following will work even if use_mem is 0 */
-  blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
-			 division_limit, age_threshold);
+  
+  end_simple_key_cache(keycache, 0);
 
 finish:
+  if (release_lock)
+    keycache_pthread_mutex_unlock(&keycache->cache_lock);     
+  DBUG_RETURN(res);
+}
+
+
+/*
+  Finalize resizing a simple key cache
+
+  SYNOPSIS
+    finish_resize_simple_key_cache()
+    keycache                pointer to the control block of a simple key cache
+    with_resize_queue       <=> resize queue is used		
+    acquire_lock            <=> acquire the key cache lock at start
+
+  DESCRIPTION
+    This function performs finalizing actions for the operation of 
+    resizing a simple key cache. The function takes the parameter
+    keycache as a pointer to the control block structure of the type
+    SIMPLE_KEY_CACHE_CB for this key cache. The function sets the flag
+    in_resize in this structure to FALSE.
+    The parameter with_resize_queue determines weather the resize queue
+    is involved (MySQL server never uses this queue).
+    The parameter acquire_lock says weather the key cache lock must be
+    acquired at the start of the function.
+
+  RETURN VALUE
+    none
+
+  NOTES
+    This function is the called by resize_simple_key_cache and
+    resize_partitioned_key_cache that resize simple and partitioned key caches
+    respectively. 
+*/
+
+static 
+void finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
+                                    my_bool with_resize_queue,
+                                    my_bool acquire_lock)
+{
+  DBUG_ENTER("finish_resize_simple_key_cache");
+
+  if (acquire_lock)
+    keycache_pthread_mutex_lock(&keycache->cache_lock); 
+  
+  safe_mutex_assert_owner(&keycache->cache_lock);
+			   
   /*
     Mark the resize finished. This allows other threads to start a
     resize or to request new cache blocks.
   */
   keycache->in_resize= 0;
-
-  /* Signal waiting threads. */
-  release_whole_queue(&keycache->resize_queue);
+  
+  if (with_resize_queue)
+  {
+    /* Signal waiting threads. */
+    release_whole_queue(&keycache->resize_queue);
+  }
 
   keycache_pthread_mutex_unlock(&keycache->cache_lock);
+
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Resize a simple key cache
+
+  SYNOPSIS
+    resize_simple_key_cache()
+    keycache                pointer to the control block of a simple key cache
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem                 memory to use for the key cache buffers/structures
+    division_limit          new division limit (if not zero)
+    age_threshold           new age threshold (if not zero)
+
+  DESCRIPTION
+    This function is the implementation of the resize_key_cache interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
+    cache to be resized. 
+    The parameter key_cache_block_size specifies the new size of the blocks in
+    the key cache. The parameters division_limit and age_threshold
+    determine the new initial values of those characteristics of the key cache
+    that are used for midpoint insertion strategy. The parameter use_mem
+    specifies the total amount of memory to be allocated for key cache blocks
+    and auxiliary structures in the new key cache.           
+
+  RETURN VALUE
+    number of blocks in the key cache, if successful,
+    0 - otherwise.
+
+  NOTES.
+    The function first calls the function prepare_resize_simple_key_cache
+    to flush all dirty blocks from key cache, to free memory used
+    for key cache blocks and auxiliary structures. After this the
+    function builds a new key cache with new parameters.
+
+    This implementation doesn't block the calls and executions of other
+    functions from the key cache interface. However it assumes that the
+    calls of resize_simple_key_cache itself are serialized.
+
+    The function starts the operation only when all other threads
+    performing operations with the key cache let her to proceed
+    (when cnt_for_resize=0).
+*/
+
+static
+int resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, uint key_cache_block_size,
+		            size_t use_mem, uint division_limit,
+		            uint age_threshold)
+{
+  int blocks= 0;
+  DBUG_ENTER("resize_simple_key_cache");
+
+  if (!keycache->key_cache_inited)
+    DBUG_RETURN(blocks);
+
+  /*
+    Note that the cache_lock mutex and the resize_queue are left untouched.
+    We do not lose the cache_lock and will release it only at the end of 
+    this function.
+  */
+  if (prepare_resize_simple_key_cache(keycache, 1, 0))
+    goto finish;
+
+  /* The following will work even if use_mem is 0 */ 
+  blocks= init_simple_key_cache(keycache, key_cache_block_size, use_mem,
+			        division_limit, age_threshold);
+
+finish:
+  finish_resize_simple_key_cache(keycache, 1, 0);
+
   DBUG_RETURN(blocks);
 }
 
@@ -672,7 +889,7 @@ finish:
 /*
   Increment counter blocking resize key cache operation
 */
-static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
+static inline void inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
 {
   keycache->cnt_for_resize_op++;
 }
@@ -682,35 +899,47 @@ static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
   Decrement counter blocking resize key cache operation;
   Signal the operation to proceed when counter becomes equal zero
 */
-static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
+static inline void dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
 {
   if (!--keycache->cnt_for_resize_op)
     release_whole_queue(&keycache->waiting_for_resize_cnt);
 }
 
+
 /*
-  Change the key cache parameters
+  Change key cache parameters of a simple key cache
 
   SYNOPSIS
-    change_key_cache_param()
-    keycache			pointer to a key cache data structure
-    division_limit		new division limit (if not zero)
-    age_threshold		new age threshold (if not zero)
+    change_simple_key_cache_param()
+    keycache                pointer to the control block of a simple key cache	
+    division_limit          new division limit (if not zero)
+    age_threshold           new age threshold (if not zero)
+
+  DESCRIPTION
+    This function is the implementation of the change_key_cache_param interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
+    cache where new values of the division limit and the age threshold used
+    for midpoint insertion strategy are to be set.  The parameters
+    division_limit and age_threshold provide these new values.
 
   RETURN VALUE
     none
 
   NOTES.
-    Presently the function resets the key cache parameters
-    concerning midpoint insertion strategy - division_limit and
-    age_threshold.
+    Presently the function resets the key cache parameters concerning
+    midpoint insertion strategy - division_limit and age_threshold.
+    This function changes some parameters of a given key cache without
+    reformatting it. The function does not touch the contents the key 
+    cache blocks.    
 */
 
-void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
-			    uint age_threshold)
+static
+void change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB *keycache, uint division_limit,
+			           uint age_threshold)
 {
-  DBUG_ENTER("change_key_cache_param");
-
+  DBUG_ENTER("change_simple_key_cache_param");
   keycache_pthread_mutex_lock(&keycache->cache_lock);
   if (division_limit)
     keycache->min_warm_blocks= (keycache->disk_blocks *
@@ -724,20 +953,31 @@ void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
 
 
 /*
-  Remove key_cache from memory
+  Destroy a simple key cache 
 
   SYNOPSIS
-    end_key_cache()
-    keycache		key cache handle
-    cleanup		Complete free (Free also mutex for key cache)
+    end_simple_key_cache()
+    keycache                pointer to the control block of a simple key cache
+    cleanup                 <=> complete free (free also mutex for key cache)
+
+  DESCRIPTION
+    This function is the implementation of the end_key_cache interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
+    cache to be destroyed.
+    The function frees the memory allocated for the key cache blocks and
+    auxiliary structures. If the value of the parameter cleanup is TRUE 
+    then even the key cache mutex is freed.
 
   RETURN VALUE
     none
 */
 
-void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
+static
+void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup)
 {
-  DBUG_ENTER("end_key_cache");
+  DBUG_ENTER("end_simple_key_cache");
   DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
 
   if (!keycache->key_cache_inited)
@@ -1028,7 +1268,7 @@ static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
     void
 */
 
-static void link_to_file_list(KEY_CACHE *keycache,
+static void link_to_file_list(SIMPLE_KEY_CACHE_CB *keycache,
                               BLOCK_LINK *block, int file,
                               my_bool unlink_block)
 {
@@ -1069,7 +1309,7 @@ static void link_to_file_list(KEY_CACHE *keycache,
     void
 */
 
-static void link_to_changed_list(KEY_CACHE *keycache,
+static void link_to_changed_list(SIMPLE_KEY_CACHE_CB *keycache,
                                  BLOCK_LINK *block)
 {
   DBUG_ASSERT(block->status & BLOCK_IN_USE);
@@ -1124,8 +1364,8 @@ static void link_to_changed_list(KEY_CACHE *keycache,
     not linked in the LRU ring.
 */
 
-static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
-                       my_bool at_end)
+static void link_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block,
+                       my_bool hot, my_bool at_end)
 {
   BLOCK_LINK *ins;
   BLOCK_LINK **pins;
@@ -1245,7 +1485,7 @@ static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
     See NOTES for link_block
 */
 
-static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
+static void unlink_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
 {
   DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
   DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
@@ -1303,7 +1543,8 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
   RETURN
     void
 */
-static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
+static void reg_requests(SIMPLE_KEY_CACHE_CB *keycache,
+                         BLOCK_LINK *block, int count)
 {
   DBUG_ASSERT(block->status & BLOCK_IN_USE);
   DBUG_ASSERT(block->hash_link);
@@ -1346,7 +1587,7 @@ static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
     not linked in the LRU ring.
 */
 
-static void unreg_request(KEY_CACHE *keycache,
+static void unreg_request(SIMPLE_KEY_CACHE_CB *keycache,
                           BLOCK_LINK *block, int at_end)
 {
   DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
@@ -1435,7 +1676,7 @@ static void remove_reader(BLOCK_LINK *block)
   signals on its termination
 */
 
-static void wait_for_readers(KEY_CACHE *keycache,
+static void wait_for_readers(SIMPLE_KEY_CACHE_CB *keycache,
                              BLOCK_LINK *block)
 {
 #ifdef THREAD
@@ -1484,7 +1725,7 @@ static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
   Remove a hash link from the hash table
 */
 
-static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
+static void unlink_hash(SIMPLE_KEY_CACHE_CB *keycache, HASH_LINK *hash_link)
 {
   KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u  pos_ %lu  #requests=%u",
       (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
@@ -1540,7 +1781,7 @@ static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
   Get the hash link for a page
 */
 
-static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
+static HASH_LINK *get_hash_link(SIMPLE_KEY_CACHE_CB *keycache,
                                 int file, my_off_t filepos)
 {
   reg1 HASH_LINK *hash_link, **start;
@@ -1661,7 +1902,7 @@ restart:
     waits until first of this operations links any block back.
 */
 
-static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
+static BLOCK_LINK *find_key_block(SIMPLE_KEY_CACHE_CB *keycache,
                                   File file, my_off_t filepos,
                                   int init_hits_left,
                                   int wrmode, int *page_st)
@@ -2421,7 +2662,7 @@ restart:
     portion is less than read_length, but not less than min_length.
 */
 
-static void read_block(KEY_CACHE *keycache,
+static void read_block(SIMPLE_KEY_CACHE_CB *keycache,
                        BLOCK_LINK *block, uint read_length,
                        uint min_length, my_bool primary)
 {
@@ -2509,43 +2750,60 @@ static void read_block(KEY_CACHE *keycache,
 
 
 /*
-  Read a block of data from a cached file into a buffer;
+  Read a block of data from a simple key cache into a buffer
 
   SYNOPSIS
 
-    key_cache_read()
-      keycache            pointer to a key cache data structure
-      file                handler for the file for the block of data to be read
-      filepos             position of the block of data in the file
-      level               determines the weight of the data
-      buff                buffer to where the data must be placed
-      length              length of the buffer
-      block_length        length of the block in the key cache buffer
-      return_buffer       return pointer to the key cache buffer with the data
+    simple_key_cache_read()
+    keycache            pointer to the control block of a simple key cache
+    file                handler for the file for the block of data to be read
+    filepos             position of the block of data in the file
+    level               determines the weight of the data
+    buff                buffer to where the data must be placed
+    length              length of the buffer
+    block_length        length of the read data from a key cache block 
+    return_buffer       return pointer to the key cache buffer with the data
 
+  DESCRIPTION
+    This function is the implementation of the key_cache_read interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
+    cache.
+    In a general case the function reads a block of data from the key cache
+    into the buffer buff of the size specified by the parameter length. The
+    beginning of the  block of data to be read is specified by the parameters
+    file and filepos. The length of the read data is the same as the length
+    of the buffer. The data is read into the buffer in key_cache_block_size
+    increments. If the next portion of the data is not found in any key cache
+    block, first it is read from file into the key cache.
+    If the parameter return_buffer is not ignored and its value is TRUE, and 
+    the data to be read of the specified size block_length can be read from one
+    key cache buffer, then the function returns a pointer to the data in the
+    key cache buffer.
+    The function takse into account parameters block_length and return buffer
+    only in a single-threaded environment.
+    The parameter 'level' is used only by the midpoint insertion strategy 
+    when the data or its portion cannot be found in the key cache. 
+   
   RETURN VALUE
-    Returns address from where the data is placed if sucessful, 0 - otherwise.
+    Returns address from where the data is placed if successful, 0 - otherwise.
 
-  NOTES.
-    The function ensures that a block of data of size length from file
-    positioned at filepos is in the buffers for some key cache blocks.
-    Then the function either copies the data into the buffer buff, or,
-    if return_buffer is TRUE, it just returns the pointer to the key cache
-    buffer with the data.
+  NOTES
     Filepos must be a multiple of 'block_length', but it doesn't
     have to be a multiple of key_cache_block_size;
 */
 
-uchar *key_cache_read(KEY_CACHE *keycache,
-                      File file, my_off_t filepos, int level,
-                      uchar *buff, uint length,
-                      uint block_length __attribute__((unused)),
-                      int return_buffer __attribute__((unused)))
+uchar *simple_key_cache_read(SIMPLE_KEY_CACHE_CB *keycache,
+                             File file, my_off_t filepos, int level,
+                             uchar *buff, uint length,
+                             uint block_length __attribute__((unused)),
+                             int return_buffer __attribute__((unused)))
 {
   my_bool locked_and_incremented= FALSE;
   int error=0;
   uchar *start= buff;
-  DBUG_ENTER("key_cache_read");
+  DBUG_ENTER("simple_key_cache_read");
   DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
                (uint) file, (ulong) filepos, length));
 
@@ -2770,28 +3028,47 @@ end:
 
 
 /*
-  Insert a block of file data from a buffer into key cache
+  Insert a block of file data from a buffer into a simple key cache
 
   SYNOPSIS
-    key_cache_insert()
-    keycache            pointer to a key cache data structure
+    simple_key_cache_insert()
+    keycache            pointer to the control block of a simple key cache 
     file                handler for the file to insert data from
     filepos             position of the block of data in the file to insert
     level               determines the weight of the data
     buff                buffer to read data from
     length              length of the data in the buffer
 
-  NOTES
-    This is used by MyISAM to move all blocks from a index file to the key
-    cache
-
+  DESCRIPTION
+    This function is the implementation of the key_cache_insert interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
+    cache.
+    The function writes a block of file data from a buffer into the key cache.
+    The buffer is specified with the parameters buff and length - the pointer
+    to the beginning of the buffer and its size respectively. It's assumed
+    the buffer contains the data from 'file' allocated from the position
+    filepos. The data is copied from the buffer in key_cache_block_size
+    increments.
+    The parameter level is used to set one characteristic for the key buffers
+    loaded with the data from buff. The characteristic is used only by the
+    midpoint insertion strategy.  
+   
   RETURN VALUE
     0 if a success, 1 - otherwise.
+
+  NOTES
+    The function is used by MyISAM to move all blocks from a index file to 
+    the key cache. It can be performed in parallel with reading the file data
+    from the key buffers by other threads.
+
 */
 
-int key_cache_insert(KEY_CACHE *keycache,
-                     File file, my_off_t filepos, int level,
-                     uchar *buff, uint length)
+static
+int simple_key_cache_insert(SIMPLE_KEY_CACHE_CB *keycache,
+                            File file, my_off_t filepos, int level,
+                            uchar *buff, uint length)
 {
   int error= 0;
   DBUG_ENTER("key_cache_insert");
@@ -3008,43 +3285,64 @@ int key_cache_insert(KEY_CACHE *keycache,
 
 
 /*
-  Write a buffer into a cached file.
+  Write a buffer into a simple key cache
 
   SYNOPSIS
 
-    key_cache_write()
-      keycache            pointer to a key cache data structure
-      file                handler for the file to write data to
-      filepos             position in the file to write data to
-      level               determines the weight of the data
-      buff                buffer with the data
-      length              length of the buffer
-      dont_write          if is 0 then all dirty pages involved in writing
-                          should have been flushed from key cache
+    simple_key_cache_write()
+    keycache            pointer to the control block of a simple key cache
+    file                handler for the file to write data to
+    file_extra          maps of key cache partitions containing 
+                        dirty pages from file 
+    filepos             position in the file to write data to
+    level               determines the weight of the data
+    buff                buffer with the data
+    length              length of the buffer
+    dont_write          if is 0 then all dirty pages involved in writing
+                        should have been flushed from key cache
 
+  DESCRIPTION
+    This function is the implementation of the key_cache_write interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
+    cache.
+    In a general case the function copies data from a buffer into the key
+    cache. The buffer is specified with the parameters buff and length -
+    the pointer to the beginning of the buffer and its size respectively.
+    It's assumed the buffer contains the data to be written into 'file'
+    starting from the position filepos. The data is copied from the buffer
+    in key_cache_block_size increments.
+    If the value of the parameter dont_write is FALSE then the function
+    also writes the data into file.
+    The parameter level is used to set one characteristic for the key buffers
+    filled with the data from buff. The characteristic is employed only by
+    the midpoint insertion strategy.
+    The parameter file_extra currently makes sense only for simple key caches
+    that are elements of a partitioned key cache. It provides a pointer to the
+    shared bitmap of the partitions that may contains dirty pages for the file.
+    This bitmap is used to optimize the function 
+    flush_partitioned_key_cache_blocks. 
+      
   RETURN VALUE
     0 if a success, 1 - otherwise.
 
-  NOTES.
-    The function copies the data of size length from buff into buffers
-    for key cache blocks that are  assigned to contain the portion of
-    the file starting with position filepos.
-    It ensures that this data is flushed to the file if dont_write is FALSE.
-    Filepos must be a multiple of 'block_length', but it doesn't
-    have to be a multiple of key_cache_block_size;
-
-    dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
+  NOTES
+    This implementation exploits the fact that the function is called only
+    when a thread has got an exclusive lock for the key file.
 */
 
-int key_cache_write(KEY_CACHE *keycache,
-                    File file, my_off_t filepos, int level,
-                    uchar *buff, uint length,
-                    uint block_length  __attribute__((unused)),
-                    int dont_write)
+static
+int simple_key_cache_write(SIMPLE_KEY_CACHE_CB *keycache,
+                           File file, void *file_extra __attribute__((unused)),                       
+                           my_off_t filepos, int level,
+                           uchar *buff, uint length,
+                           uint block_length  __attribute__((unused)),
+                           int dont_write)
 {
   my_bool locked_and_incremented= FALSE;
   int error=0;
-  DBUG_ENTER("key_cache_write");
+  DBUG_ENTER("simple_key_cache_write");
   DBUG_PRINT("enter",
              ("fd: %u  pos: %lu  length: %u  block_length: %u"
               "  key_block_length: %u",
@@ -3376,7 +3674,7 @@ end:
     Block must have a request registered on it.
 */
 
-static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
+static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
 {
   KEYCACHE_THREAD_TRACE("free block");
   KEYCACHE_DBUG_PRINT("free_block",
@@ -3516,7 +3814,7 @@ static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
   free used blocks if requested
 */
 
-static int flush_cached_blocks(KEY_CACHE *keycache,
+static int flush_cached_blocks(SIMPLE_KEY_CACHE_CB *keycache,
                                File file, BLOCK_LINK **cache,
                                BLOCK_LINK **end,
                                enum flush_type type)
@@ -3560,9 +3858,9 @@ static int flush_cached_blocks(KEY_CACHE *keycache,
                   (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
       block->status|= BLOCK_IN_FLUSHWRITE;
       keycache_pthread_mutex_unlock(&keycache->cache_lock);
-      error= my_pwrite(file, block->buffer+block->offset,
+      error= my_pwrite(file, block->buffer + block->offset,
                        block->length - block->offset,
-                       block->hash_link->diskpos+ block->offset,
+                       block->hash_link->diskpos + block->offset,
                        MYF(MY_NABP | MY_WAIT_IF_FULL));
       keycache_pthread_mutex_lock(&keycache->cache_lock);
       keycache->global_cache_write++;
@@ -3622,7 +3920,7 @@ static int flush_cached_blocks(KEY_CACHE *keycache,
 
 
 /*
-  Flush all key blocks for a file to disk, but don't do any mutex locks.
+  Flush all key blocks for a file to disk, but don't do any mutex locks
 
   SYNOPSIS
     flush_key_blocks_int()
@@ -3644,7 +3942,7 @@ static int flush_cached_blocks(KEY_CACHE *keycache,
     1  error
 */
 
-static int flush_key_blocks_int(KEY_CACHE *keycache,
+static int flush_key_blocks_int(SIMPLE_KEY_CACHE_CB *keycache,
 				File file, enum flush_type type)
 {
   BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
@@ -3659,6 +3957,7 @@ static int flush_key_blocks_int(KEY_CACHE *keycache,
                test_key_cache(keycache, "start of flush_key_blocks", 0););
 #endif
 
+  DBUG_ASSERT(type != FLUSH_KEEP_LAZY);
   cache= cache_buff;
   if (keycache->disk_blocks > 0 &&
       (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
@@ -3969,6 +4268,12 @@ restart:
               uint                next_status;
               uint                hash_requests;
 
+              LINT_INIT(next_hash_link);
+              LINT_INIT(next_diskpos);
+              LINT_INIT(next_file);
+              LINT_INIT(next_status);
+              LINT_INIT(hash_requests);
+
               total_found++;
               found++;
               KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
@@ -4079,22 +4384,46 @@ err:
 
 
 /*
-  Flush all blocks for a file to disk
+  Flush all blocks for a file from key buffers of a simple key cache 
 
   SYNOPSIS
 
-    flush_key_blocks()
-      keycache            pointer to a key cache data structure
-      file                handler for the file to flush to
-      flush_type          type of the flush
+    flush_simple_key_blocks()
+    keycache            pointer to the control block of a simple key cache
+    file                handler for the file to flush to
+    file_extra          maps of key cache partitions containing 
+                        dirty pages from file (not used)         
+    flush_type          type of the flush operation
 
+  DESCRIPTION
+    This function is the implementation of the flush_key_blocks interface
+    function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type S_KEY_CACHE_CB for a simple key
+    cache.
+    In a general case the function flushes the data from all dirty key
+    buffers related to the file 'file' into this file. The function does
+    exactly this if the value of the parameter type is FLUSH_KEEP. If the
+    value of this parameter is FLUSH_RELEASE, the function additionally 
+    releases the key buffers containing data from 'file' for new usage.
+    If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
+    just releases the key buffers containing data from 'file'.  
+    The parameter file_extra currently is not used by this function.
+      
   RETURN
     0   ok
     1  error
+
+  NOTES
+    This implementation exploits the fact that the function is called only
+    when a thread has got an exclusive lock for the key file.
 */
 
-int flush_key_blocks(KEY_CACHE *keycache,
-                     File file, enum flush_type type)
+static
+int flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB *keycache,
+                                  File file,
+                                  void *file_extra __attribute__((unused)),
+                                  enum flush_type type)
 {
   int res= 0;
   DBUG_ENTER("flush_key_blocks");
@@ -4148,7 +4477,7 @@ int flush_key_blocks(KEY_CACHE *keycache,
     != 0        Error
 */
 
-static int flush_all_key_blocks(KEY_CACHE *keycache)
+static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache)
 {
   BLOCK_LINK    *block;
   uint          total_found;
@@ -4251,37 +4580,43 @@ static int flush_all_key_blocks(KEY_CACHE *keycache)
 
 
 /*
-  Reset the counters of a key cache.
+  Reset the counters of a simple key cache
 
   SYNOPSIS
-    reset_key_cache_counters()
-    name       the name of a key cache
-    key_cache  pointer to the key kache to be reset
+    reset_simple_key_cache_counters()
+    name                the name of a key cache
+    keycache            pointer to the control block of a simple key cache
 
   DESCRIPTION
-   This procedure is used by process_key_caches() to reset the counters of all
-   currently used key caches, both the default one and the named ones.
+    This function is the implementation of the reset_key_cache_counters
+    interface function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type S_KEY_CACHE_CB for a simple key cache.
+    This function resets the values of all statistical counters for the key
+    cache to 0.
+    The parameter name is currently not used.
 
   RETURN
     0 on success (always because it can't fail)
 */
 
-int reset_key_cache_counters(const char *name __attribute__((unused)),
-                             KEY_CACHE *key_cache)
+static
+int reset_simple_key_cache_counters(const char *name __attribute__((unused)),
+                                    SIMPLE_KEY_CACHE_CB *keycache)
 {
-  DBUG_ENTER("reset_key_cache_counters");
-  if (!key_cache->key_cache_inited)
+  DBUG_ENTER("reset_simple_key_cache_counters");
+  if (!keycache->key_cache_inited)
   {
     DBUG_PRINT("info", ("Key cache %s not initialized.", name));
     DBUG_RETURN(0);
   }
   DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
 
-  key_cache->global_blocks_changed= 0;   /* Key_blocks_not_flushed */
-  key_cache->global_cache_r_requests= 0; /* Key_read_requests */
-  key_cache->global_cache_read= 0;       /* Key_reads */
-  key_cache->global_cache_w_requests= 0; /* Key_write_requests */
-  key_cache->global_cache_write= 0;      /* Key_writes */
+  keycache->global_blocks_changed= 0;   /* Key_blocks_not_flushed */
+  keycache->global_cache_r_requests= 0; /* Key_read_requests */
+  keycache->global_cache_read= 0;       /* Key_reads */
+  keycache->global_cache_w_requests= 0; /* Key_write_requests */
+  keycache->global_cache_write= 0;      /* Key_writes */
   DBUG_RETURN(0);
 }
 
@@ -4290,9 +4625,10 @@ int reset_key_cache_counters(const char *name __attribute__((unused)),
 /*
   Test if disk-cache is ok
 */
-static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)),
-                           const char *where __attribute__((unused)),
-                           my_bool lock __attribute__((unused)))
+static
+void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache __attribute__((unused)),
+                    const char *where __attribute__((unused)),
+                    my_bool lock __attribute__((unused)))
 {
   /* TODO */
 }
@@ -4304,7 +4640,7 @@ static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)),
 #define MAX_QUEUE_LEN  100
 
 
-static void keycache_dump(KEY_CACHE *keycache)
+static void keycache_dump(SIMPLE_KEY_CACHE_CB *keycache)
 {
   FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
   struct st_my_thread_var *last;
@@ -4544,7 +4880,7 @@ static int fail_hlink(HASH_LINK *hlink)
   return 0; /* Let the assert fail. */
 }
 
-static int cache_empty(KEY_CACHE *keycache)
+static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache)
 {
   int errcnt= 0;
   int idx;
@@ -4582,3 +4918,1545 @@ static int cache_empty(KEY_CACHE *keycache)
 }
 #endif
 
+
+/*
+  Get statistics for a simple key cache
+
+  SYNOPSIS
+    get_simple_key_cache_statistics()
+    keycache            pointer to the control block of a simple key cache
+    partition_no        partition number (not used)
+    key_cache_stats OUT pointer to the structure for the returned statistics
+
+  DESCRIPTION
+    This function is the implementation of the get_key_cache_statistics
+    interface function that is employed by simple (non-partitioned) key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
+    cache. This function returns the statistical data for the key cache.
+    The parameter partition_no is not used by this function.
+
+  RETURN
+    none
+*/
+
+static
+void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache, 
+                                     uint partition_no __attribute__((unused)), 
+                                     KEY_CACHE_STATISTICS *keycache_stats)
+{
+  DBUG_ENTER("simple_get_key_cache_statistics");
+
+  keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
+  keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
+  keycache_stats->blocks_used= keycache->blocks_used;
+  keycache_stats->blocks_unused= keycache->blocks_unused;
+  keycache_stats->blocks_changed= keycache->global_blocks_changed;
+  keycache_stats->blocks_warm= keycache->warm_blocks;
+  keycache_stats->read_requests= keycache->global_cache_r_requests;
+  keycache_stats->reads= keycache->global_cache_read;
+  keycache_stats->write_requests= keycache->global_cache_w_requests;
+  keycache_stats->writes= keycache->global_cache_write;
+  DBUG_VOID_RETURN;  
+}
+
+
+/* 
+  The array of pointer to the key cache interface functions used for simple
+  key caches. Any simple key cache objects including those incorporated into
+  partitioned keys caches exploit this array.
+
+  The current implementation of these functions allows to call them from 
+  the MySQL server code directly. We don't do it though. 
+*/
+   
+static KEY_CACHE_FUNCS simple_key_cache_funcs =
+{
+  (INIT_KEY_CACHE) init_simple_key_cache,
+  (RESIZE_KEY_CACHE) resize_simple_key_cache,
+  (CHANGE_KEY_CACHE_PARAM) change_simple_key_cache_param,      
+  (KEY_CACHE_READ) simple_key_cache_read,
+  (KEY_CACHE_INSERT) simple_key_cache_insert,
+  (KEY_CACHE_WRITE) simple_key_cache_write,
+  (FLUSH_KEY_BLOCKS) flush_simple_key_cache_blocks, 
+  (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters, 
+  (END_KEY_CACHE) end_simple_key_cache, 
+  (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics,
+};
+
+
+/****************************************************************************** 
+  Partitioned Key Cache Module
+
+  The module contains implementations of all key cache interface functions
+  employed by partitioned key caches. 
+
+  A partitioned key cache is a collection of structures for simple key caches
+  called key cache partitions. Any page from a file can be placed into a buffer
+  of only one partition. The number of the partition is calculated from
+  the file number and the position of the page in the file, and it's always the
+  same for the page. The function that maps pages into partitions takes care
+  of even distribution of pages among partitions.
+
+  Partition key cache mitigate one of the major problem of simple key cache:
+  thread contention for key cache lock (mutex). Every call of a key cache 
+  interface function must acquire this lock. So threads compete for this lock
+  even in the case when they have acquired shared locks for the file and
+  pages they want read from are in the key cache buffers.
+  When working with a partitioned key cache any key cache interface function
+  that needs only one page has to acquire the key cache lock only for the
+  partition the page is ascribed to. This makes the chances for threads not
+  compete for the same key cache lock better. Unfortunately if we use a
+  partitioned key cache with N partitions for B-tree indexes we can't say
+  that the chances becomes N times less. The fact is that any index lookup
+  operation requires reading from the root page that, for any index, is always
+  ascribed to the same partition. To resolve this problem we should have
+  employed more sophisticated mechanisms of working with root pages.
+
+  Currently the number of partitions in a partitioned key cache is limited 
+  by 64. We could increase this limit. Simultaneously we would have to increase
+  accordingly the size of the bitmap dirty_part_map from the MYISAM_SHARE
+  structure.
+     
+******************************************************************************/
+
+/* Control block for a partitioned key cache */
+
+typedef struct st_partitioned_key_cache_cb
+{
+  my_bool key_cache_inited;     /*<=> control block is allocated            */ 
+  SIMPLE_KEY_CACHE_CB **partition_array; /* the key cache partitions        */  
+  size_t key_cache_mem_size;    /* specified size of the cache memory       */
+  uint key_cache_block_size;    /* size of the page buffer of a cache block */ 
+  uint partitions;              /* number of partitions in the key cache    */
+} PARTITIONED_KEY_CACHE_CB;
+
+static
+void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
+                               my_bool cleanup);
+
+static int
+reset_partitioned_key_cache_counters(const char *name,
+                                     PARTITIONED_KEY_CACHE_CB *keycache);
+
+/*
+  Determine the partition to which the index block to read is ascribed
+
+  SYNOPSIS
+    get_key_cache_partition()
+    keycache            pointer to the control block of a partitioned key cache
+    file                handler for the file for the block of data to be read
+    filepos             position of the block of data in the file
+
+  DESCRIPTION
+    The function determines the number of the partition in whose buffer the 
+    block from 'file' at the position filepos has to be placed for reading.
+    The function returns the control block of the simple key cache for this
+    partition to the caller.
+
+  RETURN VALUE
+    The pointer to the control block of the partition to which the specified
+    file block is ascribed.
+*/
+
+static 
+SIMPLE_KEY_CACHE_CB *
+get_key_cache_partition(PARTITIONED_KEY_CACHE_CB *keycache, 
+                        File file, my_off_t filepos)
+{
+  uint i= KEYCACHE_BASE_EXPR(file, filepos) % keycache->partitions;
+  return keycache->partition_array[i];
+}
+
+
+/*
+  Determine the partition to which the index block to write is ascribed
+
+  SYNOPSIS
+    get_key_cache_partition()
+    keycache            pointer to the control block of a partitioned key cache
+    file                handler for the file for the block of data to be read
+    filepos             position of the block of data in the file
+    dirty_part_map      pointer to the bitmap of dirty partitions for the file
+
+  DESCRIPTION
+    The function determines the number of the partition in whose buffer the 
+    block from 'file' at the position filepos has to be placed for writing and
+    marks the partition as dirty in the dirty_part_map bitmap.
+    The function returns the control block of the simple key cache for this
+    partition to the caller.
+
+  RETURN VALUE
+    The pointer to the control block of the partition to which the specified
+    file block is ascribed.
+*/
+
+static SIMPLE_KEY_CACHE_CB 
+*get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB *keycache, 
+                                   File file, my_off_t filepos,
+                                   ulonglong* dirty_part_map)
+{
+  uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions;
+  *dirty_part_map|= 1ULL << i; 
+  return keycache->partition_array[i];
+}
+
+
+/*
+  Initialize a partitioned key cache
+
+  SYNOPSIS
+    init_partitioned_key_cache()
+    keycache            pointer to the control block of a partitioned key cache
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem             total memory to use for all key cache partitions 
+    division_limit      division limit (may be zero)
+    age_threshold       age threshold (may be zero)
+
+  DESCRIPTION
+    This function is the implementation of the init_key_cache interface function
+    that is employed by partitioned key caches.
+    The function builds and initializes an array of simple key caches, and then
+    initializes the control block structure of the type PARTITIONED_KEY_CACHE_CB
+    that is used for a partitioned key cache. The parameter keycache is
+    supposed to point to this structure. The number of partitions in the
+    partitioned key cache to be built must be passed through the field
+    'partitions' of this structure. The parameter key_cache_block_size specifies
+    the size of the  blocks in the the simple key caches to be built.
+    The parameters division_limit and  age_threshold determine the initial
+    values of those characteristics of the simple key caches that are used for
+    midpoint insertion strategy. The parameter use_mem specifies the total
+    amount of memory to be allocated for the key cache blocks in all simple key
+    caches and for all auxiliary structures.       
+
+  RETURN VALUE
+    total number of blocks in key cache partitions, if successful,
+    <= 0 - otherwise.
+
+  NOTES
+    If keycache->key_cache_inited != 0 then we assume that the memory for
+    the array of partitions has been already allocated.
+
+    It's assumed that no two threads call this function simultaneously
+    referring to the same key cache handle.
+*/
+
+static
+int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
+                               uint key_cache_block_size,
+                               size_t use_mem, uint division_limit,
+                               uint age_threshold)
+{
+  int i;
+  size_t mem_per_cache;
+  size_t mem_decr;
+  int cnt;
+  SIMPLE_KEY_CACHE_CB *partition;
+  SIMPLE_KEY_CACHE_CB **partition_ptr;
+  uint partitions= keycache->partitions;
+  int blocks= 0;
+  DBUG_ENTER("partitioned_init_key_cache");
+
+  keycache->key_cache_block_size = key_cache_block_size;
+
+  if (keycache->key_cache_inited)
+    partition_ptr= keycache->partition_array;
+  else
+  {
+    if(!(partition_ptr=
+       (SIMPLE_KEY_CACHE_CB **) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB *) *
+                                          partitions, MYF(MY_WME))))
+      DBUG_RETURN(-1);
+    bzero(partition_ptr, sizeof(SIMPLE_KEY_CACHE_CB *) * partitions);
+    keycache->partition_array= partition_ptr;
+  }
+
+  mem_per_cache = use_mem / partitions;
+  mem_decr= mem_per_cache / 5;
+
+  for (i= 0; i < (int) partitions; i++)
+  {
+    my_bool key_cache_inited= keycache->key_cache_inited;
+    if (key_cache_inited)
+      partition= *partition_ptr;
+    else
+    {
+      if (!(partition=
+              (SIMPLE_KEY_CACHE_CB *)  my_malloc(sizeof(SIMPLE_KEY_CACHE_CB),
+						 MYF(MY_WME))))
+        continue;
+      partition->key_cache_inited= 0;
+    }
+
+    cnt= init_simple_key_cache(partition, key_cache_block_size, mem_per_cache, 
+			       division_limit, age_threshold);
+    if (cnt <= 0)
+    {
+      end_simple_key_cache(partition, 1);
+      if (!key_cache_inited)
+      {
+        my_free(partition,  MYF(0));
+        partition= 0;
+      }
+      if ((i == 0 && cnt < 0) || i > 0)
+      {
+        /* 
+          Here we have two cases: 
+            1. i == 0 and cnt < 0
+            cnt < 0 => mem_per_cache is not big enough to allocate minimal
+            number of key blocks in the key cache of the partition.
+            Decrease the the number of the partitions by 1 and start again.
+            2. i > 0 
+            There is not enough memory for one of the succeeding partitions.
+            Just skip this partition decreasing the number of partitions in
+            the key cache by one.
+          Do not change the value of mem_per_cache in both cases.
+	*/
+        if (key_cache_inited)
+	{
+          my_free(partition,  MYF(0));
+          partition= 0;
+          if(key_cache_inited) 
+            memmove(partition_ptr, partition_ptr+1, 
+                    sizeof(partition_ptr)*(partitions-i-1));
+	}
+        if (!--partitions)
+          break;
+      }
+      else
+      {
+        /*
+          We come here when i == 0 && cnt == 0.
+          cnt == 0 => the memory allocator fails to allocate a block of
+          memory of the size mem_per_cache. Decrease the value of
+          mem_per_cache  without changing the current number of partitions
+          and start again. Make sure that such a decrease may happen not
+          more than 5 times in total.
+	*/
+        if (use_mem <= mem_decr)
+          break;
+        use_mem-= mem_decr;
+      }
+      i--;
+      mem_per_cache= use_mem/partitions;
+      continue;
+    }
+    else
+    {
+      blocks+= cnt;
+      *partition_ptr++= partition;
+    }
+  } 
+
+  keycache->partitions= partitions= partition_ptr-keycache->partition_array;
+  keycache->key_cache_mem_size= mem_per_cache * partitions;
+  for (i= 0; i < (int) partitions; i++)
+    keycache->partition_array[i]->hash_factor= partitions;
+  
+  keycache->key_cache_inited= 1;
+
+  if (!partitions)
+    blocks= -1;
+
+  DBUG_RETURN(blocks);
+} 
+
+
+/*
+  Resize a partitioned key cache
+
+  SYNOPSIS
+    resize_partitioned_key_cache()
+    keycache            pointer to the control block of a partitioned key cache
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem             total memory to use for the new key cache
+    division_limit      new division limit (if not zero)
+    age_threshold       new age threshold (if not zero)
+
+  DESCRIPTION
+    This function is the implementation of the resize_key_cache interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for the
+    partitioned key cache to be resized. 
+    The parameter key_cache_block_size specifies the new size of the blocks in
+    the simple key caches that comprise the partitioned key cache.
+    The parameters division_limit and age_threshold determine the new initial
+    values of those characteristics of the simple key cache that are used for
+    midpoint insertion strategy. The parameter use-mem specifies the total
+    amount of  memory to be allocated for the key cache blocks in all new
+    simple key caches and for all auxiliary structures.
+
+  RETURN VALUE
+    number of blocks in the key cache, if successful,
+    0 - otherwise.
+
+  NOTES.
+    The function first calls prepare_resize_simple_key_cache for each simple
+    key cache effectively flushing all dirty pages from it and destroying
+    the key cache. Then init_partitioned_key_cache is called. This call builds
+    a new array of simple key caches containing the same number of elements
+    as the old one. After this the function calls the function
+    finish_resize_simple_key_cache for each simple key cache from this array. 
+
+    This implementation doesn't block the calls and executions of other
+    functions from the key cache interface. However it assumes that the
+    calls of resize_partitioned_key_cache itself are serialized.
+*/
+
+static
+int resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, 
+                                 uint key_cache_block_size,
+		                 size_t use_mem, uint division_limit,
+		                 uint age_threshold)
+{
+  uint i;
+  uint partitions= keycache->partitions;
+  my_bool cleanup= use_mem == 0;
+  int blocks= -1;
+  int err= 0;
+  DBUG_ENTER("partitioned_resize_key_cache");
+  if (cleanup)
+  {
+    end_partitioned_key_cache(keycache, 0);
+    DBUG_RETURN(-1);
+  }
+  for (i= 0; i < partitions; i++)
+  {
+    err|= prepare_resize_simple_key_cache(keycache->partition_array[i], 0, 1);
+  }
+  if (!err) 
+    blocks= init_partitioned_key_cache(keycache, key_cache_block_size,
+                                       use_mem, division_limit, age_threshold);
+  if (blocks > 0)
+  {
+    for (i= 0; i < partitions; i++)
+    {
+      finish_resize_simple_key_cache(keycache->partition_array[i], 0, 1);
+    }
+  }
+  DBUG_RETURN(blocks);
+}
+
+
+/*
+  Change key cache parameters of a partitioned key cache
+
+  SYNOPSIS
+    partitioned_change_key_cache_param()
+    keycache            pointer to the control block of a partitioned key cache
+    division_limit      new division limit (if not zero)
+    age_threshold       new age threshold (if not zero)
+
+  DESCRIPTION
+    This function is the implementation of the change_key_cache_param interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for the simple
+    key cache where new values of the division limit and the age threshold used
+    for midpoint insertion strategy are to be set.  The parameters
+    division_limit and age_threshold provide these new values.
+
+  RETURN VALUE
+    none
+
+  NOTES
+    The function just calls change_simple_key_cache_param for each element from
+    the array of simple caches that comprise the partitioned key cache. 
+*/
+
+static
+void change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB *keycache,
+                                        uint division_limit,
+                                        uint age_threshold)
+{
+  uint i;
+  uint partitions= keycache->partitions;
+  DBUG_ENTER("partitioned_change_key_cache_param");
+  for (i= 0; i < partitions; i++)
+  {
+    change_simple_key_cache_param(keycache->partition_array[i], division_limit,
+                                  age_threshold);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Destroy a partitioned key cache 
+
+  SYNOPSIS
+    end_partitioned_key_cache()
+    keycache            pointer to the control block of a partitioned key cache
+    cleanup             <=> complete free (free also control block structures
+                            for all simple key caches)
+
+  DESCRIPTION
+    This function is the implementation of the end_key_cache interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for the
+    partitioned key cache to be destroyed.
+    The function frees the memory allocated for the cache blocks and
+    auxiliary structures used by simple key caches that comprise the
+    partitioned key cache. If the value of the parameter cleanup is TRUE 
+    then even the memory used for control blocks of the simple key caches
+    and the array of pointers to them are freed.
+
+  RETURN VALUE
+    none
+*/
+
+static
+void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
+                               my_bool cleanup)
+{
+  uint i;
+  uint partitions= keycache->partitions;
+  DBUG_ENTER("partitioned_end_key_cache");
+  DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
+
+  for (i= 0; i < partitions; i++)
+  {
+    end_simple_key_cache(keycache->partition_array[i], cleanup);
+  }
+  if (cleanup)
+  {
+    for (i= 0; i < partitions; i++)
+      my_free((uchar*) keycache->partition_array[i], MYF(0));
+    my_free((uchar*) keycache->partition_array, MYF(0));
+    keycache->key_cache_inited= 0;
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Read a block of data from a partitioned key cache into a buffer
+
+  SYNOPSIS
+
+    partitioned_key_cache_read()
+    keycache            pointer to the control block of a partitioned key cache  
+    file                handler for the file for the block of data to be read
+    filepos             position of the block of data in the file
+    level               determines the weight of the data
+    buff                buffer to where the data must be placed
+    length              length of the buffer
+    block_length        length of the read data from a key cache block 
+    return_buffer       return pointer to the key cache buffer with the data
+
+  DESCRIPTION
+    This function is the implementation of the key_cache_read interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for a
+    partitioned key cache.
+    In a general case the function reads a block of data from the key cache
+    into the buffer buff of the size specified by the parameter length. The
+    beginning of the  block of data to be read is  specified by the parameters
+    file and filepos. The length of the read data is the same as the length
+    of the buffer. The data is read into the buffer in key_cache_block_size
+    increments. To read each portion the function first finds out in what
+    partition of the key cache this portion(page) is to be saved, and calls
+    simple_key_cache_read with the pointer to the corresponding simple key as
+    its first parameter. 
+    If the parameter return_buffer is not ignored and its value is TRUE, and 
+    the data to be read of the specified size block_length can be read from one
+    key cache buffer, then the function returns a pointer to the data in the
+    key cache buffer.
+    The function takes into account parameters block_length and return buffer
+    only in a single-threaded environment.
+    The parameter 'level' is used only by the midpoint insertion strategy 
+    when the data or its portion cannot be found in the key cache. 
+   
+  RETURN VALUE
+    Returns address from where the data is placed if successful, 0 - otherwise.
+*/
+
+static
+uchar *partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB *keycache,
+                                  File file, my_off_t filepos, int level,
+                                  uchar *buff, uint length,
+                                  uint block_length __attribute__((unused)),
+                                  int return_buffer __attribute__((unused)))
+{
+  uint r_length;
+  uint offset= (uint) (filepos % keycache->key_cache_block_size);
+  uchar *start= buff;
+  DBUG_ENTER("partitioned_key_cache_read");
+  DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
+               (uint) file, (ulong) filepos, length));
+
+#ifndef THREAD
+  if (block_length > keycache->key_cache_block_size || offset)
+    return_buffer=0;
+#endif
+
+  /* Read data in key_cache_block_size increments */
+  do
+  {
+    SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, 
+                                                            file, filepos);
+    uchar *ret_buff= 0;
+    r_length= length;
+    set_if_smaller(r_length, keycache->key_cache_block_size - offset);
+    ret_buff= simple_key_cache_read((void *) partition, 
+                                    file, filepos, level,
+                                    buff, r_length,
+                                    block_length, return_buffer);
+    if (ret_buff == 0) 
+      DBUG_RETURN(0);
+#ifndef THREAD
+    /* This is only true if we were able to read everything in one block */
+    if (return_buffer)
+      DBUG_RETURN(ret_buff);
+#endif
+    filepos+= r_length;
+    buff+= r_length;
+    offset= 0;
+  } while ((length-= r_length));
+  
+  DBUG_RETURN(start);
+}
+
+
+/*
+  Insert a block of file data from a buffer into a partitioned key cache
+
+  SYNOPSIS
+    partitioned_key_cache_insert()
+    keycache            pointer to the control block of a partitioned key cache 
+    file                handler for the file to insert data from
+    filepos             position of the block of data in the file to insert
+    level               determines the weight of the data
+    buff                buffer to read data from
+    length              length of the data in the buffer
+
+  DESCRIPTION
+    This function is the implementation of the key_cache_insert interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for a
+    partitioned key cache.
+    The function writes a block of file data from a buffer into the key cache.
+    The buffer is specified with the parameters buff and length - the pointer
+    to the beginning of the buffer and its size respectively. It's assumed
+    that the buffer contains the data from 'file' allocated from the position
+    filepos. The data is copied from the buffer in key_cache_block_size 
+    increments. For every portion of data the function finds out in what simple
+    key cache from the array of partitions the data must be stored, and after
+    this calls simple_key_cache_insert to copy the data into a key buffer of
+    this simple key cache.
+    The parameter level is used to set one characteristic for the key buffers
+    loaded with the data from buff. The characteristic is used only by the
+    midpoint insertion strategy. 
+   
+  RETURN VALUE
+    0 if a success, 1 - otherwise.
+
+  NOTES
+    The function is used by MyISAM to move all blocks from a index file to 
+    the key cache. It can be performed in parallel with reading the file data
+    from the key buffers by other threads.
+*/
+
+static
+int partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB *keycache,
+                                 File file, my_off_t filepos, int level,
+                                 uchar *buff, uint length)
+{
+  uint w_length;
+  uint offset= (uint) (filepos % keycache->key_cache_block_size);
+  DBUG_ENTER("partitioned_key_cache_insert");
+  DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
+               (uint) file,(ulong) filepos, length));
+
+
+  /* Write data in key_cache_block_size increments */
+  do
+  {
+    SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, 
+                                                            file, filepos);
+    w_length= length;
+    set_if_smaller(w_length, keycache->key_cache_block_size - offset);
+    if (simple_key_cache_insert((void *) partition,
+                                file, filepos, level,
+                                buff, w_length)) 
+      DBUG_RETURN(1);
+
+    filepos+= w_length;
+    buff+= w_length;
+    offset = 0;
+  } while ((length-= w_length));
+  
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Write data from a buffer into a partitioned key cache
+
+  SYNOPSIS
+
+    partitioned_key_cache_write()
+    keycache            pointer to the control block of a partitioned key cache
+    file                handler for the file to write data to
+    filepos             position in the file to write data to
+    level               determines the weight of the data
+    buff                buffer with the data
+    length              length of the buffer
+    dont_write          if is 0 then all dirty pages involved in writing
+                        should have been flushed from key cache
+    file_extra          maps of key cache partitions containing 
+                        dirty pages from file 
+
+  DESCRIPTION
+    This function is the implementation of the key_cache_write interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for a
+    partitioned key cache.
+    In a general case the function copies data from a buffer into the key
+    cache. The buffer is specified with the parameters buff and length -
+    the pointer to the beginning of the buffer and its size respectively.
+    It's assumed the buffer contains the data to be written into 'file'
+    starting from the position filepos. The data is copied from the buffer
+    in key_cache_block_size increments. For every portion of data the
+    function finds out in what simple key cache from the array of partitions
+    the data must be stored, and after this calls simple_key_cache_write to
+    copy the data into a key buffer of this simple key cache.
+    If the value of the parameter dont_write is FALSE then the function
+    also writes the data into file.
+    The parameter level is used to set one characteristic for the key buffers
+    filled with the data from buff. The characteristic is employed only by
+    the midpoint insertion strategy.
+    The parameter file_expra provides a pointer to the shared bitmap of
+    the partitions that may contains dirty pages for the file. This bitmap
+    is used to optimize the function flush_partitioned_key_cache_blocks. 
+
+  RETURN VALUE
+    0 if a success, 1 - otherwise.
+
+  NOTES
+    This implementation exploits the fact that the function is called only
+    when a thread has got an exclusive lock for the key file.
+*/
+
+static
+int partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB *keycache,
+                                File file, void *file_extra,
+                                my_off_t filepos, int level,
+                                uchar *buff, uint length,
+                                uint block_length  __attribute__((unused)),
+                                int dont_write)
+{
+  uint w_length;
+  ulonglong *part_map= (ulonglong *) file_extra;
+  uint offset= (uint) (filepos % keycache->key_cache_block_size);
+  DBUG_ENTER("partitioned_key_cache_write");
+  DBUG_PRINT("enter",
+             ("fd: %u  pos: %lu  length: %u  block_length: %u"
+              "  key_block_length: %u",
+              (uint) file, (ulong) filepos, length, block_length,
+              keycache ? keycache->key_cache_block_size : 0));
+
+
+  /* Write data in key_cache_block_size increments */
+  do
+  {
+    SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache, 
+                                                                      file,
+                                                                      filepos,
+                                                                      part_map);
+    w_length = length;
+    set_if_smaller(w_length, keycache->key_cache_block_size - offset );
+    if (simple_key_cache_write(partition,
+                               file, 0, filepos, level,
+                               buff, w_length, block_length,
+                               dont_write))
+      DBUG_RETURN(1);
+
+    filepos+= w_length;
+    buff+= w_length;
+    offset= 0;
+  } while ((length-= w_length));
+
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Flush all blocks for a file from key buffers of a partitioned key cache 
+
+  SYNOPSIS
+
+    flush_partitioned_key_cache_blocks()
+    keycache            pointer to the control block of a partitioned key cache
+    file                handler for the file to flush to
+    file_extra          maps of key cache partitions containing 
+                        dirty pages from file (not used)         
+    flush_type          type of the flush operation
+
+  DESCRIPTION
+    This function is the implementation of the flush_key_blocks interface
+    function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for a
+    partitioned key cache.
+    In a general case the function flushes the data from all dirty key
+    buffers related to the file 'file' into this file. The function does
+    exactly this if the value of the parameter type is FLUSH_KEEP. If the
+    value of this parameter is FLUSH_RELEASE, the function additionally 
+    releases the key buffers containing data from 'file' for new usage.
+    If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
+    just releases the key buffers containing data from 'file'.
+    The function performs the operation by calling the function 
+    flush_simple_key_cache_blocks for the elements of the array of the
+    simple key caches that comprise the partitioned key_cache. If the value
+    of the parameter type is FLUSH_KEEP s_flush_key_blocks is called only
+    for the partitions with possibly dirty pages marked in the bitmap
+    pointed to by the parameter file_extra.    
+      
+  RETURN
+    0   ok
+    1  error
+
+  NOTES
+    This implementation exploits the fact that the function is called only
+    when a thread has got an exclusive lock for the key file.
+*/
+
+static
+int flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB *keycache,
+                                       File file, void *file_extra,
+                                       enum flush_type type)
+{
+  uint i;
+  uint partitions= keycache->partitions;
+  int err= 0;
+  ulonglong *dirty_part_map= (ulonglong *) file_extra;
+  DBUG_ENTER("partitioned_flush_key_blocks");
+  DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
+
+  for (i= 0; i < partitions; i++)
+  {
+    SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i];
+    if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
+        !((*dirty_part_map) & ((ulonglong) 1 << i)))
+      continue;
+    err|= test(flush_simple_key_cache_blocks(partition, file, 0, type));
+  }
+  *dirty_part_map= 0;
+
+  DBUG_RETURN(err);
+}
+
+
+/*
+  Reset the counters of a partitioned key cache
+
+  SYNOPSIS
+    reset_partitioned_key_cache_counters()
+    name                the name of a key cache
+    keycache            pointer to the control block of a partitioned key cache
+
+  DESCRIPTION
+    This function is the implementation of the reset_key_cache_counters
+    interface function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for a partitioned
+    key cache.
+    This function resets the values of the statistical counters of the simple
+    key caches comprising partitioned key cache to 0. It does it by calling 
+    reset_simple_key_cache_counters for each key  cache partition. 
+    The parameter name is currently not used.
+
+  RETURN
+    0 on success (always because it can't fail)
+*/
+
+static int
+reset_partitioned_key_cache_counters(const char *name __attribute__((unused)),
+                                     PARTITIONED_KEY_CACHE_CB *keycache)
+{
+  uint i;
+  uint partitions= keycache->partitions;
+  DBUG_ENTER("partitioned_reset_key_cache_counters");
+
+  for (i = 0; i < partitions; i++)
+  {
+    reset_simple_key_cache_counters(name,  keycache->partition_array[i]);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Get statistics for a partition key cache 
+
+  SYNOPSIS
+    get_partitioned_key_cache_statistics()
+    keycache            pointer to the control block of a partitioned key cache
+    partition_no        partition number to get statistics for
+    key_cache_stats OUT pointer to the structure for the returned statistics
+
+  DESCRIPTION
+    This function is the implementation of the get_key_cache_statistics
+    interface function that is employed by partitioned key caches.
+    The function takes the parameter keycache as a pointer to the
+    control block structure of the type PARTITIONED_KEY_CACHE_CB for
+    a partitioned key cache.
+    If the value of the parameter partition_no is equal to 0 then aggregated
+    statistics for all partitions is returned in the fields of the
+    structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise
+    the function returns data for the partition number partition_no of the
+    key cache in the structure key_cache_stat. (Here partitions are numbered
+    starting from 1.)
+
+  RETURN
+    none
+*/
+
+static
+void
+get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache,
+                                     uint partition_no, 
+                                     KEY_CACHE_STATISTICS *keycache_stats)
+{
+  uint i;
+  SIMPLE_KEY_CACHE_CB *partition;
+  uint partitions= keycache->partitions;
+  DBUG_ENTER("get_partitioned_key_cache_statistics");
+
+  if (partition_no != 0)
+  { 
+    partition= keycache->partition_array[partition_no-1];
+    get_simple_key_cache_statistics((void *) partition, 0, keycache_stats);
+    DBUG_VOID_RETURN;
+  }
+  bzero(keycache_stats, sizeof(KEY_CACHE_STATISTICS));  
+  keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
+  keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
+  for (i = 0; i < partitions; i++)
+  {
+    partition= keycache->partition_array[i];
+    keycache_stats->blocks_used+= partition->blocks_used;
+    keycache_stats->blocks_unused+= partition->blocks_unused;
+    keycache_stats->blocks_changed+= partition->global_blocks_changed;
+    keycache_stats->blocks_warm+= partition->warm_blocks;
+    keycache_stats->read_requests+= partition->global_cache_r_requests;
+    keycache_stats->reads+= partition->global_cache_read;
+    keycache_stats->write_requests+= partition->global_cache_w_requests;
+    keycache_stats->writes+= partition->global_cache_write;
+  }
+  DBUG_VOID_RETURN;  
+}
+
+/* 
+  The array of pointers to the key cache interface functions used by 
+  partitioned key caches. Any partitioned key cache object caches exploits
+  this array.
+ 
+  The current implementation of these functions does not allow to call
+  them from the MySQL server code directly. The key cache interface
+  wrappers must be used for this purpose. 
+*/
+
+static KEY_CACHE_FUNCS partitioned_key_cache_funcs =
+{
+  (INIT_KEY_CACHE) init_partitioned_key_cache,
+  (RESIZE_KEY_CACHE) resize_partitioned_key_cache,
+  (CHANGE_KEY_CACHE_PARAM) change_partitioned_key_cache_param,      
+  (KEY_CACHE_READ) partitioned_key_cache_read,
+  (KEY_CACHE_INSERT) partitioned_key_cache_insert,
+  (KEY_CACHE_WRITE) partitioned_key_cache_write,
+  (FLUSH_KEY_BLOCKS) flush_partitioned_key_cache_blocks, 
+  (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters, 
+  (END_KEY_CACHE) end_partitioned_key_cache, 
+  (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics,
+};
+
+
+/****************************************************************************** 
+  Key Cache Interface Module
+
+  The module contains wrappers for all key cache interface functions. 
+  
+  Currently there are key caches of two types: simple key caches and
+  partitioned key caches. Each type (class) has its own implementation of the
+  basic key cache operations used the MyISAM storage engine. The pointers
+  to the implementation functions are stored in two static structures of the
+  type KEY_CACHE_FUNC: simple_key_cache_funcs - for simple key caches, and
+  partitioned_key_cache_funcs - for partitioned key caches. When a key cache
+  object is created the constructor procedure init_key_cache places a pointer
+  to the corresponding table into one of its fields. The procedure also
+  initializes a control block for the key cache oject and saves the pointer
+  to this block in another field of the key cache object.
+  When a key cache wrapper function is invoked for a key cache object to
+  perform a basic key cache operation it looks into the interface table
+  associated with the key cache oject and calls the corresponding
+  implementation of the operation. It passes the saved key cache control
+  block to this implementation. If, for some reasons, the control block
+  has not been fully initialized yet, the wrapper function either does not
+  do anything or, in the case when it perform a read/write operation, the
+  function do it directly through the system i/o functions.
+
+  As we can see the model with which the key cache interface is supported
+  as quite conventional for interfaces in general.
+          
+******************************************************************************/
+
+
+/*
+  Initialize a key cache
+
+  SYNOPSIS
+    init_key_cache()
+    keycache           pointer to the key cache to be initialized
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem             total memory to use for cache buffers/structures 
+    division_limit      division limit (may be zero)
+    age_threshold       age threshold (may be zero)
+    partitions          number of partitions in the key cache
+
+  DESCRIPTION
+    The function creates a control block structure for a key cache and
+    places the pointer to this block in the structure keycache. 
+    If the value of the parameter 'partitions' is 0 then a simple key cache
+    is created. Otherwise a partitioned key cache with the specified number
+    of partitions is created.  
+    The parameter key_cache_block_size specifies the size of the blocks in
+    the key cache to be created. The parameters division_limit and
+    age_threshold determine the initial values of those characteristics of
+    the key cache that are used for midpoint insertion strategy. The parameter
+    use_mem  specifies the total amount of memory to be allocated for the
+    key cache buffers and for all auxiliary structures.       
+
+  RETURN VALUE
+    total number of blocks in key cache partitions, if successful,
+    <= 0 - otherwise.
+
+  NOTES
+    if keycache->key_cache_inited != 0 we assume that the memory
+    for the control block of the key cache has been already allocated.
+
+    It's assumed that no two threads call this function simultaneously
+    referring to the same key cache handle.
+*/
+
+int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
+		   size_t use_mem, uint division_limit,
+		   uint age_threshold, uint partitions)
+{
+  void *keycache_cb;
+  int blocks;
+  if (keycache->key_cache_inited)
+    keycache_cb= keycache->keycache_cb;
+  else
+  {
+    if (partitions == 0)
+    {
+      if (!(keycache_cb= (void *)  my_malloc(sizeof(SIMPLE_KEY_CACHE_CB),
+                                             MYF(0)))) 
+        return 0;
+      ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
+      keycache->key_cache_type= SIMPLE_KEY_CACHE;
+      keycache->interface_funcs= &simple_key_cache_funcs;
+    }
+    else
+    {
+      if (!(keycache_cb= (void *)  my_malloc(sizeof(PARTITIONED_KEY_CACHE_CB),
+                                             MYF(0)))) 
+        return 0;
+      ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
+      keycache->key_cache_type= PARTITIONED_KEY_CACHE;
+      keycache->interface_funcs= &partitioned_key_cache_funcs;
+    }
+    keycache->keycache_cb= keycache_cb;
+    keycache->key_cache_inited= 1;
+  }
+
+  if (partitions != 0)
+  {
+    ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions= partitions;
+  }
+  keycache->can_be_used= 0;
+  blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size,
+                                          use_mem, division_limit,
+                                          age_threshold);
+  keycache->partitions= partitions ? 
+                        ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions :
+                        0;
+  DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS);
+  if (blocks > 0)
+    keycache->can_be_used= 1;
+  return blocks;
+}
+
+
+/*
+  Resize a key cache
+
+  SYNOPSIS
+    resize_key_cache()
+    keycache            pointer to the key cache to be resized
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem             total memory to use for the new key cache
+    division_limit      new division limit (if not zero)
+    age_threshold       new age threshold (if not zero)
+
+  DESCRIPTION
+    The function operates over the key cache key cache.
+    The parameter key_cache_block_size specifies the new size of the block
+    buffers in the key cache. The parameters division_limit and age_threshold
+    determine the new initial values of those characteristics of the key cache
+    that are used for midpoint insertion strategy. The parameter use_mem
+    specifies the total amount of  memory to be allocated for the key cache
+    buffers and for all auxiliary structures.
+
+  RETURN VALUE
+    number of blocks in the key cache, if successful,
+    0 - otherwise.
+
+  NOTES
+    The function does not block the calls and executions of other functions
+    from the key cache interface. However it assumes that the calls of 
+    resize_key_cache itself are serialized.
+
+    Currently the function is called when the values of the variables
+    key_buffer_size and/or key_cache_block_size are being reset for
+    the key cache keycache.
+*/
+
+int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
+		     size_t use_mem, uint division_limit, uint age_threshold)
+{
+  int blocks= -1;
+  if (keycache->key_cache_inited)
+  {
+    if ((uint) keycache->param_partitions != keycache->partitions && use_mem)
+      blocks= repartition_key_cache(keycache,
+                                    key_cache_block_size, use_mem,
+                                    division_limit, age_threshold, 
+                                    (uint) keycache->param_partitions);
+    else
+    {
+      blocks= keycache->interface_funcs->resize(keycache->keycache_cb,
+                                                key_cache_block_size,
+                                                use_mem, division_limit,
+                                                age_threshold);
+
+      if (keycache->partitions)
+        keycache->partitions=
+          ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions;
+    }
+
+    keycache->can_be_used= (blocks >= 0);
+  } 
+  return blocks;
+}
+
+
+/*
+  Change key cache parameters of a key cache
+
+  SYNOPSIS
+    change_key_cache_param()
+    keycache            pointer to the key cache to change parameters for
+    division_limit      new division limit (if not zero)
+    age_threshold       new age threshold (if not zero)
+
+  DESCRIPTION
+    The function sets new values of the division limit and the age threshold 
+    used when the key cache keycach employs midpoint insertion strategy.
+    The parameters division_limit and age_threshold provide these new values.
+
+  RETURN VALUE
+    none
+
+  NOTES
+    Currently the function is called when the values of the variables
+    key_cache_division_limit and/or key_cache_age_threshold are being reset
+    for the key cache keycache.
+*/
+
+void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
+			    uint age_threshold)
+{
+  if (keycache->key_cache_inited)
+  {
+    
+    keycache->interface_funcs->change_param(keycache->keycache_cb,
+                                            division_limit,
+                                            age_threshold);
+  }
+}
+
+
+/*
+  Destroy a key cache 
+
+  SYNOPSIS
+    end_key_cache()
+    keycache            pointer to the key cache to be destroyed
+    cleanup             <=> complete free 
+
+  DESCRIPTION
+    The function frees the memory allocated for the cache blocks and
+    auxiliary structures used by the key cache keycache. If the value
+    of the parameter cleanup is TRUE then all resources used by the key
+    cache are to be freed.
+
+  RETURN VALUE
+    none
+*/
+
+void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
+{
+  if (keycache->key_cache_inited)
+  {
+    keycache->interface_funcs->end(keycache->keycache_cb, cleanup);
+    if (cleanup)
+    {
+      if (keycache->keycache_cb)
+      {
+        my_free((uchar *) keycache->keycache_cb, MYF(0));
+        keycache->keycache_cb= 0;
+      }
+      keycache->key_cache_inited= 0;
+    }
+    keycache->can_be_used= 0;
+  }
+}
+
+
+/*
+  Read a block of data from a key cache into a buffer
+
+  SYNOPSIS
+
+    key_cache_read()
+    keycache            pointer to the key cache to read data from  
+    file                handler for the file for the block of data to be read
+    filepos             position of the block of data in the file
+    level               determines the weight of the data
+    buff                buffer to where the data must be placed
+    length              length of the buffer
+    block_length        length of the data read from a key cache block 
+    return_buffer       return pointer to the key cache buffer with the data
+
+  DESCRIPTION
+    The function operates over buffers of the key cache keycache.
+    In a general case the function reads a block of data from the key cache
+    into the buffer buff of the size specified by the parameter length. The
+    beginning of the block of data to be read is specified by the parameters
+    file and filepos. The length of the read data is the same as the length
+    of the buffer.
+    If the parameter return_buffer is not ignored and its value is TRUE, and 
+    the data to be read of the specified size block_length can be read from one
+    key cache buffer, then the function returns a pointer to the data in the
+    key cache buffer.
+    The parameter 'level' is used only by the midpoint insertion strategy 
+    when the data or its portion cannot be found in the key cache.
+    The function reads data into the buffer directly from file if the control
+    block of the key cache has not been initialized yet. 
+   
+  RETURN VALUE
+    Returns address from where the data is placed if successful, 0 - otherwise.
+
+  NOTES.
+    Filepos must be a multiple of 'block_length', but it doesn't
+    have to be a multiple of key_cache_block_size;
+*/
+
+uchar *key_cache_read(KEY_CACHE *keycache, 
+                      File file, my_off_t filepos, int level,
+                      uchar *buff, uint length,
+		      uint block_length, int return_buffer)
+{
+  if (keycache->key_cache_inited && keycache->can_be_used)
+    return keycache->interface_funcs->read(keycache->keycache_cb,
+                                           file, filepos, level,
+                                           buff, length,
+                                           block_length, return_buffer);
+ 
+  /* We can't use mutex here as the key cache may not be initialized */
+
+  if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
+    return (uchar *) 0;
+  
+  return buff;
+}
+
+
+/*
+  Insert a block of file data from a buffer into a key cache
+
+  SYNOPSIS
+    key_cache_insert()
+    keycache            pointer to the key cache to insert data into 
+    file                handler for the file to insert data from
+    filepos             position of the block of data in the file to insert
+    level               determines the weight of the data
+    buff                buffer to read data from
+    length              length of the data in the buffer
+
+  DESCRIPTION
+    The function operates over buffers of the key cache keycache.
+    The function writes a block of file data from a buffer into the key cache.
+    The buffer is specified with the parameters buff and length - the pointer
+    to the beginning of the buffer and its size respectively. It's assumed
+    that the buffer contains the data from 'file' allocated from the position
+    filepos.
+    The parameter level is used to set one characteristic for the key buffers
+    loaded with the data from buff. The characteristic is used only by the
+    midpoint insertion strategy. 
+   
+  RETURN VALUE
+    0 if a success, 1 - otherwise.
+
+  NOTES
+    The function is used by MyISAM to move all blocks from a index file to 
+    the key cache. 
+    It is assumed that it may be performed in parallel with reading the file
+    data from the key buffers by other threads.
+*/
+
+int key_cache_insert(KEY_CACHE *keycache,
+                     File file, my_off_t filepos, int level,
+                     uchar *buff, uint length)
+{
+  if (keycache->key_cache_inited && keycache->can_be_used)
+    return keycache->interface_funcs->insert(keycache->keycache_cb,
+                                             file, filepos, level,
+                                             buff, length);
+  return 0;
+}
+
+
+/*
+  Write data from a buffer into a key cache
+
+  SYNOPSIS
+
+    key_cache_write()
+    keycache            pointer to the key cache to write data to
+    file                handler for the file to write data to
+    filepos             position in the file to write data to
+    level               determines the weight of the data
+    buff                buffer with the data
+    length              length of the buffer
+    dont_write          if is 0 then all dirty pages involved in writing
+                        should have been flushed from key cache
+    file_extra          pointer to optional file attributes
+
+  DESCRIPTION
+    The function operates over buffers of the key cache keycache.
+    In a general case the function writes data from a buffer into the key
+    cache. The buffer is specified with the parameters buff and length -
+    the pointer to the beginning of the buffer and its size respectively.
+    It's assumed the buffer contains the data to be written into 'file'
+    starting from the position filepos. 
+    If the value of the parameter dont_write is FALSE then the function
+    also writes the data into file.
+    The parameter level is used to set one characteristic for the key buffers
+    filled with the data from buff. The characteristic is employed only by
+    the midpoint insertion strategy.
+    The parameter file_expra may point to additional file attributes used
+    for optimization or other purposes.
+    The function writes data from the buffer directly into file if the control
+    block of the key cache has not been initialized yet.      
+
+  RETURN VALUE
+    0 if a success, 1 - otherwise.
+
+  NOTES
+    This implementation may exploit the fact that the function is called only
+    when a thread has got an exclusive lock for the key file.
+*/
+
+int key_cache_write(KEY_CACHE *keycache,
+                    File file, void *file_extra,
+                    my_off_t filepos, int level,
+                    uchar *buff, uint length,
+		    uint block_length, int force_write)
+{
+  if (keycache->key_cache_inited && keycache->can_be_used)
+    return keycache->interface_funcs->write(keycache->keycache_cb,
+                                            file, file_extra,
+                                            filepos, level,
+                                            buff, length,
+                                            block_length, force_write);
+  
+  /* We can't use mutex here as the key cache may not be initialized */
+  if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
+    return 1;
+
+  return 0;
+}
+
+
+/*
+  Flush all blocks for a file from key buffers of a key cache 
+
+  SYNOPSIS
+
+    flush_key_blocks()
+    keycache            pointer to the key cache whose blocks are to be flushed
+    file                handler for the file to flush to
+    file_extra          maps of key cache (used for partitioned key caches)
+    flush_type          type of the flush operation
+
+  DESCRIPTION
+    The function operates over buffers of the key cache keycache.
+    In a general case the function flushes the data from all dirty key
+    buffers related to the file 'file' into this file. The function does
+    exactly this if the value of the parameter type is FLUSH_KEEP. If the
+    value of this parameter is FLUSH_RELEASE, the function additionally 
+    releases the key buffers containing data from 'file' for new usage.
+    If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
+    just releases the key buffers containing data from 'file'.
+    If the value of the parameter type is FLUSH_KEEP the function may use
+    the value of the parameter file_extra pointing to possibly dirty
+    partitions to optimize the operation for partitioned key caches.
+      
+  RETURN
+    0   ok
+    1  error
+
+  NOTES
+    Any implementation of the function may exploit the fact that the function
+    is called only when a thread has got an exclusive lock for the key file.
+*/
+
+int flush_key_blocks(KEY_CACHE *keycache,
+                     int file, void *file_extra,
+                     enum flush_type type)
+{
+  if (keycache->key_cache_inited)
+    return keycache->interface_funcs->flush(keycache->keycache_cb,
+                                            file, file_extra, type);
+  return 0;  
+}
+
+
+/*
+  Reset the counters of a key cache
+
+  SYNOPSIS
+    reset_key_cache_counters()
+    name          the name of a key cache (unused)
+    keycache      pointer to the key cache for which to reset counters
+
+  DESCRIPTION
+    This function resets the values of the statistical counters for the key
+    cache keycache.
+    The parameter name is currently not used.
+
+  RETURN
+    0 on success (always because it can't fail)
+
+  NOTES
+   This procedure is used by process_key_caches() to reset the counters of all
+   currently used key caches, both the default one and the named ones.
+*/
+
+int reset_key_cache_counters(const char *name __attribute__((unused)),
+                             KEY_CACHE *keycache)
+{
+  if (keycache->key_cache_inited)
+  {
+    
+    return keycache->interface_funcs->reset_counters(name,
+                                                     keycache->keycache_cb);
+  }
+  return 0;
+}
+
+
+/*
+  Get statistics for a key cache
+
+  SYNOPSIS
+    get_key_cache_statistics()
+    keycache            pointer to the key cache to get statistics for
+    partition_no        partition number to get statistics for
+    key_cache_stats OUT pointer to the structure for the returned statistics
+
+  DESCRIPTION
+    If the value of the parameter partition_no is equal to 0 then statistics
+    for the whole key cache keycache (aggregated statistics) is returned in the
+    fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS.
+    Otherwise the value of the parameter partition_no makes sense only for
+    a partitioned key cache. In this case the function returns statistics
+    for the partition with the specified number partition_no.   
+  
+  RETURN
+    none
+*/
+
+void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, 
+                              KEY_CACHE_STATISTICS *key_cache_stats)
+{
+  if (keycache->key_cache_inited)
+  {    
+    keycache->interface_funcs->get_stats(keycache->keycache_cb,
+                                         partition_no, key_cache_stats);
+  }
+}
+
+/*
+  Repartition a key cache
+
+  SYNOPSIS
+    repartition_key_cache()
+    keycache           pointer to the key cache to be repartitioned
+    key_cache_block_size    size of blocks to keep cached data
+    use_mem             total memory to use for the new key cache
+    division_limit      new division limit (if not zero)
+    age_threshold       new age threshold (if not zero)
+    partitions          new number of partitions in the key cache 
+
+  DESCRIPTION
+    The function operates over the key cache keycache.
+    The parameter partitions specifies the number of partitions in the key
+    cache after repartitioning. If the value of this parameter is 0 then
+    a simple key cache must be created instead of the old one. 
+    The parameter key_cache_block_size specifies the new size of the block
+    buffers in the key cache. The parameters division_limit and age_threshold
+    determine the new initial values of those characteristics of the key cache
+    that are used for midpoint insertion strategy. The parameter use_mem
+    specifies the total amount of  memory to be allocated for the new key
+    cache buffers and for all auxiliary structures.
+
+  RETURN VALUE
+    number of blocks in the key cache, if successful,
+    0 - otherwise.
+
+  NOTES
+    The function does not block the calls and executions of other functions
+    from the key cache interface. However it assumes that the calls of 
+    resize_key_cache itself are serialized.
+
+    Currently the function is called when the value of the variable
+    key_cache_partitions is being reset for the key cache keycache.
+*/
+
+int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
+		          size_t use_mem, uint division_limit,
+                          uint age_threshold, uint partitions)
+{
+  uint blocks= -1;
+  if (keycache->key_cache_inited)
+  {
+    keycache->interface_funcs->resize(keycache->keycache_cb,
+                                      key_cache_block_size, 0,
+                                      division_limit, age_threshold);
+    end_key_cache(keycache, 1);
+    blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
+                           division_limit, age_threshold, partitions);
+  } 
+  return blocks;
+}
+
diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c
index ee4ad025b0b..9ea5678da9a 100644
--- a/mysys/mf_keycaches.c
+++ b/mysys/mf_keycaches.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 MySQL AB
+/* Copyright (C) 2003-2007 MySQL AB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -25,269 +25,7 @@
 #include <keycache.h>
 #include <hash.h>
 #include <m_string.h>
-
-/*****************************************************************************
-  General functions to handle SAFE_HASH objects.
-
-  A SAFE_HASH object is used to store the hash, the mutex and default value
-  needed by the rest of the key cache code.
-  This is a separate struct to make it easy to later reuse the code for other
-  purposes
-
-  All entries are linked in a list to allow us to traverse all elements
-  and delete selected ones. (HASH doesn't allow any easy ways to do this).
-*****************************************************************************/
-
-/*
-  Struct to store a key and pointer to object
-*/
-
-typedef struct st_safe_hash_entry
-{
-  uchar *key;
-  uint length;
-  uchar *data;
-  struct st_safe_hash_entry *next, **prev;
-} SAFE_HASH_ENTRY;
-
-
-typedef struct st_safe_hash_with_default
-{
-#ifdef THREAD
-  rw_lock_t mutex;
-#endif
-  HASH hash;
-  uchar *default_value;
-  SAFE_HASH_ENTRY *root;
-} SAFE_HASH;
-
-
-/*
-  Free a SAFE_HASH_ENTRY
-
-  This function is called by the hash object on delete
-*/
-
-static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
-{
-  DBUG_ENTER("free_assign_entry");
-  my_free(entry);
-  DBUG_VOID_RETURN;
-}
-
-
-/* Get key and length for a SAFE_HASH_ENTRY */
-
-static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length,
-                                  my_bool not_used __attribute__((unused)))
-{
-  *length=entry->length;
-  return (uchar*) entry->key;
-}
-
-
-/*
-  Init a SAFE_HASH object
-
-  SYNOPSIS
-    safe_hash_init()
-    hash		safe_hash handler
-    elements		Expected max number of elements
-    default_value	default value
-
-  NOTES
-    In case of error we set hash->default_value to 0 to allow one to call
-    safe_hash_free on an object that couldn't be initialized.
-
-  RETURN
-    0  ok
-    1  error
-*/
-
-static my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
-			      uchar *default_value)
-{
-  DBUG_ENTER("safe_hash");
-  if (my_hash_init(&hash->hash, &my_charset_bin, elements,
-                   0, 0, (my_hash_get_key) safe_hash_entry_get,
-                   (void (*)(void*)) safe_hash_entry_free, 0))
-  {
-    hash->default_value= 0;
-    DBUG_RETURN(1);
-  }
-  my_rwlock_init(&hash->mutex, 0);
-  hash->default_value= default_value;
-  hash->root= 0;
-  DBUG_RETURN(0);
-}
-
-
-/*
-  Free a SAFE_HASH object
-
-  NOTES
-    This is safe to call on any object that has been sent to safe_hash_init()
-*/
-
-static void safe_hash_free(SAFE_HASH *hash)
-{
-  /*
-    Test if safe_hash_init succeeded. This will also guard us against multiple
-    free calls.
-  */
-  if (hash->default_value)
-  {
-    my_hash_free(&hash->hash);
-    rwlock_destroy(&hash->mutex);
-    hash->default_value=0;
-  }
-}
-
-/*
-  Return the value stored for a key or default value if no key
-*/
-
-static uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length)
-{
-  uchar *result;
-  DBUG_ENTER("safe_hash_search");
-  rw_rdlock(&hash->mutex);
-  result= my_hash_search(&hash->hash, key, length);
-  rw_unlock(&hash->mutex);
-  if (!result)
-    result= hash->default_value;
-  else
-    result= ((SAFE_HASH_ENTRY*) result)->data;
-  DBUG_PRINT("exit",("data: 0x%lx", (long) result));
-  DBUG_RETURN(result);
-}
-
-
-/*
-  Associate a key with some data
-
-  SYONOPSIS
-    safe_hash_set()
-    hash			Hash handle
-    key				key (path to table etc..)
-    length			Length of key
-    data			data to to associate with the data
-
-  NOTES
-    This can be used both to insert a new entry and change an existing
-    entry.
-    If one associates a key with the default key cache, the key is deleted
-
-  RETURN
-    0  ok
-    1  error (Can only be EOM). In this case my_message() is called.
-*/
-
-static my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
-			     uchar *data)
-{
-  SAFE_HASH_ENTRY *entry;
-  my_bool error= 0;
-  DBUG_ENTER("safe_hash_set");
-  DBUG_PRINT("enter",("key: %.*s  data: 0x%lx", length, key, (long) data));
-
-  rw_wrlock(&hash->mutex);
-  entry= (SAFE_HASH_ENTRY*) my_hash_search(&hash->hash, key, length);
-
-  if (data == hash->default_value)
-  {
-    /*
-      The key is to be associated with the default entry. In this case
-      we can just delete the entry (if it existed) from the hash as a
-      search will return the default entry
-    */
-    if (!entry)					/* nothing to do */
-      goto end;
-    /* unlink entry from list */
-    if ((*entry->prev= entry->next))
-      entry->next->prev= entry->prev;
-    my_hash_delete(&hash->hash, (uchar*) entry);
-    goto end;
-  }
-  if (entry)
-  {
-    /* Entry existed;  Just change the pointer to point at the new data */
-    entry->data= data;
-  }
-  else
-  {
-    if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
-					       MYF(MY_WME))))
-    {
-      error= 1;
-      goto end;
-    }
-    entry->key= (uchar*) (entry +1);
-    memcpy((char*) entry->key, (char*) key, length);
-    entry->length= length;
-    entry->data= data;
-    /* Link entry to list */
-    if ((entry->next= hash->root))
-      entry->next->prev= &entry->next;
-    entry->prev= &hash->root;
-    hash->root= entry;
-    if (my_hash_insert(&hash->hash, (uchar*) entry))
-    {
-      /* This can only happen if hash got out of memory */
-      my_free(entry);
-      error= 1;
-      goto end;
-    }
-  }
-
-end:
-  rw_unlock(&hash->mutex);
-  DBUG_RETURN(error);
-}
-
-
-/*
-  Change all entres with one data value to another data value
-
-  SYONOPSIS
-    safe_hash_change()
-    hash			Hash handle
-    old_data			Old data
-    new_data			Change all 'old_data' to this
-
-  NOTES
-    We use the linked list to traverse all elements in the hash as
-    this allows us to delete elements in the case where 'new_data' is the
-    default value.
-*/
-
-static void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data)
-{
-  SAFE_HASH_ENTRY *entry, *next;
-  DBUG_ENTER("safe_hash_set");
-
-  rw_wrlock(&hash->mutex);
-
-  for (entry= hash->root ; entry ; entry= next)
-  {
-    next= entry->next;
-    if (entry->data == old_data)
-    {
-      if (new_data == hash->default_value)
-      {
-        if ((*entry->prev= entry->next))
-          entry->next->prev= entry->prev;
-	my_hash_delete(&hash->hash, (uchar*) entry);
-      }
-      else
-	entry->data= new_data;
-    }
-  }
-
-  rw_unlock(&hash->mutex);
-  DBUG_VOID_RETURN;
-}
-
+#include "my_safehash.h"
 
 /*****************************************************************************
   Functions to handle the key cache objects
@@ -315,6 +53,7 @@ void multi_keycache_free(void)
     multi_key_cache_search()
     key				key to find (usually table path)
     uint length			Length of key.
+    def				Default value if no key cache
 
   NOTES
     This function is coded in such a way that we will return the
@@ -325,11 +64,13 @@ void multi_keycache_free(void)
     key cache to use
 */
 
-KEY_CACHE *multi_key_cache_search(uchar *key, uint length)
+KEY_CACHE *multi_key_cache_search(uchar *key, uint length,
+                                  KEY_CACHE *def)
 {
   if (!key_cache_hash.hash.records)
-    return dflt_key_cache;
-  return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length);
+    return def;
+  return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length,
+                                       (void*) def);
 }
 
 
@@ -361,3 +102,5 @@ void multi_key_cache_change(KEY_CACHE *old_data,
 {
   safe_hash_change(&key_cache_hash, (uchar*) old_data, (uchar*) new_data);
 }
+
+
diff --git a/mysys/mf_loadpath.c b/mysys/mf_loadpath.c
index 9350babc176..510c72ffa38 100644
--- a/mysys/mf_loadpath.c
+++ b/mysys/mf_loadpath.c
@@ -26,7 +26,8 @@
 char * my_load_path(char * to, const char *path,
 		       const char *own_path_prefix)
 {
-  char buff[FN_REFLEN];
+  char buff[FN_REFLEN+1];
+  const char *from= buff;
   int is_cur;
   DBUG_ENTER("my_load_path");
   DBUG_PRINT("enter",("path: %s  prefix: %s",path,
@@ -34,7 +35,7 @@ char * my_load_path(char * to, const char *path,
 
   if ((path[0] == FN_HOMELIB && path[1] == FN_LIBCHAR) ||
       test_if_hard_path(path))
-    (void) strnmov(buff, path, FN_REFLEN);
+    from= path;
   else if ((is_cur=(path[0] == FN_CURLIB && path[1] == FN_LIBCHAR)) ||
 	   (is_prefix(path,FN_PARENTDIR)) ||
 	   ! own_path_prefix)
@@ -42,14 +43,16 @@ char * my_load_path(char * to, const char *path,
     if (is_cur)
       is_cur=2;					/* Remove current dir */
     if (! my_getwd(buff,(uint) (FN_REFLEN-strlen(path)+is_cur),MYF(0)))
-      (void) strncat(buff, path+is_cur, FN_REFLEN-1);
+    {
+      size_t length= strlen(buff);
+      (void) strmake(buff + length, path+is_cur, FN_REFLEN - length);
+    }
     else
-      (void) strnmov(buff, path, FN_REFLEN);			/* Return org file name */
+      from= path;                           /* Return org file name */
   }
   else
     (void) strxnmov(buff, FN_REFLEN, own_path_prefix, path, NullS);
-  strnmov(to, buff, FN_REFLEN);
-  to[FN_REFLEN-1]= '\0';
+  strmake(to, from, FN_REFLEN-1);
   DBUG_PRINT("exit",("to: %s",to));
   DBUG_RETURN(to);
 } /* my_load_path */
diff --git a/mysys/mf_qsort.c b/mysys/mf_qsort.c
index 4b3ecb603a6..9e1ee2782a4 100644
--- a/mysys/mf_qsort.c
+++ b/mysys/mf_qsort.c
@@ -108,7 +108,7 @@ qsort_t my_qsort(void *base_ptr, size_t count, size_t size, qsort_cmp cmp)
   low  = (char*) base_ptr;
   high = low+ size * (count - 1);
   stack_ptr = stack + 1;
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
   /* The first element in the stack will be accessed for the last POP */
   stack[0].low=stack[0].high=0;
 #endif
diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c
index fe30d8c81af..3a3dab52dd6 100644
--- a/mysys/mf_soundex.c
+++ b/mysys/mf_soundex.c
@@ -47,7 +47,7 @@ void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr,
 {
   char ch,last_ch;
   reg3 char * end;
-  register uchar *map=cs->to_upper;
+  register const uchar *map=cs->to_upper;
 
   if (remove_garbage)
   {
diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c
index 903826dd975..eb3fb7941bf 100644
--- a/mysys/my_alloc.c
+++ b/mysys/my_alloc.c
@@ -21,7 +21,6 @@
 #undef EXTRA_DEBUG
 #define EXTRA_DEBUG
 
-
 /*
   Initialize memory root
 
@@ -56,7 +55,7 @@ void init_alloc_root(MEM_ROOT *mem_root, size_t block_size,
   mem_root->block_num= 4;			/* We shift this with >>2 */
   mem_root->first_block_usage= 0;
 
-#if !(defined(HAVE_purify) && defined(EXTRA_DEBUG))
+#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG))
   if (pre_alloc_size)
   {
     if ((mem_root->free= mem_root->pre_alloc=
@@ -96,7 +95,7 @@ void reset_root_defaults(MEM_ROOT *mem_root, size_t block_size,
   DBUG_ASSERT(alloc_root_inited(mem_root));
 
   mem_root->block_size= block_size - ALLOC_ROOT_MIN_BLOCK_SIZE;
-#if !(defined(HAVE_purify) && defined(EXTRA_DEBUG))
+#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG))
   if (pre_alloc_size)
   {
     size_t size= pre_alloc_size + ALIGN_SIZE(sizeof(USED_MEM));
@@ -147,7 +146,7 @@ void reset_root_defaults(MEM_ROOT *mem_root, size_t block_size,
 
 void *alloc_root(MEM_ROOT *mem_root, size_t length)
 {
-#if defined(HAVE_purify) && defined(EXTRA_DEBUG)
+#if defined(HAVE_valgrind) && defined(EXTRA_DEBUG)
   reg1 USED_MEM *next;
   DBUG_ENTER("alloc_root");
   DBUG_PRINT("enter",("root: 0x%lx", (long) mem_root));
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 3401c7301e9..22199758112 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -40,16 +40,31 @@
 #include <m_string.h>
 #include <my_bit.h>
 
-void create_last_word_mask(MY_BITMAP *map)
+
+/* Create a mask of the significant bits for the last byte (1,3,7,..255) */
+
+static inline uchar last_byte_mask(uint bits)
 {
-  /* Get the number of used bits (1..8) in the last byte */
-  unsigned int const used= 1U + ((map->n_bits-1U) & 0x7U);
+  /* Get the number of used bits-1 (0..7) in the last byte */
+  unsigned int const used= (bits - 1U) & 7U;
+  /* Return bitmask for the significant bits */
+  return ((2U << used) - 1);
+}
+
+/*
+  Create a mask with the upper 'unused' bits set and the lower 'used'
+  bits clear. The bits within each byte is stored in big-endian order.
+*/
+
+static inline uchar invers_last_byte_mask(uint bits)
+{
+  return last_byte_mask(bits) ^ 255;
+}
 
-  /*
-    Create a mask with the upper 'unused' bits set and the lower 'used'
-    bits clear. The bits within each byte is stored in big-endian order.
-   */
-  unsigned char const mask= (~((1 << used) - 1)) & 255;
+
+void create_last_word_mask(MY_BITMAP *map)
+{
+  unsigned char const mask= invers_last_byte_mask(map->n_bits);
 
   /*
     The first bytes are to be set to zero since they represent real  bits
@@ -267,40 +282,41 @@ void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size)
 
 my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size)
 {
-  uint prefix_bits= prefix_size & 0x7, res;
-  uchar *m= (uchar*)map->bitmap;
-  uchar *end_prefix= m+prefix_size/8;
+  uint prefix_mask= last_byte_mask(prefix_size);
+  uchar *m= (uchar*) map->bitmap;
+  uchar *end_prefix= m+(prefix_size-1)/8;
   uchar *end;
   DBUG_ASSERT(m && prefix_size <= map->n_bits);
-  end= m+no_bytes_in_map(map);
+
+  /* Empty prefix is always true */
+  if (!prefix_size)
+    return 1;
 
   while (m < end_prefix)
     if (*m++ != 0xff)
       return 0;
 
-  *map->last_word_ptr&= ~map->last_word_mask; /*Clear bits*/
-  res= 0;
-  if (prefix_bits && *m++ != (1 << prefix_bits)-1)
-    goto ret;
+  end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1;
+  if (m == end)
+    return ((*m & last_byte_mask(map->n_bits)) == prefix_mask);
 
-  while (m < end)
-    if (*m++ != 0)
-      goto ret;
-  res= 1;
-ret:
-  return res; 
-}
+  if (*m != prefix_mask)
+    return 0;
 
+  while (++m < end)
+    if (*m != 0)
+      return 0;
+  return ((*m & last_byte_mask(map->n_bits)) == 0);
+}
 
 my_bool bitmap_is_set_all(const MY_BITMAP *map)
 {
   my_bitmap_map *data_ptr= map->bitmap;
   my_bitmap_map *end= map->last_word_ptr;
-  *map->last_word_ptr |= map->last_word_mask;
-  for (; data_ptr <= end; data_ptr++)
+  for (; data_ptr < end; data_ptr++)
     if (*data_ptr != 0xFFFFFFFF)
       return FALSE;
-  return TRUE;
+  return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF;
 }
 
 
@@ -308,13 +324,11 @@ my_bool bitmap_is_clear_all(const MY_BITMAP *map)
 {
   my_bitmap_map *data_ptr= map->bitmap;
   my_bitmap_map *end;
-  if (*map->last_word_ptr & ~map->last_word_mask)
-    return FALSE;
   end= map->last_word_ptr;
   for (; data_ptr < end; data_ptr++)
     if (*data_ptr)
       return FALSE;
-  return TRUE;
+  return (*data_ptr & ~map->last_word_mask) == 0;
 }
 
 /* Return TRUE if map1 is a subset of map2 */
@@ -327,14 +341,13 @@ my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2)
               map1->n_bits==map2->n_bits);
 
   end= map1->last_word_ptr;
-  *map1->last_word_ptr &= ~map1->last_word_mask;
-  *map2->last_word_ptr &= ~map2->last_word_mask;
-  while (m1 <= end)
+  while (m1 < end)
   {
     if ((*m1++) & ~(*m2++))
       return 0;
   }
-  return 1;
+  /* here both maps have the same number of bits - see assert above */
+  return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1);
 }
 
 /* True if bitmaps has any common bits */
@@ -347,14 +360,13 @@ my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2)
               map1->n_bits==map2->n_bits);
 
   end= map1->last_word_ptr;
-  *map1->last_word_ptr &= ~map1->last_word_mask;
-  *map2->last_word_ptr &= ~map2->last_word_mask;
-  while (m1 <= end)
+  while (m1 < end)
   {
     if ((*m1++) & (*m2++))
       return 1;
   }
-  return 0;
+  /* here both maps have the same number of bits - see assert above */
+  return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0);
 }
 
 
@@ -366,18 +378,35 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
   DBUG_ASSERT(map->bitmap && map2->bitmap);
 
   end= to+min(len,len2);
-  *map2->last_word_ptr&= ~map2->last_word_mask; /*Clear last bits in map2*/
   while (to < end)
     *to++ &= *from++;
 
-  if (len2 < len)
+  if (len2 <= len)
   {
-    end+=len-len2;
+    to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */
+    end+= len-len2;
     while (to < end)
-      *to++=0;
+      *to++= 0;
   }
 }
 
+/* True if union of bitmaps have all bits set */
+
+my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2)
+{
+  my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end;
+
+  DBUG_ASSERT(map1->bitmap && map2->bitmap &&
+              map1->n_bits==map2->n_bits);
+  end= map1->last_word_ptr;
+  while ( m1 < end)
+    if ((*m1++ | *m2++) != 0xFFFFFFFF)
+      return FALSE;
+  /* here both maps have the same number of bits - see assert above */
+  return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF);
+}
+
+
 
 /*
   Set/clear all bits above a bit.
@@ -461,14 +490,13 @@ void bitmap_invert(MY_BITMAP *map)
 uint bitmap_bits_set(const MY_BITMAP *map)
 {  
   uchar *m= (uchar*)map->bitmap;
-  uchar *end= m + no_bytes_in_map(map);
+  uchar *end= m + no_bytes_in_map(map) - 1;
   uint res= 0;
 
   DBUG_ASSERT(map->bitmap);
-  *map->last_word_ptr&= ~map->last_word_mask; /*Reset last bits to zero*/
   while (m < end)
     res+= my_count_bits_ushort(*m++);
-  return res;
+  return res + my_count_bits_ushort(*m & last_byte_mask(map->n_bits));
 }
 
 
@@ -492,27 +520,30 @@ uint bitmap_get_first_set(const MY_BITMAP *map)
 
   DBUG_ASSERT(map->bitmap);
   data_ptr= map->bitmap;
-  *map->last_word_ptr &= ~map->last_word_mask;
 
-  for (i=0; data_ptr <= end; data_ptr++, i++)
-  {
+  for (i=0; data_ptr < end; data_ptr++, i++)
     if (*data_ptr)
+      goto found;
+  if (!(*data_ptr & ~map->last_word_mask))
+    return MY_BIT_NONE;
+
+found:
+  {
+    byte_ptr= (uchar*)data_ptr;
+    for (j=0; ; j++, byte_ptr++)
     {
-      byte_ptr= (uchar*)data_ptr;
-      for (j=0; ; j++, byte_ptr++)
+      if (*byte_ptr)
       {
-        if (*byte_ptr)
+        for (k=0; ; k++)
         {
-          for (k=0; ; k++)
-          {
-            if (*byte_ptr & (1 << k))
-              return (i*32) + (j*8) + k;
-          }
+          if (*byte_ptr & (1 << k))
+            return (i*32) + (j*8) + k;
         }
       }
     }
   }
-  return MY_BIT_NONE;
+  DBUG_ASSERT(0);
+  return MY_BIT_NONE;                           /* Impossible */
 }
 
 
@@ -526,25 +557,29 @@ uint bitmap_get_first(const MY_BITMAP *map)
   data_ptr= map->bitmap;
   *map->last_word_ptr|= map->last_word_mask;
 
-  for (i=0; data_ptr <= end; data_ptr++, i++)
-  {
+  for (i=0; data_ptr < end; data_ptr++, i++)
     if (*data_ptr != 0xFFFFFFFF)
+      goto found;
+  if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF)
+    return MY_BIT_NONE;
+
+found:
+  {
+    byte_ptr= (uchar*)data_ptr;
+    for (j=0; ; j++, byte_ptr++)
     {
-      byte_ptr= (uchar*)data_ptr;
-      for (j=0; ; j++, byte_ptr++)
+      if (*byte_ptr != 0xFF)
       {
-        if (*byte_ptr != 0xFF)
+        for (k=0; ; k++)
         {
-          for (k=0; ; k++)
-          {
-            if (!(*byte_ptr & (1 << k)))
-              return (i*32) + (j*8) + k;
-          }
+          if (!(*byte_ptr & (1 << k)))
+            return (i*32) + (j*8) + k;
         }
       }
     }
   }
-  return MY_BIT_NONE;
+  DBUG_ASSERT(0);
+  return MY_BIT_NONE;                           /* Impossible */
 }
 
 
@@ -573,7 +608,7 @@ uint get_rand_bit(uint bitsize)
   return (rand() % bitsize);
 }
 
-bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize)
+my_bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize)
 {
   uint i, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -596,7 +631,7 @@ error2:
   return TRUE;
 }
 
-bool test_flip_bit(MY_BITMAP *map, uint bitsize)
+my_bool test_flip_bit(MY_BITMAP *map, uint bitsize)
 {
   uint i, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -619,13 +654,13 @@ error2:
   return TRUE;
 }
 
-bool test_operators(MY_BITMAP *map __attribute__((unused)),
+my_bool test_operators(MY_BITMAP *map __attribute__((unused)),
                     uint bitsize __attribute__((unused)))
 {
   return FALSE;
 }
 
-bool test_get_all_bits(MY_BITMAP *map, uint bitsize)
+my_bool test_get_all_bits(MY_BITMAP *map, uint bitsize)
 {
   uint i;
   bitmap_set_all(map);
@@ -667,7 +702,7 @@ error6:
   return TRUE;
 }
 
-bool test_compare_operators(MY_BITMAP *map, uint bitsize)
+my_bool test_compare_operators(MY_BITMAP *map, uint bitsize)
 {
   uint i, j, test_bit1, test_bit2, test_bit3,test_bit4;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -773,7 +808,7 @@ error5:
   return TRUE;
 }
 
-bool test_count_bits_set(MY_BITMAP *map, uint bitsize)
+my_bool test_count_bits_set(MY_BITMAP *map, uint bitsize)
 {
   uint i, bit_count=0, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -799,7 +834,7 @@ error2:
   return TRUE;
 }
 
-bool test_get_first_bit(MY_BITMAP *map, uint bitsize)
+my_bool test_get_first_bit(MY_BITMAP *map, uint bitsize)
 {
   uint i, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -824,7 +859,7 @@ error2:
   return TRUE;
 }
 
-bool test_get_next_bit(MY_BITMAP *map, uint bitsize)
+my_bool test_get_next_bit(MY_BITMAP *map, uint bitsize)
 {
   uint i, j, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -843,7 +878,7 @@ error1:
   return TRUE;
 }
 
-bool test_prefix(MY_BITMAP *map, uint bitsize)
+my_bool test_prefix(MY_BITMAP *map, uint bitsize)
 {
   uint i, j, test_bit;
   uint no_loops= bitsize > 128 ? 128 : bitsize;
@@ -878,7 +913,7 @@ error3:
 }
 
 
-bool do_test(uint bitsize)
+my_bool do_test(uint bitsize)
 {
   MY_BITMAP map;
   my_bitmap_map buf[1024];
diff --git a/mysys/my_chmod.c b/mysys/my_chmod.c
new file mode 100644
index 00000000000..afdea758833
--- /dev/null
+++ b/mysys/my_chmod.c
@@ -0,0 +1,48 @@
+/* Copyright (C) 2000 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "mysys_priv.h"
+#include "mysys_err.h"
+
+/**
+   @brief Change mode of file.
+
+   @fn my_chmod()
+   @param name  	Filename
+   @param mode_t        Mode
+   @param my_flags	Flags
+
+   @notes
+   The  mode of the file given by path or referenced by fildes is changed
+
+   @retval 0	Ok
+   @retval #	Error
+*/
+
+int my_chmod(const char *name, mode_t mode, myf my_flags)
+{
+  DBUG_ENTER("my_chmod");
+  DBUG_PRINT("my",("name: %s  mode: %lu  flags: %d", name, (ulong) mode,
+                   my_flags));
+
+  if (chmod(name, mode))
+  {
+    my_errno= errno;
+    if (my_flags & MY_WME)
+      my_error(EE_CANT_CHMOD, MYF(0), name, (ulong) mode, my_errno);
+    DBUG_RETURN(1);
+  }
+  DBUG_RETURN(0);
+}
diff --git a/mysys/my_compress.c b/mysys/my_compress.c
index 360390d376a..d408520a862 100644
--- a/mysys/my_compress.c
+++ b/mysys/my_compress.c
@@ -57,19 +57,86 @@ my_bool my_compress(uchar *packet, size_t *len, size_t *complen)
 }
 
 
+/*
+  Valgrind normally gives false alarms for zlib operations, in the form of
+  "conditional jump depends on uninitialised values" etc. The reason is
+  explained in the zlib FAQ (http://www.zlib.net/zlib_faq.html#faq36):
+
+    "That is intentional for performance reasons, and the output of deflate
+    is not affected."
+
+  Also discussed on a blog
+  (http://www.sirena.org.uk/log/2006/02/19/zlib-generating-valgrind-warnings/):
+
+    "...loop unrolling in the zlib library causes the mentioned
+    “Conditional jump or move depends on uninitialised value(s)”
+    warnings. These are safe since the results of the comparison are
+    subsequently ignored..."
+
+    "the results of the calculations are discarded by bounds checking done
+    after the loop exits"
+
+  Fix by initializing the memory allocated by zlib when running under Valgrind.
+
+  This fix is safe, since such memory is only used internally by zlib, so we
+  will not hide any bugs in mysql this way.
+*/
+void *my_az_allocator(void *dummy __attribute__((unused)), unsigned int items,
+                      unsigned int size)
+{
+  return my_malloc((size_t)items*(size_t)size, IF_VALGRIND(MY_ZEROFILL, MYF(0)));
+}
+
+void my_az_free(void *dummy __attribute__((unused)), void *address)
+{
+  my_free(address, MYF(MY_ALLOW_ZERO_PTR));
+}
+
+/*
+  This works like zlib compress(), but using custom memory allocators to work
+  better with my_malloc leak detection and Valgrind.
+*/
+int my_compress_buffer(uchar *dest, size_t *destLen,
+                       const uchar *source, size_t sourceLen)
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+    stream.next_out = (Bytef*)dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((size_t)stream.avail_out != *destLen)
+      return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)my_az_allocator;
+    stream.zfree = (free_func)my_az_free;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+    if (err != Z_OK) return err;
+
+    err = deflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        deflateEnd(&stream);
+        return err == Z_OK ? Z_BUF_ERROR : err;
+    }
+    *destLen = stream.total_out;
+
+    err = deflateEnd(&stream);
+    return err;
+}
+
 uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen)
 {
   uchar *compbuf;
-  uLongf tmp_complen;
   int res;
   *complen=  *len * 120 / 100 + 12;
 
   if (!(compbuf= (uchar *) my_malloc(*complen, MYF(MY_WME))))
     return 0;					/* Not enough memory */
 
-  tmp_complen= (uint) *complen;
-  res= compress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, (uLong) *len);
-  *complen=    tmp_complen;
+  res= my_compress_buffer(compbuf, complen, packet, *len);
 
   if (res != Z_OK)
   {
@@ -118,7 +185,7 @@ my_bool my_uncompress(uchar *packet, size_t len, size_t *complen)
     if (!compbuf)
       DBUG_RETURN(1);				/* Not enough memory */
 
-    tmp_complen= (uint) *complen;
+    tmp_complen= (uLongf) *complen;
     error= uncompress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet,
                       (uLong) len);
     *complen= tmp_complen;
diff --git a/mysys/my_copy.c b/mysys/my_copy.c
index 35324dd4cef..17bb796fd9b 100644
--- a/mysys/my_copy.c
+++ b/mysys/my_copy.c
@@ -14,6 +14,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include "mysys_priv.h"
+#include "mysys_err.h"
 #include <my_dir.h> /* for stat */
 #include <m_string.h>
 #include "mysys_err.h"
@@ -57,6 +58,7 @@ int my_copy(const char *from, const char *to, myf MyFlags)
   File from_file,to_file;
   uchar buff[IO_SIZE];
   MY_STAT stat_buff,new_stat_buff;
+  my_bool file_created= 0;
   DBUG_ENTER("my_copy");
   DBUG_PRINT("my",("from %s to %s MyFlags %d", from, to, MyFlags));
 
@@ -81,6 +83,7 @@ int my_copy(const char *from, const char *to, myf MyFlags)
 			     MyFlags)) < 0)
       goto err;
 
+    file_created= 1;
     while ((Count=my_read(from_file, buff, sizeof(buff), MyFlags)) != 0)
     {
 	if (Count == (uint) -1 ||
@@ -98,6 +101,8 @@ int my_copy(const char *from, const char *to, myf MyFlags)
     if (my_close(from_file,MyFlags) | my_close(to_file,MyFlags))
       DBUG_RETURN(-1);				/* Error on close */
 
+    from_file=to_file= -1;                      /* Files are closed */
+
     /* Copy modes if possible */
 
     if (MyFlags & MY_HOLD_ORIGINAL_MODES && !new_file_stat)
@@ -106,18 +111,20 @@ int my_copy(const char *from, const char *to, myf MyFlags)
     if (chmod(to, stat_buff.st_mode & 07777))
     {
       my_errno= errno;
-      if (MyFlags & (MY_FAE+MY_WME))
-        my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL+ME_WAITTANG), from, errno);
-      goto err;
+      if (MyFlags & MY_WME)
+        my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL+ME_WAITTANG), to, errno);
+      if (MyFlags & MY_FAE)
+        goto err;
     }
 #if !defined(__WIN__)
     /* Copy ownership */
     if (chown(to, stat_buff.st_uid, stat_buff.st_gid))
     {
       my_errno= errno;
-      if (MyFlags & (MY_FAE+MY_WME))
-        my_error(EE_CHANGE_OWNERSHIP, MYF(ME_BELL+ME_WAITTANG), from, errno);
-      goto err;
+      if (MyFlags & MY_WME)
+        my_error(EE_CANT_COPY_OWNERSHIP, MYF(ME_JUST_WARNING), to, errno);
+      if (MyFlags & MY_FAE)
+        goto err;
     }
 #endif
 
@@ -134,11 +141,11 @@ int my_copy(const char *from, const char *to, myf MyFlags)
 
 err:
   if (from_file >= 0) (void) my_close(from_file,MyFlags);
-  if (to_file >= 0)
-  {
-    (void) my_close(to_file, MyFlags);
-    /* attempt to delete the to-file we've partially written */
+  if (to_file >= 0)   (void) my_close(to_file, MyFlags);
+
+  /* attempt to delete the to-file we've partially written */
+  if (file_created)
     (void) my_delete(to, MyFlags);
-  }
+
   DBUG_RETURN(-1);
 } /* my_copy */
diff --git a/mysys/my_error.c b/mysys/my_error.c
index fa62cc604b6..a682585a82e 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -114,7 +114,7 @@ void my_printf_error(uint error, const char *format, myf MyFlags, ...)
   va_list args;
   char ebuff[ERRMSGSIZE];
   DBUG_ENTER("my_printf_error");
-  DBUG_PRINT("my", ("nr: %d  MyFlags: %d  errno: %d  Format: %s",
+  DBUG_PRINT("my", ("nr: %d  MyFlags: %d  errno: %d  format: %s",
 		    error, MyFlags, errno, format));
 
   va_start(args,MyFlags);
@@ -148,6 +148,30 @@ void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
   DBUG_VOID_RETURN;
 }
 
+
+/*
+  Error with va_list
+
+  SYNOPSIS
+    my_printv_error()
+      error	Errno
+      format	Format string
+      MyFlags	Flags
+      ...	variable list
+*/
+
+int my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
+{
+  char ebuff[ERRMSGSIZE+20];
+  DBUG_ENTER("my_printv_error");
+  DBUG_PRINT("my", ("nr: %d  MyFlags: %d  errno: %d  format: %s",
+		    error, MyFlags, errno, format));
+
+  (void) my_vsnprintf(ebuff, sizeof(ebuff), format, ap);
+  DBUG_RETURN((*error_handler_hook)(error, ebuff, MyFlags));
+}
+
+
 /*
   Give message using error_handler_hook
 
diff --git a/mysys/my_file.c b/mysys/my_file.c
index e4b7cd7779f..a6a4244a772 100644
--- a/mysys/my_file.c
+++ b/mysys/my_file.c
@@ -51,7 +51,7 @@ static uint set_max_open_files(uint max_file_limit)
     DBUG_PRINT("info", ("rlim_cur: %u  rlim_max: %u",
 			(uint) rlimit.rlim_cur,
 			(uint) rlimit.rlim_max));
-    if (rlimit.rlim_cur == RLIM_INFINITY)
+    if ((ulonglong) rlimit.rlim_cur == (ulonglong) RLIM_INFINITY)
       rlimit.rlim_cur = max_file_limit;
     if (rlimit.rlim_cur >= max_file_limit)
       DBUG_RETURN(rlimit.rlim_cur);		/* purecov: inspected */
diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c
index 861e4380690..ebe5c7ddf80 100644
--- a/mysys/my_fopen.c
+++ b/mysys/my_fopen.c
@@ -132,7 +132,7 @@ FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags)
   FILE *fd;
   char type[5];
   DBUG_ENTER("my_fdopen");
-  DBUG_PRINT("my",("Fd: %d  Flags: %d  MyFlags: %d",
+  DBUG_PRINT("my",("fd: %d  Flags: %d  MyFlags: %d",
 		   Filedes, Flags, MyFlags));
 
   make_ftype(type,Flags);
diff --git a/mysys/my_gethostbyname.c b/mysys/my_gethostbyname.c
index 4b7e9054d61..3962de21fc0 100644
--- a/mysys/my_gethostbyname.c
+++ b/mysys/my_gethostbyname.c
@@ -91,9 +91,12 @@ extern mysql_mutex_t LOCK_gethostbyname_r;
   is finished with the structure.
 */
 
-struct hostent *my_gethostbyname_r(const char *name,
-				   struct hostent *result, char *buffer,
-				   int buflen, int *h_errnop)
+struct hostent *
+my_gethostbyname_r(const char *name,
+                   struct hostent *result __attribute__((unused)),
+                   char *buffer __attribute__((unused)),
+                   int buflen __attribute__((unused)),
+                   int *h_errnop)
 {
   struct hostent *hp;
   mysql_mutex_lock(&LOCK_gethostbyname_r);
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 2ec2f8eb5c9..a13ba364594 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -157,7 +157,7 @@ int handle_options(int *argc, char ***argv,
   my_bool end_of_options= 0, must_be_var, set_maximum_value,
           option_is_loose;
   char **pos, **pos_end, *optend, *opt_str, key_name[FN_REFLEN];
-  const char *UNINIT_VAR(prev_found);
+  const char *prev_found;
   const struct my_option *optp;
   void *value;
   int error, i;
@@ -228,6 +228,7 @@ int handle_options(int *argc, char ***argv,
 	  Find first the right option. Return error in case of an ambiguous,
 	  or unknown option
 	*/
+        LINT_INIT(prev_found);
 	optp= longopts;
 	if (!(opt_found= findopt(opt_str, length, &optp, &prev_found)))
 	{
@@ -859,7 +860,7 @@ static longlong eval_num_suffix(char *argument, int *error, char *option_name)
   return num;
 }
 
-/* 
+/*
   function: getopt_ll
 
   Evaluates and returns the value that user gave as an argument
@@ -1008,7 +1009,6 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp,
     my_getopt_error_reporter(WARNING_LEVEL,
                              "option '%s': unsigned value %s adjusted to %s",
                              optp->name, ullstr(old, buf1), ullstr(num, buf2));
-
   return num;
 }
 
@@ -1069,8 +1069,8 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err)
 
   SYNOPSIS
     init_one_value()
-    option		Option to initialize
-    value		Pointer to variable
+    option              Option to initialize
+    value               Pointer to variable
 */
 
 static void init_one_value(const struct my_option *option, void *variable,
@@ -1171,7 +1171,7 @@ void my_cleanup_options(const struct my_option *options)
 }
 
 
-/* 
+/*
   initialize all variables to their default values
 
   SYNOPSIS
@@ -1375,7 +1375,7 @@ void my_print_variables(const struct my_option *options)
 	printf("%d\n", *((int*) value));
 	break;
       case GET_UINT:
-	printf("%d\n", *((uint*) value));
+	printf("%u\n", *((uint*) value));
 	break;
       case GET_LONG:
 	printf("%ld\n", *((long*) value));
@@ -1387,7 +1387,7 @@ void my_print_variables(const struct my_option *options)
 	printf("%s\n", llstr(*((longlong*) value), buff));
 	break;
       case GET_ULL:
-	longlong2str(*((ulonglong*) value), buff, 10);
+	longlong10_to_str(*((ulonglong*) value), buff, 10);
 	printf("%s\n", buff);
 	break;
       case GET_DOUBLE:
diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c
index cc5d1b83efb..60cd06b3968 100644
--- a/mysys/my_getsystime.c
+++ b/mysys/my_getsystime.c
@@ -25,6 +25,10 @@
 #include "mysys_priv.h"
 #include "my_static.h"
 
+#ifdef HAVE_LINUX_UNISTD_H
+#include <linux/unistd.h>
+#endif
+
 ulonglong my_getsystime()
 {
 #ifdef HAVE_CLOCK_GETTIME
@@ -218,3 +222,24 @@ time_t my_time_possible_from_micro(ulonglong microtime __attribute__((unused)))
 #endif  /* defined(__WIN__) */
 }
 
+
+/*
+  Return cpu time in milliseconds * 10
+*/
+
+ulonglong my_getcputime()
+{
+#ifdef HAVE_CLOCK_GETTIME
+  struct timespec tp;
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))
+    return 0;
+  return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100;
+#elif defined(__NR_clock_gettime)
+  struct timespec tp;
+  if (syscall(__NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tp))
+    return 0;
+  return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100;
+#else
+  return 0;
+#endif /* HAVE_CLOCK_GETTIME */
+}
diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index bd1e313d066..8daa9af39a9 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -20,28 +20,29 @@
 #include <my_base.h>
 #include <my_handler.h>
 #include <my_sys.h>
-
 #include "my_handler_errors.h"
 
 #define CMP_NUM(a,b)    (((a) < (b)) ? -1 : ((a) == (b)) ? 0 : 1)
 
-int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
-		    uchar *b, uint b_length, my_bool part_key,
+int ha_compare_text(CHARSET_INFO *charset_info, const uchar *a, uint a_length,
+		    const uchar *b, uint b_length, my_bool part_key,
 		    my_bool skip_end_space)
 {
   if (!part_key)
     return charset_info->coll->strnncollsp(charset_info, a, a_length,
-                                           b, b_length, (my_bool)!skip_end_space);
+                                           b, b_length,
+                                           (my_bool)!skip_end_space);
   return charset_info->coll->strnncoll(charset_info, a, a_length,
                                        b, b_length, part_key);
 }
 
 
-static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
+static int compare_bin(const uchar *a, uint a_length,
+                       const uchar *b, uint b_length,
                        my_bool part_key, my_bool skip_end_space)
 {
   uint length= min(a_length,b_length);
-  uchar *end= a+ length;
+  const uchar *end= a+ length;
   int flag;
 
   while (a < end)
@@ -85,13 +86,15 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
     ha_key_cmp()
     keyseg	Array of key segments of key to compare
     a		First key to compare, in format from _mi_pack_key()
-		This is normally key specified by user
-    b		Second key to compare.  This is always from a row
-    key_length	Length of key to compare.  This can be shorter than
-		a to just compare sub keys
+		This is always from the row
+    b		Second key to compare.  This is from the row or the user
+    key_length	Length of key to compare, based on key b.  This can be shorter
+		than b to just compare sub keys
     next_flag	How keys should be compared
 		If bit SEARCH_FIND is not set the keys includes the row
 		position and this should also be compared
+                If SEARCH_PAGE_KEY_HAS_TRANSID is set then 'a' has transid
+                If SEARCH_USER_KEY_HAS_TRANSID is set then 'b' has transid
     diff_pos    OUT Number of first keypart where values differ, counting 
                 from one.
     diff_pos[1] OUT  (b + diff_pos[1]) points to first value in tuple b
@@ -120,8 +123,8 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
 
 #define FCMP(A,B) ((int) (A) - (int) (B))
 
-int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
-	       register uchar *b, uint key_length, uint nextflag,
+int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a,
+	       register const uchar *b, uint key_length, uint32 nextflag,
 	       uint *diff_pos)
 {
   int flag;
@@ -131,12 +134,12 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
   float f_1,f_2;
   double d_1,d_2;
   uint next_key_length;
-  uchar *orig_b= b;
+  const uchar *orig_b= b;
 
   *diff_pos=0;
   for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
   {
-    uchar *end;
+    const uchar *end;
     uint piks=! (keyseg->flag & HA_NO_SORT);
     (*diff_pos)++;
     diff_pos[1]= (uint)(b - orig_b);
@@ -153,8 +156,13 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
       b++;
       if (!*a++)                                /* If key was NULL */
       {
-        if (nextflag == (SEARCH_FIND | SEARCH_UPDATE))
-          nextflag=SEARCH_SAME;                 /* Allow duplicate keys */
+        if ((nextflag & (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT |
+                         SEARCH_NULL_ARE_EQUAL)) ==
+            (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT))
+        {
+          /* Allow duplicate keys */
+          nextflag= (nextflag & ~(SEARCH_FIND | SEARCH_UPDATE)) | SEARCH_SAME;
+        }
   	else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL)
 	{
 	  /*
@@ -367,7 +375,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
 
       if (keyseg->flag & HA_REVERSE_SORT)
       {
-        swap_variables(uchar*, a, b);
+        swap_variables(const uchar*, a, b);
         swap_flag=1;                            /* Remember swap of a & b */
         end= a+ (int) (end-b);
       }
@@ -392,7 +400,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
 	  if (*b != '-')
 	    return -1;
 	  a++; b++;
-	  swap_variables(uchar*, a, b);
+	  swap_variables(const uchar*, a, b);
 	  swap_variables(int, alength, blength);
 	  swap_flag=1-swap_flag;
 	  alength--; blength--;
@@ -421,7 +429,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
       }
 
       if (swap_flag)                            /* Restore pointers */
-        swap_variables(uchar*, a, b);
+        swap_variables(const uchar*, a, b);
       break;
     }
 #ifdef HAVE_LONG_LONG
@@ -456,18 +464,90 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
 end:
   if (!(nextflag & SEARCH_FIND))
   {
+    /*
+      Compare rowid and possible transid
+      This happens in the following case:
+      - INSERT, UPDATE, DELETE when we have not unique keys or
+        are using versioning
+      - SEARCH_NEXT, SEARCH_PREVIOUS when we need to restart search
+
+      The logic for comparing transid are as follows:
+      Keys with have a transid have lowest bit in the rowidt. This means that
+      if we are comparing a key with a transid with another key that doesn't
+      have a tranid, we must reset the lowest bit for both keys.
+
+      When we have transid, the keys are compared in transid order.
+      A key without a transid is regared to be smaller than a key with
+      a transid.
+    */
+
     uint i;
+    uchar key_mask, tmp_a, tmp_b;
+
     if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) /* Find record after key */
       return (nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1;
-    flag=0;
-    for (i=keyseg->length ; i-- > 0 ; )
+    key_mask= (uchar) 255;
+
+    if (!(nextflag & (SEARCH_USER_KEY_HAS_TRANSID |
+                      SEARCH_PAGE_KEY_HAS_TRANSID)))
+    {
+      /*
+        Neither key has a trid.  Only compare row id's and don't
+        try to store rows in trid order
+      */
+      key_length= keyseg->length;
+      nextflag&= ~SEARCH_INSERT;
+    }
+    else
+    {
+      /*
+        Set key_mask so that we reset the last bit in the rowid before
+        we compare it. This is needed as the lowest bit in the rowid is
+        used to mark if the key has a transid or not.
+      */
+      key_mask= (uchar) 254;
+      if (!test_all_bits(nextflag, (SEARCH_USER_KEY_HAS_TRANSID |
+                                    SEARCH_PAGE_KEY_HAS_TRANSID)))
+      {
+        /*
+          No transaction id for user key or for key on page 
+          Ignore transid as at least one of the keys are visible for all
+        */
+        key_length= keyseg->length;
+      }
+      else
+      {
+        /*
+          Both keys have trids. No need of special handling of incomplete
+          trids below.
+        */
+        nextflag&= ~SEARCH_INSERT;
+      }
+    }
+    DBUG_ASSERT(key_length > 0);
+
+    for (i= key_length-1 ; (int) i-- > 0 ; )
     {
       if (*a++ != *b++)
       {
         flag= FCMP(a[-1],b[-1]);
-        break;
+        goto found;
       }
     }
+    tmp_a= *a & key_mask;
+    tmp_b= *b & key_mask;
+    flag= FCMP(tmp_a, tmp_b);
+
+    if (flag == 0 && (nextflag & SEARCH_INSERT))
+    {
+      /*
+        Ensure that on insert we get rows stored in trid order.
+        If one of the parts doesn't have a trid, this should be regarded
+        as smaller than the other
+      */
+        return (nextflag & SEARCH_USER_KEY_HAS_TRANSID) ? -1 : 1;
+    }
+found:
     if (nextflag & SEARCH_SAME)
       return (flag);                            /* read same */
     if (nextflag & SEARCH_BIGGER)
@@ -499,11 +579,11 @@ end:
     NULLs.
 */
 
-HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
+HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, const uchar *a)
 {
   for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++)
   {
-    uchar *end;
+    const uchar *end;
     if (keyseg->null_bit)
     {
       if (!*a++)
@@ -568,7 +648,6 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
 }
 
 
-
 /*
   Register handler error messages for usage with my_error()
 
diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h
index e4e62f47fed..e5cb06fba63 100644
--- a/mysys/my_handler_errors.h
+++ b/mysys/my_handler_errors.h
@@ -34,7 +34,7 @@ static const char *handler_error_messages[]=
   "Table is crashed and last repair failed",
   "Table was marked as crashed and should be repaired",
   "Lock timed out; Retry transaction",
-  "Lock table is full;  Restart program with a larger locktable",
+  "Lock table is full;  Restart program with a larger lock table",
   "Updates are not allowed under a read only transactions",
   "Lock deadlock; Retry transaction",
   "Foreign key constraint is incorrectly formed",
@@ -48,7 +48,7 @@ static const char *handler_error_messages[]=
   "Unexpected null pointer found when using spatial index",
   "The table changed in storage engine",
   "There's no partition in table for the given value",
-  "Row-based binlogging of row failed",
+  "Row-based binary logging of row failed",
   "Index needed in foreign key constraint",
   "Upholding foreign key constraints would lead to a duplicate key error in "
   "some other table",
@@ -57,15 +57,16 @@ static const char *handler_error_messages[]=
   "Failed to get next auto increment value",
   "Failed to set row auto increment value",
   "Unknown (generic) error from engine",
-  "Record is the same",
+  "Record was not update. Original values was same as new values",
   "It is not possible to log this statement",
   "The event was corrupt, leading to illegal data being read",
   "The table is of a new format not supported by this version",
-  "The event could not be processed no other hanlder error happened",
-  "Got a fatal error during initialzaction of handler",
-  "File to short; Expected more data in file",
+  "The event could not be processed. No other handler error happened",
+  "Got a fatal error during initialization of handler",
+  "File too short; Expected more data in file",
   "Read page with wrong checksum",
-  "Too many active concurrent transactions"
+  "Too many active concurrent transactions",
+  "Row is not visible by the current transaction"
 };
 
 #endif /* MYSYS_MY_HANDLER_ERRORS_INCLUDED */
diff --git a/mysys/my_init.c b/mysys/my_init.c
index e3b189c27bb..71b3d960386 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -40,7 +40,8 @@ my_bool my_init_done= 0;
 /** True if @c my_basic_init() has been called. */
 my_bool my_basic_init_done= 0;
 uint	mysys_usage_id= 0;              /* Incremented for each my_init() */
-ulong   my_thread_stack_size= 65536;
+
+ulong   my_thread_stack_size= (sizeof(void*) <= 4)? 65536: ((256-16)*1024);
 
 static ulong atoi_octal(const char *str)
 {
@@ -91,17 +92,16 @@ my_bool my_basic_init(void)
   instrumented_stdin.m_psi= NULL;       /* not yet instrumented */
   mysql_stdin= & instrumented_stdin;
 
+  my_progname_short= "unknown";
+  if (my_progname)
+    my_progname_short= my_progname + dirname_length(my_progname);
+
+  /* Initalize our mutex handling */
+  my_mutex_init();
+
 #if defined(THREAD)
   if (my_thread_global_init())
     return 1;
-#  if defined(SAFE_MUTEX)
-  safe_mutex_global_init();		/* Must be called early */
-#  endif
-#endif
-#if defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX)
-  fastmutex_global_init();              /* Must be called early */
-#endif
-#ifdef THREAD
 #if defined(HAVE_PTHREAD_INIT)
   pthread_init();			/* Must be called before DBUG_ENTER */
 #endif
@@ -203,7 +203,7 @@ void my_end(int infoflag)
   {
 #ifdef HAVE_GETRUSAGE
     struct rusage rus;
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
     /* Purify assumes that rus is uninitialized after getrusage call */
     bzero((char*) &rus, sizeof(rus));
 #endif
@@ -243,6 +243,7 @@ Voluntary context switches %ld, Involuntary context switches %ld\n",
 #ifdef THREAD
   my_thread_end();
   my_thread_global_end();
+  my_mutex_end();
 #if defined(SAFE_MUTEX)
   /*
     Check on destroying of mutexes. A few may be left that will get cleaned
@@ -262,6 +263,13 @@ Voluntary context switches %ld, Involuntary context switches %ld\n",
   my_basic_init_done= 0;
 } /* my_end */
 
+#ifndef DBUG_OFF
+/* Dummy tag function for debugging */
+
+void my_debug_put_break_here(void)
+{
+}
+#endif
 
 #ifdef __WIN__
 
diff --git a/mysys/my_lock.c b/mysys/my_lock.c
index 49c94ea838c..96abded4cc3 100644
--- a/mysys/my_lock.c
+++ b/mysys/my_lock.c
@@ -146,13 +146,13 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length,
   DBUG_ENTER("my_lock");
   DBUG_PRINT("my",("fd: %d  Op: %d  start: %ld  Length: %ld  MyFlags: %d",
 		   fd,locktype,(long) start,(long) length,MyFlags));
-  if (my_disable_locking)
+  if (my_disable_locking && ! (MyFlags & MY_FORCE_LOCK))
     DBUG_RETURN(0);
 
 #if defined(_WIN32)
   {
     int timeout_sec;
-    if (MyFlags & MY_DONT_WAIT)
+    if (MyFlags & MY_NO_WAIT)
       timeout_sec= 0;
     else
       timeout_sec= WIN_LOCK_INFINITE;
@@ -170,10 +170,16 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length,
     lock.l_start=  (off_t) start;
     lock.l_len=    (off_t) length;
 
-    if (MyFlags & MY_DONT_WAIT)
+    if (MyFlags & (MY_NO_WAIT | MY_SHORT_WAIT))
     {
       if (fcntl(fd,F_SETLK,&lock) != -1)	/* Check if we can lock */
-	DBUG_RETURN(0);			/* Ok, file locked */
+	DBUG_RETURN(0);                         /* Ok, file locked */
+      if (MyFlags & MY_NO_WAIT)
+      {
+        my_errno= (errno == EACCES) ? EAGAIN : errno ? errno : -1;
+        DBUG_RETURN(-1);
+      }
+
       DBUG_PRINT("info",("Was locked, trying with alarm"));
       ALARM_INIT;
       while ((value=fcntl(fd,F_SETLKW,&lock)) && ! ALARM_TEST &&
diff --git a/mysys/my_pthread.c b/mysys/my_pthread.c
index dee34d10b38..a13cd9b3545 100644
--- a/mysys/my_pthread.c
+++ b/mysys/my_pthread.c
@@ -360,7 +360,8 @@ int sigwait(sigset_t *setp, int *sigp)
 
 #include <netdb.h>
 
-int my_pthread_mutex_init(pthread_mutex_t *mp, const pthread_mutexattr_t *attr)
+int my_pthread_mutex_noposix_init(pthread_mutex_t *mp,
+                                  const pthread_mutexattr_t *attr)
 {
   int error;
   if (!attr)
@@ -370,7 +371,8 @@ int my_pthread_mutex_init(pthread_mutex_t *mp, const pthread_mutexattr_t *attr)
   return error;
 }
 
-int my_pthread_cond_init(pthread_cond_t *mp, const pthread_condattr_t *attr)
+int my_pthread_cond_noposix_init(pthread_cond_t *mp,
+                                 const pthread_condattr_t *attr)
 {
   int error;
   if (!attr)
diff --git a/mysys/my_redel.c b/mysys/my_redel.c
index 92aa6e42073..2fa5832bf0d 100644
--- a/mysys/my_redel.c
+++ b/mysys/my_redel.c
@@ -14,6 +14,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include "mysys_priv.h"
+#include "mysys_err.h"
 #include <my_dir.h>
 #include <m_string.h>
 #include "mysys_err.h"
@@ -70,8 +71,22 @@ end:
 } /* my_redel */
 
 
-	/* Copy stat from one file to another */
-	/* Return -1 if can't get stat, 1 if wrong type of file */
+/**
+   Copy stat from one file to another
+   @fn     my_copystat()
+   @param  from		Copy stat from this file
+   @param  to           Copy stat to this file
+   @param  MyFlags      Flags:
+		        MY_WME    Give error if something goes wrong
+		        MY_FAE    Abort operation if something goes wrong
+                        If MY_FAE is not given, we don't return -1 for
+                        errors from chown (which normally require root
+                        privilege)
+
+  @return  0 ok
+          -1 if can't get stat,
+           1 if wrong type of file
+*/
 
 int my_copystat(const char *from, const char *to, int MyFlags)
 {
@@ -106,9 +121,10 @@ int my_copystat(const char *from, const char *to, int MyFlags)
   if (chown(to, statbuf.st_uid, statbuf.st_gid))
   {
     my_errno= errno;
-    if (MyFlags & (MY_FAE+MY_WME))
+    if (MyFlags & MY_WME)
       my_error(EE_CHANGE_OWNERSHIP, MYF(ME_BELL+ME_WAITTANG), from, errno);
-    return -1;
+    if (MyFlags & MY_FAE)
+      return -1;
   }
 #endif /* !__WIN__ */
 
diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c
new file mode 100644
index 00000000000..178bcd9c539
--- /dev/null
+++ b/mysys/my_rnd.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2007 MySQL AB & Michael Widenius
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "mysys_priv.h"
+#include <m_string.h>
+
+/*
+  Initialize random generator
+
+  NOTES
+    MySQL's password checks depends on this, so don't do any changes
+    that changes the random numbers that are generated!
+*/
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2)
+{
+#ifdef HAVE_valgrind
+  bzero((char*) rand_st,sizeof(*rand_st));      /* Avoid UMC varnings */
+#endif
+  rand_st->max_value= 0x3FFFFFFFL;
+  rand_st->max_value_dbl=(double) rand_st->max_value;
+  rand_st->seed1=seed1%rand_st->max_value ;
+  rand_st->seed2=seed2%rand_st->max_value;
+}
+
+
+/*
+  Generate random number.
+
+  SYNOPSIS
+    my_rnd()
+    rand_st    INOUT  Structure used for number generation
+    
+  RETURN VALUE
+    generated pseudo random number
+*/
+
+double my_rnd(struct my_rnd_struct *rand_st)
+{
+  rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
+  rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
+  return (((double) rand_st->seed1)/rand_st->max_value_dbl);
+}
diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c
new file mode 100644
index 00000000000..7ffb6d82e53
--- /dev/null
+++ b/mysys/my_safehash.c
@@ -0,0 +1,297 @@
+/* Copyright (C) 2003-2007 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  Handling of multiple key caches
+
+  The idea is to have a thread safe hash on the table name,
+  with a default key cache value that is returned if the table name is not in
+  the cache.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include "my_safehash.h"
+
+/*****************************************************************************
+  General functions to handle SAFE_HASH objects.
+
+  A SAFE_HASH object is used to store the hash, the mutex and default value
+  needed by the rest of the key cache code.
+  This is a separate struct to make it easy to later reuse the code for other
+  purposes
+
+  All entries are linked in a list to allow us to traverse all elements
+  and delete selected ones. (HASH doesn't allow any easy ways to do this).
+*****************************************************************************/
+
+
+/*
+  Free a SAFE_HASH_ENTRY
+
+  SYNOPSIS
+    safe_hash_entry_free()
+    entry                The entry which should be freed
+
+  NOTE
+    This function is called by the hash object on delete
+*/
+
+static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
+{
+  DBUG_ENTER("safe_hash_entry_free");
+  my_free(entry);
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Get key and length for a SAFE_HASH_ENTRY
+
+  SYNOPSIS
+    safe_hash_entry_get()
+    entry                The entry for which the key should be returned
+    length               Length of the key
+
+  RETURN
+    #  reference on the key
+*/
+
+static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length,
+                                  my_bool not_used __attribute__((unused)))
+{
+  *length= entry->length;
+  return (uchar*) entry->key;
+}
+
+
+/*
+  Init a SAFE_HASH object
+
+  SYNOPSIS
+    safe_hash_init()
+    hash		safe_hash handler
+    elements		Expected max number of elements
+    default_value	default value
+
+  NOTES
+    In case of error we set hash->default_value to 0 to allow one to call
+    safe_hash_free on an object that couldn't be initialized.
+
+  RETURN
+    0  OK
+    1  error
+*/
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+                       uchar *default_value)
+{
+  DBUG_ENTER("safe_hash_init");
+  if (my_hash_init(&hash->hash, &my_charset_bin, elements,
+                   0, 0, (hash_get_key) safe_hash_entry_get,
+                   (void (*)(void*)) safe_hash_entry_free, 0))
+  {
+    hash->default_value= 0;
+    DBUG_RETURN(1);
+  }
+  my_rwlock_init(&hash->mutex, 0);
+  hash->default_value= default_value;
+  hash->root= 0;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Free a SAFE_HASH object
+
+  SYNOPSIS
+    safe_hash_free()
+    hash                 Hash handle
+
+  NOTES
+    This is safe to call on any object that has been sent to safe_hash_init()
+*/
+
+void safe_hash_free(SAFE_HASH *hash)
+{
+  /*
+    Test if safe_hash_init succeeded. This will also guard us against multiple
+    free calls.
+  */
+  if (hash->default_value)
+  {
+    my_hash_free(&hash->hash);
+    rwlock_destroy(&hash->mutex);
+    hash->default_value=0;
+  }
+}
+
+
+/*
+  Return the value stored for a key or default value if no key
+
+  SYNOPSIS
+    safe_hash_search()
+    hash                 Hash handle
+    key                  key (path to table etc..)
+    length               Length of key
+    def                  Default value of data
+
+  RETURN
+    #  data associated with the key of default value if data was not found
+*/
+
+uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length,
+                        uchar *def)
+{
+  uchar *result;
+  DBUG_ENTER("safe_hash_search");
+  rw_rdlock(&hash->mutex);
+  result= my_hash_search(&hash->hash, key, length);
+  rw_unlock(&hash->mutex);
+  if (!result)
+    result= def;
+  else
+    result= ((SAFE_HASH_ENTRY*) result)->data;
+  DBUG_PRINT("exit",("data: 0x%lx", (long) result));
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Associate a key with some data
+
+  SYNOPSIS
+    safe_hash_set()
+    hash                 Hash handle
+    key                  key (path to table etc..)
+    length               Length of key
+    data                 data to to associate with the data
+
+  NOTES
+    This can be used both to insert a new entry and change an existing
+    entry.
+    If one associates a key with the default key cache, the key is deleted
+
+  RETURN
+    0  OK
+    1  error (Can only be EOM). In this case my_message() is called.
+*/
+
+my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
+                      uchar *data)
+{
+  SAFE_HASH_ENTRY *entry;
+  my_bool error= 0;
+  DBUG_ENTER("safe_hash_set");
+  DBUG_PRINT("enter",("key: %.*s  data: 0x%lx", length, key, (long) data));
+
+  rw_wrlock(&hash->mutex);
+  entry= (SAFE_HASH_ENTRY*) my_hash_search(&hash->hash, key, length);
+
+  if (data == hash->default_value)
+  {
+    /*
+      The key is to be associated with the default entry. In this case
+      we can just delete the entry (if it existed) from the hash as a
+      search will return the default entry
+    */
+    if (!entry)          /* nothing to do */
+      goto end;
+    /* unlink entry from list */
+    if ((*entry->prev= entry->next))
+      entry->next->prev= entry->prev;
+    my_hash_delete(&hash->hash, (uchar*) entry);
+    goto end;
+  }
+  if (entry)
+  {
+    /* Entry existed;  Just change the pointer to point at the new data */
+    entry->data= data;
+  }
+  else
+  {
+    if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
+                                               MYF(MY_WME))))
+    {
+      error= 1;
+      goto end;
+    }
+    entry->key= (uchar*) (entry +1);
+    memcpy((char*) entry->key, (char*) key, length);
+    entry->length= length;
+    entry->data= data;
+    /* Link entry to list */
+    if ((entry->next= hash->root))
+      entry->next->prev= &entry->next;
+    entry->prev= &hash->root;
+    hash->root= entry;
+    if (my_hash_insert(&hash->hash, (uchar*) entry))
+    {
+      /* This can only happen if hash got out of memory */
+      my_free(entry);
+      error= 1;
+      goto end;
+    }
+  }
+
+end:
+  rw_unlock(&hash->mutex);
+  DBUG_RETURN(error);
+}
+
+
+/*
+  Change all entries with one data value to another data value
+
+  SYNOPSIS
+    safe_hash_change()
+    hash                 Hash handle
+    old_data             Old data
+    new_data             Change all 'old_data' to this
+
+  NOTES
+    We use the linked list to traverse all elements in the hash as
+    this allows us to delete elements in the case where 'new_data' is the
+    default value.
+*/
+
+void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data)
+{
+  SAFE_HASH_ENTRY *entry, *next;
+  DBUG_ENTER("safe_hash_change");
+
+  rw_wrlock(&hash->mutex);
+
+  for (entry= hash->root ; entry ; entry= next)
+  {
+    next= entry->next;
+    if (entry->data == old_data)
+    {
+      if (new_data == hash->default_value)
+      {
+        if ((*entry->prev= entry->next))
+          entry->next->prev= entry->prev;
+        my_hash_delete(&hash->hash, (uchar*) entry);
+      }
+      else
+        entry->data= new_data;
+    }
+  }
+
+  rw_unlock(&hash->mutex);
+  DBUG_VOID_RETURN;
+}
diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h
new file mode 100644
index 00000000000..8a5856b6763
--- /dev/null
+++ b/mysys/my_safehash.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2003 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  Handling of multiple key caches
+
+  The idea is to have a thread safe hash on the table name,
+  with a default key cache value that is returned if the table name is not in
+  the cache.
+*/
+
+#include <hash.h>
+
+/*
+  Struct to store a key and pointer to object
+*/
+
+typedef struct st_safe_hash_entry
+{
+  uchar *key;
+  uint length;
+  uchar *data;
+  struct st_safe_hash_entry *next, **prev;
+} SAFE_HASH_ENTRY;
+
+
+typedef struct st_safe_hash_with_default
+{
+#ifdef THREAD
+  rw_lock_t mutex;
+#endif
+  HASH hash;
+  uchar *default_value;
+  SAFE_HASH_ENTRY *root;
+} SAFE_HASH;
+
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+                       uchar *default_value);
+void safe_hash_free(SAFE_HASH *hash);
+uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length,
+                       uchar *def);
+my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
+                      uchar *data);
+void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data);
diff --git a/mysys/my_seek.c b/mysys/my_seek.c
index 8502c259353..63337c636a4 100644
--- a/mysys/my_seek.c
+++ b/mysys/my_seek.c
@@ -14,6 +14,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 #include "mysys_priv.h"
+#include "mysys_err.h"
 
 /* 
   Seek to a position in a file.
@@ -42,8 +43,7 @@
                        actual error.
 */
 
-my_off_t my_seek(File fd, my_off_t pos, int whence,
-		 myf MyFlags __attribute__((unused)))
+my_off_t my_seek(File fd, my_off_t pos, int whence, myf MyFlags)
 {
   os_off_t newpos= -1;
   DBUG_ENTER("my_seek");
@@ -63,6 +63,8 @@ my_off_t my_seek(File fd, my_off_t pos, int whence,
   if (newpos == (os_off_t) -1)
   {
     my_errno= errno;
+    if (MyFlags & MY_WME)
+      my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno);
     DBUG_PRINT("error",("lseek: %llu  errno: %d", (ulonglong) newpos,errno));
     DBUG_RETURN(MY_FILEPOS_ERROR);
   }
@@ -77,7 +79,7 @@ my_off_t my_seek(File fd, my_off_t pos, int whence,
 	/* Tell current position of file */
 	/* ARGSUSED */
 
-my_off_t my_tell(File fd, myf MyFlags __attribute__((unused)))
+my_off_t my_tell(File fd, myf MyFlags)
 {
   os_off_t pos;
   DBUG_ENTER("my_tell");
@@ -89,7 +91,11 @@ my_off_t my_tell(File fd, myf MyFlags __attribute__((unused)))
   pos= my_seek(fd, 0L, MY_SEEK_CUR,0);
 #endif
   if (pos == (os_off_t) -1)
+  {
     my_errno= errno;
+    if (MyFlags & MY_WME)
+      my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno);
+  }
   DBUG_PRINT("exit",("pos: %llu", (ulonglong) pos));
   DBUG_RETURN((my_off_t) pos);
 } /* my_tell */
diff --git a/mysys/my_sleep.c b/mysys/my_sleep.c
index 6d1bdd5dc55..5a18a2854d6 100644
--- a/mysys/my_sleep.c
+++ b/mysys/my_sleep.c
@@ -28,7 +28,7 @@ void my_sleep(ulong m_seconds)
   t.tv_usec= m_seconds % 1000000L;
   select(0,0,0,0,&t); /* sleep */
 #else
-  uint sec=    (uint) (m_seconds / 1000000L);
+  uint sec=    (uint) ((m_seconds + 999999L) / 1000000L);
   ulong start= (ulong) time((time_t*) 0);
   while ((ulong) time((time_t*) 0) < start+sec);
 #endif
diff --git a/mysys/my_static.c b/mysys/my_static.c
index d7354555f3d..97af25cf92d 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -26,7 +26,7 @@ my_bool timed_mutexes= 0;
 
 	/* from my_init */
 char *	home_dir=0;
-const char      *my_progname=0;
+const char      *my_progname= NULL, *my_progname_short= NULL;
 char		curr_dir[FN_REFLEN]= {0},
 		home_dir_buff[FN_REFLEN]= {0};
 ulong		my_stream_opened=0,my_file_opened=0, my_tmp_file_created=0;
@@ -89,6 +89,18 @@ static const char *proc_info_dummy(void *a __attribute__((unused)),
 const char *(*proc_info_hook)(void *, const char *, const char *, const char *,
                               const unsigned int)= proc_info_dummy;
 
+static const char *proc_info_dummy(void *a __attribute__((unused)),
+                                   const char *b __attribute__((unused)),
+                                   const char *c __attribute__((unused)),
+                                   const char *d __attribute__((unused)),
+                                   const unsigned int e __attribute__((unused)))
+{
+  return 0;
+}
+
+/* this is to be able to call set_thd_proc_info from the C code */
+const char *(*proc_info_hook)(void *, const char *, const char *, const char *,
+                              const unsigned int)= proc_info_dummy;
 #if defined(ENABLED_DEBUG_SYNC)
 /**
   Global pointer to be set if callback function is defined
@@ -104,6 +116,7 @@ ulonglong query_performance_frequency, query_performance_offset;
 
 	/* How to disable options */
 my_bool my_disable_locking=0;
+my_bool my_disable_sync=0;
 my_bool my_disable_async_io=0;
 my_bool my_disable_flush_key_blocks=0;
 my_bool my_disable_symlinks=0;
diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c
index 258e227bb7b..33f45a882e1 100644
--- a/mysys/my_symlink.c
+++ b/mysys/my_symlink.c
@@ -117,6 +117,9 @@ int my_is_symlink(const char *filename __attribute__((unused)))
 /*
   Resolve all symbolic links in path
   'to' may be equal to 'filename'
+
+  to is guaranteed to never set to a string longer than FN_REFLEN
+  (including the end \0)
 */
 
 int my_realpath(char *to, const char *filename,
@@ -130,7 +133,7 @@ int my_realpath(char *to, const char *filename,
 
   DBUG_PRINT("info",("executing realpath"));
   if ((ptr=realpath(filename,buff)))
-      strmake(to,ptr,FN_REFLEN-1);
+    strmake(to, ptr, FN_REFLEN-1);
   else
   {
     /*
diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c
index 7c3ddbb911c..bc7ac751fad 100644
--- a/mysys/my_symlink2.c
+++ b/mysys/my_symlink2.c
@@ -34,8 +34,8 @@ File my_create_with_symlink(const char *linkname, const char *filename,
   char abs_linkname[FN_REFLEN];
   DBUG_ENTER("my_create_with_symlink");
   DBUG_PRINT("enter", ("linkname: %s  filename: %s",
-                       linkname ? linkname : "(null)",
-                       filename ? filename : "(null)"));
+                       linkname ? linkname : "(NULL)",
+                       filename ? filename : "(NULL)"));
 
   if (my_disable_symlinks)
   {
diff --git a/mysys/my_sync.c b/mysys/my_sync.c
index bc050922ffc..7dfd365326c 100644
--- a/mysys/my_sync.c
+++ b/mysys/my_sync.c
@@ -17,6 +17,8 @@
 #include "mysys_err.h"
 #include <errno.h>
 
+ulong my_sync_count;                           /* Count number of sync calls */
+
 /*
   Sync data in file to disk
 
@@ -44,8 +46,12 @@ int my_sync(File fd, myf my_flags)
 {
   int res;
   DBUG_ENTER("my_sync");
-  DBUG_PRINT("my",("Fd: %d  my_flags: %d", fd, my_flags));
+  DBUG_PRINT("my",("fd: %d  my_flags: %d", fd, my_flags));
+
+  if (my_disable_sync)
+    DBUG_RETURN(0);
 
+  statistic_increment(my_sync_count,&THR_LOCK_open);
   do
   {
 #if defined(F_FULLFSYNC)
@@ -62,6 +68,8 @@ int my_sync(File fd, myf my_flags)
     res= fdatasync(fd);
 #elif defined(HAVE_FSYNC)
     res= fsync(fd);
+    if (res == -1 && errno == ENOLCK)
+      res= 0;                                   /* Result Bug in Old FreeBSD */
 #elif defined(_WIN32)
     res= my_win_fsync(fd);
 #else
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index 045b56b11c2..7b895149d84 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -38,12 +38,6 @@ mysql_mutex_t LOCK_localtime_r;
 #ifndef HAVE_GETHOSTBYNAME_R
 mysql_mutex_t LOCK_gethostbyname_r;
 #endif
-#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
-pthread_mutexattr_t my_fast_mutexattr;
-#endif
-#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
-pthread_mutexattr_t my_errorcheck_mutexattr;
-#endif
 #ifdef _MSC_VER
 static void install_sigabrt_handler();
 #endif
@@ -88,30 +82,6 @@ my_bool my_thread_basic_global_init(void)
     return 0;
   my_thread_basic_global_init_done= 1;
 
-#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
-  /*
-    Set mutex type to "fast" a.k.a "adaptive"
-
-    In this case the thread may steal the mutex from some other thread
-    that is waiting for the same mutex.  This will save us some
-    context switches but may cause a thread to 'starve forever' while
-    waiting for the mutex (not likely if the code within the mutex is
-    short).
-  */
-  pthread_mutexattr_init(&my_fast_mutexattr);
-  pthread_mutexattr_settype(&my_fast_mutexattr,
-                            PTHREAD_MUTEX_ADAPTIVE_NP);
-#endif
-
-#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
-  /*
-    Set mutex type to "errorcheck"
-  */
-  pthread_mutexattr_init(&my_errorcheck_mutexattr);
-  pthread_mutexattr_settype(&my_errorcheck_mutexattr,
-                            PTHREAD_MUTEX_ERRORCHECK);
-#endif
-
   mysql_mutex_init(key_THR_LOCK_malloc, &THR_LOCK_malloc, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_THR_LOCK_open, &THR_LOCK_open, MY_MUTEX_INIT_FAST);
   mysql_mutex_init(key_THR_LOCK_charset, &THR_LOCK_charset, MY_MUTEX_INIT_FAST);
@@ -275,12 +245,6 @@ void my_thread_global_end(void)
   mysql_mutex_unlock(&THR_LOCK_threads);
 
   pthread_key_delete(THR_KEY_mysys);
-#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
-  pthread_mutexattr_destroy(&my_fast_mutexattr);
-#endif
-#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
-  pthread_mutexattr_destroy(&my_errorcheck_mutexattr);
-#endif
   mysql_mutex_destroy(&THR_LOCK_malloc);
   mysql_mutex_destroy(&THR_LOCK_open);
   mysql_mutex_destroy(&THR_LOCK_lock);
@@ -427,8 +391,10 @@ void my_thread_end(void)
     mysql_cond_destroy(&tmp->suspend);
 #endif
     mysql_mutex_destroy(&tmp->mutex);
+    TRASH(tmp, sizeof(*tmp));
     free(tmp);
 
+#warning why monty added pthread_setspecific(THR_KEY_mysys,0) here?
     /*
       Decrement counter for number of running threads. We are using this
       in my_thread_global_end() to wait until all threads have called
diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c
new file mode 100644
index 00000000000..f115806b4e9
--- /dev/null
+++ b/mysys/my_uuid.c
@@ -0,0 +1,243 @@
+/* Copyright (C) 2007 MySQL AB, Sergei Golubchik & Michael Widenius
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  implements Universal Unique Identifiers (UUIDs), as in
+    DCE 1.1: Remote Procedure Call,
+    Open Group Technical Standard Document Number C706, October 1997,
+    (supersedes C309 DCE: Remote Procedure Call 8/1994,
+    which was basis for ISO/IEC 11578:1996 specification)
+
+  A UUID has the following structure:
+
+  Field                     NDR Data Type  Octet #          Note
+ time_low                   unsigned long    0-3     The low field of the
+                                                     timestamp.
+ time_mid                   unsigned short   4-5     The middle field of
+                                                     the timestamp.
+ time_hi_and_version        unsigned short   6-7     The high field of the
+                                                     timestamp multiplexed
+                                                     with the version number.
+ clock_seq_hi_and_reserved  unsigned small   8       The high field of the
+                                                     clock sequence multi-
+                                                     plexed with the variant.
+ clock_seq_low              unsigned small   9       The low field of the
+                                                     clock sequence.
+ node                       character        10-15   The spatially unique node
+                                                     identifier.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include <myisampack.h> /* mi_int2store, mi_int4store */
+
+static my_bool my_uuid_inited= 0;
+static struct my_rnd_struct uuid_rand;
+static uint nanoseq;
+static ulonglong uuid_time= 0;
+static uchar uuid_suffix[2+6]; /* clock_seq and node */
+
+#ifdef THREAD
+pthread_mutex_t LOCK_uuid_generator;
+#endif
+
+/*
+  Number of 100-nanosecond intervals between
+  1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00
+*/
+
+#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * \
+                          1000 * 1000 * 10)
+#define UUID_VERSION      0x1000
+#define UUID_VARIANT      0x8000
+
+
+/* Helper function */
+
+static void set_clock_seq()
+{
+  uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT;
+  mi_int2store(uuid_suffix, clock_seq);
+}
+
+
+/**
+  Init structures needed for my_uuid
+
+  @func my_uuid_init()
+  @param seed1		Seed for random generator
+  @param seed2		Seed for random generator
+
+  @note
+    Seed1 & seed2 should NOT depend on clock. This is to be able to
+    generate a random mac address according to UUID specs.
+*/
+
+void my_uuid_init(ulong seed1, ulong seed2)
+{
+  uchar *mac= uuid_suffix+2;
+  ulonglong now;
+
+  if (my_uuid_inited)
+    return;
+  my_uuid_inited= 1;
+  now= my_getsystime();
+  nanoseq= 0;
+
+  if (my_gethwaddr(mac))
+  {
+    uint i;
+    /*
+      Generating random "hardware addr"
+
+      Specs explicitly specify that node identifier should NOT
+      correlate with a clock_seq value, so we use a separate
+      randominit() here.
+    */
+    /* purecov: begin inspected */
+    my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (ulong) (now+rand()));
+    for (i=0; i < array_elements(uuid_suffix) -2 ; i++)
+      mac[i]= (uchar)(my_rnd(&uuid_rand)*255);
+    /* purecov: end */
+  }
+  my_rnd_init(&uuid_rand, (ulong) (seed1 + now), (ulong) (now/2+ getpid()));
+  set_clock_seq();
+  pthread_mutex_init(&LOCK_uuid_generator, MY_MUTEX_INIT_FAST);
+}
+
+
+/**
+   Create a global unique identifier (uuid)
+
+   @func  my_uuid()
+   @param to   Store uuid here. Must be of size MY_uuid_SIZE (16)
+*/
+
+void my_uuid(uchar *to)
+{
+  ulonglong tv;
+  uint32 time_low;
+  uint16 time_mid, time_hi_and_version;
+
+  DBUG_ASSERT(my_uuid_inited);
+
+  pthread_mutex_lock(&LOCK_uuid_generator);
+  tv= my_getsystime() + UUID_TIME_OFFSET + nanoseq;
+
+  if (likely(tv > uuid_time))
+  {
+    /*
+      Current time is ahead of last timestamp, as it should be.
+      If we "borrowed time", give it back, just as long as we
+      stay ahead of the previous timestamp.
+    */
+    if (nanoseq)
+    {
+      ulong delta;
+      DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0));
+      /*
+        -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time)
+      */
+      delta= min(nanoseq, (ulong)(tv - uuid_time -1));
+      tv-= delta;
+      nanoseq-= delta;
+    }
+  }
+  else
+  {
+    if (unlikely(tv == uuid_time))
+    {
+      /*
+        For low-res system clocks. If several requests for UUIDs
+        end up on the same tick, we add a nano-second to make them
+        different.
+        ( current_timestamp + nanoseq * calls_in_this_period )
+        may end up > next_timestamp; this is OK. Nonetheless, we'll
+        try to unwind nanoseq when we get a chance to.
+        If nanoseq overflows, we'll start over with a new numberspace
+        (so the if() below is needed so we can avoid the ++tv and thus
+        match the follow-up if() if nanoseq overflows!).
+      */
+      if (likely(++nanoseq))
+        ++tv;
+    }
+
+    if (unlikely(tv <= uuid_time))
+    {
+      /*
+        If the admin changes the system clock (or due to Daylight
+        Saving Time), the system clock may be turned *back* so we
+        go through a period once more for which we already gave out
+        UUIDs.  To avoid duplicate UUIDs despite potentially identical
+        times, we make a new random component.
+        We also come here if the nanoseq "borrowing" overflows.
+        In either case, we throw away any nanoseq borrowing since it's
+        irrelevant in the new numberspace.
+      */
+      set_clock_seq();
+      tv= my_getsystime() + UUID_TIME_OFFSET;
+      nanoseq= 0;
+      DBUG_PRINT("uuid",("making new numberspace"));
+    }
+  }
+
+  uuid_time=tv;
+  pthread_mutex_unlock(&LOCK_uuid_generator);
+
+  time_low=            (uint32) (tv & 0xFFFFFFFF);
+  time_mid=            (uint16) ((tv >> 32) & 0xFFFF);
+  time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION);
+
+  /*
+    Note, that the standard does NOT specify byte ordering in
+    multi-byte fields. it's implementation defined (but must be
+    the same for all fields).
+    We use big-endian, so we can use memcmp() to compare UUIDs
+    and for straightforward UUID to string conversion.
+  */
+  mi_int4store(to, time_low);
+  mi_int2store(to+4, time_mid);
+  mi_int2store(to+6, time_hi_and_version);
+  bmove(to+8, uuid_suffix, sizeof(uuid_suffix));
+}
+
+
+/**
+   Convert uuid to string representation
+
+   @func  my_uuid2str()
+   @param guid uuid
+   @param s    Output buffer.Must be at least MY_UUID_STRING_LENGTH+1 large.
+*/
+void my_uuid2str(const uchar *guid, char *s)
+{
+  int i;
+  for (i=0; i < MY_UUID_SIZE; i++)
+  {
+    *s++= _dig_vec_lower[guid[i] >>4];
+    *s++= _dig_vec_lower[guid[i] & 15];
+    if(i == 3 || i == 5 || i == 7 || i == 9)
+      *s++= '-';
+  }
+}
+
+void my_uuid_end()
+{
+  if (my_uuid_inited)
+  {
+    my_uuid_inited= 0;
+    pthread_mutex_destroy(&LOCK_uuid_generator);
+  }
+}
diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c
index ad1636011db..4d8ff6254e3 100644
--- a/mysys/my_wincond.c
+++ b/mysys/my_wincond.c
@@ -18,6 +18,7 @@
 *****************************************************************************/
 #if defined(_WIN32)
 
+#warning #include <my_global.h>
 #undef SAFE_MUTEX			/* Avoid safe_mutex redefinitions */
 #include "mysys_priv.h"
 #include <m_string.h>
diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c
index aecb2f7cc78..1ca32d58832 100644
--- a/mysys/my_winthread.c
+++ b/mysys/my_winthread.c
@@ -18,6 +18,7 @@
 *****************************************************************************/
 #if defined (_WIN32)
 /* SAFE_MUTEX will not work until the thread structure is up to date */
+#warning #include <my_global.h>
 #undef SAFE_MUTEX
 #include "mysys_priv.h"
 #include <process.h>
diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h
index 1ae6a9e3a99..30ffac0ac77 100644
--- a/mysys/mysys_priv.h
+++ b/mysys/mysys_priv.h
@@ -75,6 +75,7 @@ extern PSI_file_key key_file_proc_meminfo;
 extern PSI_file_key key_file_charset, key_file_cnf;
 #endif /* HAVE_PSI_INTERFACE */
 
+
 /*
   EDQUOT is used only in 3 C files only in mysys/. If it does not exist on
   system, we set it to some value which can never happen.
@@ -84,6 +85,7 @@ extern PSI_file_key key_file_charset, key_file_cnf;
 #endif
 
 void my_error_unregister_all(void);
+<<<<<<< TREE
 
 #ifdef _WIN32
 #include <sys/stat.h>
diff --git a/mysys/queues.c b/mysys/queues.c
index 25a310c0752..418163d7c58 100644
--- a/mysys/queues.c
+++ b/mysys/queues.c
@@ -1,25 +1,42 @@
-/* Copyright (C) 2000, 2005 MySQL AB
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+/* Copyright (C) 2010 Monty Program Ab
+   All Rights reserved
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the following disclaimer
+      in the documentation and/or other materials provided with the
+      distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+  <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  SUCH DAMAGE.
+*/
 
 /*
+  This code originates from the Unireg project.
+
   Code for generell handling of priority Queues.
   Implemention of queues from "Algoritms in C" by Robert Sedgewick.
-  An optimisation of _downheap suggested in Exercise 7.51 in "Data
-  Structures & Algorithms in C++" by Mark Allen Weiss, Second Edition
-  was implemented by Mikael Ronstrom 2005. Also the O(N) algorithm
-  of queue_fix was implemented.
+
+  The queue can optionally store the position in queue in the element
+  that is in the queue. This allows one to remove any element from the queue
+  in O(1) time.
+
+  Optimisation of _downheap() and queue_fix() is inspired by code done
+  by Mikael Ronström, based on an optimisation of _downheap from
+  Exercise 7.51 in "Data Structures & Algorithms in C++" by Mark Allen
+  Weiss, Second Edition.
 */
 
 #include "mysys_priv.h"
@@ -39,6 +56,10 @@
     max_at_top		Set to 1 if you want biggest element on top.
     compare		Compare function for elements, takes 3 arguments.
     first_cmp_arg	First argument to compare function
+    offset_to_queue_pos If <> 0, then offset+1 in element to store position
+                        in queue (for fast delete of element in queue)
+    auto_extent         When the queue is full and there is insert operation
+                        extend the queue.
 
   NOTES
     Will allocate max_element pointers for queue array
@@ -50,74 +71,33 @@
 
 int init_queue(QUEUE *queue, uint max_elements, uint offset_to_key,
 	       pbool max_at_top, int (*compare) (void *, uchar *, uchar *),
-	       void *first_cmp_arg)
+	       void *first_cmp_arg, uint offset_to_queue_pos,
+               uint auto_extent)
+               
 {
   DBUG_ENTER("init_queue");
-  if ((queue->root= (uchar **) my_malloc((max_elements+1)*sizeof(void*),
+  if ((queue->root= (uchar **) my_malloc((max_elements + 1) * sizeof(void*),
 					 MYF(MY_WME))) == 0)
     DBUG_RETURN(1);
-  queue->elements=0;
-  queue->compare=compare;
-  queue->first_cmp_arg=first_cmp_arg;
-  queue->max_elements=max_elements;
-  queue->offset_to_key=offset_to_key;
+  queue->elements=      	0;
+  queue->compare=       	compare;
+  queue->first_cmp_arg= 	first_cmp_arg;
+  queue->max_elements=  	max_elements;
+  queue->offset_to_key= 	offset_to_key;
+  queue->offset_to_queue_pos=   offset_to_queue_pos;
+  queue->auto_extent=   	auto_extent;
   queue_set_max_at_top(queue, max_at_top);
   DBUG_RETURN(0);
 }
 
 
-
-/*
-  Init queue, uses init_queue internally for init work but also accepts
-  auto_extent as parameter
-
-  SYNOPSIS
-    init_queue_ex()
-    queue		Queue to initialise
-    max_elements	Max elements that will be put in queue
-    offset_to_key	Offset to key in element stored in queue
-			Used when sending pointers to compare function
-    max_at_top		Set to 1 if you want biggest element on top.
-    compare		Compare function for elements, takes 3 arguments.
-    first_cmp_arg	First argument to compare function
-    auto_extent         When the queue is full and there is insert operation
-                        extend the queue.
-
-  NOTES
-    Will allocate max_element pointers for queue array
-
-  RETURN
-    0	ok
-    1	Could not allocate memory
-*/
-
-int init_queue_ex(QUEUE *queue, uint max_elements, uint offset_to_key,
-	       pbool max_at_top, int (*compare) (void *, uchar *, uchar *),
-	       void *first_cmp_arg, uint auto_extent)
-{
-  int ret;
-  DBUG_ENTER("init_queue_ex");
-
-  if ((ret= init_queue(queue, max_elements, offset_to_key, max_at_top, compare,
-                       first_cmp_arg)))
-    DBUG_RETURN(ret);
-  
-  queue->auto_extent= auto_extent;
-  DBUG_RETURN(0);
-}
-
 /*
   Reinitialize queue for other usage
 
   SYNOPSIS
     reinit_queue()
     queue		Queue to initialise
-    max_elements	Max elements that will be put in queue
-    offset_to_key	Offset to key in element stored in queue
-			Used when sending pointers to compare function
-    max_at_top		Set to 1 if you want biggest element on top.
-    compare		Compare function for elements, takes 3 arguments.
-    first_cmp_arg	First argument to compare function
+    For rest of arguments, see init_queue() above
 
   NOTES
     This will delete all elements from the queue.  If you don't want this,
@@ -125,21 +105,23 @@ int init_queue_ex(QUEUE *queue, uint max_elements, uint offset_to_key,
 
   RETURN
     0			ok
-    EE_OUTOFMEMORY	Wrong max_elements
+    1			Wrong max_elements; Queue has old size
 */
 
 int reinit_queue(QUEUE *queue, uint max_elements, uint offset_to_key,
 		 pbool max_at_top, int (*compare) (void *, uchar *, uchar *),
-		 void *first_cmp_arg)
+		 void *first_cmp_arg, uint offset_to_queue_pos,
+                 uint auto_extent)
 {
   DBUG_ENTER("reinit_queue");
-  queue->elements=0;
-  queue->compare=compare;
-  queue->first_cmp_arg=first_cmp_arg;
-  queue->offset_to_key=offset_to_key;
+  queue->elements=		0;
+  queue->compare=		compare;
+  queue->first_cmp_arg=		first_cmp_arg;
+  queue->offset_to_key=		offset_to_key;
+  queue->offset_to_queue_pos=   offset_to_queue_pos;
+  queue->auto_extent= 		auto_extent;
   queue_set_max_at_top(queue, max_at_top);
-  resize_queue(queue, max_elements);
-  DBUG_RETURN(0);
+  DBUG_RETURN(resize_queue(queue, max_elements));
 }
 
 
@@ -167,8 +149,8 @@ int resize_queue(QUEUE *queue, uint max_elements)
   if (queue->max_elements == max_elements)
     DBUG_RETURN(0);
   if ((new_root= (uchar **) my_realloc((void *)queue->root,
-				      (max_elements+1)*sizeof(void*),
-				      MYF(MY_WME))) == 0)
+                                       (max_elements + 1)* sizeof(void*),
+                                       MYF(MY_WME))) == 0)
     DBUG_RETURN(1);
   set_if_smaller(queue->elements, max_elements);
   queue->max_elements= max_elements;
@@ -195,38 +177,57 @@ void delete_queue(QUEUE *queue)
 {
   DBUG_ENTER("delete_queue");
   my_free(queue->root);
-  queue->root= NULL;
+  queue->root=0;                              /* Allow multiple calls */
   DBUG_VOID_RETURN;
 }
 
 
-	/* Code for insert, search and delete of elements */
+/*
+  Insert element in queue
+
+  SYNOPSIS
+    queue_insert()
+    queue		Queue to use
+    element		Element to insert
+*/
 
 void queue_insert(register QUEUE *queue, uchar *element)
 {
   reg2 uint idx, next;
+  uint offset_to_queue_pos= queue->offset_to_queue_pos;
   DBUG_ASSERT(queue->elements < queue->max_elements);
-  queue->root[0]= element;
+
   idx= ++queue->elements;
   /* max_at_top swaps the comparison if we want to order by desc */
-  while ((queue->compare(queue->first_cmp_arg,
+  while (idx > 1 &&
+         (queue->compare(queue->first_cmp_arg,
                          element + queue->offset_to_key,
                          queue->root[(next= idx >> 1)] +
                          queue->offset_to_key) * queue->max_at_top) < 0)
   {
     queue->root[idx]= queue->root[next];
+    if (offset_to_queue_pos)
+      (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx;
     idx= next;
   }
   queue->root[idx]= element;
+  if (offset_to_queue_pos)
+    (*(uint*) (element+ offset_to_queue_pos-1))= idx;
 }
 
+
 /*
-  Does safe insert. If no more space left on the queue resize it.
-  Return codes:
-    0 - OK
-    1 - Cannot allocate more memory
-    2 - auto_extend is 0, the operation would
-  
+  Like queue_insert, but resize queue if queue is full
+
+  SYNOPSIS
+    queue_insert_safe()
+    queue		Queue to use
+    element		Element to insert
+
+  RETURN
+    0	OK
+    1	Cannot allocate more memory
+    2   auto_extend is 0; No insertion done
 */
 
 int queue_insert_safe(register QUEUE *queue, uchar *element)
@@ -236,7 +237,7 @@ int queue_insert_safe(register QUEUE *queue, uchar *element)
   {
     if (!queue->auto_extent)
       return 2;
-    else if (resize_queue(queue, queue->max_elements + queue->auto_extent))
+    if (resize_queue(queue, queue->max_elements + queue->auto_extent))
       return 1;
   }
   
@@ -245,40 +246,51 @@ int queue_insert_safe(register QUEUE *queue, uchar *element)
 }
 
 
-	/* Remove item from queue */
-	/* Returns pointer to removed element */
+/*
+  Remove item from queue
+
+  SYNOPSIS
+    queue_remove()
+    queue		Queue to use
+    element		Index of element to remove.
+			First element in queue is 'queue_first_element(queue)'
+
+  RETURN
+   pointer to removed element
+*/
 
 uchar *queue_remove(register QUEUE *queue, uint idx)
 {
   uchar *element;
-  DBUG_ASSERT(idx < queue->max_elements);
-  element= queue->root[++idx];  /* Intern index starts from 1 */
-  queue->root[idx]= queue->root[queue->elements--];
-  _downheap(queue, idx);
+  DBUG_ASSERT(idx >= 1 && idx <= queue->elements);
+  element= queue->root[idx];
+  _downheap(queue, idx, queue->root[queue->elements--]);
   return element;
 }
 
-	/* Fix when element on top has been replaced */
 
-#ifndef queue_replaced
-void queue_replaced(QUEUE *queue)
-{
-  _downheap(queue,1);
-}
-#endif
+/*
+  Add element to fixed position and update heap
 
-#ifndef OLD_VERSION
+  SYNOPSIS
+    _downheap()
+    queue	Queue to use
+    idx         Index of element to change
+    element     Element to store at 'idx'
+
+  NOTE
+    This only works if element is >= all elements <= start_idx
+*/
 
-void _downheap(register QUEUE *queue, uint idx)
+void _downheap(register QUEUE *queue, uint start_idx, uchar *element)
 {
-  uchar *element;
-  uint elements,half_queue,offset_to_key, next_index;
+  uint elements,half_queue,offset_to_key, next_index, offset_to_queue_pos;
+  register uint idx= start_idx;
   my_bool first= TRUE;
-  uint start_idx= idx;
 
   offset_to_key=queue->offset_to_key;
-  element=queue->root[idx];
-  half_queue=(elements=queue->elements) >> 1;
+  offset_to_queue_pos= queue->offset_to_queue_pos;
+  half_queue= (elements= queue->elements) >> 1;
 
   while (idx <= half_queue)
   {
@@ -295,393 +307,68 @@ void _downheap(register QUEUE *queue, uint idx)
                           element+offset_to_key) * queue->max_at_top) >= 0)))
     {
       queue->root[idx]= element;
+      if (offset_to_queue_pos)
+        (*(uint*) (element + offset_to_queue_pos-1))= idx;
       return;
     }
-    queue->root[idx]=queue->root[next_index];
-    idx=next_index;
     first= FALSE;
-  }
-
-  next_index= idx >> 1;
-  while (next_index > start_idx)
-  {
-    if ((queue->compare(queue->first_cmp_arg,
-                       queue->root[next_index]+offset_to_key,
-                       element+offset_to_key) *
-         queue->max_at_top) < 0)
-      break;
-    queue->root[idx]=queue->root[next_index];
+    queue->root[idx]= queue->root[next_index];
+    if (offset_to_queue_pos)
+      (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx;
     idx=next_index;
-    next_index= idx >> 1;
   }
-  queue->root[idx]=element;
-}
 
-#else
   /*
-    The old _downheap version is kept for comparisons with the benchmark
-    suit or new benchmarks anyone wants to run for comparisons.
+    Insert the element into the right position. This is the same code
+    as we have in queue_insert()
   */
-	/* Fix heap when index have changed */
-void _downheap(register QUEUE *queue, uint idx)
-{
-  uchar *element;
-  uint elements,half_queue,next_index,offset_to_key;
-
-  offset_to_key=queue->offset_to_key;
-  element=queue->root[idx];
-  half_queue=(elements=queue->elements) >> 1;
-
-  while (idx <= half_queue)
+  while ((next_index= (idx >> 1)) > start_idx &&
+         queue->compare(queue->first_cmp_arg,
+                        element+offset_to_key,
+                        queue->root[next_index]+offset_to_key)*
+         queue->max_at_top < 0)
   {
-    next_index=idx+idx;
-    if (next_index < elements &&
-	(queue->compare(queue->first_cmp_arg,
-			queue->root[next_index]+offset_to_key,
-			queue->root[next_index+1]+offset_to_key) *
-	 queue->max_at_top) > 0)
-      next_index++;
-    if ((queue->compare(queue->first_cmp_arg,
-                        queue->root[next_index]+offset_to_key,
-                        element+offset_to_key) * queue->max_at_top) >= 0)
-      break;
-    queue->root[idx]=queue->root[next_index];
-    idx=next_index;
+    queue->root[idx]= queue->root[next_index];
+    if (offset_to_queue_pos)
+      (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx;
+    idx= next_index;
   }
-  queue->root[idx]=element;
+  queue->root[idx]= element;
+  if (offset_to_queue_pos)
+    (*(uint*) (element + offset_to_queue_pos-1))= idx;
 }
 
 
-#endif
-
 /*
   Fix heap when every element was changed.
+
+  SYNOPSIS
+    queue_fix()
+    queue	Queue to use
 */
 
 void queue_fix(QUEUE *queue)
 {
   uint i;
   for (i= queue->elements >> 1; i > 0; i--)
-    _downheap(queue, i);
+    _downheap(queue, i, queue_element(queue, i));
 }
 
-#ifdef MAIN
- /*
-   A test program for the priority queue implementation.
-   It can also be used to benchmark changes of the implementation
-   Build by doing the following in the directory mysys
-   make test_priority_queue
-   ./test_priority_queue
-
-   Written by Mikael Ronström, 2005
- */
-
-static uint num_array[1025];
-static uint tot_no_parts= 0;
-static uint tot_no_loops= 0;
-static uint expected_part= 0;
-static uint expected_num= 0;
-static bool max_ind= 0;
-static bool fix_used= 0;
-static ulonglong start_time= 0;
-
-static bool is_divisible_by(uint num, uint divisor)
-{
-  uint quotient= num / divisor;
-  if (quotient * divisor == num)
-    return TRUE;
-  return FALSE;
-}
-
-void calculate_next()
-{
-  uint part= expected_part, num= expected_num;
-  uint no_parts= tot_no_parts;
-  if (max_ind)
-  {
-    do
-    {
-      while (++part <= no_parts)
-      {
-        if (is_divisible_by(num, part) &&
-            (num <= ((1 << 21) + part)))
-        {
-          expected_part= part;
-          expected_num= num;
-          return;
-        }
-      }
-      part= 0;
-    } while (--num);
-  }
-  else
-  {
-    do
-    {
-      while (--part > 0)
-      {
-        if (is_divisible_by(num, part))
-        {
-          expected_part= part;
-          expected_num= num;
-          return;
-        }
-      }
-      part= no_parts + 1;
-    } while (++num);
-  }
-}
 
-void calculate_end_next(uint part)
-{
-  uint no_parts= tot_no_parts, num;
-  num_array[part]= 0;
-  if (max_ind)
-  {
-    expected_num= 0;
-    for (part= no_parts; part > 0 ; part--)
-    {
-      if (num_array[part])
-      {
-        num= num_array[part] & 0x3FFFFF;
-        if (num >= expected_num)
-        {
-          expected_num= num;
-          expected_part= part;
-        }
-      }
-    }
-    if (expected_num == 0)
-      expected_part= 0;
-  }
-  else
-  {
-    expected_num= 0xFFFFFFFF;
-    for (part= 1; part <= no_parts; part++)
-    {
-      if (num_array[part])
-      {
-        num= num_array[part] & 0x3FFFFF;
-        if (num <= expected_num)
-        {
-          expected_num= num;
-          expected_part= part;
-        }
-      }
-    }
-    if (expected_num == 0xFFFFFFFF)
-      expected_part= 0;
-  }
-  return;
-}
-static int test_compare(void *null_arg, uchar *a, uchar *b)
-{
-  uint a_num= (*(uint*)a) & 0x3FFFFF;
-  uint b_num= (*(uint*)b) & 0x3FFFFF;
-  uint a_part, b_part;
-  if (a_num > b_num)
-    return +1;
-  if (a_num < b_num)
-    return -1;
-  a_part= (*(uint*)a) >> 22;
-  b_part= (*(uint*)b) >> 22;
-  if (a_part < b_part)
-    return +1;
-  if (a_part > b_part)
-    return -1;
-  return 0;
-}
-
-bool check_num(uint num_part)
-{
-  uint part= num_part >> 22;
-  uint num= num_part & 0x3FFFFF;
-  if (part == expected_part)
-    if (num == expected_num)
-      return FALSE;
-  printf("Expect part %u Expect num 0x%x got part %u num 0x%x max_ind %u fix_used %u \n",
-          expected_part, expected_num, part, num, max_ind, fix_used);
-  return TRUE;
-}
-
-
-void perform_insert(QUEUE *queue)
-{
-  uint i= 1, no_parts= tot_no_parts;
-  uint backward_start= 0;
-
-  expected_part= 1;
-  expected_num= 1;
- 
-  if (max_ind)
-    backward_start= 1 << 21;
-
-  do
-  {
-    uint num= (i + backward_start);
-    if (max_ind)
-    {
-      while (!is_divisible_by(num, i))
-        num--;
-      if (max_ind && (num > expected_num ||
-                      (num == expected_num && i < expected_part)))
-      {
-        expected_num= num;
-        expected_part= i;
-      }
-    }
-    num_array[i]= num + (i << 22);
-    if (fix_used)
-      queue_element(queue, i-1)= (uchar*)&num_array[i];
-    else
-      queue_insert(queue, (uchar*)&num_array[i]);
-  } while (++i <= no_parts);
-  if (fix_used)
-  {
-    queue->elements= no_parts;
-    queue_fix(queue);
-  }
-}
-
-bool perform_ins_del(QUEUE *queue, bool max_ind)
-{
-  uint i= 0, no_loops= tot_no_loops, j= tot_no_parts;
-  do
-  {
-    uint num_part= *(uint*)queue_top(queue);
-    uint part= num_part >> 22;
-    if (check_num(num_part))
-      return TRUE;
-    if (j++ >= no_loops)
-    {
-      calculate_end_next(part);
-      queue_remove(queue, (uint) 0);
-    }
-    else
-    {
-      calculate_next();
-      if (max_ind)
-        num_array[part]-= part;
-      else
-        num_array[part]+= part;
-      queue_top(queue)= (uchar*)&num_array[part];
-      queue_replaced(queue);
-    }
-  } while (++i < no_loops);
-  return FALSE;
-}
-
-bool do_test(uint no_parts, uint l_max_ind, bool l_fix_used)
-{
-  QUEUE queue;
-  bool result;
-  max_ind= l_max_ind;
-  fix_used= l_fix_used;
-  init_queue(&queue, no_parts, 0, max_ind, test_compare, NULL);
-  tot_no_parts= no_parts;
-  tot_no_loops= 1024;
-  perform_insert(&queue);
-  if ((result= perform_ins_del(&queue, max_ind)))
-  delete_queue(&queue);
-  if (result)
-  {
-    printf("Error\n");
-    return TRUE;
-  }
-  return FALSE;
-}
-
-static void start_measurement()
-{
-  start_time= my_getsystime();
-}
-
-static void stop_measurement()
-{
-  ulonglong stop_time= my_getsystime();
-  uint time_in_micros;
-  stop_time-= start_time;
-  stop_time/= 10; /* Convert to microseconds */
-  time_in_micros= (uint)stop_time;
-  printf("Time expired is %u microseconds \n", time_in_micros);
-}
-
-static void benchmark_test()
-{
-  QUEUE queue_real;
-  QUEUE *queue= &queue_real;
-  uint i, add;
-  fix_used= TRUE;
-  max_ind= FALSE;
-  tot_no_parts= 1024;
-  init_queue(queue, tot_no_parts, 0, max_ind, test_compare, NULL);
-  /*
-    First benchmark whether queue_fix is faster than using queue_insert
-    for sizes of 16 partitions.
-  */
-  for (tot_no_parts= 2, add=2; tot_no_parts < 128;
-       tot_no_parts+= add, add++)
-  {
-    printf("Start benchmark queue_fix, tot_no_parts= %u \n", tot_no_parts);
-    start_measurement();
-    for (i= 0; i < 128; i++)
-    {
-      perform_insert(queue);
-      queue_remove_all(queue);
-    }
-    stop_measurement();
+/*
+  Change element at fixed position
 
-    fix_used= FALSE;
-    printf("Start benchmark queue_insert\n");
-    start_measurement();
-    for (i= 0; i < 128; i++)
-    {
-      perform_insert(queue);
-      queue_remove_all(queue);
-    }
-    stop_measurement();
-  }
-  /*
-    Now benchmark insertion and deletion of 16400 elements.
-    Used in consecutive runs this shows whether the optimised _downheap
-    is faster than the standard implementation.
-  */
-  printf("Start benchmarking _downheap \n");
-  start_measurement();
-  perform_insert(queue);
-  for (i= 0; i < 65536; i++)
-  {
-    uint num, part;
-    num= *(uint*)queue_top(queue);
-    num+= 16;
-    part= num >> 22;
-    num_array[part]= num;
-    queue_top(queue)= (uchar*)&num_array[part];
-    queue_replaced(queue);
-  }
-  for (i= 0; i < 16; i++)
-    queue_remove(queue, (uint) 0);
-  queue_remove_all(queue);
-  stop_measurement();
-}
+  SYNOPSIS
+    queue_replace()
+    queue	Queue to use
+    idx         Index of element to change
+    element     Element to store at 'idx'
+*/
 
-int main()
+void queue_replace(QUEUE *queue, uint idx)
 {
-  int i, add= 1;
-  for (i= 1; i < 1024; i+=add, add++)
-  {
-    printf("Start test for priority queue of size %u\n", i);
-    if (do_test(i, 0, 1))
-      return -1;
-    if (do_test(i, 1, 1))
-      return -1;
-    if (do_test(i, 0, 0))
-      return -1;
-    if (do_test(i, 1, 0))
-      return -1;
-  }
-  benchmark_test();
-  printf("OK\n");
-  return 0;
+  uchar *element= queue->root[idx];
+  DBUG_ASSERT(idx >= 1 && idx <= queue->elements);
+  queue_remove(queue, idx);
+  queue_insert(queue, element);
 }
-#endif
diff --git a/mysys/test_thr_mutex.c b/mysys/test_thr_mutex.c
new file mode 100644
index 00000000000..0bd14a0d31b
--- /dev/null
+++ b/mysys/test_thr_mutex.c
@@ -0,0 +1,162 @@
+/* Copyright (C) 2008 Sun Microsystems, Inc
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/* Testing of deadlock detector */
+
+#include <my_global.h>
+#include <mysys_priv.h>
+
+
+int main(int argc __attribute__((unused)), char** argv)
+{
+  pthread_mutex_t LOCK_A, LOCK_B, LOCK_C, LOCK_D, LOCK_E, LOCK_F, LOCK_G;
+  pthread_mutex_t LOCK_H, LOCK_I;
+  MY_INIT(argv[0]);
+  DBUG_ENTER("main");
+
+  DBUG_PUSH("d:t:O,/tmp/trace");
+  printf("This program is testing the mutex deadlock detection.\n"
+         "It should print out different failures of wrong mutex usage"
+         "on stderr\n\n");
+
+  safe_mutex_deadlock_detector= 1;
+  pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_C, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_D, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_E, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_F, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_G, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_H, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_I, MY_MUTEX_INIT_FAST);
+
+  printf("Testing A->B and B->A\n");
+  fflush(stdout);
+  pthread_mutex_lock(&LOCK_A);
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_B);
+
+  /* Test different (wrong) lock order */
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_lock(&LOCK_A);                  /* Should give warning */
+
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_B);
+
+  /* Check that we don't get another warning for same lock */
+  printf("Testing A->B and B->A again (should not give a warning)\n");
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_lock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_B);
+
+  /*
+    Test of ring with many mutex
+    We also unlock mutex in different orders to get the unlock code properly
+    tested.
+  */
+  printf("Testing A->C and C->D and D->A\n");
+  pthread_mutex_lock(&LOCK_A);
+  pthread_mutex_lock(&LOCK_C);
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_C);
+  pthread_mutex_lock(&LOCK_C);
+  pthread_mutex_lock(&LOCK_D);
+  pthread_mutex_unlock(&LOCK_D);
+  pthread_mutex_unlock(&LOCK_C);
+
+  pthread_mutex_lock(&LOCK_D);
+  pthread_mutex_lock(&LOCK_A);                  /* Should give warning */
+
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_D);
+
+  printf("Testing E -> F ; H -> I ; F -> H ; H -> I -> E\n");
+  fflush(stdout);
+
+  pthread_mutex_lock(&LOCK_E);
+  pthread_mutex_lock(&LOCK_F);
+  pthread_mutex_unlock(&LOCK_E);
+  pthread_mutex_unlock(&LOCK_F);
+  pthread_mutex_lock(&LOCK_H);
+  pthread_mutex_lock(&LOCK_I);
+  pthread_mutex_unlock(&LOCK_I);
+  pthread_mutex_unlock(&LOCK_H);
+  pthread_mutex_lock(&LOCK_F);
+  pthread_mutex_lock(&LOCK_H);
+  pthread_mutex_unlock(&LOCK_H);
+  pthread_mutex_unlock(&LOCK_F);
+
+  pthread_mutex_lock(&LOCK_H);
+  pthread_mutex_lock(&LOCK_I);
+  pthread_mutex_lock(&LOCK_E);                  /* Should give warning */
+
+  pthread_mutex_unlock(&LOCK_E);
+  pthread_mutex_unlock(&LOCK_I);
+  pthread_mutex_unlock(&LOCK_H);
+
+  printf("\nFollowing shouldn't give any warnings\n");
+  printf("Testing A->B and B->A without deadlock detection\n");
+  fflush(stdout);
+
+  /* Reinitialize mutex to get rid of old wrong usage markers */
+  pthread_mutex_destroy(&LOCK_A);
+  pthread_mutex_destroy(&LOCK_B);
+  pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST);
+  pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST);
+
+  /* Start testing */
+  my_pthread_mutex_lock(&LOCK_A, MYF(MYF_NO_DEADLOCK_DETECTION));
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_B);
+
+  pthread_mutex_lock(&LOCK_A);
+  my_pthread_mutex_lock(&LOCK_B, MYF(MYF_NO_DEADLOCK_DETECTION));
+  pthread_mutex_unlock(&LOCK_A);
+  pthread_mutex_unlock(&LOCK_B);
+
+  printf("Testing A -> C ; B -> C ; A->B\n");
+  fflush(stdout);
+  pthread_mutex_lock(&LOCK_A);
+  pthread_mutex_lock(&LOCK_C);
+  pthread_mutex_unlock(&LOCK_C);
+  pthread_mutex_unlock(&LOCK_A);
+
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_lock(&LOCK_C);
+  pthread_mutex_unlock(&LOCK_C);
+  pthread_mutex_unlock(&LOCK_B);
+
+  pthread_mutex_lock(&LOCK_A);
+  pthread_mutex_lock(&LOCK_B);
+  pthread_mutex_unlock(&LOCK_B);
+  pthread_mutex_unlock(&LOCK_A);
+
+  /* Cleanup */
+  pthread_mutex_destroy(&LOCK_A);
+  pthread_mutex_destroy(&LOCK_B);
+  pthread_mutex_destroy(&LOCK_C);
+  pthread_mutex_destroy(&LOCK_D);
+  pthread_mutex_destroy(&LOCK_E);
+  pthread_mutex_destroy(&LOCK_F);
+  pthread_mutex_destroy(&LOCK_G);
+  pthread_mutex_destroy(&LOCK_H);
+  pthread_mutex_destroy(&LOCK_I);
+
+  my_end(MY_DONT_FREE_DBUG);
+  exit(0);
+}
diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c
index 54eef693558..10607822ac0 100644
--- a/mysys/thr_alarm.c
+++ b/mysys/thr_alarm.c
@@ -42,6 +42,19 @@ volatile my_bool alarm_thread_running= 0;
 time_t next_alarm_expire_time= ~ (time_t) 0;
 static sig_handler process_alarm_part2(int sig);
 
+#ifdef DBUG_OFF
+#define reset_index_in_queue(alarm_data)
+#else
+#define reset_index_in_queue(alarm_data) alarm_data->index_in_queue= 0;
+#endif /* DBUG_OFF */
+
+#ifndef USE_ONE_SIGNAL_HAND
+#define one_signal_hand_sigmask(A,B,C) pthread_sigmask((A), (B), (C))
+#else
+#define one_signal_hand_sigmask(A,B,C)
+#endif
+
+
 #if !defined(__WIN__)
 
 static mysql_mutex_t LOCK_alarm;
@@ -73,8 +86,8 @@ void init_thr_alarm(uint max_alarms)
   DBUG_ENTER("init_thr_alarm");
   alarm_aborted=0;
   next_alarm_expire_time= ~ (time_t) 0;
-  init_queue(&alarm_queue,max_alarms+1,offsetof(ALARM,expire_time),0,
-	     compare_ulong,NullS);
+  init_queue(&alarm_queue, max_alarms+1, offsetof(ALARM,expire_time), 0,
+	     compare_ulong, NullS, offsetof(ALARM, index_in_queue)+1, 0);
   sigfillset(&full_signal_set);			/* Neaded to block signals */
   mysql_mutex_init(key_LOCK_alarm, &LOCK_alarm, MY_MUTEX_INIT_FAST);
   mysql_cond_init(key_COND_alarm, &COND_alarm, NULL);
@@ -151,7 +164,7 @@ void resize_thr_alarm(uint max_alarms)
 
 my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data)
 {
-  time_t now;
+  time_t now, next;
 #ifndef USE_ONE_SIGNAL_HAND
   sigset_t old_mask;
 #endif
@@ -161,79 +174,68 @@ my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data)
   DBUG_PRINT("enter",("thread: %s  sec: %d",my_thread_name(),sec));
 
   now= my_time(0);
-#ifndef USE_ONE_SIGNAL_HAND
-  pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask);
-#endif
+  if (!alarm_data)
+  {
+    if (!(alarm_data=(ALARM*) my_malloc(sizeof(ALARM),MYF(MY_WME))))
+      goto abort_no_unlock;
+    alarm_data->malloced= 1;
+  }
+  else
+    alarm_data->malloced= 0;
+  next= now + sec;
+  alarm_data->expire_time= next;
+  alarm_data->alarmed=   0;
+  alarm_data->thread=    current_my_thread_var->pthread_self;
+  alarm_data->thread_id= current_my_thread_var->id;
+
+  one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask);
   mysql_mutex_lock(&LOCK_alarm);        /* Lock from threads & alarms */
-  if (alarm_aborted > 0)
+  if (unlikely(alarm_aborted))
   {					/* No signal thread */
     DBUG_PRINT("info", ("alarm aborted"));
-    *alrm= 0;					/* No alarm */
-    mysql_mutex_unlock(&LOCK_alarm);
-#ifndef USE_ONE_SIGNAL_HAND
-    pthread_sigmask(SIG_SETMASK,&old_mask,NULL);
-#endif
-    DBUG_RETURN(1);
-  }
-  if (alarm_aborted < 0)
+    if (alarm_aborted > 0)
+      goto abort;
     sec= 1;					/* Abort mode */
-
+  }
   if (alarm_queue.elements >= max_used_alarms)
   {
     if (alarm_queue.elements == alarm_queue.max_elements)
     {
       DBUG_PRINT("info", ("alarm queue full"));
       fprintf(stderr,"Warning: thr_alarm queue is full\n");
-      *alrm= 0;					/* No alarm */
-      mysql_mutex_unlock(&LOCK_alarm);
-#ifndef USE_ONE_SIGNAL_HAND
-      pthread_sigmask(SIG_SETMASK,&old_mask,NULL);
-#endif
-      DBUG_RETURN(1);
+      goto abort;
     }
     max_used_alarms=alarm_queue.elements+1;
   }
-  reschedule= (ulong) next_alarm_expire_time > (ulong) now + sec;
-  if (!alarm_data)
-  {
-    if (!(alarm_data=(ALARM*) my_malloc(sizeof(ALARM),MYF(MY_WME))))
-    {
-      DBUG_PRINT("info", ("failed my_malloc()"));
-      *alrm= 0;					/* No alarm */
-      mysql_mutex_unlock(&LOCK_alarm);
-#ifndef USE_ONE_SIGNAL_HAND
-      pthread_sigmask(SIG_SETMASK,&old_mask,NULL);
-#endif
-      DBUG_RETURN(1);
-    }
-    alarm_data->malloced=1;
-  }
-  else
-    alarm_data->malloced=0;
-  alarm_data->expire_time=now+sec;
-  alarm_data->alarmed=0;
-  alarm_data->thread=    current_my_thread_var->pthread_self;
-  alarm_data->thread_id= current_my_thread_var->id;
+  reschedule= (ulong) next_alarm_expire_time > (ulong) next;
   queue_insert(&alarm_queue,(uchar*) alarm_data);
+  assert(alarm_data->index_in_queue > 0);
 
   /* Reschedule alarm if the current one has more than sec left */
-  if (reschedule)
+  if (unlikely(reschedule))
   {
     DBUG_PRINT("info", ("reschedule"));
     if (pthread_equal(pthread_self(),alarm_thread))
     {
       alarm(sec);				/* purecov: inspected */
-      next_alarm_expire_time= now + sec;
+      next_alarm_expire_time= next;
     }
     else
       reschedule_alarms();			/* Reschedule alarms */
   }
   mysql_mutex_unlock(&LOCK_alarm);
-#ifndef USE_ONE_SIGNAL_HAND
-  pthread_sigmask(SIG_SETMASK,&old_mask,NULL);
-#endif
+  one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL);
   (*alrm)= &alarm_data->alarmed;
   DBUG_RETURN(0);
+
+abort:
+  if (alarm_data->malloced)
+    my_free(alarm_data, MYF(0));
+  mysql_mutex_unlock(&LOCK_alarm);
+  one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL);
+abort_no_unlock:
+  *alrm= 0;					/* No alarm */
+  DBUG_RETURN(1);
 }
 
 
@@ -247,41 +249,18 @@ void thr_end_alarm(thr_alarm_t *alarmed)
 #ifndef USE_ONE_SIGNAL_HAND
   sigset_t old_mask;
 #endif
-  uint i, found=0;
   DBUG_ENTER("thr_end_alarm");
 
-#ifndef USE_ONE_SIGNAL_HAND
-  pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask);
-#endif
-  mysql_mutex_lock(&LOCK_alarm);
-
+  one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask);
   alarm_data= (ALARM*) ((uchar*) *alarmed - offsetof(ALARM,alarmed));
-  for (i=0 ; i < alarm_queue.elements ; i++)
-  {
-    if ((ALARM*) queue_element(&alarm_queue,i) == alarm_data)
-    {
-      queue_remove(&alarm_queue,i),MYF(0);
-      if (alarm_data->malloced)
-	my_free(alarm_data);
-      found++;
-#ifdef DBUG_OFF
-      break;
-#endif
-    }
-  }
-  DBUG_ASSERT(!*alarmed || found == 1);
-  if (!found)
-  {
-    if (*alarmed)
-      fprintf(stderr,"Warning: Didn't find alarm 0x%lx in queue of %d alarms\n",
-	      (long) *alarmed, alarm_queue.elements);
-    DBUG_PRINT("warning",("Didn't find alarm 0x%lx in queue\n",
-			  (long) *alarmed));
-  }
+  mysql_mutex_lock(&LOCK_alarm);
+  DBUG_ASSERT(alarm_data->index_in_queue != 0);
+  DBUG_ASSERT(queue_element(&alarm_queue, alarm_data->index_in_queue) ==
+              alarm_data);
+  queue_remove(&alarm_queue, alarm_data->index_in_queue);
   mysql_mutex_unlock(&LOCK_alarm);
-#ifndef USE_ONE_SIGNAL_HAND
-  pthread_sigmask(SIG_SETMASK,&old_mask,NULL);
-#endif
+  one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL);
+  reset_index_in_queue(alarm_data);
   DBUG_VOID_RETURN;
 }
 
@@ -344,12 +323,13 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused)))
 #if defined(MAIN) && !defined(__bsdi__)
   printf("process_alarm\n"); fflush(stdout);
 #endif
-  if (alarm_queue.elements)
+  if (likely(alarm_queue.elements))
   {
-    if (alarm_aborted)
+    if (unlikely(alarm_aborted))
     {
       uint i;
-      for (i=0 ; i < alarm_queue.elements ;)
+      for (i= queue_first_element(&alarm_queue) ;
+           i <= queue_last_element(&alarm_queue) ;)
       {
 	alarm_data=(ALARM*) queue_element(&alarm_queue,i);
 	alarm_data->alarmed=1;			/* Info to thread */
@@ -360,6 +340,7 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused)))
 	  printf("Warning: pthread_kill couldn't find thread!!!\n");
 #endif
 	  queue_remove(&alarm_queue,i);		/* No thread. Remove alarm */
+          reset_index_in_queue(alarm_data);
 	}
 	else
 	  i++;					/* Signal next thread */
@@ -371,8 +352,8 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused)))
     }
     else
     {
-      ulong now=(ulong) my_time(0);
-      ulong next=now+10-(now%10);
+      time_t now= my_time(0);
+      time_t next= now+10-(now%10);
       while ((alarm_data=(ALARM*) queue_top(&alarm_queue))->expire_time <= now)
       {
 	alarm_data->alarmed=1;			/* Info to thread */
@@ -382,15 +363,16 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused)))
 	{
 #ifdef MAIN
 	  printf("Warning: pthread_kill couldn't find thread!!!\n");
-#endif
-	  queue_remove(&alarm_queue,0);		/* No thread. Remove alarm */
+#endif /* MAIN */
+	  queue_remove_top(&alarm_queue); /* No thread. Remove alarm */
+          reset_index_in_queue(alarm_data);
 	  if (!alarm_queue.elements)
 	    break;
 	}
 	else
 	{
 	  alarm_data->expire_time=next;
-	  queue_replaced(&alarm_queue);
+	  queue_replace_top(&alarm_queue);
 	}
       }
 #ifndef USE_ALARM_THREAD
@@ -483,21 +465,27 @@ void end_thr_alarm(my_bool free_structures)
 void thr_alarm_kill(my_thread_id thread_id)
 {
   uint i;
+  DBUG_ENTER("thr_alarm_kill");
+
   if (alarm_aborted)
     return;
   mysql_mutex_lock(&LOCK_alarm);
-  for (i=0 ; i < alarm_queue.elements ; i++)
+  for (i= queue_first_element(&alarm_queue) ;
+       i <= queue_last_element(&alarm_queue);
+       i++)
   {
-    if (((ALARM*) queue_element(&alarm_queue,i))->thread_id == thread_id)
+    ALARM *element= (ALARM*) queue_element(&alarm_queue,i);
+    if (element->thread_id == thread_id)
     {
-      ALARM *tmp=(ALARM*) queue_remove(&alarm_queue,i);
-      tmp->expire_time=0;
-      queue_insert(&alarm_queue,(uchar*) tmp);
+      DBUG_PRINT("info", ("found thread; Killing it"));
+      element->expire_time= 0;
+      queue_replace(&alarm_queue, i);
       reschedule_alarms();
       break;
     }
   }
   mysql_mutex_unlock(&LOCK_alarm);
+  DBUG_VOID_RETURN;
 }
 
 
@@ -508,7 +496,7 @@ void thr_alarm_info(ALARM_INFO *info)
   info->max_used_alarms= max_used_alarms;
   if ((info->active_alarms=  alarm_queue.elements))
   {
-    ulong now=(ulong) my_time(0);
+    time_t now= my_time(0);
     long time_diff;
     ALARM *alarm_data= (ALARM*) queue_top(&alarm_queue);
     time_diff= (long) (alarm_data->expire_time - now);
@@ -556,7 +544,7 @@ static void *alarm_handler(void *arg __attribute__((unused)))
   {
     if (alarm_queue.elements)
     {
-      ulong sleep_time,now= my_time(0);
+      time_t sleep_time,now= my_time(0);
       if (alarm_aborted)
 	sleep_time=now+1;
       else
@@ -792,19 +780,6 @@ static void *test_thread(void *arg)
   return 0;
 }
 
-#ifdef USE_ONE_SIGNAL_HAND
-static sig_handler print_signal_warning(int sig)
-{
-  printf("Warning: Got signal %d from thread %s\n",sig,my_thread_name());
-  fflush(stdout);
-#ifdef SIGNAL_HANDLER_RESET_ON_DELIVERY
-  my_sigset(sig,print_signal_warning);		/* int. thread system calls */
-#endif
-  if (sig == SIGALRM)
-    alarm(2);					/* reschedule alarm */
-}
-#endif /* USE_ONE_SIGNAL_HAND */
-
 
 static void *signal_hand(void *arg __attribute__((unused)))
 {
diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c
index d96d08ea0c3..135a2a5618f 100644
--- a/mysys/thr_lock.c
+++ b/mysys/thr_lock.c
@@ -24,7 +24,7 @@ Locks are prioritized according to:
 
 The current lock types are:
 
-TL_READ	 		# Low priority read
+TL_READ                 # Low priority read
 TL_READ_WITH_SHARED_LOCKS
 TL_READ_HIGH_PRIORITY	# High priority read
 TL_READ_NO_INSERT	# Read without concurrent inserts
@@ -56,8 +56,17 @@ check_status:
 	 In MyISAM this is a simple check if the insert can be done
 	 at the end of the datafile.
 update_status:
-	Before a write lock is released, this function is called.
-	In MyISAM this functions updates the count and length of the datafile
+        in thr_reschedule_write_lock(), when an insert delayed thread
+        downgrades TL_WRITE lock to TL_WRITE_DELAYED, to allow SELECT
+        threads to proceed.
+        A storage engine should also call update_status internally
+        in the ::external_lock(F_UNLCK) method.
+        In MyISAM and CSV this functions updates the length of the datafile.
+        MySQL does in some exceptional cases (when doing DLL statements on
+        open tables calls thr_unlock() followed by thr_lock() without calling
+        ::external_lock() in between. In this case thr_unlock() is called with
+        the THR_UNLOCK_UPDATE_STATUS flag and thr_unlock() will call
+        update_status for write locks.
 get_status:
 	When one gets a lock this functions is called.
 	In MyISAM this stores the number of rows and size of the datafile
@@ -66,6 +75,8 @@ get_status:
 The lock algorithm allows one to have one TL_WRITE_CONCURRENT_INSERT or
 one TL_WRITE_DELAYED lock at the same time as multiple read locks.
 
+In addition, if lock->allow_multiple_concurrent_insert is set then there can
+be any number of TL_WRITE_CONCURRENT_INSERT locks aktive at the same time.
 */
 
 #if !defined(MAIN) && !defined(DBUG_OFF) && !defined(EXTRA_DEBUG)
@@ -106,8 +117,30 @@ static inline mysql_cond_t *get_cond(void)
   return &my_thread_var->suspend;
 }
 
+
 /*
-** For the future (now the thread specific cond is alloced by my_pthread.c)
+  Priority for locks (decides in which order locks are locked)
+  We want all write locks to be first, followed by read locks.
+  Locks from MERGE tables has a little lower priority than other
+  locks, to allow one to release merge tables without having
+  to unlock and re-lock other locks.
+  The lower the number, the higher the priority for the lock.
+  Read locks should have 4, write locks should have 0.
+  UNLOCK is 8, to force these last in thr_merge_locks.
+  For MERGE tables we add 2 (THR_LOCK_MERGE_PRIV) to the lock priority.
+  THR_LOCK_LATE_PRIV (1) is used when one locks other tables to be merged
+  with existing locks. This way we prioritize the original locks over the
+  new locks.
+*/
+
+static uint lock_priority[(uint)TL_WRITE_ONLY+1] =
+{ 8, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+
+#define LOCK_CMP(A,B) ((uchar*) ((A)->lock) + lock_priority[(uint) (A)->type] + (A)->priority < (uchar*) ((B)->lock) + lock_priority[(uint) (B)->type] + (B)->priority)
+
+
+/*
+  For the future (now the thread specific cond is alloced by my_pthread.c)
 */
 
 my_bool init_thr_lock()
@@ -154,7 +187,8 @@ static int check_lock(struct st_lock_list *list, const char* lock_type,
       }
       if (same_owner &&
           !thr_lock_owner_equal(data->owner, first_owner) &&
-	  last_lock_type != TL_WRITE_ALLOW_WRITE)
+	  last_lock_type != TL_WRITE_ALLOW_WRITE &&
+          last_lock_type != TL_WRITE_CONCURRENT_INSERT)
       {
 	fprintf(stderr,
 		"Warning: Found locks from different threads in %s: %s\n",
@@ -207,7 +241,7 @@ static void check_locks(THR_LOCK *lock, const char *where,
       THR_LOCK_DATA *data;
       for (data=lock->read.data ; data ; data=data->next)
       {
-	if ((int) data->type == (int) TL_READ_NO_INSERT)
+	if (data->type == TL_READ_NO_INSERT)
 	  count++;
         /* Protect against infinite loop. */
         DBUG_ASSERT(count <= lock->read_no_write_count);
@@ -255,7 +289,22 @@ static void check_locks(THR_LOCK *lock, const char *where,
 	}	      
       }
       else
-      {						/* Have write lock */
+      {
+        /* We have at least one write lock */
+        if (lock->write.data->type == TL_WRITE_CONCURRENT_INSERT)
+        {
+          THR_LOCK_DATA *data;
+          for (data=lock->write.data->next ; data ; data=data->next)
+          {
+            if (data->type != TL_WRITE_CONCURRENT_INSERT)
+            {
+              fprintf(stderr,
+                      "Warning at '%s': Found TL_WRITE_CONCURRENT_INSERT lock mixed with other write locks\n",
+                      where);
+              break;
+            }
+          }
+        }
 	if (lock->write_wait.data)
 	{
 	  if (!allow_no_locks && 
@@ -362,6 +411,7 @@ void thr_lock_data_init(THR_LOCK *lock,THR_LOCK_DATA *data, void *param)
   data->owner= 0;                               /* no owner yet */
   data->status_param=param;
   data->cond=0;
+  data->priority= 0;
 }
 
 
@@ -518,7 +568,8 @@ wait_for_lock(struct st_lock_list *wait, THR_LOCK_DATA *data,
   {
     result= THR_LOCK_SUCCESS;
     if (data->lock->get_status)
-      (*data->lock->get_status)(data->status_param, 0);
+      (*data->lock->get_status)(data->status_param,
+                                data->type == TL_WRITE_CONCURRENT_INSERT);
     check_locks(data->lock,"got wait_for_lock",0);
   }
   mysql_mutex_unlock(&data->lock->mutex);
@@ -535,7 +586,7 @@ wait_for_lock(struct st_lock_list *wait, THR_LOCK_DATA *data,
 }
 
 
-enum enum_thr_lock_result
+static enum enum_thr_lock_result
 thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
          enum thr_lock_type lock_type, ulong lock_wait_timeout)
 {
@@ -548,6 +599,7 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
   data->cond=0;					/* safety */
   data->type=lock_type;
   data->owner= owner;                           /* Must be reset ! */
+  data->priority&= ~THR_LOCK_LATE_PRIV;
   mysql_mutex_lock(&lock->mutex);
   DBUG_PRINT("lock",("data: 0x%lx  thread: 0x%lx  lock: 0x%lx  type: %d",
                      (long) data, data->owner->thread_id,
@@ -620,11 +672,11 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
       (*lock->read.last)=data;			/* Add to running FIFO */
       data->prev=lock->read.last;
       lock->read.last= &data->next;
-      if (lock->get_status)
-	(*lock->get_status)(data->status_param, 0);
       if (lock_type == TL_READ_NO_INSERT)
 	lock->read_no_write_count++;
       check_locks(lock,"read lock with no write locks",0);
+      if (lock->get_status)
+	(*lock->get_status)(data->status_param, 0);
       statistic_increment(locks_immediate,&THR_LOCK_lock);
       goto end;
     }
@@ -682,7 +734,8 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
       /*
         The idea is to allow us to get a lock at once if we already have
         a write lock or if there is no pending write locks and if all
-        write locks are of TL_WRITE_ALLOW_WRITE type.
+        write locks are of the same type and are either
+        TL_WRITE_ALLOW_WRITE or TL_WRITE_CONCURRENT_INSERT
 
         Note that, since lock requests for the same table are sorted in
         such way that requests with higher thr_lock_type value come first
@@ -713,15 +766,13 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
                      lock->write.data->type == TL_WRITE_LOW_PRIORITY)) &&
                    lock->write.data->type != TL_WRITE_DELAYED));
 
-      if ((lock_type == TL_WRITE_ALLOW_WRITE &&
+      if (((lock_type == TL_WRITE_ALLOW_WRITE ||
+           (lock_type == TL_WRITE_CONCURRENT_INSERT &&
+             lock->allow_multiple_concurrent_insert)) &&
            ! lock->write_wait.data &&
-           lock->write.data->type == TL_WRITE_ALLOW_WRITE) ||
+           lock->write.data->type == lock_type) ||
           has_old_lock(lock->write.data, data->owner))
       {
-	/*
-          We have already got a write lock or all locks are
-          TL_WRITE_ALLOW_WRITE
-        */
         DBUG_PRINT("info", ("write_wait.data: 0x%lx  old_type: %d",
                             (ulong) lock->write_wait.data,
                             lock->write.data->type));
@@ -730,8 +781,9 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
 	data->prev=lock->write.last;
 	lock->write.last= &data->next;
 	check_locks(lock,"second write lock",0);
-	if (data->lock->get_status)
-	  (*data->lock->get_status)(data->status_param, 0);
+	if (lock->get_status)
+	  (*lock->get_status)(data->status_param,
+                              lock_type == TL_WRITE_CONCURRENT_INSERT);
 	statistic_increment(locks_immediate,&THR_LOCK_lock);
 	goto end;
       }
@@ -764,8 +816,8 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_INFO *owner,
 	  (*lock->write.last)=data;		/* Add as current write lock */
 	  data->prev=lock->write.last;
 	  lock->write.last= &data->next;
-	  if (data->lock->get_status)
-	    (*data->lock->get_status)(data->status_param, concurrent_insert);
+	  if (lock->get_status)
+	    (*lock->get_status)(data->status_param, concurrent_insert);
 	  check_locks(lock,"only write lock",0);
 	  statistic_increment(locks_immediate,&THR_LOCK_lock);
 	  goto end;
@@ -836,7 +888,7 @@ static inline void free_all_read_locks(THR_LOCK *lock,
 
 	/* Unlock lock and free next thread on same lock */
 
-void thr_unlock(THR_LOCK_DATA *data)
+void thr_unlock(THR_LOCK_DATA *data, uint unlock_flags)
 {
   THR_LOCK *lock=data->lock;
   enum thr_lock_type lock_type=data->type;
@@ -860,15 +912,20 @@ void thr_unlock(THR_LOCK_DATA *data)
   }
   else
     lock->write.last=data->prev;
-  if (lock_type >= TL_WRITE_CONCURRENT_INSERT)
+ 
+  if (unlock_flags & THR_UNLOCK_UPDATE_STATUS)
   {
-    if (lock->update_status)
-      (*lock->update_status)(data->status_param);
-  }
-  else
-  {
-    if (lock->restore_status)
-      (*lock->restore_status)(data->status_param);
+    /* External lock was not called; Update or restore status */
+    if (lock_type >= TL_WRITE_CONCURRENT_INSERT)
+    {
+      if (lock->update_status)
+        (*lock->update_status)(data->status_param);
+    }
+    else
+    {
+      if (lock->restore_status)
+        (*lock->restore_status)(data->status_param);
+    }
   }
   if (lock_type == TL_READ_NO_INSERT)
     lock->read_no_write_count--;
@@ -892,7 +949,6 @@ static void wake_up_waiters(THR_LOCK *lock)
 {
   THR_LOCK_DATA *data;
   enum thr_lock_type lock_type;
-
   DBUG_ENTER("wake_up_waiters");
 
   if (!lock->write.data)			/* If no active write locks */
@@ -1006,14 +1062,12 @@ end:
 
 
 /*
-** Get all locks in a specific order to avoid dead-locks
-** Sort acording to lock position and put write_locks before read_locks if
-** lock on same lock.
+  Get all locks in a specific order to avoid dead-locks
+  Sort acording to lock position and put write_locks before read_locks if
+  lock on same lock. Locks on MERGE tables has lower priority than other
+  locks of the same type. See comment for lock_priority.
 */
 
-
-#define LOCK_CMP(A,B) ((uchar*) (A->lock) - (uint) ((A)->type) < (uchar*) (B->lock)- (uint) ((B)->type))
-
 static void sort_locks(THR_LOCK_DATA **data,uint count)
 {
   THR_LOCK_DATA **pos,**end,**prev,*tmp;
@@ -1039,11 +1093,15 @@ enum enum_thr_lock_result
 thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner,
                ulong lock_wait_timeout)
 {
-  THR_LOCK_DATA **pos,**end;
+  THR_LOCK_DATA **pos, **end, **first_lock;
   DBUG_ENTER("thr_multi_lock");
   DBUG_PRINT("lock",("data: 0x%lx  count: %d", (long) data, count));
+
   if (count > 1)
     sort_locks(data,count);
+  else if (count == 0)
+    DBUG_RETURN(THR_LOCK_SUCCESS);
+
   /* lock everything */
   for (pos=data,end=data+count; pos < end ; pos++)
   {
@@ -1051,7 +1109,7 @@ thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner,
                                                lock_wait_timeout);
     if (result != THR_LOCK_SUCCESS)
     {						/* Aborted */
-      thr_multi_unlock(data,(uint) (pos-data));
+      thr_multi_unlock(data,(uint) (pos-data), 0);
       DBUG_RETURN(result);
     }
     DEBUG_SYNC_C("thr_multi_lock_after_thr_lock");
@@ -1060,93 +1118,103 @@ thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner,
 	   (long) pos[0]->lock, pos[0]->type); fflush(stdout);
 #endif
   }
-  thr_lock_merge_status(data, count);
+
+  /*
+    Call start_trans for all locks.
+    If we lock the same table multiple times, we must use the same
+    status_param; We ensure this by calling copy_status() for all
+    copies of the same tables.
+  */
+  if ((*data)->lock->start_trans)
+    ((*data)->lock->start_trans)((*data)->status_param);
+  for (first_lock=data, pos= data+1 ; pos < end ; pos++)
+  {
+    /* Get the current status (row count, checksum, trid etc) */
+    if ((*pos)->lock->start_trans)
+      (*(*pos)->lock->start_trans)((*pos)->status_param);
+    /*
+      If same table as previous table use pointer to previous status
+      information to ensure that all read/write tables shares same
+      state.
+    */
+    if (pos[0]->lock == pos[-1]->lock && pos[0]->lock->copy_status)
+      (pos[0]->lock->copy_status)((*pos)->status_param,
+                                  (*first_lock)->status_param);
+    else
+    {
+      /* Different lock, use this as base for next lock */
+      first_lock= pos;
+    }
+  }
   DBUG_RETURN(THR_LOCK_SUCCESS);
 }
 
 
 /**
-  Ensure that all locks for a given table have the same
-  status_param.
-
-  This is a MyISAM and possibly Maria specific crutch. MyISAM
-  engine stores data file length, record count and other table
-  properties in status_param member of handler. When a table is
-  locked, connection-local copy is made from a global copy
-  (myisam_share) by mi_get_status(). When a table is unlocked,
-  the changed status is transferred back to the global share by
-  mi_update_status().
-
-  One thing MyISAM doesn't do is to ensure that when the same
-  table is opened twice in a connection all instances share the
-  same status_param. This is necessary, however: for one, to keep
-  all instances of a connection "on the same page" with regard to
-  the current state of the table. For other, unless this is done,
-  myisam_share will always get updated from the last unlocked
-  instance (in mi_update_status()), and when this instance was not
-  the one that was used to update data, records may be lost.
-
-  For each table, this function looks up the last lock_data in the
-  list of acquired locks, and makes sure that all other instances
-  share status_param with it.
+  Merge two sets of locks.
+
+  @param data       All locks. First old locks, then new locks.
+  @param old_count  Original number of locks. These are first in 'data'.
+  @param new_count  How many new locks
+
+  The merge is needed if the new locks contains same tables as the old
+  locks, in which case we have to ensure that same tables shares the
+  same status (as after a thr_multi_lock()).
 */
 
-void
-thr_lock_merge_status(THR_LOCK_DATA **data, uint count)
+void thr_merge_locks(THR_LOCK_DATA **data, uint old_count, uint new_count)
 {
-#if !defined(DONT_USE_RW_LOCKS)
-  THR_LOCK_DATA **pos= data;
-  THR_LOCK_DATA **end= data + count;
-  if (count > 1)
+  THR_LOCK_DATA **pos, **end, **first_lock= 0;
+  DBUG_ENTER("thr_merge_lock");
+
+  /* Remove marks on old locks to make them sort before new ones */
+  for (pos=data, end= pos + old_count; pos < end ; pos++)
+    (*pos)->priority&= ~THR_LOCK_LATE_PRIV;
+
+  /* Mark new locks with LATE_PRIV to make them sort after org ones */
+  for (pos=data + old_count, end= pos + new_count; pos < end ; pos++)
+    (*pos)->priority|= THR_LOCK_LATE_PRIV;
+
+  sort_locks(data, old_count + new_count);
+
+  for (pos=data ; pos < end ; pos++)
   {
-    THR_LOCK_DATA *last_lock= end[-1];
-    pos=end-1;
-    do
+    /* Check if lock was unlocked before */
+    if (pos[0]->type == TL_UNLOCK || ! pos[0]->lock->fix_status)
     {
-      pos--;
-      if (last_lock->lock == (*pos)->lock &&
-	  last_lock->lock->copy_status)
-      {
-	if (last_lock->type <= TL_READ_NO_INSERT)
-	{
-	  THR_LOCK_DATA **read_lock;
-	  /*
-	    If we are locking the same table with read locks we must ensure
-	    that all tables share the status of the last write lock or
-	    the same read lock.
-	  */
-	  for (;
-	       (*pos)->type <= TL_READ_NO_INSERT &&
-		 pos != data &&
-		 pos[-1]->lock == (*pos)->lock ;
-	       pos--) ;
-
-	  read_lock = pos+1;
-	  do
-	  {
-	    (last_lock->lock->copy_status)((*read_lock)->status_param,
-					   (*pos)->status_param);
-	  } while (*(read_lock++) != last_lock);
-	  last_lock= (*pos);			/* Point at last write lock */
-	}
-	else
-	  (*last_lock->lock->copy_status)((*pos)->status_param,
-					  last_lock->status_param);
-      }
-      else
-	last_lock=(*pos);
-    } while (pos != data);
+      DBUG_PRINT("info", ("lock skipped.  unlocked: %d  fix_status: %d",
+                          pos[0]->type == TL_UNLOCK,
+                          pos[0]->lock->fix_status == 0));
+      continue;
+    }
+
+    /*
+      If same table as previous table use pointer to previous status
+      information to ensure that all read/write tables shares same
+      state.
+    */
+    if (first_lock && pos[0]->lock == first_lock[0]->lock)
+      (pos[0]->lock->fix_status)((*first_lock)->status_param,
+                                 (*pos)->status_param);
+    else
+    {
+      /* Different lock, use this as base for next lock */
+      first_lock= pos;
+      (pos[0]->lock->fix_status)((*first_lock)->status_param, 0);
+    }
   }
-#endif
+  DBUG_VOID_RETURN;
 }
 
-  /* free all locks */
 
-void thr_multi_unlock(THR_LOCK_DATA **data,uint count)
+/* Unlock all locks */
+
+void thr_multi_unlock(THR_LOCK_DATA **data,uint count, uint unlock_flags)
 {
   THR_LOCK_DATA **pos,**end;
   DBUG_ENTER("thr_multi_unlock");
-  DBUG_PRINT("lock",("data: 0x%lx  count: %d", (long) data, count));
+  DBUG_PRINT("lock",("data: 0x%lx  count: %d  flags: %u", (long) data, count,
+                     unlock_flags));
 
   for (pos=data,end=data+count; pos < end ; pos++)
   {
@@ -1156,7 +1224,7 @@ void thr_multi_unlock(THR_LOCK_DATA **data,uint count)
     fflush(stdout);
 #endif
     if ((*pos)->type != TL_UNLOCK)
-      thr_unlock(*pos);
+      thr_unlock(*pos, unlock_flags);
     else
     {
       DBUG_PRINT("lock",("Free lock: data: 0x%lx  thread: 0x%lx  lock: 0x%lx",
@@ -1306,6 +1374,7 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data,
                                      ulong lock_wait_timeout)
 {
   THR_LOCK *lock=data->lock;
+  enum enum_thr_lock_result res;
   DBUG_ENTER("thr_upgrade_write_delay_lock");
 
   mysql_mutex_lock(&lock->mutex);
@@ -1326,6 +1395,8 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data,
       if (data->lock->get_status)
 	(*data->lock->get_status)(data->status_param, 0);
       mysql_mutex_unlock(&lock->mutex);
+      if (lock->start_trans)
+	(*lock->start_trans)(data->status_param);
       DBUG_RETURN(0);
     }
 
@@ -1346,7 +1417,10 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data,
   {
     check_locks(lock,"waiting for lock",0);
   }
-  DBUG_RETURN(wait_for_lock(&lock->write_wait,data,1, lock_wait_timeout));
+  res= wait_for_lock(&lock->write_wait, data, 1, lock_wait_timeout);
+  if (res == THR_LOCK_SUCCESS && lock->start_trans)
+    DBUG_RETURN((*lock->start_trans)(data->status_param));
+  DBUG_RETURN(0);
 }
 
 
@@ -1466,7 +1540,7 @@ struct st_test {
   enum thr_lock_type lock_type;
 };
 
-THR_LOCK locks[5];			/* 4 locks */
+THR_LOCK locks[6];			/* Number of locks +1 */
 
 struct st_test test_0[] = {{0,TL_READ}};	/* One lock */
 struct st_test test_1[] = {{0,TL_READ},{0,TL_WRITE}}; /* Read and write lock of lock 0 */
@@ -1547,7 +1621,6 @@ static void *test_thread(void *arg)
 
   printf("Thread %s (%d) started\n",my_thread_name(),param); fflush(stdout);
 
-
   thr_lock_info_init(&lock_info);
   for (i=0; i < lock_counts[param] ; i++)
     thr_lock_data_init(locks+tests[param][i].lock_nr,data+i,NULL);
@@ -1574,7 +1647,7 @@ static void *test_thread(void *arg)
       }
     }
     mysql_mutex_unlock(&LOCK_thread_count);
-    thr_multi_unlock(multi_locks,lock_counts[param]);
+    thr_multi_unlock(multi_locks,lock_counts[param], THR_UNLOCK_UPDATE_STATUS);
   }
 
   printf("Thread %s (%d) ended\n",my_thread_name(),param); fflush(stdout);
@@ -1592,7 +1665,8 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused)))
 {
   pthread_t tid;
   pthread_attr_t thr_attr;
-  int i,*param,error;
+  int *param,error;
+  uint i;
   MY_INIT(argv[0]);
   if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '#')
     DBUG_PUSH(argv[1]+2);
@@ -1612,13 +1686,14 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused)))
     exit(1);
   }
 
-  for (i=0 ; i < (int) array_elements(locks) ; i++)
+  for (i=0 ; i < array_elements(locks) ; i++)
   {
     thr_lock_init(locks+i);
     locks[i].check_status= test_check_status;
     locks[i].update_status=test_update_status;
     locks[i].copy_status=  test_copy_status;
     locks[i].get_status=   test_get_status;
+    locks[i].allow_multiple_concurrent_insert= 1;
   }
   if ((error=pthread_attr_init(&thr_attr)))
   {
@@ -1644,7 +1719,7 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused)))
 #ifdef HAVE_THR_SETCONCURRENCY
   (void) thr_setconcurrency(2);
 #endif
-  for (i=0 ; i < (int) array_elements(lock_counts) ; i++)
+  for (i=0 ; i < array_elements(lock_counts) ; i++)
   {
     param=(int*) malloc(sizeof(int));
     *param=i;
@@ -1678,7 +1753,7 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused)))
   }
   if ((error= mysql_mutex_unlock(&LOCK_thread_count)))
     fprintf(stderr, "Got error: %d from mysql_mutex_unlock\n", error);
-  for (i=0 ; i < (int) array_elements(locks) ; i++)
+  for (i=0 ; i < array_elements(locks) ; i++)
     thr_lock_delete(locks+i);
 #ifdef EXTRA_DEBUG
   if (found_errors)
diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c
index db35d5a13a6..bd53ee433e8 100644
--- a/mysys/thr_mutex.c
+++ b/mysys/thr_mutex.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2003 MySQL AB, 2008-2009 Sun Microsystems, Inc
+/* Copyright (C) 2000-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -19,11 +19,16 @@
 #if defined(TARGET_OS_LINUX) && !defined (__USE_UNIX98)
 #define __USE_UNIX98			/* To get rw locks under Linux */
 #endif
-#if defined(THREAD) && defined(SAFE_MUTEX)
+#ifdef SAFE_MUTEX
+#define SAFE_MUTEX_DEFINED
+#endif
+
+#if defined(THREAD)
 #undef SAFE_MUTEX			/* Avoid safe_mutex redefinitions */
 #include "mysys_priv.h"
 #include "my_static.h"
 #include <m_string.h>
+#include <hash.h>
 
 #ifndef DO_NOT_REMOVE_THREAD_WRAPPERS
 /* Remove wrappers */
@@ -31,38 +36,177 @@
 #undef pthread_mutex_init
 #undef pthread_mutex_lock
 #undef pthread_mutex_unlock
+#undef pthread_mutex_trylock
 #undef pthread_mutex_destroy
 #undef pthread_cond_wait
 #undef pthread_cond_timedwait
+#undef safe_mutex_free_deadlock_data
 #ifdef HAVE_NONPOSIX_PTHREAD_MUTEX_INIT
-#define pthread_mutex_init(a,b) my_pthread_mutex_init((a),(b))
+#define pthread_mutex_init(a,b) my_pthread_noposix_mutex_init((a),(b))
 #endif
 #endif /* DO_NOT_REMOVE_THREAD_WRAPPERS */
 
-/* Not instrumented */
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+pthread_mutexattr_t my_fast_mutexattr;
+#endif
+#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
+pthread_mutexattr_t my_errorcheck_mutexattr;
+#endif
+
+#ifdef SAFE_MUTEX_DEFINED
 static pthread_mutex_t THR_LOCK_mutex;
 static ulong safe_mutex_count= 0;		/* Number of mutexes created */
+static ulong safe_mutex_id= 0;
+my_bool safe_mutex_deadlock_detector= 1;        /* On by default */
+
 #ifdef SAFE_MUTEX_DETECT_DESTROY
-static struct st_safe_mutex_info_t *safe_mutex_root= NULL;
+static struct st_safe_mutex_create_info_t *safe_mutex_create_root= NULL;
+#endif
+
+static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex,
+                                        safe_mutex_deadlock_t *locked_mutex);
+static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex,
+                                   safe_mutex_t *current_mutex);
+static my_bool remove_from_locked_mutex(safe_mutex_t *mp,
+                                        safe_mutex_t *delete_mutex);
+static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex,
+                                      safe_mutex_t *mutex);
+static void print_deadlock_warning(safe_mutex_t *new_mutex,
+                                   safe_mutex_t *conflicting_mutex);
+#endif
+
+
+/* Initialize all mutex handling */
+
+void my_mutex_init()
+{
+  /* Initialize mutex attributes */
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+  /*
+    Set mutex type to "fast" a.k.a "adaptive"
+
+    In this case the thread may steal the mutex from some other thread
+    that is waiting for the same mutex.  This will save us some
+    context switches but may cause a thread to 'starve forever' while
+    waiting for the mutex (not likely if the code within the mutex is
+    short).
+  */
+  pthread_mutexattr_init(&my_fast_mutexattr);
+  pthread_mutexattr_settype(&my_fast_mutexattr,
+                            PTHREAD_MUTEX_ADAPTIVE_NP);
+#endif
+#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
+  /*
+    Set mutex type to "errorcheck"
+  */
+  pthread_mutexattr_init(&my_errorcheck_mutexattr);
+  pthread_mutexattr_settype(&my_errorcheck_mutexattr,
+                            PTHREAD_MUTEX_ERRORCHECK);
+#endif
+
+#if defined(SAFE_MUTEX_DEFINED)
+  safe_mutex_global_init();
+#elif defined(MY_PTHREAD_FASTMUTEX)
+  fastmutex_global_init();
+#endif
+}
+
+void my_mutex_end()
+{
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+  pthread_mutexattr_destroy(&my_fast_mutexattr);
 #endif
+#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
+  pthread_mutexattr_destroy(&my_errorcheck_mutexattr);
+#endif
+}
 
+
+/* Initialize safe_mutex handling */
+
+#ifdef SAFE_MUTEX_DEFINED
 void safe_mutex_global_init(void)
 {
   pthread_mutex_init(&THR_LOCK_mutex,MY_MUTEX_INIT_FAST);
+  safe_mutex_id= safe_mutex_count= 0;
+  safe_mutex_deadlock_detector= 1;
+
+#ifdef SAFE_MUTEX_DETECT_DESTROY
+  safe_mutex_create_root= 0;
+#endif
+}
+
+static inline void remove_from_active_list(safe_mutex_t *mp)
+{
+  if (!(mp->active_flags & (MYF_NO_DEADLOCK_DETECTION | MYF_TRY_LOCK)))
+  {
+    /* Remove mutex from active mutex linked list */
+    if (mp->next)
+      mp->next->prev= mp->prev;
+    if (mp->prev)
+      mp->prev->next= mp->next;
+    else
+      *my_thread_var_mutex_in_use()= mp->next;
+  }
+  mp->prev= mp->next= 0;
 }
 
+/*
+  We initialise the hashes for deadlock detection lazily.
+  This greatly helps with performance when lots of mutexes are initiased but
+  only a few of them are actually used (eg. XtraDB).
+*/
+static int safe_mutex_lazy_init_deadlock_detection(safe_mutex_t *mp)
+{
+  if (!my_multi_malloc(MY_FAE | MY_WME,
+                       &mp->locked_mutex, sizeof(*mp->locked_mutex),
+                       &mp->used_mutex, sizeof(*mp->used_mutex), NullS))
+  {
+    /* Disable deadlock handling for this mutex */
+    mp->create_flags|= MYF_NO_DEADLOCK_DETECTION;
+    mp->active_flags|= MYF_NO_DEADLOCK_DETECTION;
+    return 1;                                   /* Error */
+  }
+
+  pthread_mutex_lock(&THR_LOCK_mutex);
+  mp->id= ++safe_mutex_id;
+  pthread_mutex_unlock(&THR_LOCK_mutex);
+  hash_init(mp->locked_mutex, &my_charset_bin,
+            1000,
+            offsetof(safe_mutex_deadlock_t, id),
+            sizeof(mp->id),
+            0, 0, HASH_UNIQUE);
+  hash_init(mp->used_mutex, &my_charset_bin,
+            1000,
+            offsetof(safe_mutex_t, id),
+            sizeof(mp->id),
+            0, 0, HASH_UNIQUE);
+  return 0;
+}
 
 int safe_mutex_init(safe_mutex_t *mp,
 		    const pthread_mutexattr_t *attr __attribute__((unused)),
+                    const char *name,
+                    myf my_flags,
 		    const char *file,
 		    uint line)
 {
+  DBUG_ENTER("safe_mutex_init");
+  DBUG_PRINT("enter",("mutex: 0x%lx  name: %s", (ulong) mp, name));
   bzero((char*) mp,sizeof(*mp));
   pthread_mutex_init(&mp->global,MY_MUTEX_INIT_ERRCHK);
   pthread_mutex_init(&mp->mutex,attr);
   /* Mark that mutex is initialized */
   mp->file= file;
   mp->line= line;
+  /* Skip the very common '&' prefix from the autogenerated name */
+  mp->name= name[0] == '&' ? name + 1 : name;
+
+  if (!safe_mutex_deadlock_detector)
+    my_flags|= MYF_NO_DEADLOCK_DETECTION;
+  /* Deadlock detection is initialised only lazily, on first use. */
+
+  mp->create_flags= my_flags;
 
 #ifdef SAFE_MUTEX_DETECT_DESTROY
   /*
@@ -71,7 +215,7 @@ int safe_mutex_init(safe_mutex_t *mp,
   */
   if ((mp->info= (safe_mutex_info_t *) malloc(sizeof(safe_mutex_info_t))))
   {
-    struct st_safe_mutex_info_t *info =mp->info;
+    struct st_safe_mutex_info_t *info= mp->info;
 
     info->init_file= file;
     info->init_line= line;
@@ -79,9 +223,9 @@ int safe_mutex_init(safe_mutex_t *mp,
     info->next= NULL;
 
     pthread_mutex_lock(&THR_LOCK_mutex);
-    if ((info->next= safe_mutex_root))
-      safe_mutex_root->prev= info;
-    safe_mutex_root= info;
+    if ((info->next= safe_mutex_create_root))
+      safe_mutex_create_root->prev= info;
+    safe_mutex_create_root= info;
     safe_mutex_count++;
     pthread_mutex_unlock(&THR_LOCK_mutex);
   }
@@ -90,13 +234,16 @@ int safe_mutex_init(safe_mutex_t *mp,
   safe_mutex_count++;
   pthread_mutex_unlock(&THR_LOCK_mutex);
 #endif /* SAFE_MUTEX_DETECT_DESTROY */
-  return 0;
+  DBUG_RETURN(0);
 }
 
 
-int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line)
+int safe_mutex_lock(safe_mutex_t *mp, myf my_flags, const char *file,
+                    uint line)
 {
   int error;
+  DBUG_PRINT("mutex", ("%s (0x%lx) locking", mp->name ? mp->name : "Null",
+                       (ulong) mp));
   if (!mp->file)
   {
     fprintf(stderr,
@@ -109,12 +256,13 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l
   pthread_mutex_lock(&mp->global);
   if (mp->count > 0)
   {
-    if (try_lock)
-    {
-      pthread_mutex_unlock(&mp->global);
-      return EBUSY;
-    }
-    else if (pthread_equal(pthread_self(),mp->thread))
+    /*
+      Check that we are not trying to lock mutex twice. This is an error
+      even if we are using 'try_lock' as it's not portably what happens
+      if you lock the mutex many times and this is in any case bad
+      behaviour that should not be encouraged
+    */
+    if (pthread_equal(pthread_self(),mp->thread))
     {
       fprintf(stderr,
               "safe_mutex: Trying to lock mutex at %s, line %d, when the"
@@ -142,7 +290,7 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l
       instead just return EBUSY, since this is the expected behaviour
       of trylock().
    */
-  if (try_lock)
+  if (my_flags & MYF_TRY_LOCK)
   {
     error= pthread_mutex_trylock(&mp->mutex);
     if (error == EBUSY)
@@ -153,22 +301,110 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l
 
   if (error || (error=pthread_mutex_lock(&mp->global)))
   {
-    fprintf(stderr,"Got error %d when trying to lock mutex at %s, line %d\n",
-	    error, file, line);
+    fprintf(stderr,"Got error %d when trying to lock mutex %s at %s, line %d\n",
+	    error, mp->name, file, line);
     fflush(stderr);
     abort();
   }
   mp->thread= pthread_self();
   if (mp->count++)
   {
-    fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex at %s, \
-line %d more than 1 time\n", file,line);
+    fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex %s at %s, "
+            "line %d more than 1 time\n", mp->name, file,line);
     fflush(stderr);
     abort();
   }
   mp->file= file;
-  mp->line=line;
+  mp->line= line;
+  mp->active_flags= mp->create_flags | my_flags;
   pthread_mutex_unlock(&mp->global);
+
+  /* Deadlock detection */
+
+  mp->prev= mp->next= 0;
+  if (!(mp->active_flags & (MYF_TRY_LOCK | MYF_NO_DEADLOCK_DETECTION)) &&
+      (mp->used_mutex != NULL || !safe_mutex_lazy_init_deadlock_detection(mp)))
+  {
+    safe_mutex_t **mutex_in_use= my_thread_var_mutex_in_use();
+
+    if (!mutex_in_use)
+    {
+      /* thread has not called my_thread_init() */
+      mp->active_flags|= MYF_NO_DEADLOCK_DETECTION;
+    }
+    else
+    {
+      safe_mutex_t *mutex_root;
+      if ((mutex_root= *mutex_in_use))   /* If not first locked */
+      {
+        /*
+          Protect locked_mutex against changes if a mutex is deleted
+        */
+        pthread_mutex_lock(&THR_LOCK_mutex);
+
+        if (!hash_search(mutex_root->locked_mutex, (uchar*) &mp->id, 0))
+        {
+          safe_mutex_deadlock_t *deadlock;
+          safe_mutex_t *mutex;
+
+          /* Create object to store mutex info */
+          if (!(deadlock= my_malloc(sizeof(*deadlock),
+                                    MYF(MY_ZEROFILL | MY_WME | MY_FAE))))
+            goto abort_loop;
+          deadlock->name= mp->name;
+          deadlock->id= mp->id;
+          deadlock->mutex= mp;
+          /* The following is useful for debugging wrong mutex usage */
+          deadlock->file= file;
+          deadlock->line= line;
+
+          /* Check if potential deadlock */
+          mutex= mutex_root;
+          do
+          {
+            if (hash_search(mp->locked_mutex, (uchar*) &mutex->id, 0))
+            {
+              print_deadlock_warning(mp, mutex);
+              /* Mark wrong usage to avoid future warnings for same error */
+              deadlock->warning_only= 1;
+              add_to_locked_mutex(deadlock, mutex_root);
+              DBUG_ASSERT(deadlock->count > 0);
+              goto abort_loop;
+            }
+          }
+          while ((mutex= mutex->next));
+
+          /*
+            Copy current mutex and all mutex that has been locked
+            after current mutex (mp->locked_mutex) to all mutex that
+            was locked before previous mutex (mutex_root->used_mutex)
+
+            For example if A->B would have been done before and we
+            are now locking (C) in B->C, then we would add C into
+            B->locked_mutex and A->locked_mutex
+          */
+          my_hash_iterate(mutex_root->used_mutex,
+                          (my_hash_walk_action) add_used_to_locked_mutex,
+                          deadlock);
+
+          /*
+            Copy all current mutex and all mutex locked after current one
+            into the prev mutex
+          */
+          add_used_to_locked_mutex(mutex_root, deadlock);
+          DBUG_ASSERT(deadlock->count > 0);
+        }
+  abort_loop:
+        pthread_mutex_unlock(&THR_LOCK_mutex);
+      }
+      /* Link mutex into mutex_in_use list */
+      if ((mp->next= *mutex_in_use))
+        (*mutex_in_use)->prev= mp;
+      *mutex_in_use= mp;
+    }
+  }
+
+  DBUG_PRINT("mutex", ("%s (0x%lx) locked", mp->name, (ulong) mp));
   return error;
 }
 
@@ -176,23 +412,34 @@ line %d more than 1 time\n", file,line);
 int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line)
 {
   int error;
+  DBUG_PRINT("mutex", ("%s (0x%lx) unlocking", mp->name, (ulong) mp));
   pthread_mutex_lock(&mp->global);
   if (mp->count == 0)
   {
-    fprintf(stderr,"safe_mutex: Trying to unlock mutex that wasn't locked at %s, line %d\n            Last used at %s, line: %d\n",
-	    file,line,mp->file ? mp->file : "",mp->line);
+    fprintf(stderr,
+            "safe_mutex: Trying to unlock mutex %s that wasn't locked at "
+            "%s, line %d\n"
+            "Last used at %s, line: %d\n",
+	    mp->name ? mp->name : "Null", file, line,
+            mp->file ? mp->file : "Null", mp->line);
     fflush(stderr);
     abort();
   }
   if (!pthread_equal(pthread_self(),mp->thread))
   {
-    fprintf(stderr,"safe_mutex: Trying to unlock mutex at %s, line %d  that was locked by another thread at: %s, line: %d\n",
-	    file,line,mp->file,mp->line);
+    fprintf(stderr,
+            "safe_mutex: Trying to unlock mutex %s at %s, line %d that was "
+            "locked by "
+            "another thread at: %s, line: %d\n",
+	    mp->name, file, line, mp->file, mp->line);
     fflush(stderr);
     abort();
   }
   mp->thread= 0;
   mp->count--;
+
+  remove_from_active_list(mp);
+
 #ifdef __WIN__
   pthread_mutex_unlock(&mp->mutex);
   error=0;
@@ -200,7 +447,9 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line)
   error=pthread_mutex_unlock(&mp->mutex);
   if (error)
   {
-    fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex at %s, line %d\n", error, errno, file, line);
+    fprintf(stderr,
+            "safe_mutex: Got error: %d (%d) when trying to unlock mutex "
+            "%s at %s, line %d\n", error, errno, mp->name, file, line);
     fflush(stderr);
     abort();
   }
@@ -214,43 +463,62 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file,
 		   uint line)
 {
   int error;
+  safe_mutex_t save_state;
+
   pthread_mutex_lock(&mp->global);
   if (mp->count == 0)
   {
-    fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex at %s, line %d\n",file,line);
+    fprintf(stderr,
+            "safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, "
+            "line %d\n",
+            mp->name ? mp->name : "Null", file, line);
     fflush(stderr);
     abort();
   }
   if (!pthread_equal(pthread_self(),mp->thread))
   {
-    fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex at %s, line %d  that was locked by another thread at: %s, line: %d\n",
-	    file,line,mp->file,mp->line);
+    fprintf(stderr,
+            "safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d "
+            "that was locked by another thread at: %s, line: %d\n",
+	    mp->name, file, line, mp->file, mp->line);
     fflush(stderr);
     abort();
   }
 
   if (mp->count-- != 1)
   {
-    fprintf(stderr,"safe_mutex:  Count was %d on locked mutex at %s, line %d\n",
-	    mp->count+1, file, line);
+    fprintf(stderr,
+            "safe_mutex:  Count was %d on locked mutex %s at %s, line %d\n",
+	    mp->count+1, mp->name, file, line);
     fflush(stderr);
     abort();
   }
+  save_state= *mp;
+  remove_from_active_list(mp);
   pthread_mutex_unlock(&mp->global);
   error=pthread_cond_wait(cond,&mp->mutex);
   pthread_mutex_lock(&mp->global);
+
   if (error)
   {
-    fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait at %s, line %d\n", error, errno, file, line);
+    fprintf(stderr,
+            "safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on "
+            "%s at %s, line %d\n", error, errno, mp->name, file, line);
     fflush(stderr);
     abort();
   }
-  mp->thread=pthread_self();
+  /* Restore state as it was before */
+  mp->thread=       save_state.thread;
+  mp->active_flags= save_state.active_flags;
+  mp->next=         save_state.next;
+  mp->prev=         save_state.prev;
+
   if (mp->count++)
   {
     fprintf(stderr,
-	    "safe_mutex:  Count was %d in thread 0x%lx when locking mutex at %s, line %d\n",
-	    mp->count-1, my_thread_dbug_id(), file, line);
+	    "safe_mutex:  Count was %d in thread 0x%lx when locking mutex %s "
+            "at %s, line %d\n",
+	    mp->count-1, my_thread_dbug_id(), mp->name, file, line);
     fflush(stderr);
     abort();
   }
@@ -266,29 +534,46 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp,
 			const char *file, uint line)
 {
   int error;
+  safe_mutex_t save_state;
+
   pthread_mutex_lock(&mp->global);
   if (mp->count != 1 || !pthread_equal(pthread_self(),mp->thread))
   {
-    fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex\n",file,line);
+    fprintf(stderr,
+            "safe_mutex: Trying to cond_wait at %s, line %d on a not hold "
+            "mutex %s\n",
+            file, line, mp->name ? mp->name : "Null");
     fflush(stderr);
     abort();
   }
   mp->count--;					/* Mutex will be released */
+  save_state= *mp;
+  remove_from_active_list(mp);
   pthread_mutex_unlock(&mp->global);
   error=pthread_cond_timedwait(cond,&mp->mutex,abstime);
 #ifdef EXTRA_DEBUG
   if (error && (error != EINTR && error != ETIMEDOUT && error != ETIME))
   {
-    fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait at %s, line %d\n", error, errno, file, line);
+    fprintf(stderr,
+            "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait "
+            "on %s at %s, line %d\n",
+            error, errno, mp->name, file, line);
   }
 #endif
   pthread_mutex_lock(&mp->global);
-  mp->thread=pthread_self();
+  /* Restore state as it was before */
+  mp->thread=       save_state.thread;
+  mp->active_flags= save_state.active_flags;
+  mp->next=         save_state.next;
+  mp->prev=         save_state.prev;
+
   if (mp->count++)
   {
     fprintf(stderr,
-	    "safe_mutex:  Count was %d in thread 0x%lx when locking mutex at %s, line %d (error: %d (%d))\n",
-	    mp->count-1, my_thread_dbug_id(), file, line, error, error);
+	    "safe_mutex:  Count was %d in thread 0x%lx when locking mutex "
+            "%s at %s, line %d (error: %d (%d))\n",
+	    mp->count-1, my_thread_dbug_id(), mp->name, file, line,
+            error, error);
     fflush(stderr);
     abort();
   }
@@ -302,6 +587,8 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp,
 int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line)
 {
   int error=0;
+  DBUG_ENTER("safe_mutex_destroy");
+  DBUG_PRINT("enter", ("mutex: 0x%lx  name: %s", (ulong) mp, mp->name));
   if (!mp->file)
   {
     fprintf(stderr,
@@ -312,11 +599,17 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line)
   }
   if (mp->count != 0)
   {
-    fprintf(stderr,"safe_mutex: Trying to destroy a mutex that was locked at %s, line %d at %s, line %d\n",
-	    mp->file,mp->line, file, line);
+    fprintf(stderr,
+            "safe_mutex: Trying to destroy a mutex %s that was locked at %s, "
+            "line %d at %s, line %d\n",
+	    mp->name, mp->file, mp->line, file, line);
     fflush(stderr);
     abort();
   }
+
+  /* Free all entries that points to this one */
+  safe_mutex_free_deadlock_data(mp);
+
 #ifdef __WIN__ 
   pthread_mutex_destroy(&mp->global);
   pthread_mutex_destroy(&mp->mutex);
@@ -337,7 +630,7 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line)
     if (info->prev)
       info->prev->next = info->next;
     else
-      safe_mutex_root = info->next;
+      safe_mutex_create_root = info->next;
     if (info->next)
       info->next->prev = info->prev;
     safe_mutex_count--;
@@ -351,10 +644,38 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line)
   safe_mutex_count--;
   pthread_mutex_unlock(&THR_LOCK_mutex);
 #endif /* SAFE_MUTEX_DETECT_DESTROY */
-  return error;
+  DBUG_RETURN(error);
 }
 
 
+/**
+  Free all data related to deadlock detection
+
+  This is also useful together with safemalloc when you don't want to
+  have reports of not freed memory for mysys mutexes.
+*/
+
+void safe_mutex_free_deadlock_data(safe_mutex_t *mp)
+{
+  /* Free all entries that points to this one */
+  if (!(mp->create_flags & MYF_NO_DEADLOCK_DETECTION) && mp->used_mutex != NULL)
+  {
+    pthread_mutex_lock(&THR_LOCK_mutex);
+    my_hash_iterate(mp->used_mutex,
+                    (my_hash_walk_action) remove_from_locked_mutex,
+                    mp);
+    my_hash_iterate(mp->locked_mutex,
+                    (my_hash_walk_action) remove_from_used_mutex,
+                    mp);
+    pthread_mutex_unlock(&THR_LOCK_mutex);
+
+    hash_free(mp->used_mutex);
+    hash_free(mp->locked_mutex);
+    my_free(mp->locked_mutex, 0);
+    mp->create_flags|= MYF_NO_DEADLOCK_DETECTION;
+  }
+}
+
 /*
   Free global resources and check that all mutex has been destroyed
 
@@ -385,40 +706,153 @@ void safe_mutex_end(FILE *file __attribute__((unused)))
   }
   {
     struct st_safe_mutex_info_t *ptr;
-    for (ptr= safe_mutex_root ; ptr ; ptr= ptr->next)
+    for (ptr= safe_mutex_create_root ; ptr ; ptr= ptr->next)
     {
-      fprintf(file, "\tMutex initiated at line %4u in '%s'\n",
-	      ptr->init_line, ptr->init_file);
+      fprintf(file, "\tMutex %s initiated at line %4u in '%s'\n",
+	      ptr->name, ptr->init_line, ptr->init_file);
       (void) fflush(file);
     }
   }
 #endif /* SAFE_MUTEX_DETECT_DESTROY */
 }
 
-#endif /* THREAD && SAFE_MUTEX */
+safe_mutex_t **my_thread_var_mutex_in_use()
+{
+  struct st_my_thread_var *tmp=
+    my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys);
+  return tmp ? &tmp->mutex_in_use : 0;
+}
 
-#if defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX)
+static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex,
+                                        safe_mutex_deadlock_t *locked_mutex)
+{
+  /* Add mutex to all parent of the current mutex */
+  if (!locked_mutex->warning_only)
+  {
+    (void) my_hash_iterate(locked_mutex->mutex->locked_mutex,
+                           (my_hash_walk_action) add_to_locked_mutex,
+                           used_mutex);
+    /* mark that locked_mutex is locked after used_mutex */
+    (void) add_to_locked_mutex(locked_mutex, used_mutex);
+  }
+  return 0;
+}
 
-#include "mysys_priv.h"
-#include "my_static.h"
-#include <m_string.h>
 
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
+/**
+   register that locked_mutex was locked after current_mutex
+*/
 
-#undef pthread_mutex_t
-#undef pthread_mutex_init
-#undef pthread_mutex_lock
-#undef pthread_mutex_trylock
-#undef pthread_mutex_unlock
-#undef pthread_mutex_destroy
-#undef pthread_cond_wait
-#undef pthread_cond_timedwait
+static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex,
+                                   safe_mutex_t *current_mutex)
+{
+  DBUG_ENTER("add_to_locked_mutex");
+  DBUG_PRINT("info", ("inserting 0x%lx  into  0x%lx  (id: %lu -> %lu)",
+                      (ulong) locked_mutex, (long) current_mutex,
+                      locked_mutex->id, current_mutex->id));
+  if (my_hash_insert(current_mutex->locked_mutex, (uchar*) locked_mutex))
+  {
+    /* Got mutex through two paths; ignore */
+    DBUG_RETURN(0);
+  }
+  locked_mutex->count++;
+  if (my_hash_insert(locked_mutex->mutex->used_mutex,
+                     (uchar*) current_mutex))
+  {
+    DBUG_ASSERT(0);
+  }
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Remove mutex from the locked mutex hash
+  @fn    remove_from_used_mutex()
+  @param mp            Mutex that has delete_mutex in it's locked_mutex hash
+  @param delete_mutex  Mutex should be removed from the hash
+
+  @notes
+    safe_mutex_deadlock_t entries in the locked hash are shared.
+    When counter goes to 0, we delete the safe_mutex_deadlock_t entry.
+*/
+
+static my_bool remove_from_locked_mutex(safe_mutex_t *mp,
+                                        safe_mutex_t *delete_mutex)
+{
+  safe_mutex_deadlock_t *found;
+  DBUG_ENTER("remove_from_locked_mutex");
+  DBUG_PRINT("enter", ("delete_mutex: 0x%lx  mutex: 0x%lx  (id: %lu <- %lu)",
+                       (ulong) delete_mutex, (ulong) mp, 
+                       delete_mutex->id, mp->id));
+
+  found= (safe_mutex_deadlock_t *) hash_search(mp->locked_mutex,
+                                               (uchar*) &delete_mutex->id, 0);
+  DBUG_ASSERT(found);
+  if (found)
+  {
+    if (hash_delete(mp->locked_mutex, (uchar*) found))
+    {
+      DBUG_ASSERT(0);
+    }
+    if (!--found->count)
+      my_free(found, MYF(0));
+  }
+  DBUG_RETURN(0);
+}
 
-ulong mutex_delay(ulong delayloops)
+static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex,
+                                      safe_mutex_t *mutex)
+{
+  DBUG_ENTER("remove_from_used_mutex");
+  DBUG_PRINT("enter", ("delete_mutex: 0x%lx  mutex: 0x%lx  (id: %lu <- %lu)",
+                       (ulong) mutex, (ulong) locked_mutex, 
+                       mutex->id, locked_mutex->id));
+  if (hash_delete(locked_mutex->mutex->used_mutex, (uchar*) mutex))
+  {
+    DBUG_ASSERT(0);
+  }
+  if (!--locked_mutex->count)
+    my_free(locked_mutex, MYF(0));
+  DBUG_RETURN(0);
+}
+
+
+static void print_deadlock_warning(safe_mutex_t *new_mutex,
+                                   safe_mutex_t *parent_mutex)
+{
+  safe_mutex_t *mutex_root;
+  DBUG_ENTER("print_deadlock_warning");
+  DBUG_PRINT("enter", ("mutex: %s  parent: %s",
+                       new_mutex->name, parent_mutex->name));
+
+  fprintf(stderr, "safe_mutex: Found wrong usage of mutex "
+          "'%s' and '%s'\n",
+          parent_mutex->name, new_mutex->name);
+  DBUG_PRINT("info", ("safe_mutex: Found wrong usage of mutex "
+                      "'%s' and '%s'",
+                      parent_mutex->name, new_mutex->name));
+  fprintf(stderr, "Mutex currently locked (in reverse order):\n");
+  DBUG_PRINT("info", ("Mutex currently locked (in reverse order):"));
+  fprintf(stderr, "%-32.32s  %s  line %u\n", new_mutex->name, new_mutex->file,
+          new_mutex->line);
+  DBUG_PRINT("info", ("%-32.32s  %s  line %u\n", new_mutex->name,
+                      new_mutex->file, new_mutex->line));
+  for (mutex_root= *my_thread_var_mutex_in_use() ;
+       mutex_root;
+       mutex_root= mutex_root->next)
+  {
+    fprintf(stderr, "%-32.32s  %s  line %u\n", mutex_root->name,
+            mutex_root->file, mutex_root->line);
+    DBUG_PRINT("info", ("%-32.32s  %s  line %u", mutex_root->name,
+                        mutex_root->file, mutex_root->line));
+  }
+  fflush(stderr);
+  DBUG_VOID_RETURN;
+}
+
+#elif defined(MY_PTHREAD_FASTMUTEX)
+
+static ulong mutex_delay(ulong delayloops)
 {
   ulong	i;
   volatile ulong j;
@@ -499,5 +933,6 @@ void fastmutex_global_init(void)
   cpu_count= sysconf(_SC_NPROCESSORS_CONF);
 #endif
 }
-  
-#endif /* defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX) */ 
+
+#endif /* defined(MY_PTHREAD_FASTMUTEX) */
+#endif /* THREAD */
diff --git a/mysys/tree.c b/mysys/tree.c
index 8ea7102ed4c..c922c8f505a 100644
--- a/mysys/tree.c
+++ b/mysys/tree.c
@@ -77,13 +77,13 @@ static void rb_insert(TREE *tree,TREE_ELEMENT ***parent,
 static void rb_delete_fixup(TREE *tree,TREE_ELEMENT ***parent);
 
 
-	/* The actuall code for handling binary trees */
+/* The actual code for handling binary trees */
 
 #ifndef DBUG_OFF
 static int test_rb_tree(TREE_ELEMENT *element);
 #endif
 
-void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit,
+void init_tree(TREE *tree, size_t default_alloc_size, size_t memory_limit,
                int size, qsort_cmp2 compare, my_bool with_delete,
 	       tree_element_free free_element, void *custom_arg)
 {
@@ -96,7 +96,7 @@ void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit,
   bzero((uchar*) &tree->null_element,sizeof(tree->null_element));
   tree->root= &tree->null_element;
   tree->compare=compare;
-  tree->size_of_element=size > 0 ? (uint) size : 0;
+  tree->size_of_element= size > 0 ? (uint) size : 0;
   tree->memory_limit=memory_limit;
   tree->free=free_element;
   tree->allocated=0;
@@ -127,7 +127,7 @@ void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit,
   }
   if (!(tree->with_delete=with_delete))
   {
-    init_alloc_root(&tree->mem_root, (uint) default_alloc_size, 0);
+    init_alloc_root(&tree->mem_root, default_alloc_size, 0);
     tree->mem_root.min_malloc=(sizeof(TREE_ELEMENT)+tree->size_of_element);
   }
   DBUG_VOID_RETURN;
diff --git a/mysys/typelib.c b/mysys/typelib.c
index 7681ff581ac..aec203becc0 100644
--- a/mysys/typelib.c
+++ b/mysys/typelib.c
@@ -22,7 +22,7 @@
 
 #define is_field_separator(X) ((X) == ',' || (X) == '=')
 
-int find_type_or_exit(const char *x, TYPELIB *typelib, const char *option)
+int find_type_with_warning(const char *x, TYPELIB *typelib, const char *option)
 {
   int res;
   const char **ptr;
@@ -38,12 +38,20 @@ int find_type_or_exit(const char *x, TYPELIB *typelib, const char *option)
     while (*++ptr)
       fprintf(stderr, ",'%s'", *ptr);
     fprintf(stderr, "\n");
-    exit(1);
   }
   return res;
 }
 
 
+uint find_type_or_exit(const char *x, TYPELIB *typelib, const char *option)
+{
+  int res;
+  if ((res= find_type_with_warning(x, typelib, option)) <= 0)
+    exit(1);
+  return (uint) res;
+}
+
+
 /**
   Search after a string in a list of strings. Endspace in x is not compared.
 
diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c
new file mode 100644
index 00000000000..732929f6d99
--- /dev/null
+++ b/mysys/waiting_threads.c
@@ -0,0 +1,1153 @@
+/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/**
+  @file
+
+  "waiting threads" subsystem - a unified interface for threads to wait
+  on each other, with built-in deadlock detection.
+
+  Main concepts
+  ^^^^^^^^^^^^^
+  a thread - is represented by a WT_THD structure. One physical thread
+    can have only one WT_THD descriptor at any given moment.
+
+  a resource - a thread does not wait for other threads directly,
+    instead it waits for a "resource", which is "owned" by other threads.
+    It waits, exactly, for all "owners" to "release" a resource.
+    It does not have to correspond to a physical resource. For example, it
+    may be convenient in certain cases to force resource == thread.
+    A resource is represented by a WT_RESOURCE structure. 
+
+  a resource identifier - a pair of {resource type, value}. A value is
+    an ulonglong number. Represented by a WT_RESOURCE_ID structure.
+
+  a resource type - a pointer to a statically defined instance of
+    WT_RESOURCE_TYPE structure. This structure contains a pointer to
+    a function that knows how to compare values of this resource type.
+    In the simple case it could be wt_resource_id_memcmp().
+
+  a wait-for graph - a graph, that represenst "wait-for" relationships.
+    It has two types of nodes - threads and resources. There are directed
+    edges from a thread to a resource it is waiting for (WT_THD::waiting_for),
+    from a thread to resources that it "owns" (WT_THD::my_resources),
+    and from a resource to threads that "own" it (WT_RESOURCE::owners)
+
+  Graph completeness
+  ^^^^^^^^^^^^^^^^^^
+
+  For flawless deadlock detection wait-for graph must be complete.
+  It means that when a thread starts waiting it needs to know *all* its
+  blockers, and call wt_thd_will_wait_for() for every one of them.
+  Otherwise two phenomena should be expected:
+
+  1. Fuzzy timeouts:
+
+    thread A needs to get a lock, and is blocked by a thread B.
+    it waits.
+    Just before the timeout thread B releases the lock.
+    thread A is ready to grab the lock but discovers that it is also
+    blocked by a thread C.
+    It waits and times out.
+
+    As a result thread A has waited two timeout intervals, instead of one.
+
+  2. Unreliable cycle detection:
+
+     Thread A waits for threads B and C
+     Thread C waits for D
+     Thread D wants to start waiting for A
+
+     one can see immediately that thread D creates a cycle, and thus
+     a deadlock is detected.
+
+     But if thread A would only wait for B, and start waiting for C
+     when B would unlock, thread D would be allowed to wait, a deadlock
+     would be only detected when B unlocks or somebody times out.
+
+  These two phenomena don't affect a correctness, and strictly speaking,
+  the caller is not required to call wt_thd_will_wait_for() for *all*
+  blockers - it may optimize wt_thd_will_wait_for() calls. But they
+  may be perceived as bugs by users, it must be understood that such
+  an optimization comes with its price.
+
+  Usage
+  ^^^^^
+
+  First, the wt* subsystem must be initialized by calling
+  wt_init(). In the server you don't need to do it, it's done
+  in mysqld.cc.
+
+  Similarly, wt_end() frees wt* structures, should be called
+  at the end, but in the server mysqld.cc takes care of that.
+
+  Every WT_THD should be initialized with wt_thd_lazy_init().
+  After that they can be used in other wt_thd_* calls.
+  Before discarding, WT_THD should be free'd with
+  wt_thd_destroy(). In the server both are handled in sql_class.cc,
+  it's an error to try to do it manually.
+
+  To use the deadlock detection one needs to use this thread's WT_THD,
+  call wt_thd_will_wait_for() for every thread it needs to wait on,
+  then call wt_thd_cond_timedwait(). When thread releases a resource
+  it should call wt_thd_release() (or wt_thd_release_all()) - it will
+  notify (send a signal) threads waiting in wt_thd_cond_timedwait(),
+  if appropriate.
+
+  Just like with pthread's cond_wait, there could be spurious
+  wake-ups from wt_thd_cond_timedwait(). A caller is expected to
+  handle that (that is, to re-check the blocking criteria).
+
+  wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either
+  WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return
+  WT_TIMEOUT. Out of memory and other fatal errors are reported as
+  WT_DEADLOCK - and a transaction must be aborted just the same.
+
+  Configuration
+  ^^^^^^^^^^^^^
+  There are four config variables. Two deadlock search depths - short and
+  long - and two timeouts. Deadlock search is performed with the short
+  depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait()
+  waits with a short timeout, performs a deadlock search with the long
+  depth, and waits with a long timeout. As most deadlock cycles are supposed
+  to be short, most deadlocks will be detected at once, and waits will
+  rarely be necessary.
+
+  These config variables are thread-local. Different threads may have
+  different search depth and timeout values.
+
+  Also, deadlock detector supports different killing strategies, the victim
+  in a deadlock cycle is selected based on the "weight". See "weight"
+  description in waiting_threads.h for details. It's up to the caller to
+  set weights accordingly.
+
+  Status
+  ^^^^^^
+  We calculate the number of successfull waits (WT_OK returned from
+  wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle
+  length distribution - number of deadlocks with every length from
+  1 to WT_CYCLE_STATS, and a wait time distribution - number
+  of waits with a time from 1 us to 1 min in WT_WAIT_STATS
+  intervals on a log e scale.
+*/
+
+/*
+  Note that if your lock system satisfy the following condition:
+
+    there exist four lock levels A, B, C, D, such as
+      A is compatible with B
+      A is not compatible with C
+      D is not compatible with B
+
+      (example A=IX, B=IS, C=S, D=X)
+
+   you need to include lock level in the resource identifier - a
+   thread waiting for lock of the type A on resource R and another
+   thread waiting for lock of the type B on resource R should wait on
+   different WT_RESOURCE structures, on different {lock, resource}
+   pairs.  Otherwise the following is possible:
+
+      thread1> take S-lock on R
+      thread2> take IS-lock on R
+      thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R.
+      thread3 is killed (or timeout or whatever)
+      WT_RESOURCE structure for R is still in the hash, as it has two owners
+      thread4> wants an IX-lock on R
+      WT_RESOURCE for R is found in the hash, thread4 starts waiting on it.
+      !! now thread4 is waiting for both thread1 and thread2
+      !! while, in fact, IX-lock and IS-lock are compatible and
+      !! thread4 should not wait for thread2.
+*/
+
+#include <waiting_threads.h>
+#include <m_string.h>
+
+/* status variables */
+
+/**
+  preset table of wait intervals
+*/
+ulonglong wt_wait_table[WT_WAIT_STATS];
+/**
+  wait time distribution (log e scale)
+*/
+uint32 wt_wait_stats[WT_WAIT_STATS+1];
+/**
+  distribution of cycle lengths
+  first column tells whether this was during short or long detection
+*/
+uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1];
+uint32 wt_success_stats;
+
+static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
+
+#ifdef SAFE_STATISTICS
+#define incr(VAR, LOCK)                           \
+  do {                                            \
+    my_atomic_rwlock_wrlock(&(LOCK));             \
+    my_atomic_add32(&(VAR), 1);                   \
+    my_atomic_rwlock_wrunlock(&(LOCK));           \
+  } while(0)
+#else
+#define incr(VAR,LOCK)  do { (VAR)++; } while(0)
+#endif
+
+static void increment_success_stats()
+{
+  incr(wt_success_stats, success_stats_lock);
+}
+
+static void increment_cycle_stats(uint depth, uint slot)
+{
+  if (depth >= WT_CYCLE_STATS)
+    depth= WT_CYCLE_STATS;
+  incr(wt_cycle_stats[slot][depth], cycle_stats_lock);
+}
+
+static void increment_wait_stats(ulonglong waited,int ret)
+{
+  uint i;
+  if ((ret) == ETIMEDOUT)
+    i= WT_WAIT_STATS;
+  else
+    for (i= 0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ;
+  incr(wt_wait_stats[i], wait_stats_lock);
+}
+
+/*
+  'lock' protects 'owners', 'state', and 'waiter_count'
+  'id' is read-only
+
+  a resource is picked up from a hash in a lock-free manner
+  it's returned pinned, so it cannot be freed at once
+  but it may be freed right after the pin is removed
+  to free a resource it should
+    1. have no owners
+    2. have no waiters
+
+  two ways to access a resource:
+    1. find it in a hash
+       - it's returned pinned.
+        a) take a lock in exclusive mode
+        b) check the state, it should be ACTIVE to be usable
+        c) unpin
+    2. by a direct reference
+       - could only used if a resource cannot be freed
+       e.g. accessing a resource by thd->waiting_for is safe,
+       a resource cannot be freed as there's a thread waiting for it
+*/
+struct st_wt_resource {
+  WT_RESOURCE_ID  id;
+  uint            waiter_count;
+  enum { ACTIVE, FREE } state;
+#ifndef DBUG_OFF
+  pthread_mutex_t  *cond_mutex; /* a mutex for the 'cond' below */
+#endif
+  /*
+    before the 'lock' all elements are mutable, after (and including) -
+    immutable in the sense that lf_hash_insert() won't memcpy() over them.
+    See wt_init().
+  */
+#ifdef WT_RWLOCKS_USE_MUTEXES
+  /*
+    we need a special rwlock-like 'lock' to allow readers bypass
+    waiting writers, otherwise readers can deadlock. For example:
+
+      A waits on resource x, owned by B, B waits on resource y, owned
+      by A, we have a cycle (A->x->B->y->A)
+      Both A and B start deadlock detection:
+
+        A locks x                          B locks y
+        A goes deeper                      B goes deeper
+        A locks y                          B locks x
+
+      with mutexes it would deadlock. With rwlocks it won't, as long
+      as both A and B are taking read locks (and they do).
+      But other threads may take write locks. Assume there's
+      C who wants to start waiting on x, and D who wants to start
+      waiting on y.
+
+        A read-locks x                       B read-locks y
+        A goes deeper                        B goes deeper
+     => C write-locks x (to add a new edge)  D write-locks y
+     .. C is blocked                         D is blocked
+        A read-locks y                       B read-locks x
+
+      Now, if a read lock can bypass a pending wrote lock request, we're fine.
+      If it can not, we have a deadlock.
+
+    writer starvation is technically possible, but unlikely, because
+    the contention is expected to be low.
+  */
+  struct {
+    pthread_cond_t   cond;
+    pthread_mutex_t  mutex;
+    uint readers: 16;
+    uint pending_writers: 15;
+    uint write_locked: 1;
+  } lock;
+#else
+  rw_lock_t lock;
+#endif
+  pthread_cond_t   cond; /* the corresponding mutex is provided by the caller */
+  DYNAMIC_ARRAY    owners;
+};
+
+#ifdef  WT_RWLOCKS_USE_MUTEXES
+static void rc_rwlock_init(WT_RESOURCE *rc)
+{
+  pthread_cond_init(&rc->lock.cond, 0);
+  pthread_mutex_init(&rc->lock.mutex, MY_MUTEX_INIT_FAST);
+}
+static void rc_rwlock_destroy(WT_RESOURCE *rc)
+{
+  DBUG_ASSERT(rc->lock.write_locked == 0);
+  DBUG_ASSERT(rc->lock.readers == 0);
+  pthread_cond_destroy(&rc->lock.cond);
+  pthread_mutex_destroy(&rc->lock.mutex);
+}
+static void rc_rdlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
+  pthread_mutex_lock(&rc->lock.mutex);
+  while (rc->lock.write_locked)
+    pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex);
+  rc->lock.readers++;
+  pthread_mutex_unlock(&rc->lock.mutex);
+  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
+}
+static void rc_wrlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
+  pthread_mutex_lock(&rc->lock.mutex);
+  while (rc->lock.write_locked || rc->lock.readers)
+    pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex);
+  rc->lock.write_locked= 1;
+  pthread_mutex_unlock(&rc->lock.mutex);
+  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
+}
+static void rc_unlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
+  pthread_mutex_lock(&rc->lock.mutex);
+  if (rc->lock.write_locked)
+  {
+    rc->lock.write_locked= 0;
+    pthread_cond_broadcast(&rc->lock.cond);
+  }
+  else if (--rc->lock.readers == 0)
+    pthread_cond_broadcast(&rc->lock.cond);
+  pthread_mutex_unlock(&rc->lock.mutex);
+}
+#else
+static void rc_rwlock_init(WT_RESOURCE *rc)
+{
+  my_rwlock_init(&rc->lock, 0);
+}
+static void rc_rwlock_destroy(WT_RESOURCE *rc)
+{
+  rwlock_destroy(&rc->lock);
+}
+static void rc_rdlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
+  rw_rdlock(&rc->lock);
+  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
+}
+static void rc_wrlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
+  rw_wrlock(&rc->lock);
+  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
+}
+static void rc_unlock(WT_RESOURCE *rc)
+{
+  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
+  rw_unlock(&rc->lock);
+}
+#endif
+
+/*
+  All resources are stored in a lock-free hash. Different threads
+  may add new resources and perform deadlock detection concurrently.
+*/
+static LF_HASH      reshash;
+
+/**
+  WT_RESOURCE constructor
+
+  It's called from lf_hash and takes a pointer to an LF_SLIST instance.
+  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_init(uchar *arg)
+{
+  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+  DBUG_ENTER("wt_resource_init");
+
+  bzero(rc, sizeof(*rc));
+  rc_rwlock_init(rc);
+  pthread_cond_init(&rc->cond, 0);
+  my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
+  DBUG_VOID_RETURN;
+}
+
+/**
+  WT_RESOURCE destructor
+
+  It's called from lf_hash and takes a pointer to an LF_SLIST instance.
+  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_destroy(uchar *arg)
+{
+  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+  DBUG_ENTER("wt_resource_destroy");
+
+  DBUG_ASSERT(rc->owners.elements == 0);
+  rc_rwlock_destroy(rc);
+  pthread_cond_destroy(&rc->cond);
+  delete_dynamic(&rc->owners);
+  DBUG_VOID_RETURN;
+}
+
+void wt_init()
+{
+  DBUG_ENTER("wt_init");
+  DBUG_ASSERT(reshash.alloc.constructor != wt_resource_init);
+
+  lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0,
+               sizeof_WT_RESOURCE_ID, 0, 0);
+  reshash.alloc.constructor= wt_resource_init;
+  reshash.alloc.destructor= wt_resource_destroy;
+  /*
+    Note a trick: we initialize the hash with the real element size,
+    but fix it later to a shortened element size. This way
+    the allocator will allocate elements correctly, but
+    lf_hash_insert() will only overwrite part of the element with memcpy().
+    lock, condition, and dynamic array will be intact.
+  */
+  reshash.element_size= offsetof(WT_RESOURCE, lock);
+  bzero(wt_wait_stats, sizeof(wt_wait_stats));
+  bzero(wt_cycle_stats, sizeof(wt_cycle_stats));
+  wt_success_stats= 0;
+  { /* initialize wt_wait_table[]. from 1 us to 1 min, log e scale */
+    int i;
+    double from= log(1);   /* 1 us */
+    double to= log(60e6);  /* 1 min */
+    for (i= 0; i < WT_WAIT_STATS; i++)
+    {
+      wt_wait_table[i]= (ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from);
+      DBUG_ASSERT(i == 0 || wt_wait_table[i-1] != wt_wait_table[i]);
+    }
+  }
+  my_atomic_rwlock_init(&cycle_stats_lock);
+  my_atomic_rwlock_init(&success_stats_lock);
+  my_atomic_rwlock_init(&wait_stats_lock);
+  DBUG_VOID_RETURN;
+}
+
+void wt_end()
+{
+  DBUG_ENTER("wt_end");
+
+  DBUG_ASSERT(reshash.count == 0);
+  lf_hash_destroy(&reshash);
+  my_atomic_rwlock_destroy(&cycle_stats_lock);
+  my_atomic_rwlock_destroy(&success_stats_lock);
+  my_atomic_rwlock_destroy(&wait_stats_lock);
+  DBUG_VOID_RETURN;
+}
+
+/**
+  Lazy WT_THD initialization
+
+  Cheap initialization of WT_THD. Only initialize fields that don't require
+  memory allocations - basically, it only does assignments. The rest of the
+  WT_THD structure will be initialized on demand, on the first use.
+  This allows one to initialize lazily all WT_THD structures, even if some
+  (or even most) of them will never be used for deadlock detection.
+
+  @param ds     a pointer to deadlock search depth short value
+  @param ts     a pointer to deadlock timeout short value
+  @param dl     a pointer to deadlock search depth long value
+  @param tl     a pointer to deadlock timeout long value
+
+  @note these are pointers to values, and WT_THD stores them as pointers.
+  It allows one later to change search depths and timeouts for existing
+  threads. It also means that the pointers must stay valid for the lifetime
+  of WT_THD.
+*/
+void wt_thd_lazy_init(WT_THD *thd, const ulong *ds, const ulong *ts,
+                                   const ulong *dl, const ulong *tl)
+{
+  DBUG_ENTER("wt_thd_lazy_init");
+  thd->waiting_for= 0;
+  thd->weight= 0;
+  thd->deadlock_search_depth_short= ds;
+  thd->timeout_short= ts;
+  thd->deadlock_search_depth_long= dl;
+  thd->timeout_long= tl;
+  /* dynamic array is also initialized lazily - without memory allocations */
+  my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
+#ifndef DBUG_OFF
+  thd->name= my_thread_name();
+#endif
+  DBUG_VOID_RETURN;
+}
+
+/**
+  Finalize WT_THD initialization
+
+  After lazy WT_THD initialization, parts of the structure are still
+  uninitialized. This function completes the initialization, allocating
+  memory, if necessary. It's called automatically on demand, when WT_THD
+  is about to be used.
+*/
+static int fix_thd_pins(WT_THD *thd)
+{
+  if (unlikely(thd->pins == 0))
+  {
+    thd->pins= lf_hash_get_pins(&reshash);
+#ifndef DBUG_OFF
+    thd->name= my_thread_name();
+#endif
+  }
+  return thd->pins == 0;
+}
+
+void wt_thd_destroy(WT_THD *thd)
+{
+  DBUG_ENTER("wt_thd_destroy");
+
+  DBUG_ASSERT(thd->my_resources.elements == 0);
+  DBUG_ASSERT(thd->waiting_for == 0);
+
+  if (thd->pins != 0)
+    lf_hash_put_pins(thd->pins);
+
+  delete_dynamic(&thd->my_resources);
+  DBUG_VOID_RETURN;
+}
+/**
+  Trivial resource id comparison function - bytewise memcmp.
+
+  It can be used in WT_RESOURCE_TYPE structures where bytewise
+  comparison of values is sufficient.
+*/
+my_bool wt_resource_id_memcmp(const void *a, const void *b)
+{
+  /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */
+  compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong));
+  return memcmp(a, b, sizeof_WT_RESOURCE_ID);
+}
+
+/**
+  arguments for the recursive deadlock_search function
+*/
+struct deadlock_arg {
+  WT_THD * const thd;          /**< starting point of a search */
+  uint const max_depth;        /**< search depth limit */
+  WT_THD *victim;              /**< a thread to be killed to resolve a deadlock */
+  WT_RESOURCE *last_locked_rc; /**< see comment at the end of deadlock_search() */
+};
+
+/**
+  helper function to change the victim, according to the weight
+*/
+static void change_victim(WT_THD* found, struct deadlock_arg *arg)
+{
+  if (found->weight < arg->victim->weight)
+  {
+    if (arg->victim != arg->thd)
+    {
+      rc_unlock(arg->victim->waiting_for); /* release the previous victim */
+      DBUG_ASSERT(arg->last_locked_rc == found->waiting_for);
+    }
+    arg->victim= found;
+    arg->last_locked_rc= 0;
+  }
+}
+
+/**
+  recursive loop detection in a wait-for graph with a limited search depth
+*/
+static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
+                           uint depth)
+{
+  WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for;
+  WT_THD *cursor;
+  uint i;
+  int ret= WT_OK;
+  DBUG_ENTER("deadlock_search");
+  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u",
+                    arg->thd->name, blocker->name, depth));
+
+  LF_REQUIRE_PINS(1);
+
+  arg->last_locked_rc= 0;
+
+  if (depth > arg->max_depth)
+  {
+    DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)"));
+    DBUG_RETURN(WT_DEPTH_EXCEEDED);
+  }
+
+retry:
+  /*
+    safe dereference as explained in lf_alloc-pin.c
+    (in short: protects against lf_alloc_free() in lf_hash_delete())
+  */
+  do
+  {
+    rc= *shared_ptr;
+    lf_pin(arg->thd->pins, 0, rc);
+  } while (rc != *shared_ptr && LF_BACKOFF);
+
+  if (rc == 0)
+  {
+    DBUG_PRINT("wt", ("exit: OK (early)"));
+    DBUG_RETURN(0);
+  }
+
+  rc_rdlock(rc);
+  if (rc->state != ACTIVE || *shared_ptr != rc)
+  {
+    /* blocker is not waiting on this resource anymore */
+    rc_unlock(rc);
+    lf_unpin(arg->thd->pins, 0);
+    goto retry;
+  }
+  /* as the state is locked, we can unpin now */
+  lf_unpin(arg->thd->pins, 0);
+
+  /*
+    Below is not a pure depth-first search. It's a depth-first with a
+    slightest hint of breadth-first. Depth-first is:
+
+      check(element, X):
+        foreach current in element->nodes[] do:
+          if current == X return error;
+          check(current, X);
+
+    while we do
+
+      check(element, X):
+        foreach current in element->nodes[] do:
+          if current == X return error;
+        foreach current in element->nodes[] do:
+          check(current, X);
+
+    preferring shorter deadlocks over longer ones.
+  */
+  for (i= 0; i < rc->owners.elements; i++)
+  {
+    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+    /*
+      We're only looking for (and detecting) cycles that include 'arg->thd'.
+      That is, only deadlocks that *we* have created. For example,
+        thd->A->B->thd
+      (thd waits for A, A waits for B, while B is waiting for thd).
+      While walking the graph we can encounter other cicles, e.g.
+        thd->A->B->C->A
+      This will not be detected. Instead we will walk it in circles until
+      the search depth limit is reached (the latter guarantees that an
+      infinite loop is impossible). We expect the thread that has created
+      the cycle (one of A, B, and C) to detect its deadlock.
+    */
+    if (cursor == arg->thd)
+    {
+      ret= WT_DEADLOCK;
+      increment_cycle_stats(depth, arg->max_depth ==
+                                   *arg->thd->deadlock_search_depth_long);
+      arg->victim= cursor;
+      goto end;
+    }
+  }
+  for (i= 0; i < rc->owners.elements; i++)
+  {
+    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+    switch (deadlock_search(arg, cursor, depth+1)) {
+    case WT_OK:
+      break;
+    case WT_DEPTH_EXCEEDED:
+      ret= WT_DEPTH_EXCEEDED;
+      break;
+    case WT_DEADLOCK:
+      ret= WT_DEADLOCK;
+      change_victim(cursor, arg);       /* also sets arg->last_locked_rc to 0 */
+      i= rc->owners.elements;           /* jump out of the loop */
+      break;
+    default:
+      DBUG_ASSERT(0);
+    }
+    if (arg->last_locked_rc)
+      rc_unlock(arg->last_locked_rc);
+  }
+end:
+  /*
+    Note that 'rc' is locked in this function, but it's never unlocked here.
+    Instead it's saved in arg->last_locked_rc and the *caller* is
+    expected to unlock it.  It's done to support different killing
+    strategies. This is how it works:
+    Assuming a graph
+
+      thd->A->B->C->thd
+
+    deadlock_search() function starts from thd, locks it (in fact it locks not
+    a thd, but a resource it is waiting on, but below, for simplicity, I'll
+    talk about "locking a thd"). Then it goes down recursively, locks A, and so
+    on. Goes down recursively, locks B. Goes down recursively, locks C.
+    Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
+    Returns from the last deadlock_search() call. C stays locked!
+    Now it checks whether C is a more appropriate victim than 'thd'.
+    If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
+    Now it checks whether B is a more appropriate victim than arg->victim.
+    If yes - old arg->victim is unlocked and arg->victim=B,
+    otherwise B is unlocked. Return.
+    And so on.
+
+    In short, a resource is locked in a frame. But it's not unlocked in the
+    same frame, it's unlocked by the caller, and only after the caller checks
+    that it doesn't need to use current WT_THD as a victim. If it does - the
+    lock is kept and the old victim's resource is unlocked. When the recursion
+    is unrolled and we are back to deadlock() function, there are only two
+    locks left - on thd and on the victim.
+  */
+  arg->last_locked_rc= rc;
+  DBUG_PRINT("wt", ("exit: %s",
+                    ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
+                    ret ? "WT_DEADLOCK" : "OK"));
+  DBUG_RETURN(ret);
+}
+
+/**
+  Deadlock detection in a wait-for graph
+
+  A wrapper for recursive deadlock_search() - prepares deadlock_arg structure,
+  invokes deadlock_search(), increments statistics, notifies the victim.
+
+  @param thd            thread that is going to wait. Deadlock is detected
+                        if, while walking the graph, we reach a thread that
+                        is waiting on thd
+  @param blocker        starting point of a search. In wt_thd_cond_timedwait()
+                        it's thd, in wt_thd_will_wait_for() it's a thread that
+                        thd is going to wait for
+  @param depth          starting search depth. In general it's the number of
+                        edges in the wait-for graph between thd and the
+                        blocker. Practically only two values are used (and
+                        supported) - when thd == blocker it's 0, when thd
+                        waits directly for blocker, it's 1
+  @param max_depth      search depth limit
+*/
+static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
+                            uint max_depth)
+{
+  struct deadlock_arg arg= {thd, max_depth, 0, 0};
+  int ret;
+  DBUG_ENTER("deadlock");
+  DBUG_ASSERT(depth < 2);
+  ret= deadlock_search(&arg, blocker, depth);
+  if (ret == WT_DEPTH_EXCEEDED)
+  {
+    increment_cycle_stats(WT_CYCLE_STATS, max_depth ==
+                                          *thd->deadlock_search_depth_long);
+    ret= WT_OK;
+  }
+  /*
+    if we started with depth==1, blocker was never considered for a victim
+    in deadlock_search(). Do it here.
+  */
+  if (ret == WT_DEADLOCK && depth)
+    change_victim(blocker, &arg);
+  if (arg.last_locked_rc)
+  {
+    /*
+      Special return code if there's nobody to wait for.
+
+      depth == 0 means that we start the search from thd (thd == blocker).
+      ret == WT_OK means that no cycle was found and
+        arg.last_locked_rc == thd->waiting_for.
+      and arg.last_locked_rc->owners.elements == 0 means that
+        (applying the rule above) thd->waiting_for->owners.elements == 0,
+        and thd doesn't have anybody to wait for.
+    */
+    if (depth == 0 && ret == WT_OK && arg.last_locked_rc->owners.elements == 0)
+    {
+      DBUG_ASSERT(thd == blocker);
+      DBUG_ASSERT(arg.last_locked_rc == thd->waiting_for);
+      ret= WT_FREE_TO_GO;
+    }
+    rc_unlock(arg.last_locked_rc);
+  }
+  /* notify the victim, if appropriate */
+  if (ret == WT_DEADLOCK && arg.victim != thd)
+  {
+    DBUG_PRINT("wt", ("killing %s", arg.victim->name));
+    arg.victim->killed= 1;
+    pthread_cond_broadcast(&arg.victim->waiting_for->cond);
+    rc_unlock(arg.victim->waiting_for);
+    ret= WT_OK;
+  }
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Delete an element from reshash if it has no waiters or owners
+
+  rc->lock must be locked by the caller and it's unlocked on return.
+*/
+static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
+{
+  uint keylen;
+  const void *key;
+  DBUG_ENTER("unlock_lock_and_free_resource");
+
+  DBUG_ASSERT(rc->state == ACTIVE);
+
+  if (rc->owners.elements || rc->waiter_count)
+  {
+    DBUG_PRINT("wt", ("nothing to do, %u owners, %u waiters",
+                      rc->owners.elements, rc->waiter_count));
+    rc_unlock(rc);
+    DBUG_RETURN(0);
+  }
+
+  if (fix_thd_pins(thd))
+  {
+    rc_unlock(rc);
+    DBUG_RETURN(1);
+  }
+
+  /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */
+  {
+    key= &rc->id;
+    keylen= sizeof_WT_RESOURCE_ID;
+  }
+
+  /*
+    To free the element correctly we need to:
+     1. take its lock (already done).
+     2. set the state to FREE
+     3. release the lock
+     4. remove from the hash
+  */
+  rc->state= FREE;
+  rc_unlock(rc);
+  DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
+}
+
+
+/**
+  register the fact that thd is not waiting anymore
+
+  decrease waiter_count, clear waiting_for, free the resource if appropriate.
+  thd->waiting_for must be locked!
+*/
+static int stop_waiting_locked(WT_THD *thd)
+{
+  int ret;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("stop_waiting_locked");
+
+  DBUG_ASSERT(rc->waiter_count);
+  DBUG_ASSERT(rc->state == ACTIVE);
+  rc->waiter_count--;
+  thd->waiting_for= 0;
+  ret= unlock_lock_and_free_resource(thd, rc);
+  DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
+}
+
+/**
+  register the fact that thd is not waiting anymore
+
+  locks thd->waiting_for and calls stop_waiting_locked().
+*/
+static int stop_waiting(WT_THD *thd)
+{
+  int ret;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("stop_waiting");
+
+  if (!rc)
+    DBUG_RETURN(WT_OK);
+  /*
+    nobody's trying to free the resource now,
+    as its waiter_count is guaranteed to be non-zero
+  */
+  rc_wrlock(rc);
+  ret= stop_waiting_locked(thd);
+  DBUG_RETURN(ret);
+}
+
+/**
+  notify the system that a thread needs to wait for another thread
+
+  called by a *waiter* to declare that it (thd) will wait for another
+  thread (blocker) on a specific resource (resid).
+  can be called many times, if many blockers own a blocking resource.
+  but must always be called with the same resource id - a thread cannot
+  wait for more than one resource at a time.
+
+  @return WT_OK or WT_DEADLOCK
+
+  As a new edge is added to the wait-for graph, a deadlock detection is
+  performed for this new edge.
+*/
+int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker,
+                         const WT_RESOURCE_ID *resid)
+{
+  uint i;
+  WT_RESOURCE *rc;
+  DBUG_ENTER("wt_thd_will_wait_for");
+
+  LF_REQUIRE_PINS(3);
+
+  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%lu",
+                    thd->name, blocker->name, (ulong)resid->value));
+
+  if (fix_thd_pins(thd))
+    DBUG_RETURN(WT_DEADLOCK);
+
+  if (thd->waiting_for == 0)
+  {
+    uint keylen;
+    const void *key;
+    /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */
+    {
+      key= resid;
+      keylen= sizeof_WT_RESOURCE_ID;
+    }
+
+    DBUG_PRINT("wt", ("first blocker"));
+
+retry:
+    while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0)
+    {
+      WT_RESOURCE tmp;
+
+      DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
+      bzero(&tmp, sizeof(tmp));
+      tmp.id= *resid;
+      tmp.state= ACTIVE;
+
+      if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
+        DBUG_RETURN(WT_DEADLOCK);
+      /*
+        Two cases: either lf_hash_insert() failed - because another thread
+        has just inserted a resource with the same id - and we need to retry.
+        Or lf_hash_insert() succeeded, and then we need to repeat
+        lf_hash_search() to find a real address of the newly inserted element.
+        That is, we don't care what lf_hash_insert() has returned.
+        And we need to repeat the loop anyway.
+      */
+    }
+    if (rc == MY_ERRPTR)
+      DBUG_RETURN(WT_DEADLOCK);
+
+    DBUG_PRINT("wt", ("found in hash rc=%p", rc));
+
+    rc_wrlock(rc);
+    if (rc->state != ACTIVE)
+    {
+      DBUG_PRINT("wt", ("but it's not active, retrying"));
+      /* Somebody has freed the element while we weren't looking */
+      rc_unlock(rc);
+      lf_hash_search_unpin(thd->pins);
+      goto retry;
+    }
+
+    lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */
+    thd->waiting_for= rc;
+    rc->waiter_count++;
+    thd->killed= 0;
+  }
+  else
+  {
+    DBUG_ASSERT(thd->waiting_for->id.type == resid->type);
+    DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0);
+    DBUG_PRINT("wt", ("adding another blocker"));
+
+    /*
+      we can safely access the resource here, it's in the hash as it has
+      non-zero waiter_count
+    */
+    rc= thd->waiting_for;
+    rc_wrlock(rc);
+    DBUG_ASSERT(rc->waiter_count);
+    DBUG_ASSERT(rc->state == ACTIVE);
+
+    if (thd->killed)
+    {
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK);
+    }
+  }
+  /*
+    Another thread could be waiting on this resource for this very 'blocker'.
+    In this case we should not add it to the list for the second time.
+  */
+  for (i= 0; i < rc->owners.elements; i++)
+    if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker)
+      break;
+  if (i >= rc->owners.elements)
+  {
+    if (push_dynamic(&blocker->my_resources, (void*)&rc))
+    {
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
+    }
+    if (push_dynamic(&rc->owners, (void*)&blocker))
+    {
+      pop_dynamic(&blocker->my_resources);
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK);
+    }
+  }
+  rc_unlock(rc);
+
+  if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short) != WT_OK)
+  {
+    stop_waiting(thd);
+    DBUG_RETURN(WT_DEADLOCK);
+  }
+  DBUG_RETURN(WT_OK);
+}
+
+/**
+  called by a *waiter* (thd) to start waiting
+
+  It's supposed to be a drop-in replacement for
+  pthread_cond_timedwait(), and it takes mutex as an argument.
+
+  @return one of WT_TIMEOUT, WT_DEADLOCK, WT_OK
+*/
+int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
+{
+  int ret= WT_TIMEOUT;
+  struct timespec timeout;
+  ulonglong before, after, starttime;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("wt_thd_cond_timedwait");
+  DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc));
+
+#ifndef DBUG_OFF
+  if (rc->cond_mutex)
+    DBUG_ASSERT(rc->cond_mutex == mutex);
+  else
+    rc->cond_mutex= mutex;
+  safe_mutex_assert_owner(mutex);
+#endif
+
+  before= starttime= my_getsystime();
+
+#ifdef __WIN__
+  /*
+    only for the sake of Windows we distinguish between
+    'before' and 'starttime':
+
+    my_getsystime() returns high-resolution value, that cannot be used for
+    waiting (it doesn't follow system clock changes), but is good for time
+    intervals.
+
+    GetSystemTimeAsFileTime() follows system clock, but is low-resolution
+    and will result in lousy intervals.
+  */
+  GetSystemTimeAsFileTime((PFILETIME)&starttime);
+#endif
+
+  rc_wrlock(rc);
+  if (rc->owners.elements == 0)
+    ret= WT_OK;
+  rc_unlock(rc);
+
+  set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*ULL(1000));
+  if (ret == WT_TIMEOUT && !thd->killed)
+    ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+  if (ret == WT_TIMEOUT && !thd->killed)
+  {
+    int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long);
+    if (r == WT_FREE_TO_GO)
+      ret= WT_OK;
+    else if (r != WT_OK)
+      ret= WT_DEADLOCK;
+    else if (*thd->timeout_long > *thd->timeout_short)
+    {
+      set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*ULL(1000));
+      if (!thd->killed)
+        ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+    }
+  }
+  after= my_getsystime();
+  if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
+    ret= WT_DEADLOCK;
+  increment_wait_stats(after-before, ret);
+  if (ret == WT_OK)
+    increment_success_stats();
+  DBUG_RETURN(ret);
+}
+
+/**
+  called by a *blocker* when it releases a resource
+
+  it's conceptually similar to pthread_cond_broadcast, and must be done
+  under the same mutex as wt_thd_cond_timedwait().
+
+  @param resid   a resource to release. 0 to release all resources
+*/
+
+void wt_thd_release(WT_THD *thd, const WT_RESOURCE_ID *resid)
+{
+  uint i;
+  DBUG_ENTER("wt_thd_release");
+
+  for (i= 0; i < thd->my_resources.elements; i++)
+  {
+    WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
+    if (!resid || (resid->type->compare(&rc->id, resid) == 0))
+    {
+      uint j;
+
+      rc_wrlock(rc);
+      /*
+        nobody's trying to free the resource now,
+        as its owners[] array is not empty (at least thd must be there)
+      */
+      DBUG_ASSERT(rc->state == ACTIVE);
+      for (j= 0; j < rc->owners.elements; j++)
+        if (*dynamic_element(&rc->owners, j, WT_THD**) == thd)
+          break;
+      DBUG_ASSERT(j < rc->owners.elements);
+      delete_dynamic_element(&rc->owners, j);
+      if (rc->owners.elements == 0)
+      {
+        pthread_cond_broadcast(&rc->cond);
+#ifndef DBUG_OFF
+        if (rc->cond_mutex)
+          safe_mutex_assert_owner(rc->cond_mutex);
+#endif
+      }
+      unlock_lock_and_free_resource(thd, rc);
+      if (resid)
+      {
+        delete_dynamic_element(&thd->my_resources, i);
+        DBUG_VOID_RETURN;
+      }
+    }
+  }
+  if (!resid)
+    reset_dynamic(&thd->my_resources);
+  DBUG_VOID_RETURN;
+}
+
diff --git a/mysys/wqueue.c b/mysys/wqueue.c
new file mode 100644
index 00000000000..fcc0a39725d
--- /dev/null
+++ b/mysys/wqueue.c
@@ -0,0 +1,225 @@
+
+#include <wqueue.h>
+
+#define STRUCT_PTR(TYPE, MEMBER, a)                                           \
+          (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+/*
+  Link a thread into double-linked queue of waiting threads.
+
+  SYNOPSIS
+    wqueue_link_into_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    Queue is represented by a circular list of the thread structures
+    The list is double-linked of the type (**prev,*next), accessed by
+    a pointer to the last element.
+*/
+
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  struct st_my_thread_var *last;
+  if (!(last= wqueue->last_thread))
+  {
+    /* Queue is empty */
+    thread->next= thread;
+    thread->prev= &thread->next;
+  }
+  else
+  {
+    thread->prev= last->next->prev;
+    last->next->prev= &thread->next;
+    thread->next= last->next;
+    last->next= thread;
+  }
+  wqueue->last_thread= thread;
+}
+
+
+/*
+  Add a thread to single-linked queue of waiting threads
+
+  SYNOPSIS
+    wqueue_add_to_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    Queue is represented by a circular list of the thread structures
+    The list is single-linked of the type (*next), accessed by a pointer
+    to the last element.
+*/
+
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  struct st_my_thread_var *last;
+  if (!(last= wqueue->last_thread))
+    thread->next= thread;
+  else
+  {
+    thread->next= last->next;
+    last->next= thread;
+  }
+#ifndef DBUG_OFF
+  thread->prev= NULL; /* force segfault if used */
+#endif
+  wqueue->last_thread= thread;
+}
+
+/*
+  Unlink a thread from double-linked queue of waiting threads
+
+  SYNOPSIS
+    wqueue_unlink_from_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be removed from the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    See NOTES for link_into_queue
+*/
+
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  if (thread->next == thread)
+    /* The queue contains only one member */
+    wqueue->last_thread= NULL;
+  else
+  {
+    thread->next->prev= thread->prev;
+    *thread->prev= thread->next;
+    if (wqueue->last_thread == thread)
+      wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
+                                      thread->prev);
+  }
+  thread->next= NULL;
+}
+
+
+/*
+  Remove all threads from queue signaling them to proceed
+
+  SYNOPSIS
+    wqueue_realease_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    See notes for add_to_queue
+    When removed from the queue each thread is signaled via condition
+    variable thread->suspend.
+*/
+
+void wqueue_release_queue(WQUEUE *wqueue)
+{
+  struct st_my_thread_var *last= wqueue->last_thread;
+  struct st_my_thread_var *next= last->next;
+  struct st_my_thread_var *thread;
+  do
+  {
+    thread= next;
+    pthread_cond_signal(&thread->suspend);
+    next= thread->next;
+    thread->next= NULL;
+  }
+  while (thread != last);
+  wqueue->last_thread= NULL;
+}
+
+
+/**
+  @brief Removes all threads waiting for read or first one waiting for write.
+
+  @param wqueue          pointer to the queue structure
+  @param thread          pointer to the thread to be added to the queue
+
+  @note This function is applicable only to single linked lists.
+*/
+
+void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue)
+{
+  struct st_my_thread_var *last= wqueue->last_thread;
+  struct st_my_thread_var *next= last->next;
+  struct st_my_thread_var *thread;
+  struct st_my_thread_var *new_list= NULL;
+  uint first_type= next->lock_type;
+  if (first_type == MY_PTHREAD_LOCK_WRITE)
+  {
+    /* release first waiting for write lock */
+    pthread_cond_signal(&next->suspend);
+    if (next == last)
+      wqueue->last_thread= NULL;
+    else
+      last->next= next->next;
+    next->next= NULL;
+    return;
+  }
+  do
+  {
+    thread= next;
+    next= thread->next;
+    if (thread->lock_type == MY_PTHREAD_LOCK_WRITE)
+    {
+      /* skip waiting for write lock */
+      if (new_list)
+      {
+        thread->next= new_list->next;
+        new_list= new_list->next= thread;
+      }
+      else
+        new_list= thread->next= thread;
+    }
+    else
+    {
+      /* release waiting for read lock */
+      pthread_cond_signal(&thread->suspend);
+      thread->next= NULL;
+    }
+  } while (thread != last);
+  wqueue->last_thread= new_list;
+}
+
+
+/*
+  Add thread and wait
+
+  SYNOPSYS
+    wqueue_add_and_wait()
+    wqueue               queue to add to
+    thread               thread which is waiting
+    lock                 mutex need for the operation
+*/
+
+void wqueue_add_and_wait(WQUEUE *wqueue,
+                         struct st_my_thread_var *thread,
+                         pthread_mutex_t *lock)
+{
+  DBUG_ENTER("wqueue_add_and_wait");
+  DBUG_PRINT("enter",
+             ("thread: 0x%lx  cond: 0x%lx  mutex: 0x%lx",
+              (ulong) thread, (ulong) &thread->suspend, (ulong) lock));
+  wqueue_add_to_queue(wqueue, thread);
+  do
+  {
+    DBUG_PRINT("info", ("wait... cond:  0x%lx  mutex:  0x%lx",
+                        (ulong) &thread->suspend, (ulong) lock));
+    pthread_cond_wait(&thread->suspend, lock);
+    DBUG_PRINT("info", ("wait done cond: 0x%lx  mutex: 0x%lx   next: 0x%lx",
+                        (ulong) &thread->suspend, (ulong) lock,
+                        (ulong) thread->next));
+  }
+  while (thread->next);
+  DBUG_VOID_RETURN;
+}