Merge jamppa@bk-internal.mysql.com:/home/bk/mysql-5.1

into hynda.mysql.fi:/home/my/mysql-maria BitKeeper/etc/ignore: auto-union Makefile.am: Auto merged BUILD/SETUP.sh: Auto merged BitKeeper/deleted/.del-init_db.sql~a77d572c39d5a1f8: Auto merged client/mysqldump.c: Auto merged include/Makefile.am: Auto merged include/m_string.h: Auto merged include/my_base.h: Auto merged include/my_dbug.h: Auto merged libmysql/CMakeLists.txt: Auto merged libmysql/Makefile.shared: Auto merged libmysqld/Makefile.am: Auto merged mysql-test/include/varchar.inc: Auto merged mysql-test/lib/mtr_cases.pl: Auto merged mysql-test/lib/mtr_io.pl: Auto merged mysql-test/lib/mtr_misc.pl: Auto merged mysql-test/mysql-test-run.pl: Auto merged mysql-test/lib/mtr_process.pl: Auto merged mysql-test/lib/mtr_report.pl: Auto merged mysql-test/r/events_logs_tests.result: Auto merged mysql-test/r/view.result: Auto merged mysql-test/t/disabled.def: Auto merged mysql-test/t/events_logs_tests.test: Auto merged mysql-test/t/myisam.test: Auto merged mysql-test/t/view.test: Auto merged mysys/Makefile.am: Auto merged mysys/my_create.c: Auto merged mysys/my_handler.c: Auto merged mysys/my_init.c: Auto merged mysys/my_open.c: Auto merged mysys/safemalloc.c: Auto merged plugin/daemon_example/daemon_example.cc: Auto merged sql/Makefile.am: Auto merged sql/filesort.cc: Auto merged sql/gen_lex_hash.cc: Auto merged sql/ha_ndbcluster.cc: Auto merged sql/handler.cc: Auto merged sql/handler.h: Auto merged sql/item_func.cc: Auto merged sql/item_func.h: Auto merged sql/log.cc: Auto merged sql/mysql_priv.h: Auto merged sql/opt_range.cc: Auto merged sql/slave.cc: Auto merged sql/slave.h: Auto merged sql/sql_parse.cc: Auto merged sql/sql_select.cc: Auto merged sql/share/errmsg.txt: Auto merged sql/sql_table.cc: Auto merged sql/sql_test.cc: Auto merged sql/udf_example.c: Auto merged sql/uniques.cc: Auto merged sql/unireg.cc: Auto merged storage/csv/ha_tina.cc: Auto merged storage/myisam/ft_boolean_search.c: Auto merged storage/myisam/ft_nlq_search.c: Auto merged storage/myisam/ft_parser.c: Auto merged storage/myisam/ft_stopwords.c: Auto merged storage/myisam/ft_update.c: Auto merged storage/myisam/fulltext.h: Auto merged storage/myisam/ha_myisam.h: Auto merged storage/myisam/mi_checksum.c: Auto merged storage/myisam/mi_create.c: Auto merged storage/myisam/mi_delete.c: Auto merged storage/myisam/mi_delete_all.c: Auto merged storage/myisam/mi_key.c: Auto merged storage/myisam/mi_log.c: Auto merged storage/myisam/mi_open.c: Auto merged storage/myisam/mi_range.c: Auto merged storage/myisam/mi_rkey.c: Auto merged storage/myisam/mi_rsamepos.c: Auto merged storage/myisam/mi_search.c: Auto merged storage/myisam/mi_test1.c: Auto merged storage/myisam/mi_test2.c: Auto merged storage/myisam/mi_unique.c: Auto merged storage/myisam/mi_update.c: Auto merged storage/myisam/myisamlog.c: Auto merged storage/myisam/myisampack.c: Auto merged storage/myisam/rt_index.c: Auto merged storage/myisam/sort.c: Auto merged storage/myisam/sp_test.c: Auto merged storage/myisammrg/ha_myisammrg.h: Auto merged storage/ndb/src/mgmapi/mgmapi.cpp: Auto merged unittest/Makefile.am: Auto merged BitKeeper/triggers/post-commit: Manual merge from mysql-5.1 to mysql-maria configure.in: Manual merge from mysql-5.1 to mysql-maria include/ft_global.h: Manual merge from mysql-5.1 to mysql-maria include/keycache.h: Manual merge from mysql-5.1 to mysql-maria include/my_atomic.h: Manual merge from mysql-5.1 to mysql-maria include/my_global.h: Manual merge from mysql-5.1 to mysql-maria include/my_sys.h: Manual merge from mysql-5.1 to mysql-maria include/myisam.h: Manual merge from mysql-5.1 to mysql-maria mysys/array.c: Manual merge from mysql-5.1 to mysql-maria mysys/mf_keycache.c: Manual merge from mysql-5.1 to mysql-maria mysys/mf_keycaches.c: Manual merge from mysql-5.1 to mysql-maria mysys/my_pread.c: Manual merge from mysql-5.1 to mysql-maria sql/mysqld.cc: Manual merge from mysql-5.1 to mysql-maria sql/net_serv.cc: Manual merge from mysql-5.1 to mysql-maria sql/set_var.cc: Manual merge from mysql-5.1 to mysql-maria sql/set_var.h: Manual merge from mysql-5.1 to mysql-maria sql/sql_class.h: Manual merge from mysql-5.1 to mysql-maria storage/myisam/ft_static.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/ha_myisam.cc: Manual merge from mysql-5.1 to mysql-maria storage/myisam/mi_check.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/mi_dynrec.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/mi_packrec.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/mi_write.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/myisamchk.c: Manual merge from mysql-5.1 to mysql-maria storage/myisam/myisamdef.h: Manual merge from mysql-5.1 to mysql-maria storage/myisammrg/ha_myisammrg.cc: Manual merge from mysql-5.1 to mysql-maria unittest/mysys/Makefile.am: Manual merge from mysql-5.1 to mysql-maria unittest/mysys/my_atomic-t.c: Manual merge from mysql-5.1 to mysql-maria
author: unknown <jani@hynda.mysql.fi> 2007-06-27 17:49:12 +0300
committer: unknown <jani@hynda.mysql.fi> 2007-06-27 17:49:12 +0300
commit: 5444b55cbbab69a508e6c477a1b9580eff969da1 (patch)
tree: eb82a0418f88b83d49bf66dfc659d59862535683 /mysys
parent: e34641b3cece714b861c7130f245659342f583ba (diff)
parent: 1a96259191b193b353387cbb70d7567009e3b247 (diff)
download: mariadb-git-5444b55cbbab69a508e6c477a1b9580eff969da1.tar.gz
24 files changed, 2026 insertions, 407 deletions
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index fca13ab52b8..bdc9f176d8e 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -25,12 +25,14 @@ libmysys_a_SOURCES =    my_init.c my_getwd.c mf_getdate.c my_mmap.c \
 			mf_path.c mf_loadpath.c my_file.c \
 			my_open.c my_create.c my_dup.c my_seek.c my_read.c \
 			my_pread.c my_write.c my_getpagesize.c \
+			my_safehash.c \
 			mf_keycache.c mf_keycaches.c my_crc32.c \
 			mf_iocache.c mf_iocache2.c mf_cache.c mf_tempfile.c \
 			mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \
 			my_malloc.c my_realloc.c my_once.c mulalloc.c \
 			my_alloc.c safemalloc.c my_new.cc \
-			my_vle.c my_atomic.c \
+			my_vle.c my_atomic.c lf_hash.c \
+			lf_dynarray.c lf_alloc-pin.c \
 			my_fopen.c my_fstream.c my_getsystime.c \
 			my_error.c errors.c my_div.c my_messnc.c \
 			mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \
@@ -52,7 +54,8 @@ libmysys_a_SOURCES =    my_init.c my_getwd.c mf_getdate.c my_mmap.c \
 			my_gethostbyname.c rijndael.c my_aes.c sha1.c \
 			my_handler.c my_netware.c my_largepage.c \
 			my_memmem.c \
-			my_windac.c my_access.c base64.c my_libwrap.c
+			my_windac.c my_access.c base64.c my_libwrap.c \
+		        wqueue.c
 EXTRA_DIST =		thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
 			thr_mutex.c thr_rwlock.c \
 			CMakeLists.txt mf_soundex.c \
@@ -126,5 +129,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES)
 	$(LINK) $(FLAGS) -DMAIN  ./test_base64.c $(LDADD) $(LIBS)
 	$(RM) -f ./test_base64.c
 
+
 # Don't update the files from bitkeeper
 %::SCCS/s.%
diff --git a/mysys/array.c b/mysys/array.c
index 8a539f18a20..b7342f70ef8 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -63,7 +63,7 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
   array->size_of_element=element_size;
   if ((array->buffer= init_buffer))
     DBUG_RETURN(FALSE);
-  if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME))))
+  if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc, MYF(MY_WME))))
   {
     array->max_element=0;
     DBUG_RETURN(TRUE);
@@ -179,7 +179,7 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array)
 }
 
 /*
-  Replace elemnent in array with given element and index
+  Replace element in array with given element and index
 
   SYNOPSIS
     set_dynamic()
@@ -200,42 +200,69 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx)
 {
   if (idx >= array->elements)
   {
-    if (idx >= array->max_element)
-    {
-      uint size;
-      char *new_ptr;
-      size=(idx+array->alloc_increment)/array->alloc_increment;
-      size*= array->alloc_increment;
-      if (array->buffer == (uchar *)(array + 1))
-      {
-        /*
-          In this senerio, the buffer is statically preallocated,
-          so we have to create an all-new malloc since we overflowed
-        */
-        if (!(new_ptr= (char *) my_malloc(size *
-                                          array->size_of_element,
-                                          MYF(MY_WME))))
-          return 0;
-        memcpy(new_ptr, array->buffer, 
-               array->elements * array->size_of_element);
-      }
-      else
-      if (!(new_ptr=(char*) my_realloc(array->buffer,size*
-                                       array->size_of_element,
-                                       MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
-	return TRUE;
-      array->buffer= (uchar*) new_ptr;
-      array->max_element=size;
-    }
+    if (idx >= array->max_element && allocate_dynamic(array, idx))
+      return TRUE;
     bzero((uchar*) (array->buffer+array->elements*array->size_of_element),
-          (idx - array->elements)*array->size_of_element);
+	  (idx - array->elements)*array->size_of_element);
     array->elements=idx+1;
   }
   memcpy(array->buffer+(idx * array->size_of_element),element,
-         (size_t) array->size_of_element);
+	 (size_t) array->size_of_element);
   return FALSE;
 }
 
+
+/*
+  Ensure that dynamic array has enough elements
+
+  SYNOPSIS
+    allocate_dynamic()
+    array
+    max_elements        Numbers of elements that is needed
+
+  NOTES
+   Any new allocated element are NOT initialized
+
+  RETURN VALUE
+    FALSE	Ok
+    TRUE	Allocation of new memory failed
+*/
+
+my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements)
+{
+  if (max_elements >= array->max_element)
+  {
+    uint size;
+    char *new_ptr;
+    size= (max_elements + array->alloc_increment)/array->alloc_increment;
+    size*= array->alloc_increment;
+    if (array->buffer == (uchar *)(array + 1))
+    {
+       /*
+         In this senerio, the buffer is statically preallocated,
+         so we have to create an all-new malloc since we overflowed
+       */
+       if (!(new_ptr= (char *) my_malloc(size *
+                                         array->size_of_element,
+                                         MYF(MY_WME))))
+         return 0;
+       memcpy(new_ptr, array->buffer, 
+              array->elements * array->size_of_element);
+     }
+     else
+
+
+    if (!(new_ptr= (char*) my_realloc(array->buffer,size*
+                                      array->size_of_element,
+                                      MYF(MY_WME | MY_ALLOW_ZERO_PTR))))
+      return TRUE;
+    array->buffer= new_ptr;
+    array->max_element= size;
+  }
+  return FALSE;
+}
+
+
 /*
   Get an element from array by given index
 
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
new file mode 100644
index 00000000000..51c4df7c94a
--- /dev/null
+++ b/mysys/lf_alloc-pin.c
@@ -0,0 +1,523 @@
+/* QQ: TODO multi-pinbox */
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  wait-free concurrent allocator based on pinning addresses
+
+  It works as follows: every thread (strictly speaking - every CPU, but
+  it's too difficult to do) has a small array of pointers. They're called
+  "pins".  Before using an object its address must be stored in this array
+  (pinned).  When an object is no longer necessary its address must be
+  removed from this array (unpinned). When a thread wants to free() an
+  object it scans all pins of all threads to see if somebody has this
+  object pinned.  If yes - the object is not freed (but stored in a
+  "purgatory").  To reduce the cost of a single free() pins are not scanned
+  on every free() but only added to (thread-local) purgatory. On every
+  LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects
+  are freed.
+
+  Pins are used to solve ABA problem. To use pins one must obey
+  a pinning protocol:
+
+   1. Let's assume that PTR is a shared pointer to an object. Shared means
+      that any thread may modify it anytime to point to a different object
+      and free the old object. Later the freed object may be potentially
+      allocated by another thread. If we're unlucky that other thread may
+      set PTR to point to this object again. This is ABA problem.
+   2. Create a local pointer LOCAL_PTR.
+   3. Pin the PTR in a loop:
+      do
+      {
+        LOCAL_PTR= PTR;
+        pin(PTR, PIN_NUMBER);
+      } while (LOCAL_PTR != PTR)
+   4. It is guaranteed that after the loop has ended, LOCAL_PTR
+      points to an object (or NULL, if PTR may be NULL), that
+      will never be freed. It is not guaranteed though
+      that LOCAL_PTR == PTR (as PTR can change any time)
+   5. When done working with the object, remove the pin:
+      unpin(PIN_NUMBER)
+   6. When copying pins (as in the list traversing loop:
+        pin(CUR, 1);
+        while ()
+        {
+          do                            // standard
+          {                             //  pinning
+            NEXT=CUR->next;             //   loop
+            pin(NEXT, 0);               //    see #3
+          } while (NEXT != CUR->next);  //     above
+          ...
+          ...
+          CUR=NEXT;
+          pin(CUR, 1);                  // copy pin[0] to pin[1]
+        }
+      which keeps CUR address constantly pinned), note than pins may be
+      copied only upwards (!!!), that is pin[N] to pin[M], M > N.
+   7. Don't keep the object pinned longer than necessary - the number of
+      pins you have is limited (and small), keeping an object pinned
+      prevents its reuse and cause unnecessary mallocs.
+
+  Explanations:
+
+   3. The loop is important. The following can occur:
+        thread1> LOCAL_PTR= PTR
+        thread2> free(PTR); PTR=0;
+        thread1> pin(PTR, PIN_NUMBER);
+      now thread1 cannot access LOCAL_PTR, even if it's pinned,
+      because it points to a freed memory. That is, it *must*
+      verify that it has indeed pinned PTR, the shared pointer.
+
+   6. When a thread wants to free some LOCAL_PTR, and it scans
+      all lists of pins to see whether it's pinned, it does it
+      upwards, from low pin numbers to high. Thus another thread
+      must copy an address from one pin to another in the same
+      direction - upwards, otherwise the scanning thread may
+      miss it.
+
+  Implementation details:
+
+  Pins are given away from a "pinbox". Pinbox is stack-based allocator.
+  It used dynarray for storing pins, new elements are allocated by dynarray
+  as necessary, old are pushed in the stack for reuse. ABA is solved by
+  versioning a pointer - because we use an array, a pointer to pins is 16 bit,
+  upper 16 bits are used for a version.
+
+  It is assumed that pins belong to a thread and are not transferable
+  between threads (LF_PINS::stack_ends_here being a primary reason
+  for this limitation).
+*/
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <lf.h>
+
+#define LF_PINBOX_MAX_PINS 65536
+
+static void _lf_pinbox_real_free(LF_PINS *pins);
+
+/*
+  Initialize a pinbox. Normally called from lf_alloc_init.
+  See the latter for details.
+*/
+void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
+                    lf_pinbox_free_func *free_func, void *free_func_arg)
+{
+  DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
+  compile_time_assert(sizeof(LF_PINS) == 128);
+  lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS));
+  pinbox->pinstack_top_ver= 0;
+  pinbox->pins_in_array= 0;
+  pinbox->free_ptr_offset= free_ptr_offset;
+  pinbox->free_func= free_func;
+  pinbox->free_func_arg= free_func_arg;
+}
+
+void lf_pinbox_destroy(LF_PINBOX *pinbox)
+{
+  lf_dynarray_destroy(&pinbox->pinarray);
+}
+
+/*
+  Get pins from a pinbox. Usually called via lf_alloc_get_pins() or
+  lf_hash_get_pins().
+
+  SYNOPSYS
+    pinbox      -
+    stack_end   - a pointer to the end (top/bottom, depending on the
+                  STACK_DIRECTION) of stack. Used for safe alloca. There's
+                  no safety margin deducted, a caller should take care of it,
+                  if necessary.
+
+  DESCRIPTION
+    get a new LF_PINS structure from a stack of unused pins,
+    or allocate a new one out of dynarray.
+
+  NOTE
+    It is assumed that pins belong to a thread and are not transferable
+    between threads.
+*/
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end)
+{
+  uint32 pins, next, top_ver;
+  LF_PINS *el;
+  /*
+    We have an array of max. 64k elements.
+    The highest index currently allocated is pinbox->pins_in_array.
+    Freed elements are in a lifo stack, pinstack_top_ver.
+    pinstack_top_ver is 32 bits; 16 low bits are the index in the
+    array, to the first element of the list. 16 high bits are a version
+    (every time the 16 low bits are updated, the 16 high bits are
+    incremented). Versioniong prevents the ABA problem.
+  */
+  top_ver= pinbox->pinstack_top_ver;
+  do
+  {
+    if (!(pins= top_ver % LF_PINBOX_MAX_PINS))
+    {
+      /* the stack of free elements is empty */
+      pins= my_atomic_add32(&pinbox->pins_in_array, 1)+1;
+      if (unlikely(pins >= LF_PINBOX_MAX_PINS))
+        return 0;
+      /*
+        note that the first allocated element has index 1 (pins==1).
+        index 0 is reserved to mean "NULL pointer"
+      */
+      el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins);
+      if (unlikely(!el))
+        return 0;
+      break;
+    }
+    el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins);
+    next= el->link;
+  } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+                            top_ver-pins+next+LF_PINBOX_MAX_PINS));
+  /*
+    set el->link to the index of el in the dynarray (el->link has two usages:
+    - if element is allocated, it's its own index
+    - if element is free, it's its next element in the free stack
+  */
+  el->link= pins;
+  el->purgatory_count= 0;
+  el->pinbox= pinbox;
+  el->stack_ends_here= stack_end;
+  return el;
+}
+
+/*
+  Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or
+  lf_hash_put_pins().
+
+  DESCRIPTION
+    empty the purgatory (XXX deadlock warning below!),
+    push LF_PINS structure to a stack
+*/
+void _lf_pinbox_put_pins(LF_PINS *pins)
+{
+  LF_PINBOX *pinbox= pins->pinbox;
+  uint32 top_ver, nr;
+  nr= pins->link;
+#ifdef MY_LF_EXTRA_DEBUG
+  {
+    int i;
+    for (i= 0; i < LF_PINBOX_PINS; i++)
+      DBUG_ASSERT(pins->pin[i] == 0);
+  }
+#endif
+  /*
+    XXX this will deadlock if other threads will wait for
+    the caller to do something after _lf_pinbox_put_pins(),
+    and they would have pinned addresses that the caller wants to free.
+    Thus: only free pins when all work is done and nobody can wait for you!!!
+  */
+  while (pins->purgatory_count)
+  {
+    _lf_pinbox_real_free(pins);
+    if (pins->purgatory_count)
+    {
+      my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock);
+      pthread_yield();
+      my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock);
+    }
+  }
+  top_ver= pinbox->pinstack_top_ver;
+  do
+  {
+    pins->link= top_ver % LF_PINBOX_MAX_PINS;
+  } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver,
+                            top_ver-pins->link+nr+LF_PINBOX_MAX_PINS));
+  return;
+}
+
+static int ptr_cmp(void **a, void **b)
+{
+  return *a < *b ? -1 : *a == *b ? 0 : 1;
+}
+
+#define add_to_purgatory(PINS, ADDR)                                    \
+  do                                                                    \
+  {                                                                     \
+    *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)=         \
+      (PINS)->purgatory;                                                \
+    (PINS)->purgatory= (ADDR);                                          \
+    (PINS)->purgatory_count++;                                          \
+  } while (0)
+
+/*
+  Free an object allocated via pinbox allocator
+
+  DESCRIPTION
+    add an object to purgatory. if necessary, call _lf_pinbox_real_free()
+    to actually free something.
+*/
+void _lf_pinbox_free(LF_PINS *pins, void *addr)
+{
+  add_to_purgatory(pins, addr);
+  if (pins->purgatory_count % LF_PURGATORY_SIZE)
+    _lf_pinbox_real_free(pins);
+}
+
+struct st_harvester {
+  void **granary;
+  int npins;
+};
+
+/*
+  callback for _lf_dynarray_iterate:
+  scan all pins of all threads and accumulate all pins
+*/
+static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
+{
+  int i;
+  LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+  for (; el < el_end; el++)
+  {
+    for (i= 0; i < LF_PINBOX_PINS; i++)
+    {
+      void *p= el->pin[i];
+      if (p)
+        *hv->granary++= p;
+    }
+  }
+  /*
+    hv->npins may become negative below, but it means that
+    we're on the last dynarray page and harvest_pins() won't be
+    called again. We don't bother to make hv->npins() correct
+    (that is 0) in this case.
+  */
+  hv->npins-= LF_DYNARRAY_LEVEL_LENGTH;
+  return 0;
+}
+
+/*
+  callback for _lf_dynarray_iterate:
+  scan all pins of all threads and see if addr is present there
+*/
+static int match_pins(LF_PINS *el, void *addr)
+{
+  int i;
+  LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH;
+  for (; el < el_end; el++)
+    for (i= 0; i < LF_PINBOX_PINS; i++)
+      if (el->pin[i] == addr)
+        return 1;
+  return 0;
+}
+
+#if STACK_DIRECTION < 0
+#define available_stack_size(END,CUR) (long) ((char*)(CUR) - (char*)(END))
+#else
+#define available_stack_size(END,CUR) (long) ((char*)(END) - (char*)(CUR))
+#endif
+
+/*
+  Scan the purgatory and free everything that can be freed
+*/
+static void _lf_pinbox_real_free(LF_PINS *pins)
+{
+  int npins, alloca_size;
+  void *list, **addr;
+  struct st_lf_alloc_node *first, *last= NULL;
+  LF_PINBOX *pinbox= pins->pinbox;
+
+  npins= pinbox->pins_in_array+1;
+
+#ifdef HAVE_ALLOCA
+  alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
+  /* create a sorted list of pinned addresses, to speed up searches */
+  if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size)
+  {
+    struct st_harvester hv;
+    addr= (void **) alloca(alloca_size);
+    hv.granary= addr;
+    hv.npins= npins;
+    /* scan the dynarray and accumulate all pinned addresses */
+    _lf_dynarray_iterate(&pinbox->pinarray,
+                         (lf_dynarray_func)harvest_pins, &hv);
+
+    npins= hv.granary-addr;
+    /* and sort them */
+    if (npins)
+      qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp);
+  }
+  else
+#endif
+    addr= 0;
+
+  list= pins->purgatory;
+  pins->purgatory= 0;
+  pins->purgatory_count= 0;
+  while (list)
+  {
+    void *cur= list;
+    list= *(void **)((char *)cur+pinbox->free_ptr_offset);
+    if (npins)
+    {
+      if (addr) /* use binary search */
+      {
+        void **a, **b, **c;
+        for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2)
+          if (cur == *c)
+            a= b= c;
+          else if (cur > *c)
+            a= c;
+          else
+            b= c;
+        if (cur == *a || cur == *b)
+          goto found;
+      }
+      else /* no alloca - no cookie. linear search here */
+      {
+        if (_lf_dynarray_iterate(&pinbox->pinarray,
+                                 (lf_dynarray_func)match_pins, cur))
+          goto found;
+      }
+    }
+    /* not pinned - freeing */
+    if (last)
+      last= last->next= (struct st_lf_alloc_node *)cur;
+    else
+      first= last= (struct st_lf_alloc_node *)cur;
+    continue;
+found:
+    /* pinned - keeping */
+    add_to_purgatory(pins, cur);
+  }
+  if (last)
+    pinbox->free_func(first, last, pinbox->free_func_arg);
+}
+
+/* lock-free memory allocator for fixed-size objects */
+
+LF_REQUIRE_PINS(1);
+
+/*
+  callback for _lf_pinbox_real_free to free a list of unpinned objects -
+  add it back to the allocator stack
+
+  DESCRIPTION
+    'first' and 'last' are the ends of the linked list of st_lf_alloc_node's:
+    first->el->el->....->el->last. Use first==last to free only one element.
+*/
+static void alloc_free(struct st_lf_alloc_node *first,
+                       struct st_lf_alloc_node *last,
+                       LF_ALLOCATOR *allocator)
+{
+  struct st_lf_alloc_node *tmp;
+  tmp= allocator->top;
+  do
+  {
+    last->next= tmp;
+  } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, first) &&
+           LF_BACKOFF);
+}
+
+/*
+  initialize lock-free allocator
+
+  SYNOPSYS
+    allocator           -
+    size                a size of an object to allocate
+    free_ptr_offset     an offset inside the object to a sizeof(void *)
+                        memory that is guaranteed to be unused after
+                        the object is put in the purgatory. Unused by ANY
+                        thread, not only the purgatory owner.
+                        This memory will be used to link waiting-to-be-freed
+                        objects in a purgatory list.
+*/
+void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
+{
+  lf_pinbox_init(&allocator->pinbox, free_ptr_offset,
+                 (lf_pinbox_free_func *)alloc_free, allocator);
+  allocator->top= 0;
+  allocator->mallocs= 0;
+  allocator->element_size= size;
+  DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
+}
+
+/*
+  destroy the allocator, free everything that's in it
+
+  NOTE
+    As every other init/destroy function here and elsewhere it
+    is not thread safe. No, this function is no different, ensure
+    that no thread needs the allocator before destroying it.
+    We are not responsible for any damage that may be caused by
+    accessing the allocator when it is being or has been destroyed.
+    Oh yes, and don't put your cat in a microwave.
+*/
+void lf_alloc_destroy(LF_ALLOCATOR *allocator)
+{
+  struct st_lf_alloc_node *node= allocator->top;
+  while (node)
+  {
+    struct st_lf_alloc_node *tmp= node->next;
+    my_free((void *)node, MYF(0));
+    node= tmp;
+  }
+  lf_pinbox_destroy(&allocator->pinbox);
+  allocator->top= 0;
+}
+
+/*
+  Allocate and return an new object.
+
+  DESCRIPTION
+    Pop an unused object from the stack or malloc it is the stack is empty.
+    pin[0] is used, it's removed on return.
+*/
+void *_lf_alloc_new(LF_PINS *pins)
+{
+  LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
+  struct st_lf_alloc_node *node;
+  for (;;)
+  {
+    do
+    {
+      node= allocator->top;
+      _lf_pin(pins, 0, node);
+    } while (node != allocator->top && LF_BACKOFF);
+    if (!node)
+    {
+      node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+#ifdef MY_LF_EXTRA_DEBUG
+      if (likely(node))
+        my_atomic_add32(&allocator->mallocs, 1);
+#endif
+      break;
+    }
+    if (my_atomic_casptr((void **)&allocator->top, (void *)&node, node->next))
+      break;
+  }
+  _lf_unpin(pins, 0);
+  return node;
+}
+
+/*
+  count the number of objects in a pool.
+
+  NOTE
+    This is NOT thread-safe !!!
+*/
+uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
+{
+  uint i;
+  struct st_lf_alloc_node *node;
+  for (node= allocator->top, i= 0; node; node= node->next, i++)
+    /* no op */;
+  return i;
+}
+
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
new file mode 100644
index 00000000000..770b1f9342b
--- /dev/null
+++ b/mysys/lf_dynarray.c
@@ -0,0 +1,208 @@
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  Analog of DYNAMIC_ARRAY that never reallocs
+  (so no pointer into the array may ever become invalid).
+
+  Memory is allocated in non-contiguous chunks.
+  This data structure is not space efficient for sparse arrays.
+
+  Every element is aligned to sizeof(element) boundary
+  (to avoid false sharing if element is big enough).
+
+  LF_DYNARRAY is a recursive structure. On the zero level
+  LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements,
+  on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers
+  to arrays of elements, on the second level it's an array of pointers
+  to arrays of pointers to arrays of elements. And so on.
+
+  With four levels the number of elements is limited to 4311810304
+  (but as in all functions index is uint, the real limit is 2^32-1)
+
+  Actually, it's wait-free, not lock-free ;-)
+*/
+
+#include <my_global.h>
+#include <strings.h>
+#include <my_sys.h>
+#include <lf.h>
+
+void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
+{
+  bzero(array, sizeof(*array));
+  array->size_of_element= element_size;
+  my_atomic_rwlock_init(&array->lock);
+}
+
+static void recursive_free(void **alloc, int level)
+{
+  if (!alloc)
+    return;
+
+  if (level)
+  {
+    int i;
+    for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+      recursive_free(alloc[i], level-1);
+    my_free((void *)alloc, MYF(0));
+  }
+  else
+    my_free(alloc[-1], MYF(0));
+}
+
+void lf_dynarray_destroy(LF_DYNARRAY *array)
+{
+  int i;
+  for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+    recursive_free(array->level[i], i);
+  my_atomic_rwlock_destroy(&array->lock);
+}
+
+static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
+{
+  0, /* +1 here to to avoid -1's below */
+  LF_DYNARRAY_LEVEL_LENGTH,
+  LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH +
+    LF_DYNARRAY_LEVEL_LENGTH,
+  LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+    LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH *
+    LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
+};
+
+static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
+{
+  0, /* +1 here to to avoid -1's below */
+  LF_DYNARRAY_LEVEL_LENGTH,
+  LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
+  LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
+    LF_DYNARRAY_LEVEL_LENGTH,
+};
+
+/*
+  Returns a valid lvalue pointer to the element number 'idx'.
+  Allocates memory if necessary.
+*/
+void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
+{
+  void * ptr, * volatile * ptr_ptr= 0;
+  int i;
+
+  for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+    /* no-op */;
+  ptr_ptr= &array->level[i];
+  idx-= dynarray_idxes_in_prev_levels[i];
+  for (; i > 0; i--)
+  {
+    if (!(ptr= *ptr_ptr))
+    {
+      void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
+                             MYF(MY_WME|MY_ZEROFILL));
+      if (unlikely(!alloc))
+        return(NULL);
+      if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
+        ptr= alloc;
+      else
+        my_free(alloc, MYF(0));
+    }
+    ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+    idx%= dynarray_idxes_in_prev_level[i];
+  }
+  if (!(ptr= *ptr_ptr))
+  {
+    void *alloc, *data;
+    alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
+                    max(array->size_of_element, sizeof(void *)),
+                    MYF(MY_WME|MY_ZEROFILL));
+    if (unlikely(!alloc))
+      return(NULL);
+    /* reserve the space for free() address */
+    data= alloc + sizeof(void *);
+    { /* alignment */
+      intptr mod= ((intptr)data) % array->size_of_element;
+      if (mod)
+        data+= array->size_of_element - mod;
+    }
+    ((void **)data)[-1]= alloc; /* free() will need the original pointer */
+    if (my_atomic_casptr(ptr_ptr, &ptr, data))
+      ptr= data;
+    else
+      my_free(alloc, MYF(0));
+  }
+  return ptr + array->size_of_element * idx;
+}
+
+/*
+  Returns a pointer to the element number 'idx'
+  or NULL if an element does not exists
+*/
+void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
+{
+  void * ptr, * volatile * ptr_ptr= 0;
+  int i;
+
+  for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
+    /* no-op */;
+  ptr_ptr= &array->level[i];
+  idx-= dynarray_idxes_in_prev_levels[i];
+  for (; i > 0; i--)
+  {
+    if (!(ptr= *ptr_ptr))
+      return(NULL);
+    ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i];
+    idx %= dynarray_idxes_in_prev_level[i];
+  }
+  if (!(ptr= *ptr_ptr))
+    return(NULL);
+  return ptr + array->size_of_element * idx;
+}
+
+static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level,
+                             lf_dynarray_func func, void *arg)
+{
+  int res, i;
+  if (!ptr)
+    return 0;
+  if (!level)
+    return func(ptr, arg);
+  for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++)
+    if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg)))
+      return res;
+  return 0;
+}
+
+/*
+  Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements
+  in lf_dynarray.
+
+  DESCRIPTION
+    lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements
+    each. _lf_dynarray_iterate() calls user-supplied function on every array
+    from the set. It is the fastest way to scan the array, faster than
+      for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); }
+
+  NOTE
+    if func() returns non-zero, the scan is aborted
+*/
+int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg)
+{
+  int i, res;
+  for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
+    if ((res= recursive_iterate(array, array->level[i], i, func, arg)))
+      return res;
+  return 0;
+}
+
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c
new file mode 100644
index 00000000000..832f0eb5852
--- /dev/null
+++ b/mysys/lf_hash.c
@@ -0,0 +1,493 @@
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  extensible hash
+
+  TODO
+     try to get rid of dummy nodes ?
+     for non-unique hash, count only _distinct_ values
+     (but how to do it in lf_hash_delete ?)
+*/
+#include <my_global.h>
+#include <m_string.h>
+#include <my_sys.h>
+#include <my_bit.h>
+#include <lf.h>
+
+LF_REQUIRE_PINS(3);
+
+/* An element of the list */
+typedef struct {
+  intptr volatile link; /* a pointer to the next element in a listand a flag  */
+  uint32 hashnr;        /* reversed hash number, for sorting                  */
+  const byte *key;
+  uint keylen;
+  /*
+    data is stored here, directly after the keylen.
+    thus the pointer to data is (void*)(slist_element_ptr+1)
+  */
+} LF_SLIST;
+
+/*
+  a structure to pass the context (pointers two the three successive elements
+  in a list) from lfind to linsert/ldelete
+*/
+typedef struct {
+  intptr volatile *prev;
+  LF_SLIST *curr, *next;
+} CURSOR;
+
+/*
+  the last bit in LF_SLIST::link is a "deleted" flag.
+  the helper macros below convert it to a pure pointer or a pure flag
+*/
+#define PTR(V)      (LF_SLIST *)((V) & (~(intptr)1))
+#define DELETED(V)  ((V) & 1)
+
+/*
+  DESCRIPTION
+    Search for hashnr/key/keylen in the list starting from 'head' and
+    position the cursor. The list is ORDER BY hashnr, key
+
+  RETURN
+    0 - not found
+    1 - found
+
+  NOTE
+    cursor is positioned in either case
+    pins[0..2] are used, they are NOT removed on return
+*/
+static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+                 const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
+{
+  uint32       cur_hashnr;
+  const byte  *cur_key;
+  uint         cur_keylen;
+  intptr       link;
+
+retry:
+  cursor->prev= (intptr *)head;
+  do { /* PTR() isn't necessary below, head is a dummy node */
+    cursor->curr= (LF_SLIST *)(*cursor->prev);
+    _lf_pin(pins, 1, cursor->curr);
+  } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
+  for (;;)
+  {
+    if (unlikely(!cursor->curr))
+      return 0; /* end of the list */
+    do {
+      /* QQ: XXX or goto retry ? */
+      link= cursor->curr->link;
+      cursor->next= PTR(link);
+      _lf_pin(pins, 0, cursor->next);
+    } while (link != cursor->curr->link && LF_BACKOFF);
+    cur_hashnr= cursor->curr->hashnr;
+    cur_key= cursor->curr->key;
+    cur_keylen= cursor->curr->keylen;
+    if (*cursor->prev != (intptr)cursor->curr)
+    {
+      (void)LF_BACKOFF;
+      goto retry;
+    }
+    if (!DELETED(link))
+    {
+      if (cur_hashnr >= hashnr)
+      {
+        int r= 1;
+        if (cur_hashnr > hashnr ||
+            (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key,
+                             keylen)) >= 0)
+          return !r;
+      }
+      cursor->prev= &(cursor->curr->link);
+      _lf_pin(pins, 2, cursor->curr);
+    }
+    else
+    {
+      /*
+        we found a deleted node - be nice, help the other thread
+        and remove this deleted node
+      */
+      if (my_atomic_casptr((void **)cursor->prev,
+                           (void **)&cursor->curr, cursor->next))
+        _lf_alloc_free(pins, cursor->curr);
+      else
+      {
+        (void)LF_BACKOFF;
+        goto retry;
+      }
+    }
+    cursor->curr= cursor->next;
+    _lf_pin(pins, 1, cursor->curr);
+  }
+}
+
+/*
+  DESCRIPTION
+    insert a 'node' in the list that starts from 'head' in the correct
+    position (as found by lfind)
+
+  RETURN
+    0     - inserted
+    not 0 - a pointer to a duplicate (not pinned and thus unusable)
+
+  NOTE
+    it uses pins[0..2], on return all pins are removed.
+    if there're nodes with the same key value, a new node is added before them.
+*/
+static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+                         LF_SLIST *node, LF_PINS *pins, uint flags)
+{
+  CURSOR         cursor;
+  int            res;
+
+  for (;;)
+  {
+    if (lfind(head, cs, node->hashnr, node->key, node->keylen,
+              &cursor, pins) &&
+        (flags & LF_HASH_UNIQUE))
+    {
+      res= 0; /* duplicate found */
+      break;
+    }
+    else
+    {
+      node->link= (intptr)cursor.curr;
+      DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
+      DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
+      if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
+      {
+        res= 1; /* inserted ok */
+        break;
+      }
+    }
+  }
+  _lf_unpin(pins, 0);
+  _lf_unpin(pins, 1);
+  _lf_unpin(pins, 2);
+  /*
+    Note that cursor.curr is not pinned here and the pointer is unreliable,
+    the object may dissapear anytime. But if it points to a dummy node, the
+    pointer is safe, because dummy nodes are never freed - initialize_bucket()
+    uses this fact.
+  */
+  return res ? 0 : cursor.curr;
+}
+
+/*
+  DESCRIPTION
+    deletes a node as identified by hashnr/keey/keylen from the list
+    that starts from 'head'
+
+  RETURN
+    0 - ok
+    1 - not found
+
+  NOTE
+    it uses pins[0..2], on return all pins are removed.
+*/
+static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
+                   const byte *key, uint keylen, LF_PINS *pins)
+{
+  CURSOR cursor;
+  int res;
+
+  for (;;)
+  {
+    if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins))
+    {
+      res= 1; /* not found */
+      break;
+    }
+    else
+    {
+      /* mark the node deleted */
+      if (my_atomic_casptr((void **)&(cursor.curr->link),
+                           (void **)&cursor.next,
+                           (void *)(((intptr)cursor.next) | 1)))
+      {
+        /* and remove it from the list */
+        if (my_atomic_casptr((void **)cursor.prev,
+                             (void **)&cursor.curr, cursor.next))
+          _lf_alloc_free(pins, cursor.curr);
+        else
+        {
+          /*
+            somebody already "helped" us and removed the node ?
+            Let's check if we need to help that someone too!
+            (to ensure the number of "set DELETED flag" actions
+            is equal to the number of "remove from the list" actions)
+          */
+          lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+        }
+        res= 0;
+        break;
+      }
+    }
+  }
+  _lf_unpin(pins, 0);
+  _lf_unpin(pins, 1);
+  _lf_unpin(pins, 2);
+  return res;
+}
+
+/*
+  DESCRIPTION
+    searches for a node as identified by hashnr/keey/keylen in the list
+    that starts from 'head'
+
+  RETURN
+    0 - not found
+    node - found
+
+  NOTE
+    it uses pins[0..2], on return the pin[2] keeps the node found
+    all other pins are removed.
+*/
+static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
+                         uint32 hashnr, const byte *key, uint keylen,
+                         LF_PINS *pins)
+{
+  CURSOR cursor;
+  int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
+  if (res)
+    _lf_pin(pins, 2, cursor.curr);
+  _lf_unpin(pins, 0);
+  _lf_unpin(pins, 1);
+  return res ? cursor.curr : 0;
+}
+
+static inline const byte* hash_key(const LF_HASH *hash,
+                              const byte *record, uint *length)
+{
+  if (hash->get_key)
+    return (*hash->get_key)(record, length, 0);
+  *length= hash->key_length;
+  return record + hash->key_offset;
+}
+
+/*
+  compute the hash key value from the raw key.
+  note, that the hash value is limited to 2^31, because we need one
+  bit to distinguish between normal and dummy nodes.
+*/
+static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen)
+{
+  ulong nr1= 1, nr2= 4;
+  hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen,
+                                 &nr1, &nr2);
+  return nr1 & INT_MAX32;
+}
+
+#define MAX_LOAD 1.0    /* average number of elements in a bucket */
+
+static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
+
+/*
+  Initializes lf_hash, the arguments are compatible with hash_init
+*/
+void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
+                  uint key_offset, uint key_length, hash_get_key get_key,
+                  CHARSET_INFO *charset)
+{
+  lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
+                offsetof(LF_SLIST, key));
+  lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
+  hash->size= 1;
+  hash->count= 0;
+  hash->element_size= element_size;
+  hash->flags= flags;
+  hash->charset= charset ? charset : &my_charset_bin;
+  hash->key_offset= key_offset;
+  hash->key_length= key_length;
+  hash->get_key= get_key;
+  DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
+}
+
+void lf_hash_destroy(LF_HASH *hash)
+{
+  LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0);
+
+  if (unlikely(!head))
+    return;
+  el= *head;
+
+  while (el)
+  {
+    intptr next= el->link;
+    if (el->hashnr & 1)
+      lf_alloc_direct_free(&hash->alloc, el); /* normal node */
+    else
+      my_free((void *)el, MYF(0)); /* dummy node */
+    el= (LF_SLIST *)next;
+  }
+  lf_alloc_destroy(&hash->alloc);
+  lf_dynarray_destroy(&hash->array);
+}
+
+/*
+  DESCRIPTION
+    inserts a new element to a hash. it will have a _copy_ of
+    data, not a pointer to it.
+
+  RETURN
+    0 - inserted
+    1 - didn't (unique key conflict)
+   -1 - out of memory
+
+  NOTE
+    see linsert() for pin usage notes
+*/
+int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
+{
+  int csize, bucket, hashnr;
+  LF_SLIST *node, * volatile *el;
+
+  lf_rwlock_by_pins(pins);
+  node= (LF_SLIST *)_lf_alloc_new(pins);
+  if (unlikely(!node))
+    return -1;
+  memcpy(node+1, data, hash->element_size);
+  node->key= hash_key(hash, (byte *)(node+1), &node->keylen);
+  hashnr= calc_hash(hash, node->key, node->keylen);
+  bucket= hashnr % hash->size;
+  el= _lf_dynarray_lvalue(&hash->array, bucket);
+  if (unlikely(!el))
+    return -1;
+  if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+    return -1;
+  node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
+  if (linsert(el, hash->charset, node, pins, hash->flags))
+  {
+    _lf_alloc_free(pins, node);
+    lf_rwunlock_by_pins(pins);
+    return 1;
+  }
+  csize= hash->size;
+  if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
+    my_atomic_cas32(&hash->size, &csize, csize*2);
+  lf_rwunlock_by_pins(pins);
+  return 0;
+}
+
+/*
+  DESCRIPTION
+    deletes an element with the given key from the hash (if a hash is
+    not unique and there're many elements with this key - the "first"
+    matching element is deleted)
+  RETURN
+    0 - deleted
+    1 - didn't (not found)
+   -1 - out of memory
+  NOTE
+    see ldelete() for pin usage notes
+*/
+int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+  LF_SLIST * volatile *el;
+  uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
+
+  bucket= hashnr % hash->size;
+  lf_rwlock_by_pins(pins);
+  el= _lf_dynarray_lvalue(&hash->array, bucket);
+  if (unlikely(!el))
+    return -1;
+  /*
+    note that we still need to initialize_bucket here,
+    we cannot return "node not found", because an old bucket of that
+    node may've been split and the node was assigned to a new bucket
+    that was never accessed before and thus is not initialized.
+  */
+  if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+    return -1;
+  if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
+              (byte *)key, keylen, pins))
+  {
+    lf_rwunlock_by_pins(pins);
+    return 1;
+  }
+  my_atomic_add32(&hash->count, -1);
+  lf_rwunlock_by_pins(pins);
+  return 0;
+}
+
+/*
+  RETURN
+    a pointer to an element with the given key (if a hash is not unique and
+    there're many elements with this key - the "first" matching element)
+    NULL         if nothing is found
+    MY_ERRPTR    if OOM
+
+  NOTE
+    see lsearch() for pin usage notes
+*/
+void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
+{
+  LF_SLIST * volatile *el, *found;
+  uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
+
+  bucket= hashnr % hash->size;
+  lf_rwlock_by_pins(pins);
+  el= _lf_dynarray_lvalue(&hash->array, bucket);
+  if (unlikely(!el))
+    return MY_ERRPTR;
+  if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
+    return MY_ERRPTR;
+  found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
+                 (byte *)key, keylen, pins);
+  lf_rwunlock_by_pins(pins);
+  return found ? found+1 : 0;
+}
+
+static const byte *dummy_key= "";
+
+/*
+  RETURN
+    0 - ok
+   -1 - out of memory
+*/
+static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
+                              uint bucket, LF_PINS *pins)
+{
+  uint parent= my_clear_highest_bit(bucket);
+  LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
+  LF_SLIST **tmp= 0, *cur;
+  LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
+  if (unlikely(!el || !dummy))
+    return -1;
+  if (*el == NULL && bucket &&
+      unlikely(initialize_bucket(hash, el, parent, pins)))
+    return -1;
+  dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
+  dummy->key= (char*) dummy_key;
+  dummy->keylen= 0;
+  if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
+  {
+    my_free((void *)dummy, MYF(0));
+    dummy= cur;
+  }
+  my_atomic_casptr((void **)node, (void **)&tmp, dummy);
+  /*
+    note that if the CAS above failed (after linsert() succeeded),
+    it would mean that some other thread has executed linsert() for
+    the same dummy node, its linsert() failed, it picked up our
+    dummy node (in "dummy= cur") and executed the same CAS as above.
+    Which means that even if CAS above failed we don't need to retry,
+    and we should not free(dummy) - there's no memory leak here
+  */
+  return 0;
+}
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index 1fef8aac170..6845e63dc33 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -105,6 +105,7 @@
 #include <keycache.h>
 #include "my_static.h"
 #include <m_string.h>
+#include <my_bit.h>
 #include <errno.h>
 #include <stdarg.h>
 
@@ -1261,12 +1262,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
 
   KEYCACHE_THREAD_TRACE("unlink_block");
 #if defined(KEYCACHE_DEBUG)
+  KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
   keycache->blocks_available--;
   KEYCACHE_DBUG_PRINT("unlink_block",
     ("unlinked block %u  status=%x   #requests=%u  #available=%u",
      BLOCK_NUMBER(block), block->status,
      block->requests, keycache->blocks_available));
-  KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0);
 #endif
 }
 
@@ -2359,9 +2360,9 @@ restart:
                (block->hash_link->diskpos == filepos)));
   *page_st=page_status;
   KEYCACHE_DBUG_PRINT("find_key_block",
-                      ("fd: %d  pos: %lu  block->status: %u  page_status: %u",
+                      ("fd: %d  pos: %lu  block->status: %u  page_status: %d",
                        file, (ulong) filepos, block->status,
-                       (uint) page_status));
+                       page_status));
 
 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
   DBUG_EXECUTE("check_keycache2",
@@ -2512,17 +2513,19 @@ static void read_block(KEY_CACHE *keycache,
 */
 
 uchar *key_cache_read(KEY_CACHE *keycache,
-                     File file, my_off_t filepos, int level,
-                     uchar *buff, uint length,
-		     uint block_length __attribute__((unused)),
-		     int return_buffer __attribute__((unused)))
+                      File file, my_off_t filepos, int level,
+                      uchar *buff, uint length,
+                      uint block_length __attribute__((unused)),
+                      int return_buffer __attribute__((unused)))
 {
   my_bool locked_and_incremented= FALSE;
   int error=0;
   uchar *start= buff;
   DBUG_ENTER("key_cache_read");
-  DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
-               (uint) file, (ulong) filepos, length));
+  DBUG_PRINT("enter", ("fd: %u  pos: %lu  page: %lu  length: %u",
+                       (uint) file, (ulong) filepos,
+                       (ulong) (filepos / keycache->key_cache_block_size),
+                       length));
 
   if (keycache->key_cache_inited)
   {
@@ -2533,12 +2536,13 @@ uchar *key_cache_read(KEY_CACHE *keycache,
     uint status;
     int page_st;
 
-  /*
+
+    /*
       When the key cache is once initialized, we use the cache_lock to
       reliably distinguish the cases of normal operation, resizing, and
       disabled cache. We always increment and decrement
       'cnt_for_resize_op' so that a resizer can wait for pending I/O.
-  */
+    */
     keycache_pthread_mutex_lock(&keycache->cache_lock);
     /*
       Cache resizing has two phases: Flushing and re-initializing. In
@@ -2565,7 +2569,7 @@ uchar *key_cache_read(KEY_CACHE *keycache,
     do
     {
       /* Cache could be disabled in a later iteration. */
-
+      
       if (!keycache->can_be_used)
 	goto no_key_cache;
       /* Start reading at the beginning of the cache block. */
@@ -2975,9 +2979,10 @@ int key_cache_write(KEY_CACHE *keycache,
   int error=0;
   DBUG_ENTER("key_cache_write");
   DBUG_PRINT("enter",
-	     ("fd: %u  pos: %lu  length: %u  block_length: %u  key_block_length: %u",
-	      (uint) file, (ulong) filepos, length, block_length,
-	      keycache ? keycache->key_cache_block_size : 0));
+             ("fd: %u  pos: %lu  page: %lu  length: %u  block_length: %u",
+              (uint) file, (ulong) filepos,
+              (ulong) (filepos / keycache->key_cache_block_size),
+              length, block_length));
 
   if (!dont_write)
   {
@@ -3169,7 +3174,7 @@ int key_cache_write(KEY_CACHE *keycache,
 
       if (!dont_write)
       {
-	/* Not used in the server. buff has been written to disk at start. */
+        /* Not used in the server. buff has been written to disk at start. */
         if ((block->status & BLOCK_CHANGED) &&
             (!offset && read_length >= keycache->key_cache_block_size))
              link_to_file_list(keycache, block, block->hash_link->file, 1);
@@ -3183,7 +3188,6 @@ int key_cache_write(KEY_CACHE *keycache,
         a flush.
       */
       block->status&= ~BLOCK_FOR_UPDATE;
-
       set_if_smaller(block->offset, offset);
       set_if_bigger(block->length, read_length+offset);
 
diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c
index 6227a05ce06..9ea5678da9a 100644
--- a/mysys/mf_keycaches.c
+++ b/mysys/mf_keycaches.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 MySQL AB
+/* Copyright (C) 2003-2007 MySQL AB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -25,269 +25,7 @@
 #include <keycache.h>
 #include <hash.h>
 #include <m_string.h>
-
-/*****************************************************************************
-  General functions to handle SAFE_HASH objects.
-
-  A SAFE_HASH object is used to store the hash, the mutex and default value
-  needed by the rest of the key cache code.
-  This is a separate struct to make it easy to later reuse the code for other
-  purposes
-
-  All entries are linked in a list to allow us to traverse all elements
-  and delete selected ones. (HASH doesn't allow any easy ways to do this).
-*****************************************************************************/
-
-/*
-  Struct to store a key and pointer to object
-*/
-
-typedef struct st_safe_hash_entry
-{
-  uchar *key;
-  uint length;
-  uchar *data;
-  struct st_safe_hash_entry *next, **prev;
-} SAFE_HASH_ENTRY;
-
-
-typedef struct st_safe_hash_with_default
-{
-#ifdef THREAD
-  rw_lock_t mutex;
-#endif
-  HASH hash;
-  uchar *default_value;
-  SAFE_HASH_ENTRY *root;
-} SAFE_HASH;
-
-
-/*
-  Free a SAFE_HASH_ENTRY
-
-  This function is called by the hash object on delete
-*/
-
-static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
-{
-  DBUG_ENTER("free_assign_entry");
-  my_free((uchar*) entry, MYF(0));
-  DBUG_VOID_RETURN;
-}
-
-
-/* Get key and length for a SAFE_HASH_ENTRY */
-
-static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length,
-                                  my_bool not_used __attribute__((unused)))
-{
-  *length=entry->length;
-  return (uchar*) entry->key;
-}
-
-
-/*
-  Init a SAFE_HASH object
-
-  SYNOPSIS
-    safe_hash_init()
-    hash		safe_hash handler
-    elements		Expected max number of elements
-    default_value	default value
-
-  NOTES
-    In case of error we set hash->default_value to 0 to allow one to call
-    safe_hash_free on an object that couldn't be initialized.
-
-  RETURN
-    0  ok
-    1  error
-*/
-
-static my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
-			      uchar *default_value)
-{
-  DBUG_ENTER("safe_hash");
-  if (hash_init(&hash->hash, &my_charset_bin, elements,
-		0, 0, (hash_get_key) safe_hash_entry_get,
-		(void (*)(void*)) safe_hash_entry_free, 0))
-  {
-    hash->default_value= 0;
-    DBUG_RETURN(1);
-  }
-  my_rwlock_init(&hash->mutex, 0);
-  hash->default_value= default_value;
-  hash->root= 0;
-  DBUG_RETURN(0);
-}
-
-
-/*
-  Free a SAFE_HASH object
-
-  NOTES
-    This is safe to call on any object that has been sent to safe_hash_init()
-*/
-
-static void safe_hash_free(SAFE_HASH *hash)
-{
-  /*
-    Test if safe_hash_init succeeded. This will also guard us against multiple
-    free calls.
-  */
-  if (hash->default_value)
-  {
-    hash_free(&hash->hash);
-    rwlock_destroy(&hash->mutex);
-    hash->default_value=0;
-  }
-}
-
-/*
-  Return the value stored for a key or default value if no key
-*/
-
-static uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length)
-{
-  uchar *result;
-  DBUG_ENTER("safe_hash_search");
-  rw_rdlock(&hash->mutex);
-  result= hash_search(&hash->hash, key, length);
-  rw_unlock(&hash->mutex);
-  if (!result)
-    result= hash->default_value;
-  else
-    result= ((SAFE_HASH_ENTRY*) result)->data;
-  DBUG_PRINT("exit",("data: 0x%lx", (long) result));
-  DBUG_RETURN(result);
-}
-
-
-/*
-  Associate a key with some data
-
-  SYONOPSIS
-    safe_hash_set()
-    hash			Hash handle
-    key				key (path to table etc..)
-    length			Length of key
-    data			data to to associate with the data
-
-  NOTES
-    This can be used both to insert a new entry and change an existing
-    entry.
-    If one associates a key with the default key cache, the key is deleted
-
-  RETURN
-    0  ok
-    1  error (Can only be EOM). In this case my_message() is called.
-*/
-
-static my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length,
-			     uchar *data)
-{
-  SAFE_HASH_ENTRY *entry;
-  my_bool error= 0;
-  DBUG_ENTER("safe_hash_set");
-  DBUG_PRINT("enter",("key: %.*s  data: 0x%lx", length, key, (long) data));
-
-  rw_wrlock(&hash->mutex);
-  entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length);
-
-  if (data == hash->default_value)
-  {
-    /*
-      The key is to be associated with the default entry. In this case
-      we can just delete the entry (if it existed) from the hash as a
-      search will return the default entry
-    */
-    if (!entry)					/* nothing to do */
-      goto end;
-    /* unlink entry from list */
-    if ((*entry->prev= entry->next))
-      entry->next->prev= entry->prev;
-    hash_delete(&hash->hash, (uchar*) entry);
-    goto end;
-  }
-  if (entry)
-  {
-    /* Entry existed;  Just change the pointer to point at the new data */
-    entry->data= data;
-  }
-  else
-  {
-    if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
-					       MYF(MY_WME))))
-    {
-      error= 1;
-      goto end;
-    }
-    entry->key= (uchar*) (entry +1);
-    memcpy((char*) entry->key, (char*) key, length);
-    entry->length= length;
-    entry->data= data;
-    /* Link entry to list */
-    if ((entry->next= hash->root))
-      entry->next->prev= &entry->next;
-    entry->prev= &hash->root;
-    hash->root= entry;
-    if (my_hash_insert(&hash->hash, (uchar*) entry))
-    {
-      /* This can only happen if hash got out of memory */
-      my_free((char*) entry, MYF(0));
-      error= 1;
-      goto end;
-    }
-  }
-
-end:
-  rw_unlock(&hash->mutex);
-  DBUG_RETURN(error);
-}
-
-
-/*
-  Change all entres with one data value to another data value
-
-  SYONOPSIS
-    safe_hash_change()
-    hash			Hash handle
-    old_data			Old data
-    new_data			Change all 'old_data' to this
-
-  NOTES
-    We use the linked list to traverse all elements in the hash as
-    this allows us to delete elements in the case where 'new_data' is the
-    default value.
-*/
-
-static void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data)
-{
-  SAFE_HASH_ENTRY *entry, *next;
-  DBUG_ENTER("safe_hash_set");
-
-  rw_wrlock(&hash->mutex);
-
-  for (entry= hash->root ; entry ; entry= next)
-  {
-    next= entry->next;
-    if (entry->data == old_data)
-    {
-      if (new_data == hash->default_value)
-      {
-        if ((*entry->prev= entry->next))
-          entry->next->prev= entry->prev;
-	hash_delete(&hash->hash, (uchar*) entry);
-      }
-      else
-	entry->data= new_data;
-    }
-  }
-
-  rw_unlock(&hash->mutex);
-  DBUG_VOID_RETURN;
-}
-
+#include "my_safehash.h"
 
 /*****************************************************************************
   Functions to handle the key cache objects
@@ -315,6 +53,7 @@ void multi_keycache_free(void)
     multi_key_cache_search()
     key				key to find (usually table path)
     uint length			Length of key.
+    def				Default value if no key cache
 
   NOTES
     This function is coded in such a way that we will return the
@@ -325,11 +64,13 @@ void multi_keycache_free(void)
     key cache to use
 */
 
-KEY_CACHE *multi_key_cache_search(uchar *key, uint length)
+KEY_CACHE *multi_key_cache_search(uchar *key, uint length,
+                                  KEY_CACHE *def)
 {
   if (!key_cache_hash.hash.records)
-    return dflt_key_cache;
-  return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length);
+    return def;
+  return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length,
+                                       (void*) def);
 }
 
 
@@ -361,3 +102,5 @@ void multi_key_cache_change(KEY_CACHE *old_data,
 {
   safe_hash_change(&key_cache_hash, (uchar*) old_data, (uchar*) new_data);
 }
+
+
diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c
index 6a30267eb80..aa04d55f624 100644
--- a/mysys/my_atomic.c
+++ b/mysys/my_atomic.c
@@ -17,11 +17,10 @@
 #include <my_pthread.h>
 
 #ifndef HAVE_INLINE
-/*
-  the following will cause all inline functions to be instantiated
-*/
+/* the following will cause all inline functions to be instantiated */
 #define HAVE_INLINE
-#define static extern
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
 #endif
 
 #include <my_atomic.h>
@@ -35,7 +34,7 @@
 */
 int my_atomic_initialize()
 {
-  DBUG_ASSERT(sizeof(intptr) == sizeof(void *));
+  compile_time_assert(sizeof(intptr) == sizeof(void *));
   /* currently the only thing worth checking is SMP/UP issue */
 #ifdef MY_ATOMIC_MODE_DUMMY
   return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU;
diff --git a/mysys/my_bit.c b/mysys/my_bit.c
index 5a9b1187c83..2881eb1ebd2 100644
--- a/mysys/my_bit.c
+++ b/mysys/my_bit.c
@@ -13,23 +13,18 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
-/* Some useful bit functions */
+#include <my_global.h>
 
-#include "mysys_priv.h"
-
-/*
-  Find smallest X in 2^X >= value
-  This can be used to divide a number with value by doing a shift instead
-*/
+#ifndef HAVE_INLINE
+/* the following will cause all inline functions to be instantiated */
+#define HAVE_INLINE
+#undef STATIC_INLINE
+#define STATIC_INLINE extern
+#endif
 
-uint my_bit_log2(ulong value)
-{
-  uint bit;
-  for (bit=0 ; value > 1 ; value>>=1, bit++) ;
-  return bit;
-}
+#include <my_bit.h>
 
-static char nbits[256] = {
+const char _my_bits_nbits[256] = {
   0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
   1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
   1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -48,60 +43,29 @@ static char nbits[256] = {
   4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
 };
 
-uint my_count_bits(ulonglong v)
-{
-#if SIZEOF_LONG_LONG > 4
-  /* The following code is a bit faster on 16 bit machines than if we would
-     only shift v */
-  ulong v2=(ulong) (v >> 32);
-  return (uint) (uchar) (nbits[(uchar)  v] +
-                         nbits[(uchar) (v >> 8)] +
-                         nbits[(uchar) (v >> 16)] +
-                         nbits[(uchar) (v >> 24)] +
-                         nbits[(uchar) (v2)] +
-                         nbits[(uchar) (v2 >> 8)] +
-                         nbits[(uchar) (v2 >> 16)] +
-                         nbits[(uchar) (v2 >> 24)]);
-#else
-  return (uint) (uchar) (nbits[(uchar)  v] +
-                         nbits[(uchar) (v >> 8)] +
-                         nbits[(uchar) (v >> 16)] +
-                         nbits[(uchar) (v >> 24)]);
-#endif
-}
-
-uint my_count_bits_ushort(ushort v)
-{
-  return nbits[v];
-}
-
-
 /*
-  Next highest power of two
-
-  SYNOPSIS
-    my_round_up_to_next_power()
-    v		Value to check
-
-  RETURN
-    Next or equal power of 2
-    Note: 0 will return 0
-
-  NOTES
-    Algorithm by Sean Anderson, according to:
-    http://graphics.stanford.edu/~seander/bithacks.html
-    (Orignal code public domain)
-
-    Comments shows how this works with 01100000000000000000000000001011
+  perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)'
 */
+const uchar _my_bits_reverse_table[256]={
+0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30,
+0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98,
+0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64,
+0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC,
+0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02,
+0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2,
+0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A,
+0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E,
+0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81,
+0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71,
+0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
+0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15,
+0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
+0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43,
+0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B,
+0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
+0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
+0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
 
-uint32 my_round_up_to_next_power(uint32 v)
-{
-  v--;			/* 01100000000000000000000000001010 */
-  v|= v >> 1;		/* 01110000000000000000000000001111 */
-  v|= v >> 2;		/* 01111100000000000000000000001111 */
-  v|= v >> 4;		/* 01111111110000000000000000001111 */
-  v|= v >> 8;		/* 01111111111111111100000000001111 */
-  v|= v >> 16;		/* 01111111111111111111111111111111 */
-  return v+1;		/* 10000000000000000000000000000000 */
-}
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 10eff40b9ed..e127b2584ae 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -38,6 +38,7 @@
 #include "mysys_priv.h"
 #include <my_bitmap.h>
 #include <m_string.h>
+#include <my_bit.h>
 
 void create_last_word_mask(MY_BITMAP *map)
 {
diff --git a/mysys/my_create.c b/mysys/my_create.c
index 55878318ead..454ccf6ab7d 100644
--- a/mysys/my_create.c
+++ b/mysys/my_create.c
@@ -52,6 +52,13 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
   fd = open(FileName, access_flags);
 #endif
 
+  if ((MyFlags & MY_SYNC_DIR) && (fd >=0) &&
+      my_sync_dir_by_file(FileName, MyFlags))
+  {
+    my_close(fd, MyFlags);
+    fd= -1;
+  }
+
   DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
 				   EE_CANTCREATEFILE, MyFlags));
 } /* my_create */
diff --git a/mysys/my_delete.c b/mysys/my_delete.c
index bac3e2513e1..14374fd3fa8 100644
--- a/mysys/my_delete.c
+++ b/mysys/my_delete.c
@@ -29,6 +29,9 @@ int my_delete(const char *name, myf MyFlags)
       my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
 	       name,errno);
   }
+  else if ((MyFlags & MY_SYNC_DIR) &&
+           my_sync_dir_by_file(name, MyFlags))
+    err= -1;
   DBUG_RETURN(err);
 } /* my_delete */
 
diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c
index 2fd7eed7778..8f7cc5b7029 100644
--- a/mysys/my_getsystime.c
+++ b/mysys/my_getsystime.c
@@ -34,10 +34,6 @@ ulonglong my_getsystime()
   LARGE_INTEGER t_cnt;
   if (!offset)
   {
-    /* strictly speaking there should be a mutex to protect
-       initialization section. But my_getsystime() is called from
-       UUID() code, and UUID() calls are serialized with a mutex anyway
-    */
     LARGE_INTEGER li;
     FILETIME ft;
     GetSystemTimeAsFileTime(&ft);
diff --git a/mysys/my_handler.c b/mysys/my_handler.c
index 1c3bb20426e..78cc10ac840 100644
--- a/mysys/my_handler.c
+++ b/mysys/my_handler.c
@@ -16,9 +16,11 @@
    MA 02111-1307, USA */
 
 #include <my_global.h>
-#include "my_handler.h"
+#include <m_ctype.h>
+#include <my_base.h>
+#include <my_handler.h>
 
-int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
+int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
 		    uchar *b, uint b_length, my_bool part_key,
 		    my_bool skip_end_space)
 {
@@ -174,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
         next_key_length=key_length-b_length-pack_length;
 
         if (piks &&
-            (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+            (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length,
 				  (my_bool) ((nextflag & SEARCH_PREFIX) &&
 					     next_key_length <= 0),
 				  (my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -187,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
       {
 	uint length=(uint) (end-a), a_length=length, b_length=length;
         if (piks &&
-            (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length,
+            (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length,
 				   (my_bool) ((nextflag & SEARCH_PREFIX) &&
 					      next_key_length <= 0),
 				   (my_bool)!(nextflag & SEARCH_PREFIX))))
@@ -235,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
         next_key_length=key_length-b_length-pack_length;
 
         if (piks &&
-	    (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length,
+	    (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length,
                                    (my_bool) ((nextflag & SEARCH_PREFIX) &&
                                               next_key_length <= 0),
 				   (my_bool) ((nextflag & (SEARCH_FIND |
@@ -482,12 +484,15 @@ end:
 
   DESCRIPTION
     Find the first NULL value in index-suffix values tuple.
-    TODO Consider optimizing this fuction or its use so we don't search for
-         NULL values in completely NOT NULL index suffixes.
+
+  TODO
+    Consider optimizing this function or its use so we don't search for
+    NULL values in completely NOT NULL index suffixes.
 
   RETURN
-    First key part that has NULL as value in values tuple, or the last key part 
-    (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs.
+    First key part that has NULL as value in values tuple, or the last key
+    part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain
+    NULLs.
 */
 
 HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
diff --git a/mysys/my_init.c b/mysys/my_init.c
index c1337205eb4..45601f54cfa 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -43,6 +43,7 @@ static void netware_init();
 
 my_bool my_init_done= 0;
 uint	mysys_usage_id= 0;              /* Incremented for each my_init() */
+ulong   my_thread_stack_size= 65536;
 
 static ulong atoi_octal(const char *str)
 {
diff --git a/mysys/my_open.c b/mysys/my_open.c
index 5ad48e66b68..7efaed90e2d 100644
--- a/mysys/my_open.c
+++ b/mysys/my_open.c
@@ -161,6 +161,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type
     }
     pthread_mutex_unlock(&THR_LOCK_open);
     (void) my_close(fd, MyFlags);
+    fd= -1;
     my_errno=ENOMEM;
   }
   else
diff --git a/mysys/my_pread.c b/mysys/my_pread.c
index 6e98132db73..de7a2b611ed 100644
--- a/mysys/my_pread.c
+++ b/mysys/my_pread.c
@@ -63,12 +63,12 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset,
     pthread_mutex_unlock(&my_file_info[Filedes].mutex);
 #else
     if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count)))
-      my_errno= errno;
+      my_errno= errno ? errno : -1;
 #endif
     if (error || readbytes != Count)
     {
       DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d",
-			    (int) readbytes, (uint) Count,Filedes,my_errno));
+                            (int) readbytes, (uint) Count,Filedes,my_errno));
 #ifdef THREAD
       if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR)
       {
@@ -115,7 +115,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset,
   RETURN
     (size_t) -1   Error
     #             Number of bytes read
- */
+*/
 
 size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count,
                  my_off_t offset, myf MyFlags)
diff --git a/mysys/my_rename.c b/mysys/my_rename.c
index 6a6aa6a5796..64dbac955ea 100644
--- a/mysys/my_rename.c
+++ b/mysys/my_rename.c
@@ -16,8 +16,9 @@
 #include "mysys_priv.h"
 #include <my_dir.h>
 #include "mysys_err.h"
-
+#include "m_string.h"
 #undef my_rename
+
 	/* On unix rename deletes to file if it exists */
 
 int my_rename(const char *from, const char *to, myf MyFlags)
@@ -60,5 +61,18 @@ int my_rename(const char *from, const char *to, myf MyFlags)
     if (MyFlags & (MY_FAE+MY_WME))
       my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
   }
+  else if (MyFlags & MY_SYNC_DIR)
+  {
+#ifdef NEED_EXPLICIT_SYNC_DIR
+    /* do only the needed amount of syncs: */
+    char dir_from[FN_REFLEN], dir_to[FN_REFLEN];
+    dirname_part(dir_from, from);
+    dirname_part(dir_to, to);
+    if (my_sync_dir(dir_from, MyFlags) ||
+        (strcmp(dir_from, dir_to) &&
+         my_sync_dir(dir_to, MyFlags)))
+      error= -1;
+#endif
+  }
   DBUG_RETURN(error);
 } /* my_rename */
diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c
new file mode 100644
index 00000000000..57f408942bf
--- /dev/null
+++ b/mysys/my_safehash.c
@@ -0,0 +1,297 @@
+/* Copyright (C) 2003-2007 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  Handling of multiple key caches
+
+  The idea is to have a thread safe hash on the table name,
+  with a default key cache value that is returned if the table name is not in
+  the cache.
+*/
+
+#include "mysys_priv.h"
+#include <m_string.h>
+#include "my_safehash.h"
+
+/*****************************************************************************
+  General functions to handle SAFE_HASH objects.
+
+  A SAFE_HASH object is used to store the hash, the mutex and default value
+  needed by the rest of the key cache code.
+  This is a separate struct to make it easy to later reuse the code for other
+  purposes
+
+  All entries are linked in a list to allow us to traverse all elements
+  and delete selected ones. (HASH doesn't allow any easy ways to do this).
+*****************************************************************************/
+
+
+/*
+  Free a SAFE_HASH_ENTRY
+
+  SYNOPSIS
+    safe_hash_entry_free()
+    entry                The entry which should be freed
+
+  NOTE
+    This function is called by the hash object on delete
+*/
+
+static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry)
+{
+  DBUG_ENTER("safe_hash_entry_free");
+  my_free((gptr) entry, MYF(0));
+  DBUG_VOID_RETURN;
+}
+
+
+/*
+  Get key and length for a SAFE_HASH_ENTRY
+
+  SYNOPSIS
+    safe_hash_entry_get()
+    entry                The entry for which the key should be returned
+    length               Length of the key
+
+  RETURN
+    #  reference on the key
+*/
+
+static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length,
+                                 my_bool not_used __attribute__((unused)))
+{
+  *length= entry->length;
+  return (byte*) entry->key;
+}
+
+
+/*
+  Init a SAFE_HASH object
+
+  SYNOPSIS
+    safe_hash_init()
+    hash		safe_hash handler
+    elements		Expected max number of elements
+    default_value	default value
+
+  NOTES
+    In case of error we set hash->default_value to 0 to allow one to call
+    safe_hash_free on an object that couldn't be initialized.
+
+  RETURN
+    0  OK
+    1  error
+*/
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+                       byte *default_value)
+{
+  DBUG_ENTER("safe_hash_init");
+  if (hash_init(&hash->hash, &my_charset_bin, elements,
+                0, 0, (hash_get_key) safe_hash_entry_get,
+                (void (*)(void*)) safe_hash_entry_free, 0))
+  {
+    hash->default_value= 0;
+    DBUG_RETURN(1);
+  }
+  my_rwlock_init(&hash->mutex, 0);
+  hash->default_value= default_value;
+  hash->root= 0;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Free a SAFE_HASH object
+
+  SYNOPSIS
+    safe_hash_free()
+    hash                 Hash handle
+
+  NOTES
+    This is safe to call on any object that has been sent to safe_hash_init()
+*/
+
+void safe_hash_free(SAFE_HASH *hash)
+{
+  /*
+    Test if safe_hash_init succeeded. This will also guard us against multiple
+    free calls.
+  */
+  if (hash->default_value)
+  {
+    hash_free(&hash->hash);
+    rwlock_destroy(&hash->mutex);
+    hash->default_value=0;
+  }
+}
+
+
+/*
+  Return the value stored for a key or default value if no key
+
+  SYNOPSIS
+    safe_hash_search()
+    hash                 Hash handle
+    key                  key (path to table etc..)
+    length               Length of key
+    def                  Default value of data
+
+  RETURN
+    #  data associated with the key of default value if data was not found
+*/
+
+byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length,
+                       byte *def)
+{
+  byte *result;
+  DBUG_ENTER("safe_hash_search");
+  rw_rdlock(&hash->mutex);
+  result= hash_search(&hash->hash, key, length);
+  rw_unlock(&hash->mutex);
+  if (!result)
+    result= def;
+  else
+    result= ((SAFE_HASH_ENTRY*) result)->data;
+  DBUG_PRINT("exit",("data: 0x%lx", (long) result));
+  DBUG_RETURN(result);
+}
+
+
+/*
+  Associate a key with some data
+
+  SYNOPSIS
+    safe_hash_set()
+    hash                 Hash handle
+    key                  key (path to table etc..)
+    length               Length of key
+    data                 data to to associate with the data
+
+  NOTES
+    This can be used both to insert a new entry and change an existing
+    entry.
+    If one associates a key with the default key cache, the key is deleted
+
+  RETURN
+    0  OK
+    1  error (Can only be EOM). In this case my_message() is called.
+*/
+
+my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length,
+                      byte *data)
+{
+  SAFE_HASH_ENTRY *entry;
+  my_bool error= 0;
+  DBUG_ENTER("safe_hash_set");
+  DBUG_PRINT("enter",("key: %.*s  data: 0x%lx", length, key, (long) data));
+
+  rw_wrlock(&hash->mutex);
+  entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length);
+
+  if (data == hash->default_value)
+  {
+    /*
+      The key is to be associated with the default entry. In this case
+      we can just delete the entry (if it existed) from the hash as a
+      search will return the default entry
+    */
+    if (!entry)          /* nothing to do */
+      goto end;
+    /* unlink entry from list */
+    if ((*entry->prev= entry->next))
+      entry->next->prev= entry->prev;
+    hash_delete(&hash->hash, (byte*) entry);
+    goto end;
+  }
+  if (entry)
+  {
+    /* Entry existed;  Just change the pointer to point at the new data */
+    entry->data= data;
+  }
+  else
+  {
+    if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length,
+                                               MYF(MY_WME))))
+    {
+      error= 1;
+      goto end;
+    }
+    entry->key= (byte*) (entry +1);
+    memcpy((char*) entry->key, (char*) key, length);
+    entry->length= length;
+    entry->data= data;
+    /* Link entry to list */
+    if ((entry->next= hash->root))
+      entry->next->prev= &entry->next;
+    entry->prev= &hash->root;
+    hash->root= entry;
+    if (my_hash_insert(&hash->hash, (byte*) entry))
+    {
+      /* This can only happen if hash got out of memory */
+      my_free((char*) entry, MYF(0));
+      error= 1;
+      goto end;
+    }
+  }
+
+end:
+  rw_unlock(&hash->mutex);
+  DBUG_RETURN(error);
+}
+
+
+/*
+  Change all entries with one data value to another data value
+
+  SYNOPSIS
+    safe_hash_change()
+    hash                 Hash handle
+    old_data             Old data
+    new_data             Change all 'old_data' to this
+
+  NOTES
+    We use the linked list to traverse all elements in the hash as
+    this allows us to delete elements in the case where 'new_data' is the
+    default value.
+*/
+
+void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data)
+{
+  SAFE_HASH_ENTRY *entry, *next;
+  DBUG_ENTER("safe_hash_change");
+
+  rw_wrlock(&hash->mutex);
+
+  for (entry= hash->root ; entry ; entry= next)
+  {
+    next= entry->next;
+    if (entry->data == old_data)
+    {
+      if (new_data == hash->default_value)
+      {
+        if ((*entry->prev= entry->next))
+          entry->next->prev= entry->prev;
+        hash_delete(&hash->hash, (byte*) entry);
+      }
+      else
+        entry->data= new_data;
+    }
+  }
+
+  rw_unlock(&hash->mutex);
+  DBUG_VOID_RETURN;
+}
diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h
new file mode 100644
index 00000000000..53845a5fec7
--- /dev/null
+++ b/mysys/my_safehash.h
@@ -0,0 +1,58 @@
+/* Copyright (C) 2003 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  Handling of multiple key caches
+
+  The idea is to have a thread safe hash on the table name,
+  with a default key cache value that is returned if the table name is not in
+  the cache.
+*/
+
+#include <hash.h>
+
+/*
+  Struct to store a key and pointer to object
+*/
+
+typedef struct st_safe_hash_entry
+{
+  byte *key;
+  uint length;
+  byte *data;
+  struct st_safe_hash_entry *next, **prev;
+} SAFE_HASH_ENTRY;
+
+
+typedef struct st_safe_hash_with_default
+{
+#ifdef THREAD
+  rw_lock_t mutex;
+#endif
+  HASH hash;
+  byte *default_value;
+  SAFE_HASH_ENTRY *root;
+} SAFE_HASH;
+
+
+my_bool safe_hash_init(SAFE_HASH *hash, uint elements,
+                       byte *default_value);
+void safe_hash_free(SAFE_HASH *hash);
+byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length,
+                       byte *def);
+my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length,
+                      byte *data);
+void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data);
diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c
index 810c0c72632..98059ccd508 100644
--- a/mysys/my_symlink.c
+++ b/mysys/my_symlink.c
@@ -84,6 +84,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags)
     if (MyFlags & MY_WME)
       my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno);
   }
+  else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags))
+    result= -1;
   DBUG_RETURN(result);
 #endif /* HAVE_READLINK */
 }
diff --git a/mysys/my_sync.c b/mysys/my_sync.c
index 64fce3aac21..ab3fc89e0d3 100644
--- a/mysys/my_sync.c
+++ b/mysys/my_sync.c
@@ -48,6 +48,16 @@ int my_sync(File fd, myf my_flags)
 
   do
   {
+#if defined(F_FULLFSYNC)
+    /*
+      In Mac OS X >= 10.3 this call is safer than fsync() (it forces the
+      disk's cache and guarantees ordered writes).
+    */
+    if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
+      break; /* ok */
+    /* Some file systems don't support F_FULLFSYNC and fail above: */
+    DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back"));
+#endif
 #if defined(HAVE_FDATASYNC)
     res= fdatasync(fd);
 #elif defined(HAVE_FSYNC)
@@ -55,6 +65,7 @@ int my_sync(File fd, myf my_flags)
 #elif defined(__WIN__)
     res= _commit(fd);
 #else
+#error Cannot find a way to sync a file, durability in danger
     res= 0;					/* No sync (strange OS) */
 #endif
   } while (res == -1 && errno == EINTR);
@@ -66,10 +77,78 @@ int my_sync(File fd, myf my_flags)
       my_errno= -1;                             /* Unknown error */
     if ((my_flags & MY_IGNORE_BADFD) &&
         (er == EBADF || er == EINVAL || er == EROFS))
+    {
+      DBUG_PRINT("info", ("ignoring errno %d", er));
       res= 0;
+    }
     else if (my_flags & MY_WME)
       my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno);
   }
   DBUG_RETURN(res);
 } /* my_sync */
 
+
+static const char cur_dir_name[]= {FN_CURLIB, 0};
+/*
+  Force directory information to disk.
+
+  SYNOPSIS
+    my_sync_dir()
+    dir_name             the name of the directory
+    my_flags             flags (MY_WME etc)
+
+  RETURN
+    0 if ok, !=0 if error
+*/
+int my_sync_dir(const char *dir_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+  DBUG_ENTER("my_sync_dir");
+  DBUG_PRINT("my",("Dir: '%s'  my_flags: %d", dir_name, my_flags));
+  File dir_fd;
+  int res= 0;
+  const char *correct_dir_name;
+  /* Sometimes the path does not contain an explicit directory */
+  correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name;
+  /*
+    Syncing a dir may give EINVAL on tmpfs on Linux, which is ok.
+    EIO on the other hand is very important. Hence MY_IGNORE_BADFD.
+  */
+  if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0)
+  {
+    if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD)))
+      res= 2;
+    if (my_close(dir_fd, MYF(my_flags)))
+      res= 3;
+  }
+  else
+    res= 1;
+  DBUG_RETURN(res);
+#else
+  return 0;
+#endif
+}
+
+
+/*
+  Force directory information to disk.
+
+  SYNOPSIS
+    my_sync_dir_by_file()
+    file_name            the name of a file in the directory
+    my_flags             flags (MY_WME etc)
+
+  RETURN
+    0 if ok, !=0 if error
+*/
+int my_sync_dir_by_file(const char *file_name, myf my_flags)
+{
+#ifdef NEED_EXPLICIT_SYNC_DIR
+  char dir_name[FN_REFLEN];
+  dirname_part(dir_name, file_name);
+  return my_sync_dir(dir_name, my_flags);
+#else
+  return 0;
+#endif
+}
+
diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c
index 30c501c54ee..e78a8011b3e 100644
--- a/mysys/safemalloc.c
+++ b/mysys/safemalloc.c
@@ -428,6 +428,29 @@ void TERMINATE(FILE *file)
 }
 
 
+/*
+  Report where a piece of memory was allocated
+
+  This is usefull to call from withing a debugger
+*/
+
+
+void sf_malloc_report_allocated(void *memory)
+{
+  struct st_irem *irem;  
+  for (irem= sf_malloc_root ; irem ; irem=irem->next)
+  {
+    char *data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) +
+                 sf_malloc_prehunc);
+    if (data <= (char*) memory && (char*) memory <= data + irem->datasize)
+    {
+      printf("%u bytes at 0x%lx, allocated at line %u in '%s'\n",
+             irem->datasize, (long) data, irem->linenum, irem->filename);
+      break;
+    }
+  }
+}
+
 	/* Returns 0 if chunk is ok */
 
 static int _checkchunk(register struct st_irem *irem, const char *filename,
diff --git a/mysys/wqueue.c b/mysys/wqueue.c
new file mode 100644
index 00000000000..28e044ff606
--- /dev/null
+++ b/mysys/wqueue.c
@@ -0,0 +1,167 @@
+
+#include <wqueue.h>
+
+#define STRUCT_PTR(TYPE, MEMBER, a)                                           \
+          (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+/*
+  Link a thread into double-linked queue of waiting threads.
+
+  SYNOPSIS
+    wqueue_link_into_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    Queue is represented by a circular list of the thread structures
+    The list is double-linked of the type (**prev,*next), accessed by
+    a pointer to the last element.
+*/
+
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  struct st_my_thread_var *last;
+  if (!(last= wqueue->last_thread))
+  {
+    /* Queue is empty */
+    thread->next= thread;
+    thread->prev= &thread->next;
+  }
+  else
+  {
+    thread->prev= last->next->prev;
+    last->next->prev= &thread->next;
+    thread->next= last->next;
+    last->next= thread;
+  }
+  wqueue->last_thread= thread;
+}
+
+
+/*
+  Add a thread to single-linked queue of waiting threads
+
+  SYNOPSIS
+    wqueue_add_to_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    Queue is represented by a circular list of the thread structures
+    The list is single-linked of the type (*next), accessed by a pointer
+    to the last element.
+*/
+
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  struct st_my_thread_var *last;
+  if (!(last= wqueue->last_thread))
+    thread->next= thread;
+  else
+  {
+    thread->next= last->next;
+    last->next= thread;
+  }
+  wqueue->last_thread= thread;
+}
+
+/*
+  Unlink a thread from double-linked queue of waiting threads
+
+  SYNOPSIS
+    wqueue_unlink_from_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be removed from the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    See NOTES for link_into_queue
+*/
+
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+  if (thread->next == thread)
+    /* The queue contains only one member */
+    wqueue->last_thread= NULL;
+  else
+  {
+    thread->next->prev= thread->prev;
+    *thread->prev= thread->next;
+    if (wqueue->last_thread == thread)
+      wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
+                                      thread->prev);
+  }
+  thread->next= NULL;
+}
+
+
+/*
+  Remove all threads from queue signaling them to proceed
+
+  SYNOPSIS
+    wqueue_realease_queue()
+      wqueue              pointer to the queue structure
+      thread              pointer to the thread to be added to the queue
+
+  RETURN VALUE
+    none
+
+  NOTES.
+    See notes for add_to_queue
+    When removed from the queue each thread is signaled via condition
+    variable thread->suspend.
+*/
+
+void wqueue_release_queue(WQUEUE *wqueue)
+{
+  struct st_my_thread_var *last= wqueue->last_thread;
+  struct st_my_thread_var *next= last->next;
+  struct st_my_thread_var *thread;
+  do
+  {
+    thread= next;
+    pthread_cond_signal(&thread->suspend);
+    next= thread->next;
+    thread->next= NULL;
+  }
+  while (thread != last);
+  wqueue->last_thread= NULL;
+}
+
+
+/*
+  Add thread and wait
+
+  SYNOPSYS
+    wqueue_add_and_wait()
+    wqueue               queue to add to
+    thread               thread which is waiting
+    lock                 mutex need for the operation
+*/
+
+void wqueue_add_and_wait(WQUEUE *wqueue,
+                         struct st_my_thread_var *thread, pthread_mutex_t *lock)
+{
+  DBUG_ENTER("wqueue_add_and_wait");
+  DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx",
+                       (ulong) thread, (ulong) &thread->suspend, (ulong) lock));
+  wqueue_add_to_queue(wqueue, thread);
+  do
+  {
+    DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx",
+                        (ulong) &thread->suspend, (ulong) lock));
+    pthread_cond_wait(&thread->suspend, lock);
+    DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx",
+                        (ulong) &thread->suspend, (ulong) lock,
+                        (ulong) thread->next));
+  }
+  while (thread->next);
+  DBUG_VOID_RETURN;
+}
author	unknown <jani@hynda.mysql.fi>	2007-06-27 17:49:12 +0300
committer	unknown <jani@hynda.mysql.fi>	2007-06-27 17:49:12 +0300
commit	5444b55cbbab69a508e6c477a1b9580eff969da1 (patch)
tree	eb82a0418f88b83d49bf66dfc659d59862535683 /mysys
parent	e34641b3cece714b861c7130f245659342f583ba (diff)
parent	1a96259191b193b353387cbb70d7567009e3b247 (diff)
download	mariadb-git-5444b55cbbab69a508e6c477a1b9580eff969da1.tar.gz