10 files changed, 1519 insertions, 0 deletions
diff --git a/boehm-gc/include/gc_amiga_redirects.h b/boehm-gc/include/gc_amiga_redirects.h
new file mode 100644
index 00000000000..9e975c8c832
--- /dev/null
+++ b/boehm-gc/include/gc_amiga_redirects.h
@@ -0,0 +1,30 @@
+#ifndef GC_AMIGA_REDIRECTS_H
+
+# define GC_AMIGA_REDIRECTS_H
+
+# if ( defined(_AMIGA) && !defined(GC_AMIGA_MAKINGLIB) )
+    extern void *GC_amiga_realloc(void *old_object,size_t new_size_in_bytes);
+#   define GC_realloc(a,b) GC_amiga_realloc(a,b)
+    extern void GC_amiga_set_toany(void (*func)(void));
+    extern int GC_amiga_free_space_divisor_inc;
+    extern void *(*GC_amiga_allocwrapper_do) \
+	(size_t size,void *(*AllocFunction)(size_t size2));
+#   define GC_malloc(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc)
+#   define GC_malloc_atomic(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic)
+#   define GC_malloc_uncollectable(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_uncollectable)
+#   define GC_malloc_stubborn(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_stubborn)
+#   define GC_malloc_atomic_uncollectable(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic_uncollectable)
+#   define GC_malloc_ignore_off_page(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_ignore_off_page)
+#   define GC_malloc_atomic_ignore_off_page(a) \
+	(*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic_ignore_off_page)
+# endif /* _AMIGA && !GC_AMIGA_MAKINGLIB */
+
+#endif /* GC_AMIGA_REDIRECTS_H */
+
+
diff --git a/boehm-gc/include/gc_backptr.h b/boehm-gc/include/gc_backptr.h
new file mode 100644
index 00000000000..5899496e0fe
--- /dev/null
+++ b/boehm-gc/include/gc_backptr.h
@@ -0,0 +1,65 @@
+/*
+ * This is a simple API to implement pointer back tracing, i.e.
+ * to answer questions such as "who is pointing to this" or
+ * "why is this object being retained by the collector"
+ *
+ * This API assumes that we have an ANSI C compiler.
+ *
+ * Most of these calls yield useful information on only after
+ * a garbage collection.  Usually the client will first force
+ * a full collection and then gather information, preferably
+ * before much intervening allocation.
+ *
+ * The implementation of the interface is only about 99.9999%
+ * correct.  It is intended to be good enough for profiling,
+ * but is not intended to be used with production code.
+ *
+ * Results are likely to be much more useful if all allocation is
+ * accomplished through the debugging allocators.
+ *
+ * The implementation idea is due to A. Demers.
+ */
+
+#ifndef GC_BACKPTR_H
+#define GC_BACKPTR_H
+/* Store information about the object referencing dest in *base_p     */
+/* and *offset_p.                                                     */
+/* If multiple objects or roots point to dest, the one reported	      */
+/* will be the last on used by the garbage collector to trace the     */
+/* object.							      */
+/*   source is root ==> *base_p = address, *offset_p = 0	      */
+/*   source is heap object ==> *base_p != 0, *offset_p = offset       */
+/*   Returns 1 on success, 0 if source couldn't be determined.        */
+/* Dest can be any address within a heap object.                      */
+typedef enum {  GC_UNREFERENCED, /* No reference info available.	*/
+		GC_NO_SPACE,	/* Dest not allocated with debug alloc  */
+		GC_REFD_FROM_ROOT, /* Referenced directly by root *base_p */
+		GC_REFD_FROM_REG,  /* Referenced from a register, i.e.	*/
+				   /* a root without an address.	*/
+		GC_REFD_FROM_HEAP, /* Referenced from another heap obj. */
+		GC_FINALIZER_REFD /* Finalizable and hence accessible.  */
+} GC_ref_kind;
+
+GC_ref_kind GC_get_back_ptr_info(void *dest, void **base_p, size_t *offset_p);
+
+/* Generate a random heap address.            */
+/* The resulting address is in the heap, but  */
+/* not necessarily inside a valid object.     */
+void * GC_generate_random_heap_address(void);
+
+/* Generate a random address inside a valid marked heap object. */
+void * GC_generate_random_valid_address(void);
+
+/* Force a garbage collection and generate a backtrace from a */
+/* random heap address.                                       */
+/* This uses the GC logging mechanism (GC_printf) to produce  */
+/* output.  It can often be called from a debugger.  The      */
+/* source in dbg_mlc.c also serves as a sample client.	      */
+void GC_generate_random_backtrace(void);
+
+/* Print a backtrace from a specific address.  Used by the 	*/
+/* above.  The client should call GC_gcollect() immediately	*/
+/* before invocation.						*/
+void GC_print_backtrace(void *);
+
+#endif /* GC_BACKPTR_H */
diff --git a/boehm-gc/include/gc_local_alloc.h b/boehm-gc/include/gc_local_alloc.h
new file mode 100644
index 00000000000..1e58730cfe8
--- /dev/null
+++ b/boehm-gc/include/gc_local_alloc.h
@@ -0,0 +1,85 @@
+/* 
+ * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+
+/*
+ * Interface for thread local allocation.  Memory obtained
+ * this way can be used by all threads, as though it were obtained
+ * from an allocator like GC_malloc.  The difference is that GC_local_malloc
+ * counts the number of allocations of a given size from the current thread,
+ * and uses GC_malloc_many to perform the allocations once a threashold
+ * is exceeded.  Thus far less synchronization may be needed.
+ * Allocation of known large objects should not use this interface.
+ * This interface is designed primarily for fast allocation of small
+ * objects on multiprocessors, e.g. for a JVM running on an MP server.
+ *
+ * If this file is included with GC_GCJ_SUPPORT defined, GCJ-style
+ * bitmap allocation primitives will also be included.
+ *
+ * If this file is included with GC_REDIRECT_TO_LOCAL defined, then
+ * GC_MALLOC, GC_MALLOC_ATOMIC, and possibly GC_GCJ_MALLOC will
+ * be redefined to use the thread local allocatoor.
+ *
+ * The interface is available only if the collector is built with
+ * -DTHREAD_LOCAL_ALLOC, which is currently supported only on Linux.
+ *
+ * The debugging allocators use standard, not thread-local allocation.
+ */
+
+#ifndef GC_LOCAL_ALLOC_H
+#define GC_LOCAL_ALLOC_H
+
+#ifndef _GC_H
+#   include "gc.h"
+#endif
+
+#if defined(GC_GCJ_SUPPORT) && !defined(GC_GCJ_H)
+#   include "gc_gcj.h"
+#endif
+
+/* We assume ANSI C for this interface.	*/
+
+GC_PTR GC_local_malloc(size_t bytes);
+
+GC_PTR GC_local_malloc_atomic(size_t bytes);
+
+#if defined(GC_GCJ_SUPPORT)
+  GC_PTR GC_local_gcj_malloc(size_t bytes,
+			     void * ptr_to_struct_containing_descr);
+#endif
+
+# ifdef GC_DEBUG
+#   define GC_LOCAL_MALLOC(s) GC_debug_malloc(s,GC_EXTRAS)
+#   define GC_LOCAL_MALLOC_ATOMIC(s) GC_debug_malloc_atomic(s,GC_EXTRAS)
+#   ifdef GC_GCJ_SUPPORT
+#	define GC_LOCAL_GCJ_MALLOC(s,d) GC_debug_gcj_malloc(s,d,GC_EXTRAS)
+#   endif
+# else
+#   define GC_LOCAL_MALLOC(s) GC_local_malloc(s)
+#   define GC_LOCAL_MALLOC_ATOMIC(s) GC_local_malloc_atomic(s)
+#   ifdef GC_GCJ_SUPPORT
+#	define GC_LOCAL_GCJ_MALLOC(s,d) GC_local_gcj_malloc(s,d)
+#   endif
+# endif
+
+# ifdef GC_REDIRECT_TO_LOCAL
+#   undef GC_MALLOC
+#   define GC_MALLOC(s) GC_LOCAL_MALLOC(s)
+#   undef GC_MALLOC_ATOMIC
+#   define GC_MALLOC_ATOMIC(s) GC_LOCAL_MALLOC_ATOMIC(s)
+#   ifdef GC_GCJ_SUPPORT
+#	undef GC_GCJ_MALLOC
+# 	define GC_GCJ_MALLOC(s,d) GC_LOCAL_GCJ_MALLOC(s,d)
+#   endif
+# endif
+
+#endif /* GC_LOCAL_ALLOC_H */
diff --git a/boehm-gc/include/gc_mark.h b/boehm-gc/include/gc_mark.h
new file mode 100644
index 00000000000..0856d16fdae
--- /dev/null
+++ b/boehm-gc/include/gc_mark.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ */
+
+/*
+ * This contains interfaces to the GC marker that are likely to be useful to
+ * clients that provide detailed heap layout information to the collector.
+ * This interface should not be used by normal C or C++ clients.
+ * It will be useful to runtimes for other languages.
+ * 
+ * Note that this file is not "namespace-clean", i.e. it introduces names
+ * not prefixed with GC_, which may collide with the client's names.  It
+ * should be included only in those few places that directly provide
+ * information to the collector.
+ */
+#ifndef GC_MARK_H
+# define GC_MARK_H
+
+# ifndef GC_H
+#   include "gc.h"
+# endif
+
+/* A client supplied mark procedure.  Returns new mark stack pointer.	*/
+/* Primary effect should be to push new entries on the mark stack.	*/
+/* Mark stack pointer values are passed and returned explicitly.	*/
+/* Global variables decribing mark stack are not necessarily valid.	*/
+/* (This usually saves a few cycles by keeping things in registers.)	*/
+/* Assumed to scan about GC_PROC_BYTES on average.  If it needs to do	*/
+/* much more work than that, it should do it in smaller pieces by	*/
+/* pushing itself back on the mark stack.				*/
+/* Note that it should always do some work (defined as marking some	*/
+/* objects) before pushing more than one entry on the mark stack.	*/
+/* This is required to ensure termination in the event of mark stack	*/
+/* overflows.								*/
+/* This procedure is always called with at least one empty entry on the */
+/* mark stack.								*/
+/* Currently we require that mark procedures look for pointers in a	*/
+/* subset of the places the conservative marker would.  It must be safe	*/
+/* to invoke the normal mark procedure instead.				*/
+/* WARNING: Such a mark procedure may be invoked on an unused object    */
+/* residing on a free list.  Such objects are cleared, except for a	*/
+/* free list link field in the first word.  Thus mark procedures may	*/
+/* not count on the presence of a type descriptor, and must handle this	*/
+/* case correctly somehow.						*/
+# define GC_PROC_BYTES 100
+struct GC_ms_entry;
+typedef struct GC_ms_entry * (*GC_mark_proc) GC_PROTO((
+		GC_word * addr, struct GC_ms_entry * mark_stack_ptr,
+		struct GC_ms_entry * mark_stack_limit, GC_word env));
+
+# define GC_LOG_MAX_MARK_PROCS 6
+# define GC_MAX_MARK_PROCS (1 << GC_LOG_MAX_MARK_PROCS)
+
+/* In a few cases it's necessary to assign statically known indices to	*/
+/* certain mark procs.  Thus we reserve a few for well known clients.	*/
+/* (This is necessary if mark descriptors are compiler generated.)	*/
+#define GC_RESERVED_MARK_PROCS 8
+#   define GC_GCJ_RESERVED_MARK_PROC_INDEX 0
+
+/* Object descriptors on mark stack or in objects.  Low order two	*/
+/* bits are tags distinguishing among the following 4 possibilities	*/
+/* for the high order 30 bits.						*/
+#define GC_DS_TAG_BITS 2
+#define GC_DS_TAGS   ((1 << GC_DS_TAG_BITS) - 1)
+#define GC_DS_LENGTH 0	/* The entire word is a length in bytes that	*/
+			/* must be a multiple of 4.			*/
+#define GC_DS_BITMAP 1	/* 30 (62) bits are a bitmap describing pointer	*/
+			/* fields.  The msb is 1 iff the first word	*/
+			/* is a pointer.				*/
+			/* (This unconventional ordering sometimes	*/
+			/* makes the marker slightly faster.)		*/
+			/* Zeroes indicate definite nonpointers.  Ones	*/
+			/* indicate possible pointers.			*/
+			/* Only usable if pointers are word aligned.	*/
+#define GC_DS_PROC   2
+			/* The objects referenced by this object can be */
+			/* pushed on the mark stack by invoking		*/
+			/* PROC(descr).  ENV(descr) is passed as the	*/
+			/* last argument.				*/
+#   define GC_MAKE_PROC(proc_index, env) \
+	    (((((env) << GC_LOG_MAX_MARK_PROCS) \
+	       | (proc_index)) << GC_DS_TAG_BITS) | GC_DS_PROC)
+#define GC_DS_PER_OBJECT 3  /* The real descriptor is at the		*/
+			/* byte displacement from the beginning of the	*/
+			/* object given by descr & ~DS_TAGS		*/
+			/* If the descriptor is negative, the real	*/
+			/* descriptor is at (*<object_start>) -		*/
+			/* (descr & ~DS_TAGS) - GC_INDIR_PER_OBJ_BIAS	*/
+			/* The latter alternative can be used if each	*/
+			/* object contains a type descriptor in the	*/
+			/* first word.					*/
+			/* Note that in multithreaded environments	*/
+			/* per object descriptors maust be located in	*/
+			/* either the first two or last two words of	*/
+			/* the object, since only those are guaranteed	*/
+			/* to be cleared while the allocation lock is	*/
+			/* held.					*/
+#define GC_INDIR_PER_OBJ_BIAS 0x10
+			
+extern GC_PTR GC_least_plausible_heap_addr;
+extern GC_PTR GC_greatest_plausible_heap_addr;
+			/* Bounds on the heap.  Guaranteed valid	*/
+			/* Likely to include future heap expansion.	*/
+
+/* Handle nested references in a custom mark procedure.			*/
+/* Check if obj is a valid object. If so, ensure that it is marked.	*/
+/* If it was not previously marked, push its contents onto the mark 	*/
+/* stack for future scanning.  The object will then be scanned using	*/
+/* its mark descriptor.  						*/
+/* Returns the new mark stack pointer.					*/
+/* Handles mark stack overflows correctly.				*/
+/* Since this marks first, it makes progress even if there are mark	*/
+/* stack overflows.							*/
+/* Src is the address of the pointer to obj, which is used only		*/
+/* for back pointer-based heap debugging.				*/
+/* It is strongly recommended that most objects be handled without mark	*/
+/* procedures, e.g. with bitmap descriptors, and that mark procedures	*/
+/* be reserved for exceptional cases.  That will ensure that 		*/
+/* performance of this call is not extremely performance critical.	*/
+/* (Otherwise we would need to inline GC_mark_and_push completely,	*/
+/* which would tie the client code to a fixed colllector version.)	*/
+struct GC_ms_entry *GC_mark_and_push
+		GC_PROTO((GC_PTR obj,
+			  struct GC_ms_entry * mark_stack_ptr,
+		          struct GC_ms_entry * mark_stack_limit, GC_PTR *src));
+
+#define GC_MARK_AND_PUSH(obj, msp, lim, src) \
+	(((GC_word)obj >= (GC_word)GC_least_plausible_heap_addr && \
+	  (GC_word)obj <= (GC_word)GC_greatest_plausible_heap_addr)? \
+	  GC_mark_and_push(obj, msp, lim, src) : \
+	  msp)
+
+#endif  /* GC_MARK_H */
+
diff --git a/boehm-gc/include/gc_pthread_redirects.h b/boehm-gc/include/gc_pthread_redirects.h
new file mode 100644
index 00000000000..ac254a86726
--- /dev/null
+++ b/boehm-gc/include/gc_pthread_redirects.h
@@ -0,0 +1,69 @@
+/* Our pthread support normally needs to intercept a number of thread	*/
+/* calls.  We arrange to do that here, if appropriate.			*/
+
+#ifndef GC_PTHREAD_REDIRECTS_H
+
+#define GC_PTHREAD_REDIRECTS_H
+
+#if defined(GC_SOLARIS_THREADS)
+/* We need to intercept calls to many of the threads primitives, so 	*/
+/* that we can locate thread stacks and stop the world.			*/
+/* Note also that the collector cannot see thread specific data.	*/
+/* Thread specific data should generally consist of pointers to		*/
+/* uncollectable objects (allocated with GC_malloc_uncollectable,	*/
+/* not the system malloc), which are deallocated using the destructor	*/
+/* facility in thr_keycreate.  Alternatively, keep a redundant pointer	*/
+/* to thread specific data on the thread stack.			        */
+# include <thread.h>
+  int GC_thr_create(void *stack_base, size_t stack_size,
+                    void *(*start_routine)(void *), void *arg, long flags,
+                    thread_t *new_thread);
+  int GC_thr_join(thread_t wait_for, thread_t *departed, void **status);
+  int GC_thr_suspend(thread_t target_thread);
+  int GC_thr_continue(thread_t target_thread);
+  void * GC_dlopen(const char *path, int mode);
+# define thr_create GC_thr_create
+# define thr_join GC_thr_join
+# define thr_suspend GC_thr_suspend
+# define thr_continue GC_thr_continue
+#endif /* GC_SOLARIS_THREADS */
+
+#if defined(GC_SOLARIS_PTHREADS)
+# include <pthread.h>
+# include <signal.h>
+  extern int GC_pthread_create(pthread_t *new_thread,
+    			         const pthread_attr_t *attr,
+          			 void * (*thread_execp)(void *), void *arg);
+  extern int GC_pthread_join(pthread_t wait_for, void **status);
+# define pthread_join GC_pthread_join
+# define pthread_create GC_pthread_create
+#endif
+
+#if defined(GC_SOLARIS_PTHREADS) || defined(GC_SOLARIS_THREADS)
+# define dlopen GC_dlopen
+#endif /* SOLARIS_THREADS || SOLARIS_PTHREADS */
+
+
+#if !defined(GC_USE_LD_WRAP) && \
+    (defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) \
+     || defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS))
+/* We treat these similarly. */
+# include <pthread.h>
+# include <signal.h>
+
+  int GC_pthread_create(pthread_t *new_thread,
+                        const pthread_attr_t *attr,
+		        void *(*start_routine)(void *), void *arg);
+  int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset);
+  int GC_pthread_join(pthread_t thread, void **retval);
+  int GC_pthread_detach(pthread_t thread);
+
+# define pthread_create GC_pthread_create
+# define pthread_sigmask GC_pthread_sigmask
+# define pthread_join GC_pthread_join
+# define pthread_detach GC_pthread_detach
+# define dlopen GC_dlopen
+
+#endif /* GC_xxxxx_THREADS */
+
+#endif /* GC_PTHREAD_REDIRECTS_H */
diff --git a/boehm-gc/include/private/dbg_mlc.h b/boehm-gc/include/private/dbg_mlc.h
new file mode 100644
index 00000000000..1ee814db19b
--- /dev/null
+++ b/boehm-gc/include/private/dbg_mlc.h
@@ -0,0 +1,132 @@
+/* 
+ * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
+ * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1997 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1999 by Hewlett-Packard Company.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+
+/*
+ * This is mostly an internal header file.  Typical clients should
+ * not use it.  Clients that define their own object kinds with
+ * debugging allocators will probably want to include this, however.
+ * No attempt is made to keep the namespace clean.  This should not be
+ * included from header filrd that are frequently included by clients.
+ */
+
+#ifndef _DBG_MLC_H
+
+#define _DBG_MLC_H
+
+# define I_HIDE_POINTERS
+# include "gc_priv.h"
+# ifdef KEEP_BACK_PTRS
+#   include "gc_backptr.h"
+# endif
+
+# define START_FLAG ((word)0xfedcedcb)
+# define END_FLAG ((word)0xbcdecdef)
+	/* Stored both one past the end of user object, and one before	*/
+	/* the end of the object as seen by the allocator.		*/
+
+
+/* Object header */
+typedef struct {
+#   ifdef KEEP_BACK_PTRS
+	GC_hidden_pointer oh_back_ptr;
+	    /* We make sure that we only store even valued	*/
+	    /* pointers here, so that the hidden version has	*/
+	    /* the least significant bit set.  We never		*/
+	    /* overwrite a value with the least significant	*/
+	    /* bit clear, thus ensuring that we never overwrite	*/
+	    /* a free list link field.				*/
+	    /* The following are special back pointer values.	*/
+	    /* Note that the "hidden" (i.e. bitwise 		*/
+	    /* complemented version) of these is actually 	*/
+	    /* stored.						*/
+#	define NOT_MARKED (ptr_t)(0)
+#	define MARKED_FOR_FINALIZATION (ptr_t)(2)
+	    /* Object was marked because it is finalizable.	*/
+#	define MARKED_FROM_REGISTER (ptr_t)(4)
+	    /* Object was marked from a rgister.  Hence the	*/
+	    /* source of the reference doesn't have an address.	*/
+#       if ALIGNMENT == 1
+	  /* Fudge back pointer to be even.  */
+#	  define HIDE_BACK_PTR(p) HIDE_POINTER(~1 & (GC_word)(p))
+#	else
+#	  define HIDE_BACK_PTR(p) HIDE_POINTER(p)
+#	endif
+#	ifdef ALIGN_DOUBLE
+	  word oh_dummy;
+#	endif
+#   endif
+    char * oh_string;		/* object descriptor string	*/
+    word oh_int;		/* object descriptor integers	*/
+#   ifdef NEED_CALLINFO
+      struct callinfo oh_ci[NFRAMES];
+#   endif
+#   ifndef SHORT_DBG_HDRS
+      word oh_sz;			/* Original malloc arg.		*/
+      word oh_sf;			/* start flag */
+#   endif /* SHORT_DBG_HDRS */
+} oh;
+/* The size of the above structure is assumed not to dealign things,	*/
+/* and to be a multiple of the word length.				*/
+
+#define DEBUG_BYTES (sizeof (oh) + sizeof (word))
+#define USR_PTR_FROM_BASE(p) ((ptr_t)(p) + sizeof(oh))
+
+/* There is no reason to ever add a byte at the end explicitly, since we */
+/* already add a guard word.						 */
+#undef ROUNDED_UP_WORDS
+#define ROUNDED_UP_WORDS(n) BYTES_TO_WORDS((n) + WORDS_TO_BYTES(1) - 1)
+
+#ifdef SAVE_CALL_CHAIN
+#   define ADD_CALL_CHAIN(base, ra) GC_save_callers(((oh *)(base)) -> oh_ci)
+#   define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base)) -> oh_ci)
+#else
+# ifdef GC_ADD_CALLER
+#   define ADD_CALL_CHAIN(base, ra) ((oh *)(base)) -> oh_ci[0].ci_pc = (ra)
+#   define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base)) -> oh_ci)
+# else
+#   define ADD_CALL_CHAIN(base, ra)
+#   define PRINT_CALL_CHAIN(base)
+# endif
+#endif
+
+# ifdef GC_ADD_CALLER
+#   define OPT_RA ra,
+# else
+#   define OPT_RA
+# endif
+
+
+/* Check whether object with base pointer p has debugging info	*/ 
+/* p is assumed to point to a legitimate object in our part	*/
+/* of the heap.							*/
+#ifdef SHORT_DBG_HDRS
+# define GC_has_other_debug_info(p) TRUE
+#else
+  GC_bool GC_has_other_debug_info(/* p */);
+#endif
+
+#ifdef KEEP_BACK_PTRS
+# define GC_HAS_DEBUG_INFO(p) \
+	((((oh *)p)->oh_back_ptr & 1) && GC_has_other_debug_info(p))
+#else
+# define GC_HAS_DEBUG_INFO(p) GC_has_other_debug_info(p)
+#endif
+
+/* Store debugging info into p.  Return displaced pointer. */
+/* Assumes we don't hold allocation lock.		   */
+ptr_t GC_store_debug_info(/* p, sz, string, integer */);
+
+#endif /* _DBG_MLC_H */
diff --git a/boehm-gc/include/private/gc_locks.h b/boehm-gc/include/private/gc_locks.h
new file mode 100644
index 00000000000..b9ff0cf1933
--- /dev/null
+++ b/boehm-gc/include/private/gc_locks.h
@@ -0,0 +1,480 @@
+/* 
+ * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1999 by Hewlett-Packard Company. All rights reserved.
+ *
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+
+#ifndef GC_LOCKS_H
+#define GC_LOCKS_H
+
+/*
+ * Mutual exclusion between allocator/collector routines.
+ * Needed if there is more than one allocator thread.
+ * FASTLOCK() is assumed to try to acquire the lock in a cheap and
+ * dirty way that is acceptable for a few instructions, e.g. by
+ * inhibiting preemption.  This is assumed to have succeeded only
+ * if a subsequent call to FASTLOCK_SUCCEEDED() returns TRUE.
+ * FASTUNLOCK() is called whether or not FASTLOCK_SUCCEEDED().
+ * If signals cannot be tolerated with the FASTLOCK held, then
+ * FASTLOCK should disable signals.  The code executed under
+ * FASTLOCK is otherwise immune to interruption, provided it is
+ * not restarted.
+ * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK
+ * and/or DISABLE_SIGNALS and ENABLE_SIGNALS and/or FASTLOCK.
+ * (There is currently no equivalent for FASTLOCK.)
+ *
+ * In the PARALLEL_MARK case, we also need to define a number of
+ * other inline finctions here:
+ *   GC_bool GC_compare_and_exchange( volatile GC_word *addr,
+ *   				      GC_word old, GC_word new )
+ *   GC_word GC_atomic_add( volatile GC_word *addr, GC_word how_much )
+ *   void GC_memory_barrier( )
+ *   
+ */  
+# ifdef THREADS
+#  ifdef PCR_OBSOLETE	/* Faster, but broken with multiple lwp's	*/
+#    include  "th/PCR_Th.h"
+#    include  "th/PCR_ThCrSec.h"
+     extern struct PCR_Th_MLRep GC_allocate_ml;
+#    define DCL_LOCK_STATE  PCR_sigset_t GC_old_sig_mask
+#    define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml) 
+#    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
+#    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
+#    define FASTLOCK() PCR_ThCrSec_EnterSys()
+     /* Here we cheat (a lot): */
+#        define FASTLOCK_SUCCEEDED() (*(int *)(&GC_allocate_ml) == 0)
+		/* TRUE if nobody currently holds the lock */
+#    define FASTUNLOCK() PCR_ThCrSec_ExitSys()
+#  endif
+#  ifdef PCR
+#    include <base/PCR_Base.h>
+#    include <th/PCR_Th.h>
+     extern PCR_Th_ML GC_allocate_ml;
+#    define DCL_LOCK_STATE \
+	 PCR_ERes GC_fastLockRes; PCR_sigset_t GC_old_sig_mask
+#    define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
+#    define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
+#    define FASTLOCK() (GC_fastLockRes = PCR_Th_ML_Try(&GC_allocate_ml))
+#    define FASTLOCK_SUCCEEDED() (GC_fastLockRes == PCR_ERes_okay)
+#    define FASTUNLOCK()  {\
+        if( FASTLOCK_SUCCEEDED() ) PCR_Th_ML_Release(&GC_allocate_ml); }
+#  endif
+#  ifdef SRC_M3
+     extern GC_word RT0u__inCritical;
+#    define LOCK() RT0u__inCritical++
+#    define UNLOCK() RT0u__inCritical--
+#  endif
+#  ifdef SOLARIS_THREADS
+#    include <thread.h>
+#    include <signal.h>
+     extern mutex_t GC_allocate_ml;
+#    define LOCK() mutex_lock(&GC_allocate_ml);
+#    define UNLOCK() mutex_unlock(&GC_allocate_ml);
+#  endif
+
+/* Try to define GC_TEST_AND_SET and a matching GC_CLEAR for spin lock	*/
+/* acquisition and release.  We need this for correct operation of the	*/
+/* incremental GC.							*/
+#  ifdef __GNUC__
+#    if defined(I386)
+       inline static int GC_test_and_set(volatile unsigned int *addr) {
+	  int oldval;
+	  /* Note: the "xchg" instruction does not need a "lock" prefix */
+	  __asm__ __volatile__("xchgl %0, %1"
+		: "=r"(oldval), "=m"(*(addr))
+		: "0"(1), "m"(*(addr)) : "memory");
+	  return oldval;
+       }
+#      define GC_TEST_AND_SET_DEFINED
+#    endif
+#    if defined(IA64)
+       inline static int GC_test_and_set(volatile unsigned int *addr) {
+	  long oldval, n = 1;
+	  __asm__ __volatile__("xchg4 %0=%1,%2"
+		: "=r"(oldval), "=m"(*addr)
+		: "r"(n), "1"(*addr) : "memory");
+	  return oldval;
+       }
+#      define GC_TEST_AND_SET_DEFINED
+       /* Should this handle post-increment addressing?? */
+       inline static void GC_clear(volatile unsigned int *addr) {
+	 __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr) : : "memory");
+       }
+#      define GC_CLEAR_DEFINED
+#    endif
+#    ifdef SPARC
+       inline static int GC_test_and_set(volatile unsigned int *addr) {
+	 int oldval;
+
+	 __asm__ __volatile__("ldstub %1,%0"
+	 : "=r"(oldval), "=m"(*addr)
+	 : "m"(*addr) : "memory");
+	 return oldval;
+       }
+#      define GC_TEST_AND_SET_DEFINED
+#    endif
+#    ifdef M68K
+       /* Contributed by Tony Mantler.  I'm not sure how well it was	*/
+       /* tested.							*/
+       inline static int GC_test_and_set(volatile unsigned int *addr) {
+          char oldval; /* this must be no longer than 8 bits */
+
+          /* The return value is semi-phony. */
+          /* 'tas' sets bit 7 while the return */
+          /* value pretends bit 0 was set */
+          __asm__ __volatile__(
+                 "tas %1@; sne %0; negb %0"
+                 : "=d" (oldval)
+                 : "a" (addr) : "memory");
+          return oldval;
+       }
+#      define GC_TEST_AND_SET_DEFINED
+#    endif
+#    if defined(POWERPC)
+        inline static int GC_test_and_set(volatile unsigned int *addr) {
+          int oldval;
+          int temp = 1; // locked value
+
+          __asm__ __volatile__(
+               "1:\tlwarx %0,0,%3\n"   // load and reserve
+               "\tcmpwi %0, 0\n"       // if load is
+               "\tbne 2f\n"            //   non-zero, return already set
+               "\tstwcx. %2,0,%1\n"    // else store conditional
+               "\tbne- 1b\n"           // retry if lost reservation
+               "2:\t\n"                // oldval is zero if we set
+              : "=&r"(oldval), "=p"(addr)
+              : "r"(temp), "1"(addr)
+              : "memory");
+          return (int)oldval;
+        }
+#       define GC_TEST_AND_SET_DEFINED
+        inline static void GC_clear(volatile unsigned int *addr) {
+	  __asm__ __volatile__("eieio" ::: "memory");
+          *(addr) = 0;
+        }
+#       define GC_CLEAR_DEFINED
+#    endif
+#    if defined(ALPHA) 
+        inline static int GC_test_and_set(volatile unsigned int * addr)
+        {
+          unsigned long oldvalue;
+          unsigned long temp;
+
+          __asm__ __volatile__(
+                             "1:     ldl_l %0,%1\n"
+                             "       and %0,%3,%2\n"
+                             "       bne %2,2f\n"
+                             "       xor %0,%3,%0\n"
+                             "       stl_c %0,%1\n"
+                             "       beq %0,3f\n"
+                             "       mb\n"
+                             "2:\n"
+                             ".section .text2,\"ax\"\n"
+                             "3:     br 1b\n"
+                             ".previous"
+                             :"=&r" (temp), "=m" (*addr), "=&r" (oldvalue)
+                             :"Ir" (1), "m" (*addr)
+			     :"memory");
+
+          return oldvalue;
+        }
+#       define GC_TEST_AND_SET_DEFINED
+        /* Should probably also define GC_clear, since it needs	*/
+        /* a memory barrier ??					*/
+#    endif /* ALPHA */
+#    ifdef ARM32
+        inline static int GC_test_and_set(volatile unsigned int *addr) {
+          int oldval;
+          /* SWP on ARM is very similar to XCHG on x86.  Doesn't lock the
+           * bus because there are no SMP ARM machines.  If/when there are,
+           * this code will likely need to be updated. */
+          /* See linuxthreads/sysdeps/arm/pt-machine.h in glibc-2.1 */
+          __asm__ __volatile__("swp %0, %1, [%2]"
+      		  	     : "=r"(oldval)
+      			     : "r"(1), "r"(addr)
+			     : "memory");
+          return oldval;
+        }
+#       define GC_TEST_AND_SET_DEFINED
+#    endif /* ARM32 */
+#  endif /* __GNUC__ */
+#  if (defined(ALPHA) && !defined(__GNUC__))
+#    define GC_test_and_set(addr) __cxx_test_and_set_atomic(addr, 1)
+#    define GC_TEST_AND_SET_DEFINED
+#  endif
+#  if defined(MSWIN32)
+#    define GC_test_and_set(addr) InterlockedExchange((LPLONG)addr,1)
+#    define GC_TEST_AND_SET_DEFINED
+#  endif
+#  ifdef MIPS
+#    if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \
+	|| !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700
+#        define GC_test_and_set(addr, v) test_and_set(addr,v)
+#    else
+#	 define GC_test_and_set(addr, v) __test_and_set(addr,v)
+#	 define GC_clear(addr) __lock_release(addr);
+#	 define GC_CLEAR_DEFINED
+#    endif
+#    define GC_TEST_AND_SET_DEFINED
+#  endif /* MIPS */
+#  if 0 /* defined(HP_PA) */
+     /* The official recommendation seems to be to not use ldcw from	*/
+     /* user mode.  Since multithreaded incremental collection doesn't	*/
+     /* work anyway on HP_PA, this shouldn't be a major loss.		*/
+
+     /* "set" means 0 and "clear" means 1 here.		*/
+#    define GC_test_and_set(addr) !GC_test_and_clear(addr);
+#    define GC_TEST_AND_SET_DEFINED
+#    define GC_clear(addr) GC_noop1(addr); *(volatile unsigned int *)addr = 1;
+	/* The above needs a memory barrier! */
+#    define GC_CLEAR_DEFINED
+#  endif
+#  if defined(GC_TEST_AND_SET_DEFINED) && !defined(GC_CLEAR_DEFINED)
+#    ifdef __GNUC__
+       inline static void GC_clear(volatile unsigned int *addr) {
+         /* Try to discourage gcc from moving anything past this. */
+         __asm__ __volatile__(" " : : : "memory");
+         *(addr) = 0;
+       }
+#    else
+	    /* The function call in the following should prevent the	*/
+	    /* compiler from moving assignments to below the UNLOCK.	*/
+#      define GC_clear(addr) GC_noop1((word)(addr)); \
+			     *((volatile unsigned int *)(addr)) = 0;
+#    endif
+#    define GC_CLEAR_DEFINED
+#  endif /* !GC_CLEAR_DEFINED */
+
+#  if !defined(GC_TEST_AND_SET_DEFINED)
+#    define USE_PTHREAD_LOCKS
+#  endif
+
+#  if defined(LINUX_THREADS) || defined(OSF1_THREADS) \
+      || defined(HPUX_THREADS)
+#    define NO_THREAD (pthread_t)(-1)
+#    include <pthread.h>
+#    if defined(PARALLEL_MARK) 
+      /* We need compare-and-swap to update mark bits, where it's	*/
+      /* performance critical.  If USE_MARK_BYTES is defined, it is	*/
+      /* no longer needed for this purpose.  However we use it in	*/
+      /* either case to implement atomic fetch-and-add, though that's	*/
+      /* less performance critical, and could perhaps be done with	*/
+      /* a lock.							*/
+#     if defined(GENERIC_COMPARE_AND_SWAP)
+	/* Probably not useful, except for debugging.	*/
+	/* We do use GENERIC_COMPARE_AND_SWAP on PA_RISC, but we 	*/
+	/* minimize its use.						*/
+	extern pthread_mutex_t GC_compare_and_swap_lock;
+
+	/* Note that if GC_word updates are not atomic, a concurrent 	*/
+	/* reader should acquire GC_compare_and_swap_lock.  On 		*/
+	/* currently supported platforms, such updates are atomic.	*/
+	extern GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+					       GC_word old, GC_word new_val);
+#     endif /* GENERIC_COMPARE_AND_SWAP */
+#     if defined(I386)
+#      if !defined(GENERIC_COMPARE_AND_SWAP)
+         /* Returns TRUE if the comparison succeeded. */
+         inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+		  				       GC_word old,
+						       GC_word new_val) 
+         {
+	   char result;
+	   __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1"
+	    	: "=m"(*(addr)), "=r"(result)
+		: "r" (new_val), "0"(*(addr)), "a"(old) : "memory");
+	   return (GC_bool) result;
+         }
+#      endif /* !GENERIC_COMPARE_AND_SWAP */
+       inline static void GC_memory_write_barrier()
+       {
+	 /* We believe the processor ensures at least processor	*/
+	 /* consistent ordering.  Thus a compiler barrier	*/
+	 /* should suffice.					*/
+         __asm__ __volatile__("" : : : "memory");
+       }
+#     endif /* I386 */
+#     if defined(IA64)
+#      if !defined(GENERIC_COMPARE_AND_SWAP)
+         inline static GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+						       GC_word old, GC_word new_val) 
+	 {
+	  unsigned long oldval;
+	  __asm__ __volatile__("mov ar.ccv=%4 ;; cmpxchg8.rel %0=%1,%2,ar.ccv"
+		: "=r"(oldval), "=m"(*addr)
+		: "r"(new_val), "1"(*addr), "r"(old) : "memory");
+	  return (oldval == old);
+         }
+#      endif /* !GENERIC_COMPARE_AND_SWAP */
+#      if 0
+	/* Shouldn't be needed; we use volatile stores instead. */
+        inline static void GC_memory_write_barrier()
+        {
+          __asm__ __volatile__("mf" : : : "memory");
+        }
+#      endif /* 0 */
+#     endif /* IA64 */
+#     if !defined(GENERIC_COMPARE_AND_SWAP)
+        /* Returns the original value of *addr.	*/
+        inline static GC_word GC_atomic_add(volatile GC_word *addr,
+					    GC_word how_much)
+        {
+	  GC_word old;
+	  do {
+	    old = *addr;
+	  } while (!GC_compare_and_exchange(addr, old, old+how_much));
+          return old;
+        }
+#     else /* GENERIC_COMPARE_AND_SWAP */
+	/* So long as a GC_word can be atomically updated, it should	*/
+	/* be OK to read *addr without a lock.				*/
+	extern GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much);
+#     endif /* GENERIC_COMPARE_AND_SWAP */
+
+#    endif /* PARALLEL_MARK */
+
+#    if !defined(THREAD_LOCAL_ALLOC) && !defined(USE_PTHREAD_LOCKS)
+      /* In the THREAD_LOCAL_ALLOC case, the allocation lock tends to	*/
+      /* be held for long periods, if it is held at all.  Thus spinning	*/
+      /* and sleeping for fixed periods are likely to result in 	*/
+      /* significant wasted time.  We thus rely mostly on queued locks. */
+#     define USE_SPIN_LOCK
+      extern volatile unsigned int GC_allocate_lock;
+      extern void GC_lock(void);
+	/* Allocation lock holder.  Only set if acquired by client through */
+	/* GC_call_with_alloc_lock.					   */
+#     ifdef GC_ASSERTIONS
+#        define LOCK() \
+		{ if (GC_test_and_set(&GC_allocate_lock)) GC_lock(); \
+		  SET_LOCK_HOLDER(); }
+#        define UNLOCK() \
+		{ GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \
+	          GC_clear(&GC_allocate_lock); }
+#     else
+#        define LOCK() \
+		{ if (GC_test_and_set(&GC_allocate_lock)) GC_lock(); }
+#        define UNLOCK() \
+		GC_clear(&GC_allocate_lock)
+#     endif /* !GC_ASSERTIONS */
+#     if 0
+	/* Another alternative for OSF1 might be:		*/
+#       include <sys/mman.h>
+        extern msemaphore GC_allocate_semaphore;
+#       define LOCK() { if (msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) \
+ 			    != 0) GC_lock(); else GC_allocate_lock = 1; }
+        /* The following is INCORRECT, since the memory model is too weak. */
+	/* Is this true?  Presumably msem_unlock has the right semantics?  */
+	/*		- HB						   */
+#       define UNLOCK() { GC_allocate_lock = 0; \
+                          msem_unlock(&GC_allocate_semaphore, 0); }
+#     endif /* 0 */
+#    else /* THREAD_LOCAL_ALLOC  || USE_PTHREAD_LOCKS */
+#      ifndef USE_PTHREAD_LOCKS
+#        define USE_PTHREAD_LOCKS
+#      endif
+#    endif /* THREAD_LOCAL_ALLOC */
+#   ifdef USE_PTHREAD_LOCKS
+#      include <pthread.h>
+       extern pthread_mutex_t GC_allocate_ml;
+#      ifdef GC_ASSERTIONS
+#        define LOCK() \
+		{ GC_lock(); \
+		  SET_LOCK_HOLDER(); }
+#        define UNLOCK() \
+		{ GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \
+	          pthread_mutex_unlock(&GC_allocate_ml); }
+#      else /* !GC_ASSERTIONS */
+#        define LOCK() \
+	   { if (0 != pthread_mutex_trylock(&GC_allocate_ml)) GC_lock(); }
+#        define UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
+#      endif /* !GC_ASSERTIONS */
+#   endif /* USE_PTHREAD_LOCKS */
+#   define SET_LOCK_HOLDER() GC_lock_holder = pthread_self()
+#   define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
+#   define I_HOLD_LOCK() (pthread_equal(GC_lock_holder, pthread_self()))
+    extern VOLATILE GC_bool GC_collecting;
+#   define ENTER_GC() GC_collecting = 1;
+#   define EXIT_GC() GC_collecting = 0;
+    extern void GC_lock(void);
+    extern pthread_t GC_lock_holder;
+#   ifdef GC_ASSERTIONS
+      extern pthread_t GC_mark_lock_holder;
+#   endif
+#  endif /* LINUX_THREADS || OSF1_THREADS  || HPUX_THREADS */
+#  if defined(IRIX_THREADS)
+#    include <pthread.h>
+     /* This probably should never be included, but I can't test	*/
+     /* on Irix anymore.						*/
+#    include <mutex.h>
+
+     extern unsigned long GC_allocate_lock;
+	/* This is not a mutex because mutexes that obey the (optional) 	*/
+	/* POSIX scheduling rules are subject to convoys in high contention	*/
+	/* applications.  This is basically a spin lock.			*/
+     extern pthread_t GC_lock_holder;
+     extern void GC_lock(void);
+	/* Allocation lock holder.  Only set if acquired by client through */
+	/* GC_call_with_alloc_lock.					   */
+#    define SET_LOCK_HOLDER() GC_lock_holder = pthread_self()
+#    define NO_THREAD (pthread_t)(-1)
+#    define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
+#    define I_HOLD_LOCK() (pthread_equal(GC_lock_holder, pthread_self()))
+#    define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); }
+#    define UNLOCK() GC_clear(&GC_allocate_lock);
+     extern VOLATILE GC_bool GC_collecting;
+#    define ENTER_GC() \
+		{ \
+		    GC_collecting = 1; \
+		}
+#    define EXIT_GC() GC_collecting = 0;
+#  endif /* IRIX_THREADS */
+#  ifdef WIN32_THREADS
+#    include <windows.h>
+     GC_API CRITICAL_SECTION GC_allocate_ml;
+#    define LOCK() EnterCriticalSection(&GC_allocate_ml);
+#    define UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
+#  endif
+#  ifndef SET_LOCK_HOLDER
+#      define SET_LOCK_HOLDER()
+#      define UNSET_LOCK_HOLDER()
+#      define I_HOLD_LOCK() FALSE
+		/* Used on platforms were locks can be reacquired,	*/
+		/* so it doesn't matter if we lie.			*/
+#  endif
+# else /* !THREADS */
+#    define LOCK()
+#    define UNLOCK()
+# endif /* !THREADS */
+# ifndef SET_LOCK_HOLDER
+#   define SET_LOCK_HOLDER()
+#   define UNSET_LOCK_HOLDER()
+#   define I_HOLD_LOCK() FALSE
+		/* Used on platforms were locks can be reacquired,	*/
+		/* so it doesn't matter if we lie.			*/
+# endif
+# ifndef ENTER_GC
+#   define ENTER_GC()
+#   define EXIT_GC()
+# endif
+
+# ifndef DCL_LOCK_STATE
+#   define DCL_LOCK_STATE
+# endif
+# ifndef FASTLOCK
+#   define FASTLOCK() LOCK()
+#   define FASTLOCK_SUCCEEDED() TRUE
+#   define FASTUNLOCK() UNLOCK()
+# endif
+
+#endif /* GC_LOCKS_H */
diff --git a/boehm-gc/include/private/gc_pmark.h b/boehm-gc/include/private/gc_pmark.h
new file mode 100644
index 00000000000..1592b6ff74b
--- /dev/null
+++ b/boehm-gc/include/private/gc_pmark.h
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ */
+
+/* Private declarations of GC marker data structures and macros */
+
+/*
+ * Declarations of mark stack.  Needed by marker and client supplied mark
+ * routines.  Transitively include gc_priv.h.
+ * (Note that gc_priv.h should not be included before this, since this
+ * includes dbg_mlc.h, which wants to include gc_priv.h AFTER defining
+ * I_HIDE_POINTERS.)
+ */
+#ifndef GC_PMARK_H
+# define GC_PMARK_H
+
+# ifdef KEEP_BACK_PTRS
+#   include "dbg_mlc.h"
+# endif
+# ifndef GC_MARK_H
+#   include "../gc_mark.h"
+# endif
+# ifndef GC_PRIVATE_H
+#   include "gc_priv.h"
+# endif
+
+/* The real declarations of the following is in gc_priv.h, so that	*/
+/* we can avoid scanning the following table.				*/
+/*
+extern mark_proc GC_mark_procs[MAX_MARK_PROCS];
+*/
+
+/*
+ * Mark descriptor stuff that should remain private for now, mostly
+ * because it's hard to export WORDSZ without including gcconfig.h.
+ */
+# define BITMAP_BITS (WORDSZ - GC_DS_TAG_BITS)
+# define PROC(descr) \
+	(GC_mark_procs[((descr) >> GC_DS_TAG_BITS) & (GC_MAX_MARK_PROCS-1)])
+# define ENV(descr) \
+	((descr) >> (GC_DS_TAG_BITS + GC_LOG_MAX_MARK_PROCS))
+# define MAX_ENV \
+  	(((word)1 << (WORDSZ - GC_DS_TAG_BITS - GC_LOG_MAX_MARK_PROCS)) - 1)
+
+
+extern word GC_n_mark_procs;
+
+/* Number of mark stack entries to discard on overflow.	*/
+#define GC_MARK_STACK_DISCARDS (INITIAL_MARK_STACK_SIZE/8)
+
+typedef struct GC_ms_entry {
+    GC_word * mse_start;   /* First word of object */
+    GC_word mse_descr;	/* Descriptor; low order two bits are tags,	*/
+    			/* identifying the upper 30 bits as one of the	*/
+    			/* following:					*/
+} mse;
+
+extern word GC_mark_stack_size;
+
+extern mse * GC_mark_stack_limit;
+
+#ifdef PARALLEL_MARK
+  extern mse * VOLATILE GC_mark_stack_top;
+#else
+  extern mse * GC_mark_stack_top;
+#endif
+
+extern mse * GC_mark_stack;
+
+#ifdef PARALLEL_MARK
+    /*
+     * Allow multiple threads to participate in the marking process.
+     * This works roughly as follows:
+     *  The main mark stack never shrinks, but it can grow.
+     *
+     *	The initiating threads holds the GC lock, and sets GC_help_wanted.
+     *  
+     *  Other threads:
+     *     1) update helper_count (while holding mark_lock.)
+     *	   2) allocate a local mark stack
+     *     repeatedly:
+     *		3) Steal a global mark stack entry by atomically replacing
+     *		   its descriptor with 0.
+     *		4) Copy it to the local stack.
+     *	        5) Mark on the local stack until it is empty, or
+     *		   it may be profitable to copy it back.
+     *	        6) If necessary, copy local stack to global one,
+     *		   holding mark lock.
+     *    7) Stop when the global mark stack is empty.
+     *    8) decrement helper_count (holding mark_lock).
+     *
+     * This is an experiment to see if we can do something along the lines
+     * of the University of Tokyo SGC in a less intrusive, though probably
+     * also less performant, way.
+     */
+    void GC_do_parallel_mark();
+		/* inititate parallel marking.	*/
+
+    extern GC_bool GC_help_wanted;	/* Protected by mark lock	*/
+    extern unsigned GC_helper_count;	/* Number of running helpers.	*/
+					/* Protected by mark lock	*/
+    extern unsigned GC_active_count;	/* Number of active helpers.	*/
+					/* Protected by mark lock	*/
+					/* May increase and decrease	*/
+					/* within each mark cycle.  But	*/
+					/* once it returns to 0, it	*/
+					/* stays zero for the cycle.	*/
+    /* GC_mark_stack_top is also protected by mark lock.	*/
+    extern mse * VOLATILE GC_first_nonempty;
+					/* Lowest entry on mark stack	*/
+					/* that may be nonempty.	*/
+					/* Updated only by initiating 	*/
+					/* thread.			*/
+    /*
+     * GC_notify_all_marker() is used when GC_help_wanted is first set,
+     * when the last helper becomes inactive,
+     * when something is added to the global mark stack, and just after
+     * GC_mark_no is incremented.
+     * This could be split into multiple CVs (and probably should be to
+     * scale to really large numbers of processors.)
+     */
+#endif /* PARALLEL_MARK */
+
+ptr_t GC_find_start();
+
+mse * GC_signal_mark_stack_overflow();
+
+# ifdef GATHERSTATS
+#   define ADD_TO_ATOMIC(sz) GC_atomic_in_use += (sz)
+#   define ADD_TO_COMPOSITE(sz) GC_composite_in_use += (sz)
+# else
+#   define ADD_TO_ATOMIC(sz)
+#   define ADD_TO_COMPOSITE(sz)
+# endif
+
+/* Push the object obj with corresponding heap block header hhdr onto 	*/
+/* the mark stack.							*/
+# define PUSH_OBJ(obj, hhdr, mark_stack_top, mark_stack_limit) \
+{ \
+    register word _descr = (hhdr) -> hb_descr; \
+        \
+    if (_descr == 0) { \
+    	ADD_TO_ATOMIC((hhdr) -> hb_sz); \
+    } else { \
+        ADD_TO_COMPOSITE((hhdr) -> hb_sz); \
+        mark_stack_top++; \
+        if (mark_stack_top >= mark_stack_limit) { \
+          mark_stack_top = GC_signal_mark_stack_overflow(mark_stack_top); \
+        } \
+        mark_stack_top -> mse_start = (obj); \
+        mark_stack_top -> mse_descr = _descr; \
+    } \
+}
+
+#ifdef PRINT_BLACK_LIST
+#   define GC_FIND_START(current, hhdr, source) \
+	GC_find_start(current, hhdr, source)
+#else
+#   define GC_FIND_START(current, hhdr, source) \
+	GC_find_start(current, hhdr)
+#endif
+
+/* Push the contents of current onto the mark stack if it is a valid	*/
+/* ptr to a currently unmarked object.  Mark it.			*/
+/* If we assumed a standard-conforming compiler, we could probably	*/
+/* generate the exit_label transparently.				*/
+# define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, \
+		       source, exit_label) \
+{ \
+    hdr * my_hhdr; \
+    ptr_t my_current = current; \
+ \
+    GET_HDR(my_current, my_hhdr); \
+    if (IS_FORWARDING_ADDR_OR_NIL(my_hhdr)) { \
+         my_current = GC_FIND_START(my_current, my_hhdr, (word)source); \
+         if (my_current == 0) goto exit_label; \
+         my_hhdr = GC_find_header(my_current); \
+    } \
+    PUSH_CONTENTS_HDR(my_current, mark_stack_top, mark_stack_limit, \
+		  source, exit_label, my_hhdr);	\
+exit_label: ; \
+}
+
+/* As above, but use header cache for header lookup.	*/
+# define HC_PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, \
+		       source, exit_label) \
+{ \
+    hdr * my_hhdr; \
+    ptr_t my_current = current; \
+ \
+    HC_GET_HDR(my_current, my_hhdr, source); \
+    PUSH_CONTENTS_HDR(my_current, mark_stack_top, mark_stack_limit, \
+		  source, exit_label, my_hhdr);	\
+exit_label: ; \
+}
+
+/* As above, but deal with two pointers in interleaved fashion.	*/
+# define HC_PUSH_CONTENTS2(current1, current2, mark_stack_top, \
+			   mark_stack_limit, \
+		           source1, source2, exit_label1, exit_label2) \
+{ \
+    hdr * hhdr1; \
+    ptr_t my_current1 = current1; \
+    hdr * hhdr2; \
+    ptr_t my_current2 = current2; \
+ \
+    HC_GET_HDR2(my_current1, hhdr1, source1, my_current2, hhdr2, source2); \
+    PUSH_CONTENTS_HDR(my_current1, mark_stack_top, mark_stack_limit, \
+		  source1, exit_label1, hhdr1);	\
+exit_label1: ; \
+    if (0 != hhdr2) { \
+      PUSH_CONTENTS_HDR(my_current2, mark_stack_top, mark_stack_limit, \
+		  source2, exit_label2, hhdr2);	\
+    } \
+exit_label2: ; \
+}
+
+/* Set mark bit, exit if it was already set.	*/
+
+# ifdef USE_MARK_BYTES
+    /* Unlike the mark bit case, there is a race here, and we may set	*/
+    /* the bit twice in the concurrent case.  This can result in the	*/
+    /* object being pushed twice.  But that's only a performance issue.	*/
+#   define SET_MARK_BIT_EXIT_IF_SET(hhdr,displ,exit_label) \
+    { \
+        register VOLATILE char * mark_byte_addr = \
+				hhdr -> hb_marks + ((displ) >> 1); \
+        register char mark_byte = *mark_byte_addr; \
+          \
+	if (mark_byte) goto exit_label; \
+	*mark_byte_addr = 1;  \
+    } 
+# else
+#   define SET_MARK_BIT_EXIT_IF_SET(hhdr,displ,exit_label) \
+    { \
+        register word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(displ); \
+        register word mark_word = *mark_word_addr; \
+          \
+        OR_WORD_EXIT_IF_SET(mark_word_addr, (word)1 << modWORDSZ(displ), \
+			    exit_label); \
+    } 
+# endif /* USE_MARK_BYTES */
+
+/* If the mark bit corresponding to current is not set, set it, and 	*/
+/* push the contents of the object on the mark stack.  Since we		*/
+/* already have the header, we only look at the low order bits of 	*/
+/* current.  (The value of current doesn't matter if hhdr = 		*/
+/* GC_invalid_header.)							*/
+# define PUSH_CONTENTS_HDR(current, mark_stack_top, mark_stack_limit, \
+		           source, exit_label, hhdr) \
+{ \
+    int displ;  /* Displacement in block; first bytes, then words */ \
+    int map_entry; \
+    \
+    displ = HBLKDISPL(current); \
+    map_entry = MAP_ENTRY((hhdr -> hb_map), displ); \
+    displ = BYTES_TO_WORDS(displ); \
+    if (map_entry > CPP_MAX_OFFSET) { \
+	if (map_entry == OFFSET_TOO_BIG) { \
+	  map_entry = displ % (hhdr -> hb_sz); \
+	  displ -= map_entry; \
+	  if (displ + (hhdr -> hb_sz) > BYTES_TO_WORDS(HBLKSIZE)) { \
+	    GC_ADD_TO_BLACK_LIST_NORMAL((word)current, source); \
+	    goto exit_label; \
+	  } \
+	} else { \
+          GC_ADD_TO_BLACK_LIST_NORMAL((word)current, source); goto exit_label; \
+	} \
+    } else { \
+        displ -= map_entry; \
+    } \
+    GC_ASSERT(displ >= 0 && displ < MARK_BITS_PER_HBLK); \
+    SET_MARK_BIT_EXIT_IF_SET(hhdr, displ, exit_label); \
+    GC_STORE_BACK_PTR((ptr_t)source, (ptr_t)HBLKPTR(current) \
+				      + WORDS_TO_BYTES(displ)); \
+    PUSH_OBJ(((word *)(HBLKPTR(current)) + displ), hhdr, \
+    	     mark_stack_top, mark_stack_limit) \
+}
+
+#if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
+#   define PUSH_ONE_CHECKED_STACK(p, source) \
+	GC_mark_and_push_stack(p, (ptr_t)(source))
+#else
+#   define PUSH_ONE_CHECKED_STACK(p, source) \
+	GC_mark_and_push_stack(p)
+#endif
+
+/*
+ * Push a single value onto mark stack. Mark from the object pointed to by p.
+ * P is considered valid even if it is an interior pointer.
+ * Previously marked objects are not pushed.  Hence we make progress even
+ * if the mark stack overflows.
+ */
+# define GC_PUSH_ONE_STACK(p, source) \
+    if ((ptr_t)(p) >= (ptr_t)GC_least_plausible_heap_addr 	\
+	 && (ptr_t)(p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
+	 PUSH_ONE_CHECKED_STACK(p, source);	\
+    }
+
+/*
+ * As above, but interior pointer recognition as for
+ * normal for heap pointers.
+ */
+# define GC_PUSH_ONE_HEAP(p,source) \
+    if ((ptr_t)(p) >= (ptr_t)GC_least_plausible_heap_addr 	\
+	 && (ptr_t)(p) < (ptr_t)GC_greatest_plausible_heap_addr) {	\
+	    GC_mark_stack_top = GC_mark_and_push( \
+			    (GC_PTR)(p), GC_mark_stack_top, \
+			    GC_mark_stack_limit, (GC_PTR *)(source)); \
+    }
+
+/* Mark starting at mark stack entry top (incl.) down to	*/
+/* mark stack entry bottom (incl.).  Stop after performing	*/
+/* about one page worth of work.  Return the new mark stack	*/
+/* top entry.							*/
+mse * GC_mark_from GC_PROTO((mse * top, mse * bottom, mse *limit));
+
+#define MARK_FROM_MARK_STACK() \
+	GC_mark_stack_top = GC_mark_from(GC_mark_stack_top, \
+					 GC_mark_stack, \
+					 GC_mark_stack + GC_mark_stack_size);
+
+/*
+ * Mark from one finalizable object using the specified
+ * mark proc. May not mark the object pointed to by 
+ * real_ptr. That is the job of the caller, if appropriate
+ */
+# define GC_MARK_FO(real_ptr, mark_proc) \
+{ \
+    (*(mark_proc))(real_ptr); \
+    while (!GC_mark_stack_empty()) MARK_FROM_MARK_STACK(); \
+    if (GC_mark_state != MS_NONE) { \
+        GC_set_mark_bit(real_ptr); \
+        while (!GC_mark_some((ptr_t)0)); \
+    } \
+}
+
+extern GC_bool GC_mark_stack_too_small;
+				/* We need a larger mark stack.  May be	*/
+				/* set by client supplied mark routines.*/
+
+typedef int mark_state_t;	/* Current state of marking, as follows:*/
+				/* Used to remember where we are during */
+				/* concurrent marking.			*/
+
+				/* We say something is dirty if it was	*/
+				/* written since the last time we	*/
+				/* retrieved dirty bits.  We say it's 	*/
+				/* grungy if it was marked dirty in the	*/
+				/* last set of bits we retrieved.	*/
+				
+				/* Invariant I: all roots and marked	*/
+				/* objects p are either dirty, or point */
+				/* to objects q that are either marked 	*/
+				/* or a pointer to q appears in a range	*/
+				/* on the mark stack.			*/
+
+# define MS_NONE 0		/* No marking in progress. I holds.	*/
+				/* Mark stack is empty.			*/
+
+# define MS_PUSH_RESCUERS 1	/* Rescuing objects are currently 	*/
+				/* being pushed.  I holds, except	*/
+				/* that grungy roots may point to 	*/
+				/* unmarked objects, as may marked	*/
+				/* grungy objects above scan_ptr.	*/
+
+# define MS_PUSH_UNCOLLECTABLE 2
+				/* I holds, except that marked 		*/
+				/* uncollectable objects above scan_ptr */
+				/* may point to unmarked objects.	*/
+				/* Roots may point to unmarked objects	*/
+
+# define MS_ROOTS_PUSHED 3	/* I holds, mark stack may be nonempty  */
+
+# define MS_PARTIALLY_INVALID 4	/* I may not hold, e.g. because of M.S. */
+				/* overflow.  However marked heap	*/
+				/* objects below scan_ptr point to	*/
+				/* marked or stacked objects.		*/
+
+# define MS_INVALID 5		/* I may not hold.			*/
+
+extern mark_state_t GC_mark_state;
+
+#endif  /* GC_PMARK_H */
+
diff --git a/boehm-gc/include/private/solaris_threads.h b/boehm-gc/include/private/solaris_threads.h
new file mode 100644
index 00000000000..b2cdb36e98d
--- /dev/null
+++ b/boehm-gc/include/private/solaris_threads.h
@@ -0,0 +1,34 @@
+#ifdef SOLARIS_THREADS
+
+/* The set of all known threads.  We intercept thread creation and     */
+/* joins.  We never actually create detached threads.  We allocate all */
+/* new thread stacks ourselves.  These allow us to maintain this       */
+/* data structure.                                                     */
+/* Protected by GC_thr_lock.                                           */
+/* Some of this should be declared volatile, but that's incosnsistent  */
+/* with some library routine declarations.  In particular, the 	       */
+/* definition of cond_t doesn't mention volatile!                      */
+  typedef struct GC_Thread_Rep {
+    struct GC_Thread_Rep * next;
+    thread_t id;
+    word flags;
+#      define FINISHED 1       /* Thread has exited.   */
+#      define DETACHED 2       /* Thread is intended to be detached.   */
+#      define CLIENT_OWNS_STACK        4
+                               /* Stack was supplied by client.        */
+#      define SUSPENDED 8      /* Currently suspended. */
+    ptr_t stack;
+    size_t stack_size;
+    cond_t join_cv;
+    void * status;
+  } * GC_thread;
+  extern GC_thread GC_new_thread(thread_t id);
+
+  extern GC_bool GC_thr_initialized;
+  extern volatile GC_thread GC_threads[];
+  extern size_t GC_min_stack_sz;
+  extern size_t GC_page_sz;
+  extern void GC_thr_init(void);
+
+# endif /* SOLARIS_THREADS */
+
diff --git a/boehm-gc/include/private/specific.h b/boehm-gc/include/private/specific.h
new file mode 100644
index 00000000000..60c152c6fd0
--- /dev/null
+++ b/boehm-gc/include/private/specific.h
@@ -0,0 +1,83 @@
+/*
+ * This is a reimplementation of a subset of the pthread_getspecific/setspecific
+ * interface. This appears to outperform the standard linuxthreads one
+ * by a significant margin.
+ * The major restriction is that each thread may only make a single
+ * pthread_setspecific call on a single key.  (The current data structure
+ * doesn't really require that.  The restriction should be easily removable.)
+ * We don't currently support the destruction functions, though that
+ * could be done.
+ * We also currently assume that only one pthread_setspecific call
+ * can be executed at a time, though that assumption would be easy to remove
+ * by adding a lock.
+ */
+
+#include <errno.h>
+
+/* Called during key creation or setspecific.		*/
+/* For the GC we already hold lock.			*/
+/* Currently allocated objects leak on thread exit.	*/
+/* That's hard to fix, but OK if we allocate garbage	*/
+/* collected memory.					*/
+#define MALLOC_CLEAR(n) GC_INTERNAL_MALLOC(n, NORMAL)
+#define PREFIXED(name) GC_##name
+
+#define TS_CACHE_SIZE 1024
+#define CACHE_HASH(n) (((((long)n) >> 8) ^ (long)n) & (TS_CACHE_SIZE - 1))
+#define TS_HASH_SIZE 1024
+#define HASH(n) (((((long)n) >> 8) ^ (long)n) & (TS_HASH_SIZE - 1))
+
+typedef struct thread_specific_entry {
+	unsigned long qtid;	/* quick thread id, only for cache */
+	void * value;
+	pthread_t thread;
+	struct thread_specific_entry *next;
+} tse;
+
+
+/* We represent each thread-specific datum as two tables.  The first is	*/
+/* a cache, index by a "quick thread identifier".  The "quick" thread	*/
+/* identifier is an easy to compute value, which is guaranteed to	*/
+/* determine the thread, though a thread may correspond to more than	*/
+/* one value.  We typically use the address of a page in the stack.	*/
+/* The second is a hash table, indexed by pthread_self().  It is used	*/
+/* only as a backup.							*/
+
+/* Return the "quick thread id".  Default version.  Assumes page size,	*/
+/* or at least thread stack separation, is at least 4K.			*/
+static __inline__ long quick_thread_id() {
+    int dummy;
+    return (long)(&dummy) >> 12;
+}
+
+#define INVALID_QTID ((unsigned long)(-1))
+
+typedef struct thread_specific_data {
+    tse * volatile cache[TS_CACHE_SIZE];
+			/* A faster index to the hash table */
+    tse * hash[TS_HASH_SIZE];
+    pthread_mutex_t lock;
+} tsd;
+
+typedef tsd * PREFIXED(key_t);
+
+extern int PREFIXED(key_create) (tsd ** key_ptr, void (* destructor)(void *));
+
+extern int PREFIXED(setspecific) (tsd * key, void * value);
+
+extern void PREFIXED(remove_specific) (tsd * key);
+
+/* An internal version of getspecific that assumes a cache miss.	*/
+void * PREFIXED(slow_getspecific) (tsd * key, unsigned long qtid,
+				   tse * volatile * cache_entry);
+
+static __inline__ void * PREFIXED(getspecific) (tsd * key) {
+    long qtid = quick_thread_id();
+    unsigned hash_val = CACHE_HASH(qtid);
+    tse * volatile * entry_ptr = key -> cache + hash_val;
+    tse * entry = *entry_ptr;   /* Must be loaded only once.	*/
+    if (entry -> qtid == qtid) return entry -> value;
+    return PREFIXED(slow_getspecific) (key, qtid, entry_ptr);
+}
+
+