21 files changed, 7048 insertions, 2375 deletions
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 68e8e35b3de..0d002273c57 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -620,6 +620,7 @@ c_base_headers = \
 	${c_base_srcdir}/csetjmp \
 	${c_base_srcdir}/csignal \
 	${c_base_srcdir}/cstdarg \
+	${c_base_srcdir}/cstdatomic \
 	${c_base_srcdir}/cstdbool \
 	${c_base_srcdir}/cstddef \
 	${c_base_srcdir}/cstdint \
@@ -643,7 +644,8 @@ if GLIBCXX_C_HEADERS_C_GLOBAL
 c_compatibility_headers = \
 	${c_compatibility_srcdir}/complex.h \
 	${c_compatibility_srcdir}/fenv.h \
-	${c_compatibility_srcdir}/tgmath.h 
+	${c_compatibility_srcdir}/tgmath.h \
+	${c_compatibility_srcdir}/stdatomic.h 
 endif
 
 if GLIBCXX_C_HEADERS_C
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 94038f8435f..9bd5091f132 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -49,6 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/enable.m4 \
 	$(top_srcdir)/../config/lib-prefix.m4 \
 	$(top_srcdir)/../config/multi.m4 \
 	$(top_srcdir)/../config/no-executables.m4 \
+	$(top_srcdir)/../config/proginstall.m4 \
 	$(top_srcdir)/../config/unwind_ipinfo.m4 \
 	$(top_srcdir)/../libtool.m4 $(top_srcdir)/../ltoptions.m4 \
 	$(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
@@ -867,6 +868,7 @@ c_base_headers = \
 	${c_base_srcdir}/csetjmp \
 	${c_base_srcdir}/csignal \
 	${c_base_srcdir}/cstdarg \
+	${c_base_srcdir}/cstdatomic \
 	${c_base_srcdir}/cstdbool \
 	${c_base_srcdir}/cstddef \
 	${c_base_srcdir}/cstdint \
@@ -885,7 +887,8 @@ c_compatibility_builddir = .
 @GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@c_compatibility_headers = \
 @GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@	${c_compatibility_srcdir}/complex.h \
 @GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@	${c_compatibility_srcdir}/fenv.h \
-@GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@	${c_compatibility_srcdir}/tgmath.h 
+@GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@	${c_compatibility_srcdir}/tgmath.h \
+@GLIBCXX_C_HEADERS_C_GLOBAL_TRUE@	${c_compatibility_srcdir}/stdatomic.h 
 
 @GLIBCXX_C_HEADERS_C_STD_TRUE@c_compatibility_headers = 
 @GLIBCXX_C_HEADERS_C_TRUE@c_compatibility_headers = \
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index d42cb9feb47..e38cfe05184 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -1,7 +1,7 @@
 // Predefined symbols and macros -*- C++ -*-
 
 // Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
-// 2006, 2007 Free Software Foundation, Inc.
+// 2006, 2007, 2008 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -216,6 +216,20 @@ namespace std
 }
 #endif
 
+// Defines for C compatibility. In particular, define extern "C"
+// linkage only when using  C++, same with namespaces.
+#if __cplusplus
+# define _GLIBCXX_BEGIN_EXTERN_C extern "C" {
+# define _GLIBCXX_END_EXTERN_C }
+#else
+# define _GLIBCXX_BEGIN_EXTERN_C
+# define _GLIBCXX_END_EXTERN_C
+# undef _GLIBCXX_BEGIN_NAMESPACE
+# undef _GLIBCXX_END_NAMESPACE
+# define _GLIBCXX_BEGIN_NAMESPACE(X) 
+# define _GLIBCXX_END_NAMESPACE 
+#endif
+
 // Define if compatibility should be provided for -mlong-double-64.
 #undef _GLIBCXX_LONG_DOUBLE_COMPAT
 
diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h b/libstdc++-v3/include/c_compatibility/stdatomic.h
new file mode 100644
index 00000000000..e5f7dcfe6c1
--- /dev/null
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -0,0 +1,387 @@
+// -*- C++ -*- compatibility header.
+
+// Copyright (C) 2008 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+// Boston, MA 02110-1301, USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+/** @file stdatomic.h
+ *  This is a Standard C++ Library header.
+ */
+
+#include <bits/c++config.h>
+#include <stddef.h>
+#include <stdbool.h> // XXX need to define bool w/o stdbool.h in tr1/cstdbool
+
+#ifndef _GLIBCXX_STDATOMIC_H
+#define _GLIBCXX_STDATOMIC_H 1
+
+_GLIBCXX_BEGIN_NAMESPACE(std)
+_GLIBCXX_BEGIN_EXTERN_C
+
+  /// Enumeration for memory_order
+  typedef enum memory_order 
+    {
+      memory_order_relaxed, 
+      memory_order_acquire, 
+      memory_order_release,
+      memory_order_acq_rel, 
+      memory_order_seq_cst
+    } memory_order;
+
+
+  // Base for atomic_flag.
+  struct __atomic_flag_base
+  {
+    bool _M_b;
+  };
+
+  // Base for atomic_address
+  struct __atomic_address_base
+  {
+    void* _M_i;
+  };
+
+  // POD base classes for atomic intgral types.
+  struct __atomic_bool_base
+  {
+    bool _M_i;
+  };
+
+  struct __atomic_char_base
+  {
+    char _M_i;
+  };
+
+  struct __atomic_schar_base
+  {
+    signed char _M_i;
+  };
+
+  struct __atomic_uchar_base
+  {
+    unsigned char _M_i;
+  };
+
+  struct __atomic_short_base
+  {
+    short _M_i;
+  };
+
+  struct __atomic_ushort_base
+  {
+    unsigned short _M_i;
+  };
+
+  struct __atomic_int_base
+  {
+    int _M_i;
+  };
+
+  struct __atomic_uint_base
+  {
+    unsigned int _M_i;
+  };
+
+  struct __atomic_long_base
+  {
+    long _M_i;
+  };
+
+  struct __atomic_ulong_base
+  {
+    unsigned long _M_i;
+  };
+
+  struct __atomic_llong_base
+  {
+    long long _M_i;
+  };
+
+  struct __atomic_ullong_base
+  {
+    unsigned long long _M_i;
+  };
+
+  struct __atomic_wchar_t_base
+  {
+    wchar_t _M_i;
+  };
+
+  // Switch atomic integral base types based on C or C++.  In
+  // addition, for "C" only provide type-generic macros for atomic
+  // operations. (As C++ accomplishes the same thing with sets of
+  // overloaded functions.
+#ifdef __cplusplus
+
+#define ATOMIC_FLAG_INIT { { false } }
+#define _ATOMIC_MEMBER_ ((__a)->_M_base._M_i)
+
+extern "C++"
+{
+  struct atomic_flag;
+  struct atomic_address;
+  struct atomic_bool;
+  struct atomic_char;
+  struct atomic_schar;
+  struct atomic_uchar;
+  struct atomic_short;
+  struct atomic_ushort;
+  struct atomic_int;
+  struct atomic_uint;
+  struct atomic_long;
+  struct atomic_ulong;
+  struct atomic_llong;
+  struct atomic_ullong;
+  struct atomic_wchar_t;
+  template<typename _Tp>
+    struct atomic;
+}
+#else
+
+#define ATOMIC_FLAG_INIT { false }
+#define _ATOMIC_MEMBER_ ((__a)->_M_i)
+
+  typedef struct __atomic_flag_base 	atomic_flag;
+  typedef struct __atomic_address_base 	atomic_address;
+  typedef struct __atomic_bool_base	atomic_bool;
+  typedef struct __atomic_char_base 	atomic_char;
+  typedef struct __atomic_schar_base 	atomic_schar;
+  typedef struct __atomic_uchar_base 	atomic_uchar;
+  typedef struct __atomic_short_base 	atomic_short;
+  typedef struct __atomic_ushort_base 	atomic_ushort;
+  typedef struct __atomic_int_base 	atomic_int;
+  typedef struct __atomic_uint_base 	atomic_uint;
+  typedef struct __atomic_long_base 	atomic_long;
+  typedef struct __atomic_ulong_base 	atomic_ulong;
+  typedef struct __atomic_llong_base 	atomic_llong;
+  typedef struct __atomic_ullong_base 	atomic_ullong;
+  typedef struct __atomic_wchar_t_base 	atomic_wchar_t;
+
+#define atomic_is_lock_free(__a)				\
+  false
+
+#define atomic_load(__a)					\
+  _ATOMIC_LOAD_(__a, memory_order_seq_cst)
+
+#define atomic_load_explicit(__a, __x)				\
+  _ATOMIC_LOAD_(__a, __x)
+
+#define atomic_store(__a, __m)					\
+  _ATOMIC_STORE_(__a, __m, memory_order_seq_cst)
+
+#define atomic_store_explicit(__a, __m, __x)			\
+  _ATOMIC_STORE_(__a, __m, __x)
+
+#define atomic_swap(__a, __m)					\
+  _ATOMIC_MODIFY_(__a, =, __m, memory_order_seq_cst)
+
+#define atomic_swap_explicit(__a, __m, __x)			\
+  _ATOMIC_MODIFY_(__a, =, __m, __x)
+
+#define atomic_compare_swap(__a, __e, __m)			\
+  _ATOMIC_CMPSWP_(__a, __e, __m, memory_order_seq_cst)
+
+#define atomic_compare_swap_explicit(__a, __e, __m, __x, __y)	\
+  _ATOMIC_CMPSWP_(__a, __e, __m, __x)
+
+#define atomic_fence(__a, __x)					\
+  ({ _ATOMIC_FENCE_(__a, __x); })
+
+#define atomic_fetch_add_explicit(__a, __m, __x)		\
+  _ATOMIC_MODIFY_(__a, +=, __m, __x)
+
+#define atomic_fetch_add(__a, __m)				\
+  _ATOMIC_MODIFY_(__a, +=, __m, memory_order_seq_cst)
+
+#define atomic_fetch_sub_explicit(__a, __m, __x)		\
+  _ATOMIC_MODIFY_(__a, -=, __m, __x)
+
+#define atomic_fetch_sub(__a, __m)				\
+  _ATOMIC_MODIFY_(__a, -=, __m, memory_order_seq_cst)
+
+#define atomic_fetch_and_explicit(__a, __m, __x)		\
+  _ATOMIC_MODIFY_(__a, &=, __m, __x)
+
+#define atomic_fetch_and(__a, __m)				\
+  _ATOMIC_MODIFY_(__a, &=, __m, memory_order_seq_cst)
+
+#define atomic_fetch_or_explicit(__a, __m, __x) 		\
+  _ATOMIC_MODIFY_(__a, |=, __m, __x)
+
+#define atomic_fetch_or(__a, __m)				\
+  _ATOMIC_MODIFY_(__a, |=, __m, memory_order_seq_cst)
+
+#define atomic_fetch_xor_explicit(__a, __m, __x)		\
+  _ATOMIC_MODIFY_(__a, ^=, __m, __x)
+
+#define atomic_fetch_xor(__a, __m)				\
+  _ATOMIC_MODIFY_(__a, ^=, __m, memory_order_seq_cst)
+
+#endif
+  
+  // Typedefs for other atomic integral types.
+  typedef atomic_schar 		atomic_int_least8_t;
+  typedef atomic_uchar 		atomic_uint_least8_t;
+  typedef atomic_short 		atomic_int_least16_t;
+  typedef atomic_ushort 	atomic_uint_least16_t;
+  typedef atomic_int 		atomic_int_least32_t;
+  typedef atomic_uint 		atomic_uint_least32_t;
+  typedef atomic_llong 		atomic_int_least64_t;
+  typedef atomic_ullong 	atomic_uint_least64_t;
+
+  typedef atomic_schar 		atomic_int_fast8_t;
+  typedef atomic_uchar 		atomic_uint_fast8_t;
+  typedef atomic_short 		atomic_int_fast16_t;
+  typedef atomic_ushort 	atomic_uint_fast16_t;
+  typedef atomic_int 		atomic_int_fast32_t;
+  typedef atomic_uint 		atomic_uint_fast32_t;
+  typedef atomic_llong 		atomic_int_fast64_t;
+  typedef atomic_ullong 	atomic_uint_fast64_t;
+
+  typedef atomic_long 		atomic_intptr_t;
+  typedef atomic_ulong 		atomic_uintptr_t;
+
+  typedef atomic_long 		atomic_ssize_t;
+  typedef atomic_ulong 		atomic_size_t;
+
+  typedef atomic_llong 		atomic_intmax_t;
+  typedef atomic_ullong 	atomic_uintmax_t;
+
+  typedef atomic_long 		atomic_ptrdiff_t;
+
+  typedef atomic_int_least16_t	atomic_char16_t;
+  typedef atomic_int_least32_t	atomic_char32_t;
+
+  // Accessor functions for atomic_flag.
+  extern bool 
+  atomic_flag_test_and_set(volatile atomic_flag*);
+  
+  extern bool 
+  atomic_flag_test_and_set_explicit(volatile atomic_flag*, memory_order);
+  
+  extern void 
+  atomic_flag_clear(volatile atomic_flag*);
+  
+  extern void 
+  atomic_flag_clear_explicit(volatile atomic_flag*, memory_order);
+  
+  extern void 
+  atomic_flag_fence(const volatile atomic_flag*, memory_order);
+  
+  extern void 
+  __atomic_flag_wait_explicit(volatile atomic_flag*, memory_order);
+  
+  extern volatile atomic_flag* 
+  __atomic_flag_for_address(const volatile void* __z) __attribute__((const));
+   
+  // External object.
+  extern const atomic_flag atomic_global_fence_compatibility;
+  
+  /// 29.2 Lock-free Property
+#define ATOMIC_INTEGRAL_LOCK_FREE 0
+#define ATOMIC_ADDRESS_LOCK_FREE 0
+
+  // Implementation specific defines.
+#define _ATOMIC_LOAD_(__a, __x)						\
+  ({ volatile __typeof__ _ATOMIC_MEMBER_* __p = &_ATOMIC_MEMBER_;	\
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p); 	\
+    __atomic_flag_wait_explicit(__g, __x);				\
+    __typeof__ _ATOMIC_MEMBER_ __r = *__p;				\
+    atomic_flag_clear_explicit(__g, __x);		       		\
+    __r; })
+
+#define _ATOMIC_STORE_(__a, __m, __x)					\
+  ({ volatile __typeof__ _ATOMIC_MEMBER_* __p = &_ATOMIC_MEMBER_;	\
+    __typeof__(__m) __v = (__m);			       		\
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p); 	\
+    __atomic_flag_wait_explicit(__g, __x);				\
+    *__p = __v;								\
+    atomic_flag_clear_explicit(__g, __x);		       		\
+    __v; })
+
+#define _ATOMIC_MODIFY_(__a, __o, __m, __x)				\
+  ({ volatile __typeof__ _ATOMIC_MEMBER_* __p = &_ATOMIC_MEMBER_;	\
+    __typeof__(__m) __v = (__m);			       		\
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p); 	\
+    __atomic_flag_wait_explicit(__g, __x);				\
+    __typeof__ _ATOMIC_MEMBER_ __r = *__p;				\
+    *__p __o __v;					       		\
+    atomic_flag_clear_explicit(__g, __x);		       		\
+    __r; })
+
+#define _ATOMIC_CMPSWP_(__a, __e, __m, __x)				\
+  ({ volatile __typeof__ _ATOMIC_MEMBER_* __p = &_ATOMIC_MEMBER_;	\
+    __typeof__(__e) __q = (__e);			       		\
+    __typeof__(__m) __v = (__m);			       		\
+    bool __r;						       		\
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p); 	\
+    __atomic_flag_wait_explicit(__g, __x);				\
+    __typeof__ _ATOMIC_MEMBER_ __t__ = *__p;		       		\
+    if (__t__ == *__q) { *__p = __v; __r = true; }			\
+    else { *__q = __t__; __r = false; }		       			\
+    atomic_flag_clear_explicit(__g, __x);		       		\
+    __r; })
+
+#define _ATOMIC_FENCE_(__a, __x)			       		\
+  ({ volatile __typeof__ _ATOMIC_MEMBER_* __p = &_ATOMIC_MEMBER_;	\
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p); 	\
+    atomic_flag_fence(__g, __x);			       		\
+  })
+
+_GLIBCXX_END_EXTERN_C
+_GLIBCXX_END_NAMESPACE
+
+#ifdef __cplusplus
+// Inject into global namespace iff C++.
+using std::memory_order;
+using std::memory_order_relaxed;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+
+using std::atomic_flag;
+
+using std::atomic_bool;
+using std::atomic_char;
+using std::atomic_schar;
+using std::atomic_uchar;
+using std::atomic_short;
+using std::atomic_ushort;
+using std::atomic_int;
+using std::atomic_uint;
+using std::atomic_long;
+using std::atomic_ulong;
+using std::atomic_llong;
+using std::atomic_ullong;
+using std::atomic_wchar_t;
+
+using std::atomic_address;
+using std::atomic;
+
+#endif
+
+#endif
diff --git a/libstdc++-v3/include/c_global/cstdatomic b/libstdc++-v3/include/c_global/cstdatomic
new file mode 100644
index 00000000000..22fde89603b
--- /dev/null
+++ b/libstdc++-v3/include/c_global/cstdatomic
@@ -0,0 +1,4100 @@
+// -*- C++ -*- header.
+
+// Copyright (C) 2008
+// Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+// Boston, MA 02110-1301, USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+/** @file cstdatomic
+ *  This is a Standard C++ Library file.  You should @c #include this file
+ *  in your programs, rather than any of the "*.h" implementation files.
+ *
+ *  This is the C++ version of the Standard C Library header @c stdatomic.h,
+ *  and its contents are (mostly) the same as that header, but are all
+ *  contained in the namespace @c std (except for names which are defined
+ *  as macros in C).
+ */
+
+// Based on "C++ Atomic Types and Operations" by Hans Boehm and Lawrence Crowl.
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2427.html
+
+#ifndef _GLIBCXX_STDATOMIC
+#define _GLIBCXX_STDATOMIC 1
+
+#pragma GCC system_header
+
+#ifndef __GXX_EXPERIMENTAL_CXX0X__
+# include <c++0x_warning.h>
+#endif
+
+#include <stdatomic.h>
+#include <cstddef>
+
+_GLIBCXX_BEGIN_NAMESPACE(std)
+
+  // Can either subclass or encapsulate "C" functionality, and here
+  // encapsulating works with C++2003's version of POD and so is
+  // portable across C++2003/200x. 
+  // Both end up being sub-optimal in terms of a constructor
+  // initialization list, but oh well.
+
+  /// atomic_flag
+  struct atomic_flag 
+  {
+    __atomic_flag_base _M_base;
+
+    bool 
+    test_and_set(memory_order __x = memory_order_seq_cst) volatile
+    { return atomic_flag_test_and_set_explicit(this, __x); }
+
+    void 
+    clear(memory_order __x = memory_order_seq_cst) volatile
+    { atomic_flag_clear_explicit(this, __x); }
+
+    void 
+    fence(memory_order __x) const volatile
+    { atomic_flag_fence(this, __x); }
+
+#if _GLIBCXX_USE_STANDARD_LAYOUT
+    // Add in non-trivial default constructor that correctly
+    // initializes member "as if" by ATOMIC_FLAG_INIT.
+    atomic_flag() { _M_base._M_b = false; }
+
+  private:
+    atomic_flag(const atomic_flag&);
+    atomic_flag& operator=(const atomic_flag&);
+#endif
+  };
+
+  /// 29.4.2, address types
+  typedef struct atomic_address
+  {
+    __atomic_address_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(void*, memory_order = memory_order_seq_cst) volatile;
+
+    void* 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    void* 
+    swap(void*, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(void*&, void*, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(void*&, void*, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    void* 
+    fetch_add(ptrdiff_t, memory_order = memory_order_seq_cst) volatile;
+
+    void* 
+    fetch_sub(ptrdiff_t, memory_order = memory_order_seq_cst) volatile;
+
+    void* 
+    operator=(void* __v) volatile
+    { store(__v); return __v; }
+
+    void* 
+    operator+=(ptrdiff_t __v) volatile
+    { return fetch_add(__v); }
+
+    void* 
+    operator-=(ptrdiff_t __v) volatile
+    { return fetch_sub(__v); }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_address*, void*, memory_order);
+
+    friend void* 
+    atomic_load_explicit(volatile atomic_address*, memory_order);
+
+    friend void* 
+    atomic_swap_explicit(volatile atomic_address*, void*, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_address*, void**, void*, 
+				 memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_address*, memory_order);
+
+    friend void* 
+    atomic_fetch_add_explicit(volatile atomic_address*, ptrdiff_t, 
+			      memory_order);
+
+    friend void* 
+    atomic_fetch_sub_explicit(volatile atomic_address*, ptrdiff_t, 
+			      memory_order);
+
+    atomic_address() { }
+
+    explicit atomic_address(void* __v)
+    { _M_base._M_i = __v; }
+
+  private:
+    atomic_address(const atomic_address&);
+    atomic_address& operator=(const atomic_address &);
+  };
+
+
+  // 29.4.1 atomic integral types
+  // For each of the integral types, define atomic_[integral type] struct
+  // 
+  // atomic_bool     bool
+  // atomic_char     char
+  // atomic_schar    signed char
+  // atomic_uchar    unsigned char
+  // atomic_short    short
+  // atomic_ushort   unsigned short
+  // atomic_int      int
+  // atomic_uint     unsigned int
+  // atomic_long     long
+  // atomic_ulong    unsigned long
+  // atomic_llong    long long
+  // atomic_ullong   unsigned long long
+  // atomic_char16_t char16_t
+  // atomic_char32_t char32_t
+  // atomic_wchar_t  wchar_t
+
+  /// atomic_bool
+  struct atomic_bool
+  {
+    __atomic_bool_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(bool, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    swap(bool, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(bool&, bool, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(bool&, bool, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    bool
+    operator=(bool __v) volatile { store(__v); return __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_bool*, bool, memory_order);
+
+    friend bool 
+    atomic_load_explicit(volatile atomic_bool*, memory_order);
+
+    friend bool 
+    atomic_swap_explicit(volatile atomic_bool*, bool, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_bool*, bool*, bool,
+				 memory_order, memory_order);
+    friend void 
+    atomic_fence(const volatile atomic_bool*, memory_order);
+
+    atomic_bool() { }
+
+    explicit atomic_bool(bool __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_bool(const atomic_bool&);
+    atomic_bool& operator=(const atomic_bool&);
+  };
+
+  /// atomic_char
+  struct atomic_char
+  {
+    __atomic_char_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    swap(char, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(char&, char, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(char&, char, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    char 
+    fetch_add(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    fetch_sub(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    fetch_and(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    fetch_or(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    fetch_xor(char, memory_order = memory_order_seq_cst) volatile;
+
+    char 
+    operator=(char __v) volatile { store(__v); return __v; }
+
+    char 
+    operator++(int) volatile { return fetch_add(1); }
+
+    char 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    char 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    char 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    char 
+    operator+=(char __v) volatile { return fetch_add(__v) + __v; }
+
+    char 
+    operator-=(char __v) volatile { return fetch_sub(__v) - __v; }
+
+    char 
+    operator&=(char __v) volatile { return fetch_and(__v) & __v; }
+
+    char 
+    operator|=(char __v) volatile { return fetch_or(__v) | __v; }
+
+    char 
+    operator^=(char __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_char*, char, memory_order);
+
+    friend char 
+    atomic_load_explicit(volatile atomic_char*, memory_order);
+
+    friend char 
+    atomic_swap_explicit(volatile atomic_char*, char, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_char*, char*, char, 
+				 memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_char*, memory_order);
+
+    friend char 
+    atomic_fetch_add_explicit(volatile atomic_char*, char, memory_order);
+
+    friend char 
+    atomic_fetch_sub_explicit(volatile atomic_char*, char, memory_order);
+
+    friend char 
+    atomic_fetch_and_explicit(volatile atomic_char*, char, memory_order);
+
+    friend char 
+    atomic_fetch_or_explicit( volatile atomic_char*, char, memory_order);
+
+    friend char 
+    atomic_fetch_xor_explicit(volatile atomic_char*, char, memory_order);
+
+    atomic_char() { }
+
+    atomic_char(char __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_char(const atomic_char&);
+    atomic_char& operator=(const atomic_char&);
+  };
+
+  /// atomic_schar
+  struct atomic_schar
+  {
+    __atomic_schar_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    swap(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(signed char&, signed char, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(signed char&, signed char, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    signed char 
+    fetch_add(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    fetch_sub(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    fetch_and(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    fetch_or(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    fetch_xor(signed char, memory_order = memory_order_seq_cst) volatile;
+
+    signed char 
+    operator=(signed char __v) volatile { store(__v); return __v; }
+
+    signed char 
+    operator++(int) volatile { return fetch_add(1); }
+
+    signed char 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    signed char 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    signed char 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    signed char 
+    operator+=(signed char __v) volatile { return fetch_add(__v) + __v; }
+
+    signed char 
+    operator-=(signed char __v) volatile { return fetch_sub(__v) - __v; }
+
+    signed char 
+    operator&=(signed char __v) volatile { return fetch_and(__v) & __v; }
+
+    signed char 
+    operator|=(signed char __v) volatile { return fetch_or(__v) | __v; }
+
+    signed char 
+    operator^=(signed char __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_schar*, signed char, memory_order);
+
+    friend signed char 
+    atomic_load_explicit(volatile atomic_schar*, memory_order);
+
+    friend signed char 
+    atomic_swap_explicit(volatile atomic_schar*, signed char, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_schar*, signed char*, 
+				 signed char, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_schar*, memory_order);
+
+    friend signed char 
+    atomic_fetch_add_explicit(volatile atomic_schar*, 
+			      signed char, memory_order);
+
+    friend signed char 
+    atomic_fetch_sub_explicit(volatile atomic_schar*, signed char, 
+			      memory_order);
+
+    friend signed char 
+    atomic_fetch_and_explicit(volatile atomic_schar*, signed char, 
+			      memory_order);
+
+    friend signed char 
+    atomic_fetch_or_explicit(volatile atomic_schar*, signed char, 
+			      memory_order);
+
+    friend signed char 
+    atomic_fetch_xor_explicit(volatile atomic_schar*, signed char, 
+			      memory_order);
+
+    atomic_schar() { }
+
+    atomic_schar(signed char __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_schar(const atomic_schar&);
+    atomic_schar& operator=(const atomic_schar&);
+  };
+
+  /// atomic_uchar
+  struct atomic_uchar
+  {
+    __atomic_uchar_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    swap(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(unsigned char&, unsigned char, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(unsigned char&, unsigned char, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    unsigned char 
+    fetch_add(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    fetch_sub(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    fetch_and(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    fetch_or(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    fetch_xor(unsigned char, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned char 
+    operator=(unsigned char __v) volatile { store(__v); return __v; }
+
+    unsigned char 
+    operator++(int) volatile { return fetch_add(1); }
+
+    unsigned char 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    unsigned char 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    unsigned char 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    unsigned char 
+    operator+=(unsigned char __v) volatile { return fetch_add(__v) + __v; }
+
+    unsigned char 
+    operator-=(unsigned char __v) volatile { return fetch_sub(__v) - __v; }
+
+    unsigned char 
+    operator&=(unsigned char __v) volatile { return fetch_and(__v) & __v; }
+
+    unsigned char 
+    operator|=(unsigned char __v) volatile { return fetch_or(__v) | __v; }
+
+    unsigned char 
+    operator^=(unsigned char __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_uchar*, unsigned char, memory_order);
+
+    friend unsigned char 
+    atomic_load_explicit(volatile atomic_uchar*, memory_order);
+
+    friend unsigned char 
+    atomic_swap_explicit(volatile atomic_uchar*, unsigned char, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_uchar*, unsigned char*, 
+				 unsigned char, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_uchar*, memory_order);
+
+    friend unsigned char 
+    atomic_fetch_add_explicit(volatile atomic_uchar*, unsigned char, 
+			      memory_order);
+
+    friend unsigned char 
+    atomic_fetch_sub_explicit(volatile atomic_uchar*, unsigned char, 
+			      memory_order);
+
+    friend unsigned char 
+    atomic_fetch_and_explicit(volatile atomic_uchar*, 
+			      unsigned char, memory_order);
+
+    friend unsigned char 
+    atomic_fetch_or_explicit( volatile atomic_uchar*, unsigned char, 
+			      memory_order);
+
+    friend unsigned char 
+    atomic_fetch_xor_explicit(volatile atomic_uchar*, unsigned char, 
+			      memory_order);
+
+    atomic_uchar() { }
+
+    atomic_uchar(unsigned char __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_uchar(const atomic_uchar&);
+    atomic_uchar& operator=(const atomic_uchar&);
+  };
+
+
+  /// atomic_short
+  struct atomic_short
+  {
+    __atomic_short_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    swap(short, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(short&, short, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(short&, short, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+    
+    short 
+    fetch_add(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    fetch_sub(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    fetch_and(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    fetch_or(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    fetch_xor(short, memory_order = memory_order_seq_cst) volatile;
+
+    short 
+    operator=(short __v) volatile { store(__v); return __v; }
+
+    short 
+    operator++(int) volatile { return fetch_add(1); }
+
+    short 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    short 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    short 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    short 
+    operator+=(short __v) volatile { return fetch_add(__v) + __v; }
+
+    short 
+    operator-=(short __v) volatile { return fetch_sub(__v) - __v; }
+
+    short 
+    operator&=(short __v) volatile { return fetch_and(__v) & __v; }
+
+    short 
+    operator|=(short __v) volatile { return fetch_or(__v) | __v; }
+
+    short 
+    operator^=(short __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_short*, short, memory_order);
+
+    friend short 
+    atomic_load_explicit(volatile atomic_short*, memory_order);
+
+    friend short 
+    atomic_swap_explicit(volatile atomic_short*, short, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_short*, short*, short, 
+				 memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_short*, memory_order);
+
+    friend short 
+    atomic_fetch_add_explicit(volatile atomic_short*, short, memory_order);
+
+    friend short 
+    atomic_fetch_sub_explicit(volatile atomic_short*, short, memory_order);
+
+    friend short 
+    atomic_fetch_and_explicit(volatile atomic_short*, short, memory_order);
+
+    friend short 
+    atomic_fetch_or_explicit( volatile atomic_short*, short, memory_order);
+
+    friend short 
+    atomic_fetch_xor_explicit(volatile atomic_short*, short, memory_order);
+
+    atomic_short() { }
+
+    atomic_short(short __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_short(const atomic_short&);
+    atomic_short& operator=(const atomic_short&);
+  };
+
+  /// atomic_ushort
+  struct atomic_ushort
+  {
+    __atomic_ushort_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    swap(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(unsigned short&, unsigned short, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(unsigned short&, unsigned short, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    unsigned short 
+    fetch_add(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    fetch_sub(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    fetch_and(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    fetch_or(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    fetch_xor(unsigned short, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned short 
+    operator=(unsigned short __v) volatile { store(__v); return __v; }
+
+    unsigned short 
+    operator++(int) volatile { return fetch_add(1); }
+
+    unsigned short 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    unsigned short 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    unsigned short 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    unsigned short 
+    operator+=(unsigned short __v) volatile { return fetch_add(__v) + __v; }
+
+    unsigned short 
+    operator-=(unsigned short __v) volatile { return fetch_sub(__v) - __v; }
+
+    unsigned short 
+    operator&=(unsigned short __v) volatile { return fetch_and(__v) & __v; }
+
+    unsigned short 
+    operator|=(unsigned short __v) volatile { return fetch_or(__v) | __v; }
+
+    unsigned short 
+    operator^=(unsigned short __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_ushort*, unsigned short, 
+			  memory_order);
+
+    friend unsigned short 
+    atomic_load_explicit(volatile atomic_ushort*, memory_order);
+
+    friend unsigned short 
+    atomic_swap_explicit(volatile atomic_ushort*, unsigned short, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_ushort*, unsigned short*, 
+				 unsigned short, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_ushort*, memory_order);
+
+    friend unsigned short 
+    atomic_fetch_add_explicit(volatile atomic_ushort*, unsigned short, 
+			      memory_order);
+
+    friend unsigned short 
+    atomic_fetch_sub_explicit(volatile atomic_ushort*, unsigned short, 
+			      memory_order);
+
+    friend unsigned short 
+    atomic_fetch_and_explicit(volatile atomic_ushort*, unsigned short, 
+			      memory_order);
+
+    friend unsigned short 
+    atomic_fetch_or_explicit( volatile atomic_ushort*, unsigned short, 
+			      memory_order);
+
+    friend unsigned short 
+    atomic_fetch_xor_explicit(volatile atomic_ushort*, unsigned short, 
+			      memory_order);
+
+    atomic_ushort() { }
+
+    atomic_ushort(unsigned short __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_ushort(const atomic_ushort&);
+    atomic_ushort& operator=(const atomic_ushort&);
+  };
+
+  /// atomic_int
+  struct atomic_int
+  {
+    __atomic_int_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    swap(int, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(int&, int, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(int&, int, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    int 
+    fetch_add(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    fetch_sub(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    fetch_and(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    fetch_or(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    fetch_xor(int, memory_order = memory_order_seq_cst) volatile;
+
+    int 
+    operator=(int __v) volatile { store(__v); return __v; }
+
+    int 
+    operator++(int) volatile { return fetch_add(1); }
+
+    int 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    int 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    int 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    int 
+    operator+=(int __v) volatile { return fetch_add(__v) + __v; }
+
+    int 
+    operator-=(int __v) volatile { return fetch_sub(__v) - __v; }
+
+    int 
+    operator&=(int __v) volatile { return fetch_and(__v) & __v; }
+
+    int 
+    operator|=(int __v) volatile { return fetch_or(__v) | __v; }
+
+    int 
+    operator^=(int __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_int*, int, memory_order);
+
+    friend int 
+    atomic_load_explicit(volatile atomic_int*, memory_order);
+
+    friend int 
+    atomic_swap_explicit(volatile atomic_int*, int, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_int*, int*, int, 
+				 memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_int*, memory_order);
+
+    friend int 
+    atomic_fetch_add_explicit(volatile atomic_int*, int, memory_order);
+
+    friend int 
+    atomic_fetch_sub_explicit(volatile atomic_int*, int, memory_order);
+
+    friend int 
+    atomic_fetch_and_explicit(volatile atomic_int*, int, memory_order);
+
+    friend int 
+    atomic_fetch_or_explicit( volatile atomic_int*, int, memory_order);
+
+    friend int 
+    atomic_fetch_xor_explicit(volatile atomic_int*, int, memory_order);
+
+    atomic_int() { }
+
+    atomic_int(int __v) { _M_base._M_i = __v; }
+    
+  private:
+    atomic_int(const atomic_int&);
+    atomic_int& operator=(const atomic_int&);
+  };
+
+  /// atomic_uint
+  struct atomic_uint
+  {
+    __atomic_uint_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    swap(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(unsigned int&, unsigned int, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(unsigned int&, unsigned int, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    unsigned int 
+    fetch_add(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    fetch_sub(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    fetch_and(unsigned int, memory_order = memory_order_seq_cst) volatile;
+ 
+    unsigned int 
+    fetch_or(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    fetch_xor(unsigned int, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned int 
+    operator=(unsigned int __v) volatile { store(__v); return __v; }
+
+    unsigned int 
+    operator++(int) volatile { return fetch_add(1); }
+
+    unsigned int 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    unsigned int 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    unsigned int 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    unsigned int 
+    operator+=(unsigned int __v) volatile { return fetch_add(__v) + __v; }
+
+    unsigned int 
+    operator-=(unsigned int __v) volatile { return fetch_sub(__v) - __v; }
+
+    unsigned int 
+    operator&=(unsigned int __v) volatile { return fetch_and(__v) & __v; }
+
+    unsigned int 
+    operator|=(unsigned int __v) volatile { return fetch_or(__v) | __v; }
+
+    unsigned int 
+    operator^=(unsigned int __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_uint*, unsigned int, memory_order);
+
+    friend unsigned int 
+    atomic_load_explicit(volatile atomic_uint*, memory_order);
+
+    friend unsigned int 
+    atomic_swap_explicit(volatile atomic_uint*, unsigned int, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_uint*, unsigned int*, 
+				 unsigned int, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_uint*, memory_order);
+
+    friend unsigned int 
+    atomic_fetch_add_explicit(volatile atomic_uint*, unsigned int, 
+			      memory_order);
+
+    friend unsigned int 
+    atomic_fetch_sub_explicit(volatile atomic_uint*, unsigned int, 
+			      memory_order);
+
+    friend unsigned int 
+    atomic_fetch_and_explicit(volatile atomic_uint*, unsigned int, 
+			      memory_order);
+
+    friend unsigned int 
+    atomic_fetch_or_explicit( volatile atomic_uint*, unsigned int, 
+			      memory_order);
+
+    friend unsigned int 
+    atomic_fetch_xor_explicit(volatile atomic_uint*, unsigned int, 
+			      memory_order);
+
+    atomic_uint() { }
+
+    atomic_uint(unsigned int __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_uint(const atomic_uint&);
+    atomic_uint& operator=(const atomic_uint&);
+  };
+
+  /// atomic_long
+  struct atomic_long
+  {
+    __atomic_long_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    load(memory_order = memory_order_seq_cst) volatile;
+    
+    long 
+    swap(long, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(long&, long, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(long&, long, memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    long 
+    fetch_add(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    fetch_sub(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    fetch_and(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    fetch_or(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    fetch_xor(long, memory_order = memory_order_seq_cst) volatile;
+
+    long 
+    operator=(long __v) volatile { store(__v); return __v; }
+
+    long 
+    operator++(int) volatile { return fetch_add(1); }
+
+    long 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    long 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    long 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    long 
+    operator+=(long __v) volatile { return fetch_add(__v) + __v; }
+
+    long 
+    operator-=(long __v) volatile { return fetch_sub(__v) - __v; }
+
+    long 
+    operator&=(long __v) volatile { return fetch_and(__v) & __v; }
+
+    long 
+    operator|=(long __v) volatile { return fetch_or(__v) | __v; }
+
+    long 
+    operator^=(long __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_long*, long, memory_order);
+
+    friend long 
+    atomic_load_explicit(volatile atomic_long*, memory_order);
+
+    friend long 
+    atomic_swap_explicit(volatile atomic_long*, long, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_long*, long*, long, 
+				 memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_long*, memory_order);
+
+    friend long 
+    atomic_fetch_add_explicit(volatile atomic_long*, long, memory_order);
+
+    friend long 
+    atomic_fetch_sub_explicit(volatile atomic_long*, long, memory_order);
+
+    friend long 
+    atomic_fetch_and_explicit(volatile atomic_long*, long, memory_order);
+
+    friend long 
+    atomic_fetch_or_explicit( volatile atomic_long*, long, memory_order);
+
+    friend long 
+    atomic_fetch_xor_explicit(volatile atomic_long*, long, memory_order);
+
+    atomic_long() { }
+
+    atomic_long(long __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_long(const atomic_long&);
+    atomic_long& operator=(const atomic_long&);
+  };
+
+  /// atomic_ulong
+  struct atomic_ulong
+  {
+    __atomic_ulong_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    swap(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(unsigned long&, unsigned long, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(unsigned long&, unsigned long, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    unsigned long 
+    fetch_add(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    fetch_sub(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    fetch_and(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    fetch_or(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    fetch_xor(unsigned long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long 
+    operator=(unsigned long __v) volatile { store(__v); return __v; }
+
+    unsigned long 
+    operator++(int) volatile { return fetch_add(1); }
+
+    unsigned long 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    unsigned long 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    unsigned long 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    unsigned long 
+    operator+=(unsigned long __v) volatile { return fetch_add(__v) + __v; }
+
+    unsigned long 
+    operator-=(unsigned long __v) volatile { return fetch_sub(__v) - __v; }
+
+    unsigned long 
+    operator&=(unsigned long __v) volatile { return fetch_and(__v) & __v; }
+
+    unsigned long 
+    operator|=(unsigned long __v) volatile { return fetch_or(__v) | __v; }
+
+    unsigned long 
+    operator^=(unsigned long __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_ulong*, unsigned long, memory_order);
+
+    friend unsigned long 
+    atomic_load_explicit(volatile atomic_ulong*, memory_order);
+
+    friend unsigned long 
+    atomic_swap_explicit(volatile atomic_ulong*, unsigned long, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_ulong*, unsigned long*, 
+				 unsigned long, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_ulong*, memory_order);
+
+    friend unsigned long 
+    atomic_fetch_add_explicit(volatile atomic_ulong*, unsigned long, 
+			      memory_order);
+
+    friend unsigned long 
+    atomic_fetch_sub_explicit(volatile atomic_ulong*, unsigned long, 
+			      memory_order);
+
+    friend unsigned long 
+    atomic_fetch_and_explicit(volatile atomic_ulong*, unsigned long, 
+			      memory_order);
+    friend unsigned long 
+    atomic_fetch_or_explicit(volatile atomic_ulong*, unsigned long, 
+			     memory_order);
+
+    friend unsigned long 
+    atomic_fetch_xor_explicit(volatile atomic_ulong*, unsigned long, 
+			      memory_order);
+
+    atomic_ulong() { }
+
+    atomic_ulong(unsigned long __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_ulong(const atomic_ulong&);
+    atomic_ulong& operator=(const atomic_ulong&);
+  };
+
+  /// atomic_llong
+  struct atomic_llong
+  {
+    __atomic_llong_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    swap(long long, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(long long&, long long, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(long long&, long long, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    long long 
+    fetch_add(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    fetch_sub(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    fetch_and(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    fetch_or(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    fetch_xor(long long, memory_order = memory_order_seq_cst) volatile;
+
+    long long 
+    operator=(long long __v) volatile { store(__v); return __v; }
+
+    long long 
+    operator++(int) volatile { return fetch_add(1); }
+
+    long long 
+    operator--(int) volatile { return fetch_sub(1); }
+
+    long long 
+    operator++() volatile { return fetch_add(1) + 1; }
+
+    long long 
+    operator--() volatile { return fetch_sub(1) - 1; }
+
+    long long 
+    operator+=(long long __v) volatile { return fetch_add(__v) + __v; }
+
+    long long 
+    operator-=(long long __v) volatile { return fetch_sub(__v) - __v; }
+
+    long long 
+    operator&=(long long __v) volatile { return fetch_and(__v) & __v; }
+
+    long long 
+    operator|=(long long __v) volatile { return fetch_or(__v) | __v; }
+
+    long long 
+    operator^=(long long __v) volatile { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend long long 
+    atomic_load_explicit(volatile atomic_llong*, memory_order);
+
+    friend long long 
+    atomic_swap_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_llong*, long long*, 
+				 long long, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_llong*, memory_order);
+
+    friend long long 
+    atomic_fetch_add_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend long long 
+    atomic_fetch_sub_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend long long 
+    atomic_fetch_and_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend long long 
+    atomic_fetch_or_explicit(volatile atomic_llong*, long long, memory_order);
+
+    friend long long 
+    atomic_fetch_xor_explicit(volatile atomic_llong*, long long, memory_order);
+
+    atomic_llong() { }
+
+    atomic_llong(long long __v) { _M_base._M_i = __v; }
+
+  private:    
+    atomic_llong(const atomic_llong&);
+    atomic_llong& operator=(const atomic_llong&);
+  };
+
+  /// atomic_ullong
+  struct atomic_ullong
+  {
+    __atomic_ullong_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    swap(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(unsigned long long&, unsigned long long, memory_order, 
+		 memory_order) volatile;
+
+    bool 
+    compare_swap(unsigned long long&, unsigned long long, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    unsigned long long 
+    fetch_add(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    fetch_sub(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    fetch_and(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    fetch_or(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    fetch_xor(unsigned long long, memory_order = memory_order_seq_cst) volatile;
+
+    unsigned long long 
+    operator=(unsigned long long __v) volatile
+    { store(__v); return __v; }
+
+    unsigned long long 
+    operator++(int) volatile
+    { return fetch_add(1); }
+
+    unsigned long long 
+    operator--(int) volatile
+    { return fetch_sub(1); }
+
+    unsigned long long 
+    operator++() volatile
+    { return fetch_add(1) + 1; }
+
+    unsigned long long 
+    operator--() volatile
+    { return fetch_sub(1) - 1; }
+
+    unsigned long long 
+    operator+=(unsigned long long __v) volatile
+    { return fetch_add(__v) + __v; }
+
+    unsigned long long 
+    operator-=(unsigned long long __v) volatile
+    { return fetch_sub(__v) - __v; }
+
+    unsigned long long 
+    operator&=(unsigned long long __v) volatile
+    { return fetch_and(__v) & __v; }
+
+    unsigned long long 
+    operator|=(unsigned long long __v) volatile
+    { return fetch_or(__v) | __v; }
+
+    unsigned long long 
+    operator^=(unsigned long long __v) volatile
+    { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_ullong*, unsigned long long,
+                                       memory_order);
+    friend unsigned long long 
+    atomic_load_explicit(volatile atomic_ullong*, memory_order);
+
+    friend unsigned long long 
+    atomic_swap_explicit(volatile atomic_ullong*, unsigned long long, 
+			 memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_ullong*, unsigned long long*, 
+				 unsigned long long, memory_order, 
+				 memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_ullong*, memory_order);
+
+    friend unsigned long long 
+    atomic_fetch_add_explicit(volatile atomic_ullong*, unsigned long long, 
+			      memory_order);
+
+    friend unsigned long long 
+    atomic_fetch_sub_explicit(volatile atomic_ullong*, unsigned long long, 
+			      memory_order);
+
+    friend unsigned long long 
+    atomic_fetch_and_explicit(volatile atomic_ullong*, unsigned long long, 
+			      memory_order);
+
+    friend unsigned long long 
+    atomic_fetch_or_explicit(volatile atomic_ullong*, unsigned long long, 
+			     memory_order);
+
+    friend unsigned long long 
+    atomic_fetch_xor_explicit(volatile atomic_ullong*, unsigned long long, 
+			      memory_order);
+
+    atomic_ullong() { }
+
+    atomic_ullong(unsigned long long __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_ullong(const atomic_ullong&);
+    atomic_ullong& operator=(const atomic_ullong&);
+  };
+
+  /// atomic_wchar_t
+  struct atomic_wchar_t
+  {
+    __atomic_wchar_t_base _M_base;
+
+    bool 
+    is_lock_free() const volatile;
+
+    void 
+    store(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    load(memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    swap(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    bool 
+    compare_swap(wchar_t&, wchar_t, memory_order, memory_order) volatile;
+
+    bool 
+    compare_swap(wchar_t&, wchar_t, 
+		 memory_order = memory_order_seq_cst) volatile;
+
+    void 
+    fence(memory_order) const volatile;
+
+    wchar_t 
+    fetch_add(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    fetch_sub(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    fetch_and(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    fetch_or(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    fetch_xor(wchar_t, memory_order = memory_order_seq_cst) volatile;
+
+    wchar_t 
+    operator=(wchar_t __v) volatile
+    { store(__v); return __v; }
+
+    wchar_t 
+    operator++(int) volatile
+    { return fetch_add(1); }
+
+    wchar_t 
+    operator--(int) volatile
+    { return fetch_sub(1); }
+
+    wchar_t 
+    operator++() volatile
+    { return fetch_add(1) + 1; }
+
+    wchar_t 
+    operator--() volatile
+    { return fetch_sub(1) - 1; }
+
+    wchar_t 
+    operator+=(wchar_t __v) volatile
+    { return fetch_add(__v) + __v; }
+
+    wchar_t 
+    operator-=(wchar_t __v) volatile
+    { return fetch_sub(__v) - __v; }
+
+    wchar_t 
+    operator&=(wchar_t __v) volatile
+    { return fetch_and(__v) & __v; }
+
+    wchar_t 
+    operator|=(wchar_t __v) volatile
+    { return fetch_or(__v) | __v; }
+
+    wchar_t 
+    operator^=(wchar_t __v) volatile
+    { return fetch_xor(__v) ^ __v; }
+
+    friend void 
+    atomic_store_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend wchar_t 
+    atomic_load_explicit(volatile atomic_wchar_t*, memory_order);
+
+    friend wchar_t 
+    atomic_swap_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend bool 
+    atomic_compare_swap_explicit(volatile atomic_wchar_t*,
+				 wchar_t*, wchar_t, memory_order, memory_order);
+
+    friend void 
+    atomic_fence(const volatile atomic_wchar_t*, memory_order);
+
+    friend wchar_t 
+    atomic_fetch_add_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend wchar_t 
+    atomic_fetch_sub_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend wchar_t 
+    atomic_fetch_and_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend wchar_t 
+    atomic_fetch_or_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    friend wchar_t 
+    atomic_fetch_xor_explicit(volatile atomic_wchar_t*, wchar_t, memory_order);
+
+    atomic_wchar_t() { }
+
+    atomic_wchar_t(wchar_t __v) { _M_base._M_i = __v; }
+
+  private:
+    atomic_wchar_t(const atomic_wchar_t&);
+    atomic_wchar_t& operator=(const atomic_wchar_t&);
+  };
+
+
+  /// atomic
+  /// 29.4.3, Generic atomic type, primary class template.
+  template<typename _Tp>
+    struct atomic
+    {
+      bool 
+      is_lock_free() const volatile;
+
+      void 
+      store(_Tp, memory_order = memory_order_seq_cst) volatile;
+
+      _Tp 
+      load(memory_order = memory_order_seq_cst) volatile;
+
+      _Tp 
+      swap(_Tp __v, memory_order = memory_order_seq_cst) volatile;
+
+      bool 
+      compare_swap(_Tp&, _Tp, memory_order, memory_order) volatile;
+
+      bool 
+      compare_swap(_Tp&, _Tp, memory_order = memory_order_seq_cst) volatile;
+
+      void 
+      fence(memory_order) const volatile;
+
+      _Tp 
+      operator=(_Tp __v) volatile { store(__v); return __v; }
+
+      atomic() { }
+
+      explicit atomic(_Tp __v) : __f(__v) { }
+
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+
+      _Tp __f;
+    };
+
+  /// Partial specialization for pointer types.
+  template<typename _Tp> 
+    struct atomic<_Tp*> : atomic_address
+    {
+      _Tp* 
+      load(memory_order = memory_order_seq_cst) volatile;
+
+      _Tp* 
+      swap(_Tp*, memory_order = memory_order_seq_cst) volatile;
+
+      bool 
+      compare_swap(_Tp*&, _Tp*, memory_order, memory_order) volatile;
+
+      bool 
+      compare_swap(_Tp*&, _Tp*, memory_order = memory_order_seq_cst) volatile;
+
+      _Tp* 
+      fetch_add(ptrdiff_t, memory_order = memory_order_seq_cst) volatile;
+
+      _Tp* 
+      fetch_sub(ptrdiff_t, memory_order = memory_order_seq_cst) volatile;
+
+      _Tp* 
+      operator=(_Tp* __v) volatile { store(__v); return __v; }
+
+      _Tp* 
+      operator++(int) volatile { return fetch_add(1); }
+
+      _Tp* 
+      operator--(int) volatile { return fetch_sub(1); }
+
+      _Tp* 
+      operator++() volatile { return fetch_add(1) + 1; }
+
+      _Tp* 
+      operator--() volatile { return fetch_sub(1) - 1; }
+
+      _Tp* 
+      operator+=(ptrdiff_t __v) volatile 
+      { return fetch_add(__v) + __v; }
+
+      _Tp* 
+      operator-=(ptrdiff_t __v) volatile 
+      { return fetch_sub(__v) - __v; }
+
+      atomic() { }
+
+      explicit atomic(_Tp* __v) : atomic_address(__v) { }
+
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for bool.
+  template<> 
+    struct atomic<bool> : atomic_bool
+    {
+      atomic() { }
+
+      explicit atomic(bool __v) : atomic_bool(__v) { }
+      
+      bool 
+      operator=(bool __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for void*
+  template<> 
+    struct atomic<void*> : atomic_address
+    {
+      atomic() { }
+
+      explicit atomic(void* __v) : atomic_address(__v) { }
+      
+      void* 
+      operator=(void* __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for char.
+  template<> 
+    struct atomic<char> : atomic_char
+    {
+      atomic() { }
+
+      explicit atomic(char __v) : atomic_char(__v) { }
+      
+      char 
+      operator=(char __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+
+  /// Explicit specialization for signed char.
+  template<> 
+    struct atomic<signed char> : atomic_schar
+    { 
+      atomic() { }
+
+      explicit atomic(signed char __v) : atomic_schar(__v) { }
+      
+      signed char 
+      operator=(signed char __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+  };
+
+  /// Explicit specialization for unsigned char.
+  template<> 
+    struct atomic<unsigned char> : atomic_uchar
+    {
+      atomic() { }
+
+      explicit atomic(unsigned char __v) : atomic_uchar(__v) { }
+      
+      unsigned char 
+      operator=(unsigned char __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& 
+      operator=(const atomic&);
+    };
+
+  /// Explicit specialization for short.
+  template<> 
+    struct atomic<short> : atomic_short
+    {
+      atomic() { }
+
+      explicit atomic(short __v) : atomic_short(__v) { }
+      
+      short 
+      operator=(short __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+  };
+
+  /// Explicit specialization for unsigned short.
+  template<> 
+    struct atomic<unsigned short> : atomic_ushort
+    {
+      atomic() { }
+
+      explicit atomic(unsigned short __v) : atomic_ushort(__v) { }
+      
+      unsigned short 
+      operator=(unsigned short __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for int.
+  template<> 
+    struct atomic<int> : atomic_int
+    {
+      atomic() { }
+
+      explicit atomic(int __v) : atomic_int(__v) { }
+      
+      int 
+      operator=(int __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for unsigned int.
+  template<> 
+    struct atomic<unsigned int> : atomic_uint
+    {
+      atomic() { }
+
+      explicit atomic(unsigned int __v) : atomic_uint(__v) { }
+      
+      unsigned int 
+      operator=(unsigned int __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for long.
+  template<> 
+    struct atomic<long> : atomic_long
+    {
+      atomic() { }
+
+      explicit atomic(long __v) : atomic_long(__v) { }
+      
+      long 
+      operator=(long __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for unsigned long.
+  template<> 
+    struct atomic<unsigned long> : atomic_ulong
+    {
+      atomic() { }
+
+      explicit atomic(unsigned long __v) : atomic_ulong(__v) { }
+      
+      unsigned long 
+      operator=(unsigned long __v) volatile
+      { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for long long.
+  template<> 
+    struct atomic<long long> : atomic_llong
+    {
+      atomic() { }
+
+      explicit atomic(long long __v) : atomic_llong(__v) { }
+      
+      long long 
+      operator=(long long __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);      
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for unsigned long long.
+  template<> 
+    struct atomic<unsigned long long> : atomic_ullong
+    {
+      atomic() { }
+
+      explicit atomic(unsigned long long __v) : atomic_ullong(__v) { }
+      
+      unsigned long long 
+      operator=(unsigned long long __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  /// Explicit specialization for wchar_t.
+  template<> 
+    struct atomic<wchar_t> : atomic_wchar_t
+    {
+      atomic() { }
+
+      explicit atomic(wchar_t __v) : atomic_wchar_t(__v) { }
+      
+      wchar_t 
+      operator=(wchar_t __v) volatile { store(__v); return __v; }
+      
+    private:
+      atomic(const atomic&);
+      atomic& operator=(const atomic&);
+    };
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_bool* __a)
+  { return false; }
+
+  inline bool 
+  atomic_load_explicit(volatile atomic_bool* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline bool 
+  atomic_load(volatile atomic_bool* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_bool* __a, bool __m, memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_bool* __a, bool __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_swap_explicit(volatile atomic_bool* __a, bool __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline bool 
+  atomic_swap(volatile atomic_bool* __a, bool __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_bool* __a, bool* __e, bool __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_bool* __a, bool* __e, bool __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_bool* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_address* __a)
+  { return false; }
+
+  inline void* 
+  atomic_load_explicit(volatile atomic_address* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline void* 
+  atomic_load(volatile atomic_address* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_address* __a, void* __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_address* __a, void* __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline void* 
+  atomic_swap_explicit(volatile atomic_address* __a, void* __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline void* 
+  atomic_swap(volatile atomic_address* __a, void* __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_address* __a, void** __e, 
+			       void* __m, memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_address* __a, void** __e, void* __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_address* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_char* __a)
+  { return false; }
+
+  inline char 
+  atomic_load_explicit(volatile atomic_char* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline char 
+  atomic_load(volatile atomic_char* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_char* __a, char __m, memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_char* __a, char __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline char 
+  atomic_swap_explicit(volatile atomic_char* __a, char __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline char 
+  atomic_swap(volatile atomic_char* __a, char __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_char* __a, char* __e, char __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_char* __a, char* __e, char __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_char* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_schar* __a)
+  { return false; }
+
+  inline signed char 
+  atomic_load_explicit(volatile atomic_schar* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline signed char 
+  atomic_load(volatile atomic_schar* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_schar* __a, signed char __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_schar* __a, signed char __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline signed char 
+  atomic_swap_explicit(volatile atomic_schar* __a, signed char __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline signed char 
+  atomic_swap(volatile atomic_schar* __a, signed char __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_schar* __a, signed char* __e, 
+			       signed char __m, memory_order __x, 
+			       memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_schar* __a, signed char* __e, 
+		      signed char __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_schar* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_uchar* __a)
+  { return false; }
+
+  inline unsigned char 
+  atomic_load_explicit(volatile atomic_uchar* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline unsigned char 
+  atomic_load(volatile atomic_uchar* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned char 
+  atomic_swap_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline unsigned char 
+  atomic_swap(volatile atomic_uchar* __a, unsigned char __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_uchar* __a, unsigned char* __e,
+			       unsigned char __m, memory_order __x, 
+			       memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_uchar* __a, unsigned char* __e, 
+		      unsigned char __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_uchar* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_short* __a)
+  { return false; }
+
+  inline short 
+  atomic_load_explicit(volatile atomic_short* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline short 
+  atomic_load(volatile atomic_short* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_short* __a, short __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_short* __a, short __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline short 
+  atomic_swap_explicit(volatile atomic_short* __a, short __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline short 
+  atomic_swap(volatile atomic_short* __a, short __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_short* __a, short* __e, 
+			       short __m, memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_short* __a, short* __e, short __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_short* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_ushort* __a)
+  { return false; }
+
+  inline unsigned short 
+  atomic_load_explicit(volatile atomic_ushort* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline unsigned short 
+  atomic_load(volatile atomic_ushort* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned short 
+  atomic_swap_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline unsigned short 
+  atomic_swap(volatile atomic_ushort* __a, unsigned short __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_ushort* __a, 
+			       unsigned short* __e, unsigned short __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_ushort* __a, unsigned short* __e, 
+		      unsigned short __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_ushort* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_int* __a)
+  { return false; }
+
+  inline int 
+  atomic_load_explicit(volatile atomic_int* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline int 
+  atomic_load(volatile atomic_int* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_int* __a, int __m, memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_int* __a, int __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline int 
+  atomic_swap_explicit(volatile atomic_int* __a, int __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline int 
+  atomic_swap(volatile atomic_int* __a, int __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_int* __a, int* __e, int __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_int* __a, int* __e, int __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_int* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_uint* __a)
+  { return false; }
+
+  inline unsigned int 
+  atomic_load_explicit(volatile atomic_uint* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline unsigned int 
+  atomic_load(volatile atomic_uint* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned int 
+  atomic_swap_explicit(volatile atomic_uint* __a, unsigned int __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline unsigned int 
+  atomic_swap(volatile atomic_uint* __a, unsigned int __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_uint* __a, unsigned int* __e, 
+			       unsigned int __m, memory_order __x, 
+			       memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_uint* __a, unsigned int* __e, 
+		      unsigned int __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_uint* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_long* __a)
+  { return false; }
+
+  inline long 
+  atomic_load_explicit(volatile atomic_long* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline long 
+  atomic_load(volatile atomic_long* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_long* __a, long __m, memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_long* __a, long __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long 
+  atomic_swap_explicit(volatile atomic_long* __a, long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline long 
+  atomic_swap(volatile atomic_long* __a, long __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_long* __a, long* __e, long __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_long* __a, long* __e, long __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_long* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_ulong* __a)
+  { return false; }
+
+  inline unsigned long 
+  atomic_load_explicit(volatile atomic_ulong* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline unsigned long 
+  atomic_load(volatile atomic_ulong* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long 
+  atomic_swap_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline unsigned long 
+  atomic_swap(volatile atomic_ulong* __a, unsigned long __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_ulong* __a, unsigned long* __e,
+			       unsigned long __m, memory_order __x, 
+			       memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_ulong* __a, unsigned long* __e, 
+		      unsigned long __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_ulong* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_llong* __a)
+  { return false; }
+
+  inline long long 
+  atomic_load_explicit(volatile atomic_llong* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline long long 
+  atomic_load(volatile atomic_llong* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_llong* __a, long long __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_llong* __a, long long __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long long 
+  atomic_swap_explicit(volatile atomic_llong* __a, long long __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline long long 
+  atomic_swap(volatile atomic_llong* __a, long long __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_llong* __a, long long* __e, 
+			       long long __m, memory_order __x, 
+			       memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_llong* __a, long long* __e, 
+		      long long __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_llong* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_ullong* __a)
+  { return false; }
+
+  inline unsigned long long 
+  atomic_load_explicit(volatile atomic_ullong* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline unsigned long long 
+  atomic_load(volatile atomic_ullong* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  inline void 
+  atomic_store_explicit(volatile atomic_ullong* __a, unsigned long long __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long long 
+  atomic_swap_explicit(volatile atomic_ullong* __a, unsigned long long __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline unsigned long long 
+  atomic_swap(volatile atomic_ullong* __a, unsigned long long __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_ullong* __a, 
+			       unsigned long long* __e, unsigned long long __m,
+			       memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_ullong* __a, unsigned long long* __e, 
+		      unsigned long long __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_ullong* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+  inline bool 
+  atomic_is_lock_free(const volatile atomic_wchar_t* __a)
+  { return false; }
+
+  inline wchar_t 
+  atomic_load_explicit(volatile atomic_wchar_t* __a, memory_order __x)
+  { return _ATOMIC_LOAD_(__a, __x); }
+
+  inline wchar_t 
+  atomic_load(volatile atomic_wchar_t* __a)
+  { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+
+  inline void 
+  atomic_store_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			memory_order __x)
+  { _ATOMIC_STORE_(__a, __m, __x); }
+
+  inline void 
+  atomic_store(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_store_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline wchar_t 
+  atomic_swap_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+		       memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, =, __m, __x); }
+
+  inline wchar_t 
+  atomic_swap(volatile atomic_wchar_t* __a, wchar_t __m)
+  { return atomic_swap_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_compare_swap_explicit(volatile atomic_wchar_t* __a, wchar_t* __e, 
+			       wchar_t __m, memory_order __x, memory_order __y)
+  { return _ATOMIC_CMPSWP_(__a, __e, __m, __x); }
+
+  inline bool 
+  atomic_compare_swap(volatile atomic_wchar_t* __a, wchar_t* __e, wchar_t __m)
+  { return atomic_compare_swap_explicit(__a, __e, __m, memory_order_seq_cst, 
+					memory_order_seq_cst); }
+
+  inline void 
+  atomic_fence(const volatile atomic_wchar_t* __a, memory_order __x)
+  { _ATOMIC_FENCE_(__a, __x); }
+
+  inline void* 
+  atomic_fetch_add_explicit(volatile atomic_address* __a, ptrdiff_t __m, 
+			    memory_order __x)
+  { 
+    void* volatile* __p = &((__a)->_M_base._M_i);
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p);
+    __atomic_flag_wait_explicit(__g, __x);
+    void* __r = *__p;
+    *__p = (void*)((char*)(*__p) + __m);
+    atomic_flag_clear_explicit(__g, __x);
+    return __r; 
+  }
+
+  inline void* 
+  atomic_fetch_add(volatile atomic_address* __a, ptrdiff_t __m)
+  { return atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline void* 
+  atomic_fetch_sub_explicit(volatile atomic_address* __a, ptrdiff_t __m, 
+			    memory_order __x)
+  { 
+    void* volatile* __p = &((__a)->_M_base._M_i);
+    volatile atomic_flag* __g = __atomic_flag_for_address(__p);
+    __atomic_flag_wait_explicit(__g, __x);
+    void* __r = *__p;
+    *__p = (void*)((char*)(*__p) - __m);
+    atomic_flag_clear_explicit(__g, __x);
+    return __r; 
+  }
+
+  inline void* 
+  atomic_fetch_sub(volatile atomic_address* __a, ptrdiff_t __m)
+  { return atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline char 
+  atomic_fetch_add_explicit(volatile atomic_char* __a, char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline char 
+  atomic_fetch_add(volatile atomic_char* __a, char __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline char 
+  atomic_fetch_sub_explicit(volatile atomic_char* __a, char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline char 
+  atomic_fetch_sub(volatile atomic_char* __a, char __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline char 
+  atomic_fetch_and_explicit(volatile atomic_char* __a, char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline char 
+  atomic_fetch_and(volatile atomic_char* __a, char __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline char 
+  atomic_fetch_or_explicit(volatile atomic_char* __a, char __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline char 
+  atomic_fetch_or(volatile atomic_char* __a, char __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline char 
+  atomic_fetch_xor_explicit(volatile atomic_char* __a, char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline char 
+  atomic_fetch_xor(volatile atomic_char* __a, char __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline signed char 
+  atomic_fetch_add_explicit(volatile atomic_schar* __a, signed char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline signed char 
+  atomic_fetch_add(volatile atomic_schar* __a, signed char __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline signed char 
+  atomic_fetch_sub_explicit(volatile atomic_schar* __a, signed char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline signed char 
+  atomic_fetch_sub(volatile atomic_schar* __a, signed char __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline signed char 
+  atomic_fetch_and_explicit(volatile atomic_schar* __a, signed char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline signed char 
+  atomic_fetch_and(volatile atomic_schar* __a, signed char __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline signed char 
+  atomic_fetch_or_explicit(volatile atomic_schar* __a, signed char __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline signed char 
+  atomic_fetch_or(volatile atomic_schar* __a, signed char __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline signed char 
+  atomic_fetch_xor_explicit(volatile atomic_schar* __a, signed char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline signed char 
+  atomic_fetch_xor(volatile atomic_schar* __a, signed char __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned char 
+  atomic_fetch_add_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline unsigned char 
+  atomic_fetch_add(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned char 
+  atomic_fetch_sub_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline unsigned char 
+  atomic_fetch_sub(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned char 
+  atomic_fetch_and_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline unsigned char 
+  atomic_fetch_and(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned char 
+  atomic_fetch_or_explicit(volatile atomic_uchar* __a, unsigned char __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline unsigned char 
+  atomic_fetch_or(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned char 
+  atomic_fetch_xor_explicit(volatile atomic_uchar* __a, 
+			    unsigned char __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline unsigned char 
+  atomic_fetch_xor(volatile atomic_uchar* __a, unsigned char __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline short 
+  atomic_fetch_add_explicit(volatile atomic_short* __a, short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline short 
+  atomic_fetch_add(volatile atomic_short* __a, short __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline short 
+  atomic_fetch_sub_explicit(volatile atomic_short* __a, short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline short 
+  atomic_fetch_sub(volatile atomic_short* __a, short __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline short 
+  atomic_fetch_and_explicit(volatile atomic_short* __a, short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline short 
+  atomic_fetch_and(volatile atomic_short* __a, short __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline short 
+  atomic_fetch_or_explicit(volatile atomic_short* __a, short __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline short 
+  atomic_fetch_or(volatile atomic_short* __a, short __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline short 
+  atomic_fetch_xor_explicit(volatile atomic_short* __a, short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline short 
+  atomic_fetch_xor(volatile atomic_short* __a, short __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned short 
+  atomic_fetch_add_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline unsigned short 
+  atomic_fetch_add(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned short 
+  atomic_fetch_sub_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline unsigned short 
+  atomic_fetch_sub(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned short 
+  atomic_fetch_and_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline unsigned short 
+  atomic_fetch_and(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned short 
+  atomic_fetch_or_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline unsigned short 
+  atomic_fetch_or(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned short 
+  atomic_fetch_xor_explicit(volatile atomic_ushort* __a, unsigned short __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline unsigned short 
+  atomic_fetch_xor(volatile atomic_ushort* __a, unsigned short __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline int 
+  atomic_fetch_add_explicit(volatile atomic_int* __a, int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline int 
+  atomic_fetch_add(volatile atomic_int* __a, int __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline int 
+  atomic_fetch_sub_explicit(volatile atomic_int* __a, int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline int 
+  atomic_fetch_sub(volatile atomic_int* __a, int __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline int 
+  atomic_fetch_and_explicit(volatile atomic_int* __a, int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline int 
+  atomic_fetch_and(volatile atomic_int* __a, int __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline int 
+  atomic_fetch_or_explicit(volatile atomic_int* __a, int __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline int 
+  atomic_fetch_or(volatile atomic_int* __a, int __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline int 
+  atomic_fetch_xor_explicit(volatile atomic_int* __a, int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline int 
+  atomic_fetch_xor(volatile atomic_int* __a, int __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned int 
+  atomic_fetch_add_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline unsigned int 
+  atomic_fetch_add(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned int 
+  atomic_fetch_sub_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline unsigned int 
+  atomic_fetch_sub(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned int 
+  atomic_fetch_and_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline unsigned int 
+  atomic_fetch_and(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned int 
+  atomic_fetch_or_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline unsigned int 
+  atomic_fetch_or(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned int 
+  atomic_fetch_xor_explicit(volatile atomic_uint* __a, unsigned int __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline unsigned int 
+  atomic_fetch_xor(volatile atomic_uint* __a, unsigned int __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline long 
+  atomic_fetch_add_explicit(volatile atomic_long* __a, long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline long 
+  atomic_fetch_add(volatile atomic_long* __a, long __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long 
+  atomic_fetch_sub_explicit(volatile atomic_long* __a, long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline long 
+  atomic_fetch_sub(volatile atomic_long* __a, long __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long 
+  atomic_fetch_and_explicit(volatile atomic_long* __a, long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline long 
+  atomic_fetch_and(volatile atomic_long* __a, long __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long 
+  atomic_fetch_or_explicit(volatile atomic_long* __a, long __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline long 
+  atomic_fetch_or(volatile atomic_long* __a, long __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long 
+  atomic_fetch_xor_explicit(volatile atomic_long* __a, long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline long 
+  atomic_fetch_xor(volatile atomic_long* __a, long __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned long 
+  atomic_fetch_add_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline unsigned long 
+  atomic_fetch_add(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long 
+  atomic_fetch_sub_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline unsigned long 
+  atomic_fetch_sub(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long 
+  atomic_fetch_and_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline unsigned long 
+  atomic_fetch_and(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long 
+  atomic_fetch_or_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline unsigned long 
+  atomic_fetch_or(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long 
+  atomic_fetch_xor_explicit(volatile atomic_ulong* __a, unsigned long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline unsigned long 
+  atomic_fetch_xor(volatile atomic_ulong* __a, unsigned long __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline long long 
+  atomic_fetch_add_explicit(volatile atomic_llong* __a, long long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline long long 
+  atomic_fetch_add(volatile atomic_llong* __a, long long __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long long 
+  atomic_fetch_sub_explicit(volatile atomic_llong* __a, long long __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline long long 
+  atomic_fetch_sub(volatile atomic_llong* __a, long long __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long long 
+  atomic_fetch_and_explicit(volatile atomic_llong* __a, 
+			    long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline long long 
+  atomic_fetch_and(volatile atomic_llong* __a, long long __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long long 
+  atomic_fetch_or_explicit(volatile atomic_llong* __a, 
+			   long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline long long 
+  atomic_fetch_or(volatile atomic_llong* __a, long long __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline long long 
+  atomic_fetch_xor_explicit(volatile atomic_llong* __a, 
+			    long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline long long 
+  atomic_fetch_xor(volatile atomic_llong* __a, long long __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline unsigned long long 
+  atomic_fetch_add_explicit(volatile atomic_ullong* __a, 
+			    unsigned long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline unsigned long long 
+  atomic_fetch_add(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long long 
+  atomic_fetch_sub_explicit(volatile atomic_ullong* __a, 
+			    unsigned long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline unsigned long long 
+  atomic_fetch_sub(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long long 
+  atomic_fetch_and_explicit(volatile atomic_ullong* __a, 
+			    unsigned long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline unsigned long long 
+  atomic_fetch_and(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long long 
+  atomic_fetch_or_explicit(volatile atomic_ullong* __a, 
+			   unsigned long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline unsigned long long 
+  atomic_fetch_or(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline unsigned long long 
+  atomic_fetch_xor_explicit(volatile atomic_ullong* __a, 
+			    unsigned long long __m, memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline unsigned long long 
+  atomic_fetch_xor(volatile atomic_ullong* __a, unsigned long long __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+
+  inline wchar_t 
+  atomic_fetch_add_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, +=, __m, __x); }
+
+  inline wchar_t 
+  atomic_fetch_add(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_fetch_add_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline wchar_t 
+  atomic_fetch_sub_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, -=, __m, __x); }
+
+  inline wchar_t 
+  atomic_fetch_sub(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_fetch_sub_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline wchar_t 
+  atomic_fetch_and_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, &=, __m, __x); }
+
+  inline wchar_t 
+  atomic_fetch_and(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_fetch_and_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline wchar_t 
+  atomic_fetch_or_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			   memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, |=, __m, __x); }
+
+  inline wchar_t 
+  atomic_fetch_or(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_fetch_or_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline wchar_t 
+  atomic_fetch_xor_explicit(volatile atomic_wchar_t* __a, wchar_t __m, 
+			    memory_order __x)
+  { return _ATOMIC_MODIFY_(__a, ^=, __m, __x); }
+
+  inline wchar_t 
+  atomic_fetch_xor(volatile atomic_wchar_t* __a, wchar_t __m)
+  { atomic_fetch_xor_explicit(__a, __m, memory_order_seq_cst); }
+
+  inline bool 
+  atomic_bool::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_bool::store(bool __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_bool::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline bool 
+  atomic_bool::swap(bool __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_bool::compare_swap(bool& __e, bool __m, memory_order __x, 
+			    memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_bool::compare_swap(bool& __e, bool __m, memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_bool::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_char::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_char::store(char __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline char 
+  atomic_char::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline char 
+  atomic_char::swap(char __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_char::compare_swap(char& __e, char __m,
+			    memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_char::compare_swap(char& __e, char __m, memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_char::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_schar::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_schar::store(signed char __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline signed char 
+  atomic_schar::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline signed char 
+  atomic_schar::swap(signed char __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_schar::compare_swap(signed char& __e, signed char __m,
+			     memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_schar::compare_swap(signed char& __e, signed char __m, 
+			     memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_schar::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+  inline bool 
+  atomic_uchar::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_uchar::store(unsigned char __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline unsigned char 
+  atomic_uchar::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline unsigned char 
+  atomic_uchar::swap(unsigned char __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_uchar::compare_swap(unsigned char& __e, unsigned char __m,
+			     memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_uchar::compare_swap(unsigned char& __e, unsigned char __m, 
+			     memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_uchar::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_short::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_short::store(short __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline short 
+  atomic_short::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline short 
+  atomic_short::swap(short __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_short::compare_swap(short& __e, short __m,
+			     memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_short::compare_swap(short& __e, short __m, memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_short::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_ushort::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_ushort::store(unsigned short __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline unsigned short 
+  atomic_ushort::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline unsigned short 
+  atomic_ushort::swap(unsigned short __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_ushort::compare_swap(unsigned short& __e, unsigned short __m,
+			      memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_ushort::compare_swap(unsigned short& __e, unsigned short __m, 
+			      memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_ushort::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_int::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_int::store(int __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline int 
+  atomic_int::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline int 
+  atomic_int::swap(int __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_int::compare_swap(int& __e, int __m, memory_order __x, 
+			   memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_int::compare_swap(int& __e, int __m, memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_int::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_uint::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_uint::store(unsigned int __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline unsigned int 
+  atomic_uint::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline unsigned int 
+  atomic_uint::swap(unsigned int __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_uint::compare_swap(unsigned int& __e, unsigned int __m,
+			    memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_uint::compare_swap(unsigned int& __e, unsigned int __m, 
+			    memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_uint::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_long::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_long::store(long __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline long 
+  atomic_long::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline long 
+  atomic_long::swap(long __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_long::compare_swap(long& __e, long __m,
+			    memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_long::compare_swap(long& __e, long __m, memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_long::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_ulong::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_ulong::store(unsigned long __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline unsigned long 
+  atomic_ulong::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline unsigned long 
+  atomic_ulong::swap(unsigned long __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_ulong::compare_swap(unsigned long& __e, unsigned long __m,
+			     memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_ulong::compare_swap(unsigned long& __e, unsigned long __m, 
+			     memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_ulong::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_llong::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_llong::store(long long __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline long long 
+  atomic_llong::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline long long 
+  atomic_llong::swap(long long __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_llong::compare_swap(long long& __e, long long __m,
+			     memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_llong::compare_swap(long long& __e, long long __m, 
+			     memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_llong::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_ullong::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_ullong::store(unsigned long long __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline unsigned long long 
+  atomic_ullong::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline unsigned long long 
+  atomic_ullong::swap(unsigned long long __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_ullong::compare_swap(unsigned long long& __e, unsigned long long __m,
+			      memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_ullong::compare_swap(unsigned long long& __e, unsigned long long __m, 
+			      memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_ullong::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline bool 
+  atomic_wchar_t::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_wchar_t::store(wchar_t __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline wchar_t 
+  atomic_wchar_t::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline wchar_t 
+  atomic_wchar_t::swap(wchar_t __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_wchar_t::compare_swap(wchar_t& __e, wchar_t __m,
+			       memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_wchar_t::compare_swap(wchar_t& __e, wchar_t __m, 
+			       memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_wchar_t::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  inline void* 
+  atomic_address::fetch_add(ptrdiff_t __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+  inline void* 
+  atomic_address::fetch_sub(ptrdiff_t __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline char 
+  atomic_char::fetch_add(char __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline char 
+  atomic_char::fetch_sub(char __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline char 
+  atomic_char::fetch_and(char __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline char 
+  atomic_char::fetch_or(char __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline char 
+  atomic_char::fetch_xor(char __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline signed char 
+  atomic_schar::fetch_add(signed char __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline signed char 
+  atomic_schar::fetch_sub(signed char __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline signed char 
+  atomic_schar::fetch_and(signed char __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline signed char 
+  atomic_schar::fetch_or(signed char __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline signed char 
+  atomic_schar::fetch_xor(signed char __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline unsigned char 
+  atomic_uchar::fetch_add(unsigned char __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline unsigned char 
+  atomic_uchar::fetch_sub(unsigned char __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline unsigned char 
+  atomic_uchar::fetch_and(unsigned char __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline unsigned char 
+  atomic_uchar::fetch_or(unsigned char __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline unsigned char 
+  atomic_uchar::fetch_xor(unsigned char __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline short 
+  atomic_short::fetch_add(short __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline short 
+  atomic_short::fetch_sub(short __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline short 
+  atomic_short::fetch_and(short __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline short 
+  atomic_short::fetch_or(short __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline short 
+  atomic_short::fetch_xor(short __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline unsigned short 
+  atomic_ushort::fetch_add(unsigned short __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline unsigned short 
+  atomic_ushort::fetch_sub(unsigned short __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline unsigned short 
+  atomic_ushort::fetch_and(unsigned short __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline unsigned short 
+  atomic_ushort::fetch_or(unsigned short __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline unsigned short 
+  atomic_ushort::fetch_xor(unsigned short __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline int 
+  atomic_int::fetch_add(int __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline int 
+  atomic_int::fetch_sub(int __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline int 
+  atomic_int::fetch_and(int __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline int 
+  atomic_int::fetch_or(int __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline int 
+  atomic_int::fetch_xor(int __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline unsigned int 
+  atomic_uint::fetch_add(unsigned int __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline unsigned int 
+  atomic_uint::fetch_sub(unsigned int __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline unsigned int 
+  atomic_uint::fetch_and(unsigned int __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline unsigned int 
+  atomic_uint::fetch_or(unsigned int __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline unsigned int 
+  atomic_uint::fetch_xor(unsigned int __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline long 
+  atomic_long::fetch_add(long __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline long 
+  atomic_long::fetch_sub(long __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline long 
+  atomic_long::fetch_and(long __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline long 
+  atomic_long::fetch_or(long __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline long 
+  atomic_long::fetch_xor(long __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline unsigned long 
+  atomic_ulong::fetch_add(unsigned long __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline unsigned long 
+  atomic_ulong::fetch_sub(unsigned long __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline unsigned long 
+  atomic_ulong::fetch_and(unsigned long __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline unsigned long 
+  atomic_ulong::fetch_or(unsigned long __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline unsigned long 
+  atomic_ulong::fetch_xor(unsigned long __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline long long 
+  atomic_llong::fetch_add(long long __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline long long 
+  atomic_llong::fetch_sub(long long __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline long long 
+  atomic_llong::fetch_and(long long __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline long long 
+  atomic_llong::fetch_or(long long __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline long long 
+  atomic_llong::fetch_xor(long long __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline unsigned long long 
+  atomic_ullong::fetch_add(unsigned long long __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline unsigned long long 
+  atomic_ullong::fetch_sub(unsigned long long __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline unsigned long long 
+  atomic_ullong::fetch_and(unsigned long long __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline unsigned long long 
+  atomic_ullong::fetch_or(unsigned long long __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline unsigned long long
+  atomic_ullong::fetch_xor(unsigned long long __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline wchar_t 
+  atomic_wchar_t::fetch_add(wchar_t __m, memory_order __x) volatile
+  { return atomic_fetch_add_explicit(this, __m, __x); }
+
+
+  inline wchar_t 
+  atomic_wchar_t::fetch_sub(wchar_t __m, memory_order __x) volatile
+  { return atomic_fetch_sub_explicit(this, __m, __x); }
+
+
+  inline wchar_t 
+  atomic_wchar_t::fetch_and(wchar_t __m, memory_order __x) volatile
+  { return atomic_fetch_and_explicit(this, __m, __x); }
+
+
+  inline wchar_t 
+  atomic_wchar_t::fetch_or(wchar_t __m, memory_order __x) volatile
+  { return atomic_fetch_or_explicit(this, __m, __x); }
+
+
+  inline wchar_t 
+  atomic_wchar_t::fetch_xor(wchar_t __m, memory_order __x) volatile
+  { return atomic_fetch_xor_explicit(this, __m, __x); }
+
+
+  inline bool 
+  atomic_address::is_lock_free() const volatile
+  { return false; }
+
+  inline void 
+  atomic_address::store(void* __m, memory_order __x) volatile
+  { atomic_store_explicit(this, __m, __x); }
+
+  inline void* 
+  atomic_address::load(memory_order __x) volatile
+  { return atomic_load_explicit(this, __x); }
+
+  inline void* 
+  atomic_address::swap(void* __m, memory_order __x) volatile
+  { return atomic_swap_explicit(this, __m, __x); }
+
+  inline bool 
+  atomic_address::compare_swap(void*& __e, void* __m,
+			       memory_order __x, memory_order __y) volatile
+  { return atomic_compare_swap_explicit(this, &__e, __m, __x, __y); }
+
+  inline bool 
+  atomic_address::compare_swap(void*& __e, void* __m, 
+			       memory_order __x) volatile
+  {
+    const bool __cond1 = __x == memory_order_release;
+    const bool __cond2 = __x == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return atomic_compare_swap_explicit(this, &__e, __m, __x, __mo2);
+  }
+
+  inline void 
+  atomic_address::fence(memory_order __x) const volatile
+  { return atomic_fence(this, __x); }
+
+
+  template<typename _Tp>
+    inline bool 
+    atomic<_Tp>::is_lock_free() const volatile
+    { return false; }
+
+  template<typename _Tp>
+    inline void 
+    atomic<_Tp>::store(_Tp __v, memory_order __x) volatile
+    // XXX
+    //  { _ATOMIC_STORE_(this, __v, __x); }
+    { }
+
+  template<typename _Tp>
+    inline _Tp 
+    atomic<_Tp>::load(memory_order __x) volatile
+    // XXX
+    //  { return _ATOMIC_LOAD_(this, __x); }
+    { }
+
+  template<typename _Tp>
+    inline _Tp 
+    atomic<_Tp>::swap(_Tp __v, memory_order __x) volatile
+    // XXX
+    //  { return _ATOMIC_MODIFY_(this, =, __v, __x); }
+    { }
+
+  template<typename _Tp>
+    inline bool 
+    atomic<_Tp>::compare_swap(_Tp& __r, _Tp __v, memory_order __x, 
+			      memory_order __y) volatile
+    // XXX
+    // { return _ATOMIC_CMPSWP_(this, &__r, __v, __x); }
+    { }
+
+  template<typename _Tp>
+    inline bool 
+    atomic<_Tp>::compare_swap(_Tp& __r, _Tp __v, memory_order __x) volatile
+    { 
+      const bool __cond1 = __x == memory_order_release;
+      const bool __cond2 = __x == memory_order_acq_rel;
+      memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+      memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+      return compare_swap(__r, __v, __x, __mo2);
+    }
+
+  template<typename _Tp>
+    _Tp* 
+    atomic<_Tp*>::load(memory_order __x) volatile
+    { return static_cast<_Tp*>(atomic_address::load(__x)); }
+
+  template<typename _Tp>
+    _Tp* 
+    atomic<_Tp*>::swap(_Tp* __v, memory_order __x) volatile
+    { return static_cast<_Tp*>(atomic_address::swap(__v, __x)); }
+
+  template<typename _Tp>
+    bool 
+    atomic<_Tp*>::compare_swap(_Tp*& __r, _Tp* __v, memory_order __x, 
+			       memory_order __y) volatile
+    { return atomic_address::compare_swap(*reinterpret_cast<void**>(&__r),
+					  static_cast<void*>(__v), __x, __y); }
+
+  template<typename _Tp>
+    bool 
+    atomic<_Tp*>::compare_swap(_Tp*& __r, _Tp* __v, memory_order __x) volatile
+    { 
+      const bool __cond1 = __x == memory_order_release;
+      const bool __cond2 = __x == memory_order_acq_rel;
+      memory_order __mo1(__cond1 ? memory_order_relaxed : __x);
+      memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+      return compare_swap(__r, __v, __x, __mo2);
+    }
+
+  template<typename _Tp>
+    _Tp* 
+    atomic<_Tp*>::fetch_add(ptrdiff_t __v, memory_order __x) volatile
+    { 
+      void* __p = atomic_fetch_add_explicit(this, sizeof(_Tp) * __v, __x);
+      return static_cast<_Tp*>(__p);
+    }
+
+  template<typename _Tp>
+    _Tp* 
+    atomic<_Tp*>::fetch_sub(ptrdiff_t __v, memory_order __x) volatile
+    { 
+      void* __p = atomic_fetch_sub_explicit(this, sizeof(_Tp) * __v, __x); 
+      return static_cast<_Tp*>(__p);    
+    }
+
+_GLIBCXX_END_NAMESPACE
+
+#endif
+
+
diff --git a/libstdc++-v3/include/ext/typelist.h b/libstdc++-v3/include/ext/typelist.h
index b7cd95434e6..03f28a78323 100644
--- a/libstdc++-v3/include/ext/typelist.h
+++ b/libstdc++-v3/include/ext/typelist.h
@@ -1,6 +1,6 @@
 // -*- C++ -*-
 
-// Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+// Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -71,10 +71,21 @@ namespace typelist
       typedef Typelist 	tail;
     };
 
-  template<typename Fn, class Typelist>
+  // Apply all typelist types to unary functor.
+  template<typename Fn, typename Typelist>
     void
     apply(Fn&, Typelist);
 
+  /// Apply all typelist types to generator functor.
+  template<typename Gn, typename Typelist>
+    void
+    apply_generator(Gn&, Typelist);
+
+  // Apply all typelist types and values to generator functor.
+  template<typename Gn, typename TypelistT, typename TypelistV>
+    void
+    apply_generator(Gn&, TypelistT, TypelistV);
+
   template<typename Typelist0, typename Typelist1>
     struct append;
 
@@ -135,20 +146,64 @@ namespace detail
     struct apply_<Fn, chain<Hd, Tl> >
     {
       void
-      operator() (Fn& f)
+      operator()(Fn& f)
       {
 	f.operator()(Hd());
 	apply_<Fn, Tl> next;
 	next(f);
       }
-  };
+    };
 
   template<typename Fn>
     struct apply_<Fn, null_type>
     {
       void
       operator()(Fn&) { }
-  };
+    };
+
+  template<typename Gn, typename Typelist_Chain>
+    struct apply_generator1_;
+
+  template<typename Gn, typename Hd, typename Tl>
+    struct apply_generator1_<Gn, chain<Hd, Tl> >
+    {
+      void
+      operator()(Gn& g)
+      {
+	g.template operator()<Hd>();
+	apply_generator1_<Gn, Tl> next;
+	next(g);
+      }
+    };
+
+  template<typename Gn>
+    struct apply_generator1_<Gn, null_type>
+    {
+      void
+      operator()(Gn&) { }
+    };
+
+  template<typename Gn, typename TypelistT_Chain, typename TypelistV_Chain>
+    struct apply_generator2_;
+
+  template<typename Gn, typename Hd1, typename TlT, typename Hd2, typename TlV>
+    struct apply_generator2_<Gn, chain<Hd1, TlT>, chain<Hd2, TlV> >
+    {
+      void
+      operator()(Gn& g)
+      {
+	g.template operator()<Hd1, Hd2>();
+	apply_generator2_<Gn, TlT, TlV> next;
+	next(g);
+      }
+    };
+
+  template<typename Gn>
+    struct apply_generator2_<Gn, null_type, null_type>
+    {
+      void
+      operator()(Gn&) { }
+    };
 
   template<typename Typelist_Chain0, typename Typelist_Chain1>
     struct append_;
@@ -294,20 +349,20 @@ namespace detail
     struct chain_flatten_;
 
   template<typename Hd_Tl>
-  struct chain_flatten_<chain<Hd_Tl, null_type> >
-  {
-    typedef typename Hd_Tl::root 				type;
-  };
+    struct chain_flatten_<chain<Hd_Tl, null_type> >
+    {
+      typedef typename Hd_Tl::root 				type;
+    };
 
   template<typename Hd_Typelist, class Tl_Typelist>
-  struct chain_flatten_<chain<Hd_Typelist, Tl_Typelist> >
-  {
-  private:
-    typedef typename chain_flatten_<Tl_Typelist>::type 		rest_type;
-    typedef append<Hd_Typelist, node<rest_type> >		append_type;
-  public:
-    typedef typename append_type::type::root 			type;
-  };
+    struct chain_flatten_<chain<Hd_Typelist, Tl_Typelist> >
+    {
+    private:
+      typedef typename chain_flatten_<Tl_Typelist>::type 	rest_type;
+      typedef append<Hd_Typelist, node<rest_type> >		append_type;
+    public:
+      typedef typename append_type::type::root 			type;
+    };
 } // namespace detail
 } // namespace typelist
 
@@ -333,7 +388,7 @@ _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
 
 namespace typelist
 {
-  template<typename Fn, class Typelist>
+  template<typename Fn, typename Typelist>
     void
     apply(Fn& fn, Typelist)
     {
@@ -341,6 +396,24 @@ namespace typelist
       a(fn);
     }
 
+  template<typename Fn, typename Typelist>
+    void
+    apply_generator(Fn& fn, Typelist)
+    {
+      detail::apply_generator1_<Fn, typename Typelist::root> a;
+      a(fn);
+    }
+
+  template<typename Fn, typename TypelistT, typename TypelistV>
+    void
+    apply_generator(Fn& fn, TypelistT, TypelistV)
+    {
+      typedef typename TypelistT::root rootT;
+      typedef typename TypelistV::root rootV;
+      detail::apply_generator2_<Fn, rootT, rootV> a;
+      a(fn);
+    }
+
   template<typename Typelist0, typename Typelist1>
     struct append
     {
diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h
index edaea3856ad..8ab89aa8ee9 100644
--- a/libstdc++-v3/include/parallel/compiletime_settings.h
+++ b/libstdc++-v3/include/parallel/compiletime_settings.h
@@ -73,17 +73,9 @@
  *  __gnu_parallel::parallel_random_shuffle(). */
 #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
 #endif
-#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 
+#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
 /** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
  *  Consider the size of the TLB for
  *  __gnu_parallel::parallel_random_shuffle(). */
 #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
 #endif
-
-#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-/** @brief First copy the data, sort it locally, and merge it back
- * (0); or copy it back after everything is done (1).
- *
- *  Recommendation: 0 */
-#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
-#endif
diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h
index 2e09980405e..7150c20affc 100644
--- a/libstdc++-v3/include/parallel/features.h
+++ b/libstdc++-v3/include/parallel/features.h
@@ -61,66 +61,6 @@
 #define _GLIBCXX_BAL_QUICKSORT 1
 #endif
 
-#ifndef _GLIBCXX_LOSER_TREE
-/** @def _GLIBCXX_LOSER_TREE
- *  @brief Include guarded (sequences may run empty) loser tree,
- *  moving objects.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
-/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
- *  @brief Include standard loser tree, storing two flags for infimum
- *  and supremum.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
-/** @def _GLIBCXX_LOSER_TREE_REFERENCE
- *  @brief Include some loser tree variant.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_REFERENCE 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_POINTER
-/** @def _GLIBCXX_LOSER_TREE_POINTER
- *  @brief Include some loser tree variant.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_POINTER 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
-/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
- *  @brief Include unguarded (sequences must not run empty) loser
- *  tree, moving objects.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_UNGUARDED 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
-/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
- *  @brief Include some loser tree variant.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_COMBINED
-/** @def _GLIBCXX_LOSER_TREE_COMBINED
- *  @brief Include some loser tree variant.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_COMBINED 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
-/** @def _GLIBCXX_LOSER_TREE_SENTINEL
- *  @brief Include some loser tree variant.
- *  @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_SENTINEL 0
-#endif
-
-
 #ifndef _GLIBCXX_FIND_GROWING_BLOCKS
 /** @brief Include the growing blocks variant for std::find.
  *  @see __gnu_parallel::_Settings::find_algorithm */
diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h
index ddeb0d36d6c..cae15c0826e 100644
--- a/libstdc++-v3/include/parallel/losertree.h
+++ b/libstdc++-v3/include/parallel/losertree.h
@@ -47,878 +47,725 @@
 namespace __gnu_parallel
 {
 
-#if _GLIBCXX_LOSER_TREE_EXPLICIT
-
-/** @brief Guarded loser tree, copying the whole element into the
-* tree structure.
-*
-*  Guarding is done explicitly through two flags per element, inf
-*  and sup This is a quite slow variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTreeExplicit
+/**
+ * @brief Guarded loser/tournament tree.
+ *
+ * The smallest element is at the top.
+ *
+ * Guarding is done explicitly through one flag sup per element,
+ * inf is not needed due to a better initialization routine.  This
+ * is a well-performing variant.
+ *
+ * @param T the element type
+ * @param Comparator the comparator to use, defaults to std::less<T>
+ */
+template<typename T, typename Comparator>
+class LoserTreeBase
+{
+protected:
+  /** @brief Internal representation of a LoserTree element. */
+  struct Loser
   {
-  private:
-    struct Loser
-    {
-      // The relevant element.
-      T key;
-
-      // Is this an infimum or supremum element?
-      bool inf, sup;
-
-      // Number of the sequence the element comes from.
-      int source;
-    };
-
-    unsigned int size, offset;
-    Loser* losers;
-    Comparator comp;
-
-  public:
-    LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>())
-    : comp(_comp)
-    {
-      size = _size;
-      offset = size;
-      losers = new Loser[size];
-      for (unsigned int l = 0; l < size; ++l)
-        {
-          //losers[l].key = ... 	stays unset
-          losers[l].inf = true;
-          losers[l].sup = false;
-          //losers[l].source = -1;	//sentinel
-        }
-    }
-
-    ~LoserTreeExplicit()
-    { delete[] losers; }
+    /** @brief flag, true iff this is a "maximum" sentinel. */
+    bool sup;
+    /** @brief index of the source sequence. */
+    int source;
+    /** @brief key of the element in the LoserTree. */
+    T key;
+  };
 
-    int
-    get_min_source()
-    { return losers[0].source; }
+  unsigned int ik, k, offset;
+
+  /** log_2{k} */
+  unsigned int _M_log_k;
+
+  /** @brief LoserTree elements. */
+  Loser* losers;
+
+  /** @brief Comparator to use. */
+  Comparator comp;
+
+  /**
+   * @brief State flag that determines whether the LoserTree is empty.
+   *
+   * Only used for building the LoserTree.
+   */
+  bool first_insert;
+
+public:
+  /**
+   * @brief The constructor.
+   *
+   * @param _k The number of sequences to merge.
+   * @param _comp The comparator to use.
+   */
+  LoserTreeBase(unsigned int _k, Comparator _comp)
+  : comp(_comp)
+  {
+    ik = _k;
+
+    // Compute log_2{k} for the Loser Tree
+    _M_log_k = log2(ik - 1) + 1;
+
+    // Next greater power of 2.
+    k = 1 << _M_log_k;
+    offset = k;
+
+    // Avoid default-constructing losers[].key
+    losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
+    for (unsigned int i = ik - 1; i < k; ++i)
+      losers[i + k].sup = true;
+
+    first_insert = true;
+  }
+
+  /**
+   * @brief The destructor.
+   */
+  ~LoserTreeBase()
+  { ::operator delete(losers); }
+
+  /**
+   * @brief Initializes the sequence "source" with the element "key".
+   *
+   * @param key the element to insert
+   * @param source index of the source sequence
+   * @param sup flag that determines whether the value to insert is an
+   *   explicit supremum.
+   */
+  inline void
+  insert_start(const T& key, int source, bool sup)
+  {
+    unsigned int pos = k + source;
+
+    if(first_insert)
+      {
+        // Construct all keys, so we can easily deconstruct them.
+        for (unsigned int i = 0; i < (2 * k); ++i)
+          new(&(losers[i].key)) T(key);
+        first_insert = false;
+      }
+    else
+      new(&(losers[pos].key)) T(key);
+
+    losers[pos].sup = sup;
+    losers[pos].source = source;
+  }
+
+  /**
+   * @return the index of the sequence with the smallest element.
+   */
+  int get_min_source()
+  { return losers[0].source; }
+};
+
+/**
+ * @brief Stable LoserTree variant.
+ *
+ * Provides the stable implementations of insert_start, init_winner,
+ * init and delete_min_insert.
+ *
+ * Unstable variant is done using partial specialisation below.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTree : public LoserTreeBase<T, Comparator>
+{
+  typedef LoserTreeBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
+  using Base::first_insert;
+
+public:
+  LoserTree(unsigned int _k, Comparator _comp)
+  : Base::LoserTreeBase(_k, _comp)
+  {}
+
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (losers[right].sup
+            || (!losers[left].sup
+              && !comp(losers[right].key, losers[left].key)))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  void init()
+  { losers[0] = losers[init_winner(1)]; }
+
+  /**
+   * @brief Delete the smallest element and insert a new element from
+   *   the previously smallest element's sequence.
+   *
+   * This implementation is stable.
+   */
+  // Do not pass a const reference since key will be used as local variable.
+  void delete_min_insert(T key, bool sup)
+  {
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted, ties are broken by source.
+        if ((sup && (!losers[pos].sup || losers[pos].source < source))
+              || (!sup && !losers[pos].sup
+                && ((comp(losers[pos].key, key))
+                  || (!comp(key, losers[pos].key)
+                    && losers[pos].source < source))))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].sup, sup);
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].key, key);
+          }
+      }
+
+    losers[0].sup = sup;
+    losers[0].source = source;
+    losers[0].key = key;
+  }
+};
+
+/**
+ * @brief Unstable LoserTree variant.
+ *
+ * Stability (non-stable here) is selected with partial specialization.
+ */
+template<typename T, typename Comparator>
+class LoserTree</* stable == */false, T, Comparator> :
+    public LoserTreeBase<T, Comparator>
+{
+  typedef LoserTreeBase<T, Comparator> Base;
+  using Base::_M_log_k;
+  using Base::k;
+  using Base::losers;
+  using Base::first_insert;
+
+public:
+  LoserTree(unsigned int _k, Comparator _comp)
+  : Base::LoserTreeBase(_k, _comp)
+  {}
+
+  /**
+   * Computes the winner of the competition at position "root".
+   *
+   * Called recursively (starting at 0) to build the initial tree.
+   *
+   * @param root index of the "game" to start.
+   */
+  unsigned int
+  init_winner (unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (losers[right].sup ||
+            (!losers[left].sup
+              && !comp(losers[right].key, losers[left].key)))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  inline void
+  init()
+  { losers[0] = losers[init_winner(1)]; }
+
+  /**
+   * Delete the key smallest element and insert the element key instead.
+   *
+   * @param key the key to insert
+   * @param sup true iff key is an explicitly marked supremum
+   */
+  // Do not pass a const reference since key will be used as local variable.
+  inline void
+  delete_min_insert(T key, bool sup)
+  {
+#if _GLIBCXX_ASSERTIONS
+    // loser trees are only used for at least 2 sequences
+    _GLIBCXX_PARALLEL_ASSERT(_M_log_k > 1);
+#endif
 
-    void
-    insert_start(T key, int source, bool sup)
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
     {
-      bool inf = false;
-      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
-        {
-          if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
-               && comp(losers[pos].key, key)) || losers[pos].inf || sup)
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].key, key);
-              std::swap(losers[pos].inf, inf);
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-            }
-        }
-
-      losers[0].key = key;
-      losers[0].inf = inf;
-      losers[0].sup = sup;
-      losers[0].source = source;
+        // The smaller one gets promoted.
+      if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
+      {
+            // The other one is smaller.
+        std::swap(losers[pos].sup, sup);
+        std::swap(losers[pos].source, source);
+        std::swap(losers[pos].key, key);
+      }
     }
 
-    void
-    init() { }
+    losers[0].sup = sup;
+    losers[0].source = source;
+    losers[0].key = key;
+  }
+};
 
-    void
-    delete_min_insert(T key, bool sup)
-    {
-      bool inf = false;
-      int source = losers[0].source;
-      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
-              && comp(losers[pos].key, key))
-              || losers[pos].inf || sup)
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].key, key);
-              std::swap(losers[pos].inf, inf);
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-            }
-        }
-
-      losers[0].key = key;
-      losers[0].inf = inf;
-      losers[0].sup = sup;
-      losers[0].source = source;
-    }
-
-    void
-    insert_start_stable(T key, int source, bool sup)
-    {
-      bool inf = false;
-      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
-        {
-          if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
-	       && ((comp(losers[pos].key, key))
-		   || (!comp(key, losers[pos].key)
-		       && losers[pos].source < source)))
-              || losers[pos].inf || sup)
-            {
-              // Take next key.
-              std::swap(losers[pos].key, key);
-              std::swap(losers[pos].inf, inf);
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-            }
-        }
-
-      losers[0].key = key;
-      losers[0].inf = inf;
-      losers[0].sup = sup;
-      losers[0].source = source;
-    }
 
-    void
-    init_stable() { }
-
-    void
-    delete_min_insert_stable(T key, bool sup)
-    {
-      bool inf = false;
-      int source = losers[0].source;
-      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
-        {
-          if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
-              && ((comp(losers[pos].key, key))
-		  || (!comp(key, losers[pos].key)
-		      && losers[pos].source < source)))
-              || losers[pos].inf || sup)
-            {
-              std::swap(losers[pos].key, key);
-              std::swap(losers[pos].inf, inf);
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-            }
-        }
-
-      losers[0].key = key;
-      losers[0].inf = inf;
-      losers[0].sup = sup;
-      losers[0].source = source;
-    }
+/**
+ * @brief Base class of Loser Tree implementation using pointers.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointerBase
+{
+protected:
+  /** @brief Internal representation of LoserTree elements. */
+  struct Loser
+  {
+    bool sup;
+    int source;
+    const T* keyp;
   };
 
-#endif
-
-#if _GLIBCXX_LOSER_TREE
+  unsigned int ik, k, offset;
+  Loser* losers;
+  Comparator comp;
 
-/** @brief Guarded loser tree, either copying the whole element into
-* the tree structure, or looking up the element via the index.
-*
-*  Guarding is done explicitly through one flag sup per element,
-*  inf is not needed due to a better initialization routine.  This
-*  is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTree
-  {
-  private:
-    struct Loser
-    {
-      bool sup;
-      int source;
-      T key;
-    };
-
-    unsigned int ik, k, offset;
-    Loser* losers;
-    Comparator comp;
-    bool first_insert;
-
-  public:
-    LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
+public:
+  LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less<T>())
     : comp(_comp)
-    {
-      ik = _k;
-
-      // Next greater power of 2.
-      k = 1 << (log2(ik - 1) + 1);
-      offset = k;
-      // Avoid default-constructing losers[].key
-      losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
-      for (unsigned int i = ik - 1; i < k; ++i)
-        losers[i + k].sup = true;
-
-      first_insert = true;
-    }
+  {
+    ik = _k;
 
-    ~LoserTree()
-    { ::operator delete(losers); }
+    // Next greater power of 2.
+    k = 1 << (log2(ik - 1) + 1);
+    offset = k;
+    losers = new Loser[k * 2];
+    for (unsigned int i = ik - 1; i < k; i++)
+      losers[i + k].sup = true;
+  }
 
-    int
-    get_min_source()
-    { return losers[0].source; }
+  ~LoserTreePointerBase()
+  { ::operator delete(losers); }
 
-    void
-    insert_start(const T& key, int source, bool sup)
-    {
-      unsigned int pos = k + source;
-
-      if(first_insert)
-        {
-          // Construct all keys, so we can easily deconstruct them.
-          for (unsigned int i = 0; i < (2 * k); ++i)
-            ::new(&(losers[i].key)) T(key);
-          first_insert = false;
-        }
-      else
-        ::new(&(losers[pos].key)) T(key);
-
-      losers[pos].sup = sup;
-      losers[pos].source = source;
-    }
+  int get_min_source()
+  { return losers[0].source; }
 
-    unsigned int
-    init_winner (unsigned int root)
-    {
-      if (root >= k)
-        {
-          return root;
-        }
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (losers[right].sup
-	      || (!losers[left].sup
-		  && !comp(losers[right].key, losers[left].key)))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
+  void insert_start(const T& key, int source, bool sup)
+  {
+    unsigned int pos = k + source;
+
+    losers[pos].sup = sup;
+    losers[pos].source = source;
+    losers[pos].keyp = &key;
+  }
+};
+
+/**
+ * @brief Stable LoserTree implementation.
+ *
+ * The unstable variant is implemented using partial instantiation below.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreePointer : public LoserTreePointerBase<T, Comparator>
+{
+  typedef LoserTreePointerBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
 
-    void
-    init()
-    { losers[0] = losers[init_winner(1)]; }
+public:
+  LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
+    : Base::LoserTreePointerBase(_k, _comp)
+  {}
 
-    // Do not pass const reference since key will be used as local variable.
-    void
-    delete_min_insert(T key, bool sup)
-    {
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].key, key);
-            }
-        }
-
-      losers[0].sup = sup;
-      losers[0].source = source;
-      losers[0].key = key;
-    }
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (losers[right].sup
+            || (!losers[left].sup && !comp(*losers[right].keyp,
+                                          *losers[left].keyp)))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  void init()
+  { losers[0] = losers[init_winner(1)]; }
+
+  void delete_min_insert(const T& key, bool sup)
+  {
+    const T* keyp = &key;
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted, ties are broken by source.
+        if ((sup && (!losers[pos].sup || losers[pos].source < source)) ||
+              (!sup && !losers[pos].sup &&
+              ((comp(*losers[pos].keyp, *keyp)) ||
+                (!comp(*keyp, *losers[pos].keyp)
+                && losers[pos].source < source))))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].sup, sup);
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].keyp, keyp);
+          }
+      }
+
+    losers[0].sup = sup;
+    losers[0].source = source;
+    losers[0].keyp = keyp;
+  }
+};
+
+/**
+ * @brief Unstable LoserTree implementation.
+ *
+ * The stable variant is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointer</* stable == */false, T, Comparator> :
+    public LoserTreePointerBase<T, Comparator>
+{
+  typedef LoserTreePointerBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
 
-    void
-    insert_start_stable(const T& key, int source, bool sup)
-    { return insert_start(key, source, sup); }
+public:
+  LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
+    : Base::LoserTreePointerBase(_k, _comp)
+  {}
 
-    unsigned int
-    init_winner_stable (unsigned int root)
-    {
-      if (root >= k)
-        {
-          return root;
-        }
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (losers[right].sup
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (losers[right].sup
               || (!losers[left].sup
-		  && !comp(losers[right].key, losers[left].key)))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init_stable()
-    { losers[0] = losers[init_winner_stable(1)]; }
-
-    // Do not pass const reference since key will be used as local variable.
-    void
-    delete_min_insert_stable(T key, bool sup)
-    {
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted, ties are broken by source.
-          if (	(sup && (!losers[pos].sup || losers[pos].source < source))
-                || (!sup && !losers[pos].sup
-                  && ((comp(losers[pos].key, key))
-                    || (!comp(key, losers[pos].key)
-                      && losers[pos].source < source))))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].key, key);
-            }
-        }
-
-      losers[0].sup = sup;
-      losers[0].source = source;
-      losers[0].key = key;
-    }
-  };
-
-#endif
-
-#if _GLIBCXX_LOSER_TREE_REFERENCE
-
-/** @brief Guarded loser tree, either copying the whole element into
-* the tree structure, or looking up the element via the index.
-*
-*  Guarding is done explicitly through one flag sup per element,
-*  inf is not needed due to a better initialization routine.  This
-*  is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTreeReference
+                && !comp(*losers[right].keyp, *losers[left].keyp)))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  void init()
+  { losers[0] = losers[init_winner(1)]; }
+
+  void delete_min_insert(const T& key, bool sup)
   {
-#undef COPY
-#ifdef COPY
-#define KEY(i) losers[i].key
-#define KEY_SOURCE(i) key
-#else
-#define KEY(i) keys[losers[i].source]
-#define KEY_SOURCE(i) keys[i]
-#endif
-  private:
-    struct Loser
-    {
-      bool sup;
-      int source;
-#ifdef COPY
-      T key;
-#endif
-    };
+    const T* keyp = &key;
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted.
+        if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].sup, sup);
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].keyp, keyp);
+          }
+      }
+
+    losers[0].sup = sup;
+    losers[0].source = source;
+    losers[0].keyp = keyp;
+  }
+};
+
+/** @brief Base class for unguarded LoserTree implementation.
+ * 
+ * The whole element is copied into the tree structure.
+ *
+ * No guarding is done, therefore not a single input sequence must
+ * run empty.  Unused sequence heads are marked with a sentinel which
+ * is &gt; all elements that are to be merged.
+ *
+ * This is a very fast variant.
+ */
+template<typename T, typename Comparator>
+class LoserTreeUnguardedBase
+{
+protected:
+  struct Loser
+  {
+    int source;
+    T key;
+  };
 
-    unsigned int ik, k, offset;
-    Loser* losers;
-#ifndef COPY
-    T* keys;
-#endif
-    Comparator comp;
+  unsigned int ik, k, offset;
+  Loser* losers;
+  Comparator comp;
 
-  public:
-    LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>())
+public:
+  inline
+  LoserTreeUnguardedBase(unsigned int _k, const T _sentinel,
+                         Comparator _comp = std::less<T>())
     : comp(_comp)
-    {
-      ik = _k;
-
-      // Next greater power of 2.
-      k = 1 << (log2(ik - 1) + 1);
-      offset = k;
-      losers = new Loser[k * 2];
-#ifndef COPY
-      keys = new T[ik];
-#endif
-      for (unsigned int i = ik - 1; i < k; ++i)
-        losers[i + k].sup = true;
-    }
-
-    ~LoserTreeReference()
-    {
-      delete[] losers;
-#ifndef COPY
-      delete[] keys;
-#endif
-    }
-
-    int
-    get_min_source()
-    { return losers[0].source; }
-
-    void
-    insert_start(T key, int source, bool sup)
-    {
-      unsigned int pos = k + source;
-
-      losers[pos].sup = sup;
-      losers[pos].source = source;
-      KEY(pos) = key;
-    }
-
-    unsigned int
-    init_winner(unsigned int root)
-    {
-      if (root >= k)
-        {
-          return root;
-        }
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (	losers[right].sup ||
-                (!losers[left].sup && !comp(KEY(right), KEY(left))))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init()
-    {
-      losers[0] = losers[init_winner(1)];
-    }
-
-    void
-    delete_min_insert(T key, bool sup)
-    {
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-#ifdef COPY
-              std::swap(KEY(pos), KEY_SOURCE(source));
-#endif
-            }
-        }
-
-      losers[0].sup = sup;
-      losers[0].source = source;
-#ifdef COPY
-      KEY(0) = KEY_SOURCE(source);
+  {
+    ik = _k;
+
+    // Next greater power of 2.
+    k = 1 << (log2(ik - 1) + 1);
+    offset = k;
+    // Avoid default-constructing losers[].key
+    losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
+
+    for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
+      {
+        losers[i].key = _sentinel;
+        losers[i].source = -1;
+      }
+  }
+
+  inline ~LoserTreeUnguardedBase()
+  { ::operator delete(losers); }
+
+  inline int
+  get_min_source()
+  {
+    // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
-    }
+    return losers[0].source;
+  }
 
-    void
-    insert_start_stable(T key, int source, bool sup)
-    { return insert_start(key, source, sup); }
-
-    unsigned int
-    init_winner_stable(unsigned int root)
-    {
-      if (root >= k)
-        {
-          return root;
-        }
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (losers[right].sup
-              || (!losers[left].sup && !comp(KEY(right), KEY(left))))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init_stable()
-    { losers[0] = losers[init_winner_stable(1)]; }
-
-    void
-    delete_min_insert_stable(T key, bool sup)
-    {
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted, ties are broken by source.
-          if ((sup && (!losers[pos].sup || losers[pos].source < source))
-	      || (!sup && !losers[pos].sup
-		  && ((comp(KEY(pos), KEY_SOURCE(source)))
-		      || (!comp(KEY_SOURCE(source), KEY(pos))
-			  && losers[pos].source < source))))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-#ifdef COPY
-              std::swap(KEY(pos), KEY_SOURCE(source));
-#endif
-            }
-        }
+  inline void
+  insert_start(const T& key, int source, bool)
+  {
+    unsigned int pos = k + source;
+
+    new(&(losers[pos].key)) T(key);
+    losers[pos].source = source;
+  }
+};
+
+/**
+ * @brief Stable implementation of unguarded LoserTree.
+ *
+ * Unstable variant is selected below with partial specialization.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreeUnguarded : public LoserTreeUnguardedBase<T, Comparator>
+{
+  typedef LoserTreeUnguardedBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
+
+public:
+  LoserTreeUnguarded(unsigned int _k, const T _sentinel,
+                     Comparator _comp = std::less<T>())
+    : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
+  {}
+
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (!comp(losers[right].key, losers[left].key))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  inline void
+  init()
+  {
+    losers[0] = losers[init_winner(1)];
 
-      losers[0].sup = sup;
-      losers[0].source = source;
-#ifdef COPY
-      KEY(0) = KEY_SOURCE(source);
+    // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
-    }
-  };
-#undef KEY
-#undef KEY_SOURCE
+  }
 
+  // Do not pass a const reference since key will be used as local variable.
+  inline void
+  delete_min_insert(T key, bool)
+  {
+    // No dummy sequence can ever be at the top and be retrieved!
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
 
-#if _GLIBCXX_LOSER_TREE_POINTER
-
-/** @brief Guarded loser tree, either copying the whole element into
-    the tree structure, or looking up the element via the index.
-*  Guarding is done explicitly through one flag sup per element,
-*  inf is not needed due to a better initialization routine.
-*  This is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTreePointer
+    int source = losers[0].source;
+    printf("%d\n", source);
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted, ties are broken by source.
+        if (comp(losers[pos].key, key)
+              || (!comp(key, losers[pos].key) && losers[pos].source < source))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].key, key);
+          }
+      }
+
+    losers[0].source = source;
+    losers[0].key = key;
+  }
+};
+
+/**
+ * @brief Non-Stable implementation of unguarded LoserTree.
+ *
+ * Stable implementation is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreeUnguarded</* stable == */false, T, Comparator> :
+    public LoserTreeUnguardedBase<T, Comparator>
+{
+  typedef LoserTreeUnguardedBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
+
+public:
+  LoserTreeUnguarded(unsigned int _k, const T _sentinel,
+                     Comparator _comp = std::less<T>())
+    : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
+  {}
+
+  unsigned int
+  init_winner (unsigned int root)
   {
-  private:
-    struct Loser
-    {
-      bool sup;
-      int source;
-      const T* keyp;
-    };
-
-    unsigned int ik, k, offset;
-    Loser* losers;
-    Comparator comp;
-
-  public:
-    LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
-      : comp(_comp)
-    {
-      ik = _k;
-
-      // Next greater power of 2.
-      k = 1 << (log2(ik - 1) + 1);
-      offset = k;
-      losers = new Loser[k * 2];
-      for (unsigned int i = ik - 1; i < k; ++i)
-        losers[i + k].sup = true;
-    }
-
-    ~LoserTreePointer()
-    { delete[] losers; }
-
-    int
-    get_min_source()
-    { return losers[0].source; }
-
-    void
-    insert_start(const T& key, int source, bool sup)
-    {
-      unsigned int pos = k + source;
-
-      losers[pos].sup = sup;
-      losers[pos].source = source;
-      losers[pos].keyp = &key;
-    }
-
-    unsigned int
-    init_winner(unsigned int root)
-    {
-      if (root >= k)
-	return root;
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (losers[right].sup
-                || (!losers[left].sup
-                  && !comp(*losers[right].keyp, *losers[left].keyp)))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init()
-    { losers[0] = losers[init_winner(1)]; }
-
-    void
-    delete_min_insert(const T& key, bool sup)
-    {
-      const T* keyp = &key;
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].keyp, keyp);
-            }
-        }
-
-      losers[0].sup = sup;
-      losers[0].source = source;
-      losers[0].keyp = keyp;
-    }
-
-    void
-    insert_start_stable(const T& key, int source, bool sup)
-    { return insert_start(key, source, sup); }
-
-    unsigned int
-    init_winner_stable(unsigned int root)
-    {
-      if (root >= k)
-        {
-          return root;
-        }
-      else
-        {
-          unsigned int left = init_winner (2 * root);
-          unsigned int right = init_winner (2 * root + 1);
-          if (losers[right].sup
-              || (!losers[left].sup && !comp(*losers[right].keyp,
-                                            *losers[left].keyp)))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init_stable()
-    { losers[0] = losers[init_winner_stable(1)]; }
-
-    void
-    delete_min_insert_stable(const T& key, bool sup)
-    {
-      const T* keyp = &key;
-      int source = losers[0].source;
-      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted, ties are broken by source.
-          if (	(sup && (!losers[pos].sup || losers[pos].source < source))
-		|| (!sup && !losers[pos].sup &&
-		    ((comp(*losers[pos].keyp, *keyp))
-		     || (!comp(*keyp, *losers[pos].keyp)
-			 && losers[pos].source < source))))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].sup, sup);
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].keyp, keyp);
-            }
-        }
-
-      losers[0].sup = sup;
-      losers[0].source = source;
-      losers[0].keyp = keyp;
-    }
-  };
-
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+
+#if _GLIBCXX_ASSERTIONS
+        // If left one is sentinel then right one must be, too.
+        if (losers[left].source == -1)
+          _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
 #endif
 
-#if _GLIBCXX_LOSER_TREE_UNGUARDED
-
-/** @brief Unguarded loser tree, copying the whole element into the
-* tree structure.
-*
-*  No guarding is done, therefore not a single input sequence must
-*  run empty.  This is a very fast variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTreeUnguarded
+        if (!comp(losers[right].key, losers[left].key))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  inline void
+  init()
   {
-  private:
-    struct Loser
-    {
-      int source;
-      T key;
-    };
-
-    unsigned int ik, k, offset;
-    unsigned int* mapping;
-    Loser* losers;
-    Comparator comp;
-
-    void
-    map(unsigned int root, unsigned int begin, unsigned int end)
-    {
-      if (begin + 1 == end)
-        mapping[begin] = root;
-      else
-        {
-          // Next greater or equal power of 2.
-          unsigned int left = 1 << (log2(end - begin - 1));
-          map(root * 2, begin, begin + left);
-          map(root * 2 + 1, begin + left, end);
-        }
-    }
-
-  public:
-    LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>())
-    : comp(_comp)
-    {
-      ik = _k;
-      // Next greater or equal power of 2.
-      k = 1 << (log2(ik - 1) + 1);
-      offset = k;
-      losers = new Loser[k + ik];
-      mapping = new unsigned int[ik];
-      map(1, 0, ik);
-    }
-
-    ~LoserTreeUnguarded()
-    {
-      delete[] losers;
-      delete[] mapping;
-    }
-
-    int
-    get_min_source()
-    { return losers[0].source; }
-
-    void
-    insert_start(const T& key, int source, bool)
-    {
-      unsigned int pos = mapping[source];
-      losers[pos].source = source;
-      losers[pos].key = key;
-    }
-
-    unsigned int
-    init_winner(unsigned int root, unsigned int begin, unsigned int end)
-    {
-      if (begin + 1 == end)
-        return mapping[begin];
-      else
-        {
-          // Next greater or equal power of 2.
-          unsigned int division = 1 << (log2(end - begin - 1));
-          unsigned int left = init_winner(2 * root, begin, begin + division);
-          unsigned int right =
-                          init_winner(2 * root + 1, begin + division, end);
-          if (!comp(losers[right].key, losers[left].key))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
-
-    void
-    init()
-    { losers[0] = losers[init_winner(1, 0, ik)]; }
-
-    // Do not pass const reference since key will be used as local variable.
-    void
-    delete_min_insert(const T& key, bool)
-    {
-      losers[0].key = key;
-      T& keyr = losers[0].key;
-      int& source = losers[0].source;
-      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if (comp(losers[pos].key, keyr))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].key, keyr);
-            }
-        }
-    }
-
-    void
-    insert_start_stable(const T& key, int source, bool)
-    { return insert_start(key, source, false); }
-
-    void
-    init_stable()
-    { init(); }
-
-    void
-    delete_min_insert_stable(const T& key, bool)
-    {
-      losers[0].key = key;
-      T& keyr = losers[0].key;
-      int& source = losers[0].source;
-      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted, ties are broken by source.
-          if (comp(losers[pos].key, keyr)
-              || (!comp(keyr, losers[pos].key)
-                && losers[pos].source < source))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].key, keyr);
-            }
-        }
-    }
-  };
+    losers[0] = losers[init_winner(1)];
 
+    // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
+  }
 
-#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
+  // Do not pass a const reference since key will be used as local variable.
+  inline void
+  delete_min_insert(T key, bool)
+  {
+    printf("wrong\n");
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted.
+        if (comp(losers[pos].key, key))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].key, key);
+          }
+      }
+
+    losers[0].source = source;
+    losers[0].key = key;
+  }
+};
 
 /** @brief Unguarded loser tree, keeping only pointers to the
 * elements in the tree structure.
@@ -926,175 +773,233 @@ template<typename T, typename Comparator = std::less<T> >
 *  No guarding is done, therefore not a single input sequence must
 *  run empty.  This is a very fast variant.
 */
-template<typename T, typename Comparator = std::less<T> >
-  class LoserTreePointerUnguarded
+template<typename T, typename Comparator>
+class LoserTreePointerUnguardedBase
+{
+protected:
+  struct Loser
   {
-  private:
-    struct Loser
-    {
-      int source;
-      const T* keyp;
-    };
-
-    unsigned int ik, k, offset;
-    unsigned int* mapping;
-    Loser* losers;
-    Comparator comp;
-
-    void map(unsigned int root, unsigned int begin, unsigned int end)
-    {
-      if (begin + 1 == end)
-        mapping[begin] = root;
-      else
-        {
-          // Next greater or equal power of 2.
-          unsigned int left = 1 << (log2(end - begin - 1));
-          map(root * 2, begin, begin + left);
-          map(root * 2 + 1, begin + left, end);
-        }
-    }
-
-  public:
-    LoserTreePointerUnguarded(unsigned int _k,
-                              Comparator _comp = std::less<T>())
-    : comp(_comp)
-    {
-      ik = _k;
-
-      // Next greater power of 2.
-      k = 1 << (log2(ik - 1) + 1);
-      offset = k;
-      losers = new Loser[k + ik];
-      mapping = new unsigned int[ik];
-      map(1, 0, ik);
-    }
-
-    ~LoserTreePointerUnguarded()
-    {
-      delete[] losers;
-      delete[] mapping;
-    }
-
-    int
-    get_min_source()
-    { return losers[0].source; }
-
-    void
-    insert_start(const T& key, int source, bool)
-    {
-      unsigned int pos = mapping[source];
-      losers[pos].source = source;
-      losers[pos].keyp = &key;
-    }
-
-    unsigned int
-    init_winner(unsigned int root, unsigned int begin, unsigned int end)
-    {
-      if (begin + 1 == end)
-        return mapping[begin];
-      else
-        {
-          // Next greater or equal power of 2.
-          unsigned int division = 1 << (log2(end - begin - 1));
-          unsigned int left = init_winner(2 * root, begin, begin + division);
-          unsigned int right = init_winner(2 * root + 1,
-					   begin + division, end);
-          if (!comp(*losers[right].keyp, *losers[left].keyp))
-            {
-              // Left one is less or equal.
-              losers[root] = losers[right];
-              return left;
-            }
-          else
-            {
-              // Right one is less.
-              losers[root] = losers[left];
-              return right;
-            }
-        }
-    }
+    int source;
+    const T* keyp;
+  };
 
-    void
-    init()
-    { losers[0] = losers[init_winner(1, 0, ik)]; }
+  unsigned int ik, k, offset;
+  Loser* losers;
+  const T sentinel;
+  Comparator comp;
 
-    void
-    delete_min_insert(const T& key, bool)
-    {
-      const T* keyp = &key;
-      int& source = losers[0].source;
-      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted.
-          if (comp(*losers[pos].keyp, *keyp))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].keyp, keyp);
-            }
-        }
-
-      losers[0].keyp = keyp;
-    }
+public:
 
-    void
-    insert_start_stable(const T& key, int source, bool)
-    { return insert_start(key, source, false); }
+  inline
+  LoserTreePointerUnguardedBase(unsigned int _k, const T _sentinel,
+      Comparator _comp = std::less<T>())
+    : sentinel(_sentinel), comp(_comp)
+  {
+    ik = _k;
+
+    // Next greater power of 2.
+    k = 1 << (log2(ik - 1) + 1);
+    offset = k;
+    // Avoid default-constructing losers[].key
+    losers = new Loser[2 * k];
+
+    for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
+      {
+        losers[i].keyp = &sentinel;
+        losers[i].source = -1;
+      }
+  }
+
+  inline ~LoserTreePointerUnguardedBase()
+  { delete[] losers; }
+
+  inline int
+  get_min_source()
+  {
+    // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
+#endif
+    return losers[0].source;
+  }
 
-    void
-    init_stable()
-    { init(); }
+  inline void
+  insert_start(const T& key, int source, bool)
+  {
+    unsigned int pos = k + source;
+
+    losers[pos].keyp = &key;
+    losers[pos].source = source;
+  }
+};
+
+/**
+ * @brief Stable unguarded LoserTree variant storing pointers.
+ *
+ * Unstable variant is implemented below using partial specialization.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreePointerUnguarded :
+    public LoserTreePointerUnguardedBase<T, Comparator>
+{
+  typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
+
+public:
+  LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
+      Comparator _comp = std::less<T>())
+    : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
+  {}
+
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+        if (!comp(*losers[right].keyp, *losers[left].keyp))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  inline void
+  init()
+  {
+    losers[0] = losers[init_winner(1)];
 
-    void
-    delete_min_insert_stable(const T& key, bool)
-    {
-      int& source = losers[0].source;
-      const T* keyp = &key;
-      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
-        {
-          // The smaller one gets promoted, ties are broken by source.
-          if (comp(*losers[pos].keyp, *keyp)
-              || (!comp(*keyp, *losers[pos].keyp)
-                  && losers[pos].source < source))
-            {
-              // The other one is smaller.
-              std::swap(losers[pos].source, source);
-              std::swap(losers[pos].keyp, keyp);
-            }
-        }
-      losers[0].keyp = keyp;
-    }
-  };
+    // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
+  }
 
-template<typename _ValueTp, class Comparator>
-  struct loser_tree_traits
+  inline void
+  delete_min_insert(const T& key, bool sup)
   {
-#if _GLIBCXX_LOSER_TREE
-    typedef LoserTree<_ValueTp, Comparator> LT;
-#else
-#  if _GLIBCXX_LOSER_TREE_POINTER
-    typedef LoserTreePointer<_ValueTp, Comparator> LT;
-#  else
-#    error Must define some type in losertree.h.
-#  endif
+    const T* keyp = &key;
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted, ties are broken by source.
+        if (comp(*losers[pos].keyp, *keyp)
+          || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].keyp, keyp);
+          }
+      }
+
+    losers[0].source = source;
+    losers[0].keyp = keyp;
+
+    // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
+#endif
+  }
+};
+
+/**
+ * @brief Unstable unguarded LoserTree variant storing pointers.
+ *
+ * Stable variant is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointerUnguarded</* stable == */false, T, Comparator> :
+    public LoserTreePointerUnguardedBase<T, Comparator>
+{
+  typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
+  using Base::k;
+  using Base::losers;
+
+public:
+  LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
+      Comparator _comp = std::less<T>())
+    : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
+  {}
+
+  unsigned int
+  init_winner(unsigned int root)
+  {
+    if (root >= k)
+      {
+        return root;
+      }
+    else
+      {
+        unsigned int left = init_winner (2 * root);
+        unsigned int right = init_winner (2 * root + 1);
+
+#if _GLIBCXX_ASSERTIONS
+        // If left one is sentinel then right one must be, too.
+        if (losers[left].source == -1)
+          _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
 #endif
-  };
 
-template<typename _ValueTp, class Comparator>
-  struct loser_tree_unguarded_traits
+        if (!comp(*losers[right].keyp, *losers[left].keyp))
+          {
+            // Left one is less or equal.
+            losers[root] = losers[right];
+            return left;
+          }
+        else
+          {
+            // Right one is less.
+            losers[root] = losers[left];
+            return right;
+          }
+      }
+  }
+
+  inline void
+  init()
   {
-#if _GLIBCXX_LOSER_TREE_UNGUARDED
-    typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
-#else
-#  if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
-    typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT;
-#  else
-#    error Must define some unguarded type in losertree.h.
-#  endif
+    losers[0] = losers[init_winner(1)];
+
+    // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
 #endif
-  };
+  }
 
-}
+  inline void
+  delete_min_insert(const T& key, bool sup)
+  {
+    const T* keyp = &key;
+    int source = losers[0].source;
+    for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+      {
+        // The smaller one gets promoted.
+        if (comp(*(losers[pos].keyp), *keyp))
+          {
+            // The other one is smaller.
+            std::swap(losers[pos].source, source);
+            std::swap(losers[pos].keyp, keyp);
+          }
+      }
+
+    losers[0].source = source;
+    losers[0].keyp = keyp;
+  }
+};
+
+} // namespace __gnu_parallel
 
 #endif
diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h
index f12f3110871..6e0f2e382c3 100644
--- a/libstdc++-v3/include/parallel/merge.h
+++ b/libstdc++-v3/include/parallel/merge.h
@@ -239,19 +239,26 @@ namespace __gnu_parallel
 			   std::iterator_traits<RandomAccessIterator1>::
 			   difference_type max_length, Comparator comp)
     {
-      typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-	value_type;
+      typedef typename
+          std::iterator_traits<RandomAccessIterator1>::value_type value_type;
       typedef typename std::iterator_traits<RandomAccessIterator1>::
 	difference_type difference_type1 /* == difference_type2 */;
       typedef typename std::iterator_traits<RandomAccessIterator3>::
 	difference_type difference_type3;
+      typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
+        iterator_pair;
 
       std::pair<RandomAccessIterator1, RandomAccessIterator1>
 	seqs[2] = { std::make_pair(begin1, end1),
 		    std::make_pair(begin2, end2) };
-      RandomAccessIterator3 
-	target_end = parallel_multiway_merge(seqs, seqs + 2, target,
-					     comp, max_length, true, false);
+      RandomAccessIterator3
+        target_end = parallel_multiway_merge
+          < /* stable = */ true, /* sentinels = */ false>(
+            seqs, seqs + 2, target, comp,
+            multiway_merge_exact_splitting
+              < /* stable = */ true, iterator_pair*,
+                Comparator, difference_type1>,
+            max_length);
 
       return target_end;
     }
diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h
index 6cc724b6015..40a2f1bc6af 100644
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -40,7 +40,7 @@
 *  This file is a GNU parallel extension to the Standard C++ Library.
 */
 
-// Written by Johannes Singler.
+// Written by Johannes Singler and Manuel Holtgrewe.
 
 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
@@ -50,7 +50,6 @@
 #include <bits/stl_algo.h>
 #include <parallel/features.h>
 #include <parallel/parallel.h>
-#include <parallel/merge.h>
 #include <parallel/losertree.h>
 #if _GLIBCXX_ASSERTIONS
 #include <parallel/checkers.h>
@@ -59,27 +58,34 @@
 /** @brief Length of a sequence described by a pair of iterators. */
 #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
 
-// XXX need iterator typedefs
 namespace __gnu_parallel
 {
+
+// Announce guarded and unguarded iterator.
+
 template<typename RandomAccessIterator, typename Comparator>
   class guarded_iterator;
 
+// Making the arguments const references seems to dangerous,
+// the user-defined comparator might not be const.
 template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+             guarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
 template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-	     guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+              guarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
-  /** @brief Iterator wrapper supporting an implicit supremum at the end
-      of the sequence, dominating all comparisons.
-      *  Deriving from RandomAccessIterator is not possible since
-      *  RandomAccessIterator need not be a class.
-      */
+/** @brief Iterator wrapper supporting an implicit supremum at the end
+ *         of the sequence, dominating all comparisons.
+ *
+ * The implicit supremum comes with a performance cost.
+ *
+ * Deriving from RandomAccessIterator is not possible since
+ * RandomAccessIterator need not be a class.
+ */
 template<typename RandomAccessIterator, typename Comparator>
   class guarded_iterator
   {
@@ -100,7 +106,7 @@ template<typename RandomAccessIterator, typename Comparator>
     *  @param comp Comparator provided for associated overloaded
     *  compare operators. */
     guarded_iterator(RandomAccessIterator begin,
-		     RandomAccessIterator end, Comparator& comp)
+                     RandomAccessIterator end, Comparator& comp)
     : current(begin), end(end), comp(comp)
     { }
 
@@ -115,7 +121,7 @@ template<typename RandomAccessIterator, typename Comparator>
 
     /** @brief Dereference operator.
     *  @return Referenced element. */
-    typename std::iterator_traits<RandomAccessIterator>::value_type
+    typename std::iterator_traits<RandomAccessIterator>::value_type&
     operator*()
     { return *current; }
 
@@ -158,7 +164,7 @@ template<typename RandomAccessIterator, typename Comparator>
 template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-	     guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+               guarded_iterator<RandomAccessIterator, Comparator>& bi2)
   {
     if (bi2.current == bi2.end)	//bi1 is sup
       return bi1.current != bi1.end;	//bi2 is not sup
@@ -185,7 +191,7 @@ template<typename RandomAccessIterator, typename Comparator>
   {
   private:
     /** @brief Current iterator position. */
-    RandomAccessIterator& current;
+    RandomAccessIterator current;
     /** @brief Comparator. */
     mutable Comparator& comp;
 
@@ -195,7 +201,7 @@ template<typename RandomAccessIterator, typename Comparator>
     *  @param end Unused, only for compatibility.
     *  @param comp Unused, only for compatibility. */
     unguarded_iterator(RandomAccessIterator begin,
-		       RandomAccessIterator end, Comparator& comp)
+                       RandomAccessIterator end, Comparator& comp)
     : current(begin), comp(comp)
     { }
 
@@ -210,7 +216,7 @@ template<typename RandomAccessIterator, typename Comparator>
 
     /** @brief Dereference operator.
     *  @return Referenced element. */
-    typename std::iterator_traits<RandomAccessIterator>::value_type
+    typename std::iterator_traits<RandomAccessIterator>::value_type&
     operator*()
     { return *current; }
 
@@ -256,159 +262,41 @@ template<typename RandomAccessIterator, typename Comparator>
     return !(bi1.comp)(*bi2, *bi1);
   }
 
-/** Prepare a set of sequences to be merged without a (end) guard
- *  @param seqs_begin
- *  @param seqs_end
- *  @param comp
- *  @param min_sequence
- *  @param stable
- *  @pre (seqs_end - seqs_begin > 0) */
-template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<
-      typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
-      ::first_type>::difference_type
-  prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
-                    RandomAccessIteratorIterator seqs_end, Comparator comp,
-                    int& min_sequence, bool stable)
-  {
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
-        ::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>
-      ::difference_type
-      difference_type;
-
-    if ((*seqs_begin).first == (*seqs_begin).second)
-      {
-        // Empty sequence found, it's the first one.
-        min_sequence = 0;
-        return -1;
-      }
-
-    // Last element in sequence.
-    value_type min = *((*seqs_begin).second - 1);
-    min_sequence = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
-      {
-        if ((*s).first == (*s).second)
-          {
-            // Empty sequence found.
-            min_sequence = static_cast<int>(s - seqs_begin);
-            return -1;
-          }
-
-        // Last element in sequence.
-        const value_type& v = *((*s).second - 1);
-        if (comp(v, min))	//strictly smaller
-          {
-            min = v;
-            min_sequence = static_cast<int>(s - seqs_begin);
-          }
-      }
-
-    difference_type overhang_size = 0;
-
-    int s = 0;
-    for (s = 0; s <= min_sequence; ++s)
-      {
-        RandomAccessIterator1 split;
-        if (stable)
-          split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-        else
-          split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-
-        overhang_size += seqs_begin[s].second - split;
-      }
-
-    for (; s < (seqs_end - seqs_begin); ++s)
-      {
-        RandomAccessIterator1 split = std::lower_bound(
-            seqs_begin[s].first, seqs_begin[s].second, min, comp);
-        overhang_size += seqs_begin[s].second - split;
-      }
-
-    // So many elements will be left over afterwards.
-    return overhang_size;
-  }
-
-/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
- *  @param seqs_begin
- *  @param seqs_end
- *  @param comp */
-template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<typename std::iterator_traits<
-      RandomAccessIteratorIterator>::value_type::first_type>::difference_type
-  prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
-                            RandomAccessIteratorIterator seqs_end,
-                            Comparator comp)
-  {
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
-      ::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>
-      ::value_type
-      value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>
-      ::difference_type
-      difference_type;
-
-    // Last element in sequence.
-    value_type* max = NULL;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      {
-        if ((*s).first == (*s).second)
-          continue;
-
-        // Last element in sequence.
-        value_type& v = *((*s).second - 1);
-
-        // Strictly greater.
-        if (!max || comp(*max, v))
-          max = &v;
-      }
-
-    difference_type overhang_size = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      {
-        RandomAccessIterator1 split =
-            std::lower_bound((*s).first, (*s).second, *max, comp);
-        overhang_size += (*s).second - split;
-
-        // Set sentinel.
-        *((*s).second) = *max;
-      }
-
-    // So many elements will be left over afterwards.
-    return overhang_size;
-  }
-
 /** @brief Highly efficient 3-way merging procedure.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Unused, stable anyway.
- *  @return End iterator of output sequence. */
+ *
+ * Merging is done with the algorithm implementation described by Peter
+ * Sanders.  Basically, the idea is to minimize the number of necessary
+ * comparison after merging out an element.  The implementation trick
+ * that makes this fast is that the order of the sequences is stored
+ * in the instruction pointer (translated into labels in C++).
+ *
+ * This works well for merging up to 4 sequences.
+ *
+ * Note that making the merging stable does <em>not</em> come at a
+ * performance hit.
+ *
+ * Whether the merging is done guarded or unguarded is selected by the
+ * used iterator class.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
 template<template<typename RAI, typename C> class iterator,
 	 typename RandomAccessIteratorIterator,
 	 typename RandomAccessIterator3,
 	 typename _DifferenceTp,
 	 typename Comparator>
   RandomAccessIterator3
-  multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
-			   RandomAccessIteratorIterator seqs_end,
-			   RandomAccessIterator3 target,
-			   Comparator comp, _DifferenceTp length,
-			   bool stable)
+  multiway_merge_3_variant(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
   {
     _GLIBCXX_CALL(length);
 
@@ -423,6 +311,10 @@ template<template<typename RAI, typename C> class iterator,
     if (length == 0)
       return target;
 
+#if _GLIBCXX_ASSERTIONS
+    _DifferenceTp orig_length = length;
+#endif
+
     iterator<RandomAccessIterator1, Comparator>
       seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
       seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
@@ -450,17 +342,16 @@ template<template<typename RAI, typename C> class iterator,
         else
           goto s210;
       }
-
-#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
+#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)     \
     s ## a ## b ## c :                                  \
       *target = *seq ## a;                              \
-    ++target;                                           \
-    --length;                                           \
-    ++seq ## a;                                         \
-    if (length == 0) goto finish;                       \
-    if (seq ## a c0 seq ## b) goto s ## a ## b ## c;    \
-    if (seq ## a c1 seq ## c) goto s ## b ## a ## c;    \
-    goto s ## b ## c ## a;
+      ++target;                                         \
+      --length;                                         \
+      ++seq ## a;                                       \
+      if (length == 0) goto finish;                     \
+      if (seq ## a c0 seq ## b) goto s ## a ## b ## c;  \
+      if (seq ## a c1 seq ## c) goto s ## b ## a ## c;  \
+      goto s ## b ## c ## a;
 
     _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
     _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
@@ -474,6 +365,14 @@ template<template<typename RAI, typename C> class iterator,
   finish:
     ;
 
+#if _GLIBCXX_ASSERTIONS
+  _GLIBCXX_PARALLEL_ASSERT(
+      ((RandomAccessIterator1)seq0 - seqs_begin[0].first) +
+      ((RandomAccessIterator1)seq1 - seqs_begin[1].first) +
+      ((RandomAccessIterator1)seq2 - seqs_begin[2].first)
+      == orig_length);
+#endif
+
     seqs_begin[0].first = seq0;
     seqs_begin[1].first = seq1;
     seqs_begin[2].first = seq2;
@@ -481,95 +380,31 @@ template<template<typename RAI, typename C> class iterator,
     return target;
   }
 
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
-                            RandomAccessIteratorIterator seqs_end,
-                            RandomAccessIterator3 target,
-                            Comparator comp,
-                            _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
-      ::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int min_seq;
-    RandomAccessIterator3 target_end;
-
-    // Stable anyway.
-    difference_type overhang =
-        prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
-    if (overhang != -1)
-      {
-        difference_type unguarded_length =
-            std::min(length, total_length - overhang);
-        target_end = multiway_merge_3_variant<unguarded_iterator>
-          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-        overhang = length - unguarded_length;
-      }
-    else
-      {
-        // Empty sequence found.
-        overhang = length;
-        target_end = target;
-      }
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    switch (min_seq)
-      {
-      case 0:
-        // Iterators will be advanced accordingly.
-        target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-        break;
-      case 1:
-        target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-        break;
-      case 2:
-        target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[1].first, seqs_begin[1].second,
-                                  target_end, overhang, comp);
-        break;
-      default:
-        _GLIBCXX_PARALLEL_ASSERT(false);
-      }
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    return target_end;
-  }
-
-/** @brief Highly efficient 4-way merging procedure.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Unused, stable anyway.
- *  @return End iterator of output sequence. */
+/**
+ * @brief Highly efficient 4-way merging procedure.
+ *
+ * Merging is done with the algorithm implementation described by Peter
+ * Sanders. Basically, the idea is to minimize the number of necessary
+ * comparison after merging out an element.  The implementation trick
+ * that makes this fast is that the order of the sequences is stored
+ * in the instruction pointer (translated into goto labels in C++).
+ *
+ * This works well for merging up to 4 sequences.
+ *
+ * Note that making the merging stable does <em>not</em> come at a
+ * performance hit.
+ *
+ * Whether the merging is done guarded or unguarded is selected by the
+ * used iterator class.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
 template<template<typename RAI, typename C> class iterator,
 	 typename RandomAccessIteratorIterator,
 	 typename RandomAccessIterator3,
@@ -579,7 +414,7 @@ template<template<typename RAI, typename C> class iterator,
   multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
                            RandomAccessIteratorIterator seqs_end,
                            RandomAccessIterator3 target,
-                           Comparator comp, _DifferenceTp length, bool stable)
+                           Comparator comp, _DifferenceTp length)
   {
     _GLIBCXX_CALL(length);
     typedef _DifferenceTp difference_type;
@@ -676,280 +511,20 @@ template<template<typename RAI, typename C> class iterator,
     return target;
   }
 
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
-                            RandomAccessIteratorIterator seqs_end,
-                            RandomAccessIterator3 target,
-                            Comparator comp,
-                            _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
-      ::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int min_seq;
-    RandomAccessIterator3 target_end;
-
-    // Stable anyway.
-    difference_type overhang =
-        prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
-    if (overhang != -1)
-      {
-        difference_type unguarded_length =
-            std::min(length, total_length - overhang);
-        target_end = multiway_merge_4_variant<unguarded_iterator>
-          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-        overhang = length - unguarded_length;
-      }
-    else
-      {
-        // Empty sequence found.
-        overhang = length;
-        target_end = target;
-      }
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
-        one_missing(seqs_begin, seqs_end);
-    one_missing.erase(one_missing.begin() + min_seq);	//remove
-
-    target_end = multiway_merge_3_variant<guarded_iterator>(
-        one_missing.begin(), one_missing.end(),
-        target_end, comp, overhang, stable);
-
-    // Insert back again.
-    one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
-    // Write back modified iterators.
-    copy(one_missing.begin(), one_missing.end(), seqs_begin);
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    return target_end;
-  }
-
-/** @brief Basic multi-way merging procedure.
- *
- *  The head elements are kept in a sorted array, new heads are
- *  inserted linearly.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Stable merging incurs a performance penalty.
- *  @return End iterator of output sequence.
- */
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
-                        RandomAccessIteratorIterator seqs_end,
-                        RandomAccessIterator3 target,
-                        Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
-      ::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int k = static_cast<int>(seqs_end - seqs_begin);
-    int nrs;  // Number of remaining sequences.
-
-    // Avoid default constructor.
-    value_type* fe = static_cast<value_type*>(
-      ::operator new(sizeof(value_type) * k));  // Front elements.
-    int* source = new int[k];
-    difference_type total_length = 0;
-
-    // Write entries into queue.
-    nrs = 0;
-    for (int pi = 0; pi < k; ++pi)
-      {
-        if (seqs_begin[pi].first != seqs_begin[pi].second)
-          {
-            ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
-            source[nrs] = pi;
-            ++nrs;
-            total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
-          }
-      }
-
-    if (stable)
-      {
-        // Bubble sort fe and source by fe.
-        for (int k = 0; k < nrs - 1; ++k)
-          for (int pi = nrs - 1; pi > k; --pi)
-            if (comp(fe[pi], fe[pi - 1]) ||
-                (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
-              {
-                std::swap(fe[pi - 1], fe[pi]);
-                std::swap(source[pi - 1], source[pi]);
-              }
-      }
-    else
-      {
-        for (int k = 0; k < nrs - 1; ++k)
-          for (int pi = nrs - 1; pi > k; --pi)
-            if (comp(fe[pi], fe[pi-1]))
-              {
-                std::swap(fe[pi-1], fe[pi]);
-                std::swap(source[pi-1], source[pi]);
-              }
-      }
-
-    // Iterate.
-    if (stable)
-      {
-        int j;
-        while (nrs > 0 && length > 0)
-          {
-            if (source[0] < source[1])
-              {
-                // fe[0] <= fe[1]
-                while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
-                  {
-                    *target = fe[0];
-                    ++target;
-                    ++(seqs_begin[source[0]].first);
-                    --length;
-                    if (seqs_begin[source[0]].first
-			== seqs_begin[source[0]].second)
-                      {
-                        // Move everything to the left.
-                        for (int s = 0; s < nrs - 1; ++s)
-                          {
-                            fe[s] = fe[s + 1];
-                            source[s] = source[s + 1];
-                          }
-                        fe[nrs - 1].~value_type();  //Destruct explicitly.
-                        --nrs;
-                        break;
-                      }
-                    else
-                      fe[0] = *(seqs_begin[source[0]].first);
-                  }
-              }
-            else
-              {
-                // fe[0] < fe[1]
-                while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
-                  {
-                    *target = fe[0];
-                    ++target;
-                    ++(seqs_begin[source[0]].first);
-                    --length;
-                    if (seqs_begin[source[0]].first
-			== seqs_begin[source[0]].second)
-                      {
-                        for (int s = 0; s < nrs - 1; ++s)
-                          {
-                            fe[s] = fe[s + 1];
-                            source[s] = source[s + 1];
-                          }
-                        fe[nrs - 1].~value_type();  //Destruct explicitly.
-                        --nrs;
-                        break;
-                      }
-                    else
-                      fe[0] = *(seqs_begin[source[0]].first);
-                  }
-              }
-
-            // Sink down.
-            j = 1;
-            while ((j < nrs) && (comp(fe[j], fe[j - 1])
-				 || (!comp(fe[j - 1], fe[j])
-				     && (source[j] < source[j - 1]))))
-              {
-                std::swap(fe[j - 1], fe[j]);
-                std::swap(source[j - 1], source[j]);
-                ++j;
-              }
-          }
-      }
-    else
-      {
-        int j;
-        while (nrs > 0 && length > 0)
-          {
-            // fe[0] <= fe[1]
-            while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
-              {
-                *target = fe[0];
-                ++target;
-                ++seqs_begin[source[0]].first;
-                --length;
-                if (seqs_begin[source[0]].first
-		    == seqs_begin[source[0]].second)
-                  {
-                    for (int s = 0; s < (nrs - 1); ++s)
-                      {
-                        fe[s] = fe[s + 1];
-                        source[s] = source[s + 1];
-                      }
-                    fe[nrs - 1].~value_type();  //Destruct explicitly.
-                    --nrs;
-                    break;
-                  }
-                else
-                  fe[0] = *(seqs_begin[source[0]].first);
-              }
-
-            // Sink down.
-            j = 1;
-            while ((j < nrs) && comp(fe[j], fe[j - 1]))
-              {
-                std::swap(fe[j - 1], fe[j]);
-                std::swap(source[j - 1], source[j]);
-                ++j;
-              }
-          }
-      }
-
-    ::operator delete(fe);  //Destructors already called.
-    delete[] source;
-
-    return target;
-  }
-
 /** @brief Multi-way merging procedure for a high branching factor,
- * guarded case.
+ *         guarded case.
  *
- *  The head elements are kept in a loser tree.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *   @param stable Stable merging incurs a performance penalty.
- *  @return End iterator of output sequence.
+ * This merging variant uses a LoserTree class as selected by <tt>LT</tt>.
+ *
+ * Stability is selected through the used LoserTree class <tt>LT</tt>.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
  */
 template<typename LT,
 	 typename RandomAccessIteratorIterator,
@@ -961,7 +536,7 @@ template<typename LT,
                             RandomAccessIteratorIterator seqs_end,
                             RandomAccessIterator3 target,
                             Comparator comp,
-                            _DifferenceTp length, bool stable)
+                            _DifferenceTp length)
   {
     _GLIBCXX_CALL(length)
 
@@ -994,82 +569,52 @@ template<typename LT,
 
     for (int t = 0; t < k; ++t)
       {
-        if (stable)
-          {
-            if (seqs_begin[t].first == seqs_begin[t].second)
-              lt.insert_start_stable(*arbitrary_element, t, true);
-            else
-              lt.insert_start_stable(*seqs_begin[t].first, t, false);
-          }
+        if (seqs_begin[t].first == seqs_begin[t].second)
+          lt.insert_start(*arbitrary_element, t, true);
         else
-          {
-            if (seqs_begin[t].first == seqs_begin[t].second)
-              lt.insert_start(*arbitrary_element, t, true);
-            else
-              lt.insert_start(*seqs_begin[t].first, t, false);
-          }
+          lt.insert_start(*seqs_begin[t].first, t, false);
       }
 
-    if (stable)
-      lt.init_stable();
-    else
-      lt.init();
+    lt.init();
 
-    total_length = std::min(total_length, length);
+    const difference_type const_total_length(std::min(total_length, length));
 
     int source;
 
-    if (stable)
+    for (difference_type i = 0; i < const_total_length; ++i)
       {
-        for (difference_type i = 0; i < total_length; ++i)
-          {
-            // Take out.
-            source = lt.get_min_source();
+        //take out
+        source = lt.get_min_source();
 
-            *(target++) = *(seqs_begin[source].first++);
+        *(target++) = *(seqs_begin[source].first++);
 
-            // Feed.
-            if (seqs_begin[source].first == seqs_begin[source].second)
-              lt.delete_min_insert_stable(*arbitrary_element, true);
-            else
-              // Replace from same source.
-              lt.delete_min_insert_stable(*seqs_begin[source].first, false);
-
-          }
-      }
-    else
-      {
-        for (difference_type i = 0; i < total_length; ++i)
-          {
-            //take out
-            source = lt.get_min_source();
-
-            *(target++) = *(seqs_begin[source].first++);
-
-            // Feed.
-            if (seqs_begin[source].first == seqs_begin[source].second)
-              lt.delete_min_insert(*arbitrary_element, true);
-            else
-              // Replace from same source.
-              lt.delete_min_insert(*seqs_begin[source].first, false);
-          }
+        // Feed.
+        if (seqs_begin[source].first == seqs_begin[source].second)
+          lt.delete_min_insert(*arbitrary_element, true);
+        else
+          // Replace from same source.
+          lt.delete_min_insert(*seqs_begin[source].first, false);
       }
 
     return target;
   }
 
 /** @brief Multi-way merging procedure for a high branching factor,
- * unguarded case.
+ *         unguarded case.
  *
- *  The head elements are kept in a loser tree.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Stable merging incurs a performance penalty.
- *  @return End iterator of output sequence.
- *  @pre No input will run out of elements during the merge.
+ * Merging is done using the LoserTree class <tt>LT</tt>.
+ *
+ * Stability is selected by the used LoserTrees.
+ *
+ * @pre No input will run out of elements during the merge.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
  */
 template<typename LT,
 	 typename RandomAccessIteratorIterator,
@@ -1079,8 +624,8 @@ template<typename LT,
   multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
                                       RandomAccessIteratorIterator seqs_end,
                                       RandomAccessIterator3 target,
-                                      Comparator comp,
-                                      _DifferenceTp length, bool stable)
+                                      int min_seq, Comparator comp,
+                                      _DifferenceTp length)
   {
     _GLIBCXX_CALL(length)
     typedef _DifferenceTp difference_type;
@@ -1093,7 +638,11 @@ template<typename LT,
 
     int k = seqs_end - seqs_begin;
 
-    LT lt(k, comp);
+    // Determine the sentinel.  The sentinel is largest/last element of the
+    // sequences with the smallest largest/last element.
+    value_type sentinel = *(seqs_begin[min_seq].second - 1);
+
+    LT lt(k, sentinel, comp);
 
     difference_type total_length = 0;
 
@@ -1102,18 +651,12 @@ template<typename LT,
 #if _GLIBCXX_ASSERTIONS
         _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
 #endif
-        if (stable)
-          lt.insert_start_stable(*seqs_begin[t].first, t, false);
-        else
-          lt.insert_start(*seqs_begin[t].first, t, false);
+        lt.insert_start(*seqs_begin[t].first, t, false);
 
         total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
       }
 
-    if (stable)
-      lt.init_stable();
-    else
-      lt.init();
+    lt.init();
 
     // Do not go past end.
     length = std::min(total_length, length);
@@ -1124,203 +667,311 @@ template<typename LT,
     difference_type i = 0;
 #endif
 
-    if (stable)
+    RandomAccessIterator3 target_end = target + length;
+    while (target < target_end)
       {
-        RandomAccessIterator3 target_end = target + length;
-        while (target < target_end)
-          {
-            // Take out.
-            source = lt.get_min_source();
+        // Take out.
+        source = lt.get_min_source();
 
 #if _GLIBCXX_ASSERTIONS
-            _GLIBCXX_PARALLEL_ASSERT(i == 0
-                || !comp(*(seqs_begin[source].first), *(target - 1)));
+        _GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k);
+        _GLIBCXX_PARALLEL_ASSERT(i == 0
+            || !comp(*(seqs_begin[source].first), *(target - 1)));
 #endif
 
-            *(target++) = *(seqs_begin[source].first++);
+        // Feed.
+        *(target++) = *(seqs_begin[source].first++);
 
 #if _GLIBCXX_ASSERTIONS
-            _GLIBCXX_PARALLEL_ASSERT(
-                (seqs_begin[source].first != seqs_begin[source].second)
-                || (i == length - 1));
-            ++i;
+        _GLIBCXX_PARALLEL_ASSERT(
+            (seqs_begin[source].first != seqs_begin[source].second)
+            || (i >= length - 1));
+        ++i;
 #endif
-            // Feed.
-            // Replace from same source.
-            lt.delete_min_insert_stable(*seqs_begin[source].first, false);
-
-          }
-      }
-    else
-      {
-        RandomAccessIterator3 target_end = target + length;
-        while (target < target_end)
-          {
-            // Take out.
-            source = lt.get_min_source();
-
-#if _GLIBCXX_ASSERTIONS
-            if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
-              printf("         %i %i %i\n", length, i, source);
-            _GLIBCXX_PARALLEL_ASSERT(i == 0
-                || !comp(*(seqs_begin[source].first), *(target - 1)));
-#endif
-
-            *(target++) = *(seqs_begin[source].first++);
-
-#if _GLIBCXX_ASSERTIONS
-            if (!((seqs_begin[source].first != seqs_begin[source].second)
-                || (i >= length - 1)))
-              printf("         %i %i %i\n", length, i, source);
-            _GLIBCXX_PARALLEL_ASSERT(
-                (seqs_begin[source].first != seqs_begin[source].second)
-                || (i >= length - 1));
-            ++i;
-#endif
-            // Feed.
-            // Replace from same source.
-            lt.delete_min_insert(*seqs_begin[source].first, false);
-          }
+        // Replace from same source.
+        lt.delete_min_insert(*seqs_begin[source].first, false);
       }
 
     return target;
   }
 
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
+
+/** @brief Multi-way merging procedure for a high branching factor,
+ *         requiring sentinels to exist.
+ * @param stable The value must the same as for the used LoserTrees.
+ * @param UnguardedLoserTree Loser Tree variant to use for the unguarded
+ *   merging.
+ * @param GuardedLoserTree Loser Tree variant to use for the guarded
+ *   merging.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
+template<
+    typename UnguardedLoserTree,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
+  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
                                      RandomAccessIteratorIterator seqs_end,
                                      RandomAccessIterator3 target,
                                      Comparator comp,
-                                     _DifferenceTp length, bool stable)
+                                     _DifferenceTp length)
   {
     _GLIBCXX_CALL(length)
 
     typedef _DifferenceTp difference_type;
-
+    typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
       ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
 
-    int min_seq;
     RandomAccessIterator3 target_end;
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
-                                          comp, min_seq, stable);
 
     difference_type total_length = 0;
     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
-    if (overhang != -1)
       {
-        difference_type unguarded_length =
-            std::min(length, total_length - overhang);
-        target_end = multiway_merge_loser_tree_unguarded
-          <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
-          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-        overhang = length - unguarded_length;
-      }
-    else
-      {
-        // Empty sequence found.
-        overhang = length;
-        target_end = target;
-      }
+        total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+        // Move the sequends end behind the sentinel spots.  This has the
+        // effect that the sentinel appears to be within the sequence. Then,
+        // we can use the unguarded variant if we merge out as many
+        // non-sentinel elements as we have.
+        ++((*s).second);
+      }
 
-    target_end = multiway_merge_loser_tree
-      <typename loser_tree_traits<value_type, Comparator>::LT>
-      (seqs_begin, seqs_end, target_end, comp, overhang, stable);
+    difference_type unguarded_length =
+         std::min(length, total_length);
+    target_end = multiway_merge_loser_tree_unguarded
+        <UnguardedLoserTree>
+      (seqs_begin, seqs_end, target, 0, comp, unguarded_length);
 
 #if _GLIBCXX_ASSERTIONS
     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
 #endif
 
+    // Restore the sequence ends so the sentinels are not contained in the
+    // sequence any more (see comment in loop above).
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
+      { --((*s).second); }
+
     return target_end;
   }
 
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
-                                     RandomAccessIteratorIterator seqs_end,
-				     RandomAccessIterator3 target,
-				     Comparator comp,
-				     _DifferenceTp length, bool stable)
+/**
+ * @brief Traits for determining whether the loser tree should
+ *   use pointers or copies.
+ *
+ * The field "use_pointer" is used to determine whether to use pointers in
+ * the loser trees or whether to copy the values into the loser tree.
+ *
+ * The default behavior is to use pointers if the data type is 4 times as
+ * big as the pointer to it.
+ *
+ * Specialize for your data type to customize the behavior.
+ *
+ * Example:
+ *
+ *   template<>
+ *   struct loser_tree_traits<int>
+ *   { static const bool use_pointer = false; };
+ *
+ *   template<>
+ *   struct loser_tree_traits<heavyweight_type>
+ *   { static const bool use_pointer = true; };
+ *
+ * @param T type to give the loser tree traits for.
+ */
+template <typename T>
+struct loser_tree_traits
+{
+  /**
+   * @brief True iff to use pointers instead of values in loser trees.
+   *
+   * The default behavior is to use pointers if the data type is four
+   * times as big as the pointer to it.
+   */
+  static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*));
+};
+
+/**
+ * @brief Switch for 3-way merging with sentinels turned off.
+ *
+ * Note that 3-way merging is always stable!
+ */
+template<
+  bool sentinels /*default == false*/,
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_3_variant_sentinel_switch
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
   {
-    _GLIBCXX_CALL(length)
+    return multiway_merge_3_variant<guarded_iterator>(
+        seqs_begin, seqs_end, target, comp, length);
+  }
+};
 
-    typedef _DifferenceTp difference_type;
-    typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
+/**
+ * @brief Switch for 3-way merging with sentinels turned on.
+ *
+ * Note that 3-way merging is always stable!
+ */
+template<
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_3_variant_sentinel_switch
+    <true, RandomAccessIteratorIterator, RandomAccessIterator3,
+     _DifferenceTp, Comparator>
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
+  {
+    return multiway_merge_3_variant<unguarded_iterator>(
+        seqs_begin, seqs_end, target, comp, length);
+  }
+};
+
+/**
+ * @brief Switch for 4-way merging with sentinels turned off.
+ *
+ * Note that 4-way merging is always stable!
+ */
+template<
+  bool sentinels /*default == false*/,
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_4_variant_sentinel_switch
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
+  {
+    return multiway_merge_4_variant<guarded_iterator>(
+        seqs_begin, seqs_end, target, comp, length);
+  }
+};
+
+/**
+ * @brief Switch for 4-way merging with sentinels turned on.
+ *
+ * Note that 4-way merging is always stable!
+ */
+template<
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_4_variant_sentinel_switch
+    <true, RandomAccessIteratorIterator, RandomAccessIterator3,
+     _DifferenceTp, Comparator>
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
+  {
+    return multiway_merge_4_variant<unguarded_iterator>(
+        seqs_begin, seqs_end, target, comp, length);
+  }
+};
+
+/**
+ * @brief Switch for k-way merging with sentinels turned on.
+ */
+template<
+  bool sentinels,
+  bool stable,
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_k_variant_sentinel_switch
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
+  {
     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
       ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
 
-    RandomAccessIterator3 target_end;
-    difference_type overhang =
-        prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      {
-        total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
-        // Sentinel spot.
-        ++((*s).second);
-      }
-
-    difference_type unguarded_length =
-        std::min(length, total_length - overhang);
-    target_end = multiway_merge_loser_tree_unguarded
-      <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
-      (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-    overhang = length - unguarded_length;
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    // Copy rest stable.
-    for (RandomAccessIteratorIterator s = seqs_begin;
-         s != seqs_end && overhang > 0; ++s)
-      {
-        // Restore.
-        --((*s).second);
-        difference_type local_length =
-            std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
-        target_end = std::copy((*s).first, (*s).first + local_length,
-                               target_end);
-        (*s).first += local_length;
-        overhang -= local_length;
-      }
+    return multiway_merge_loser_tree_sentinel<
+        typename __gnu_cxx::__conditional_type<
+            loser_tree_traits<value_type>::use_pointer
+          , LoserTreePointerUnguarded<stable, value_type, Comparator>
+          , LoserTreeUnguarded<stable, value_type, Comparator>
+        >::__type>(seqs_begin, seqs_end, target, comp, length);
+  }
+};
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+/**
+ * @brief Switch for k-way merging with sentinels turned off.
+ */
+template<
+  bool stable,
+  typename RandomAccessIteratorIterator,
+  typename RandomAccessIterator3,
+  typename _DifferenceTp,
+  typename Comparator>
+struct multiway_merge_k_variant_sentinel_switch
+    <false, stable, RandomAccessIteratorIterator, RandomAccessIterator3,
+     _DifferenceTp, Comparator>
+{
+  RandomAccessIterator3 operator()(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length)
+  {
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
 
-    return target_end;
+    return multiway_merge_loser_tree<
+        typename __gnu_cxx::__conditional_type<
+            loser_tree_traits<value_type>::use_pointer
+          , LoserTreePointer<stable, value_type, Comparator>
+          , LoserTree<stable, value_type, Comparator>
+        >::__type >(seqs_begin, seqs_end, target, comp, length);
   }
+};
 
 /** @brief Sequential multi-way merging switch.
  *
- *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and
+ *  The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
  *  runtime settings.
  *  @param seqs_begin Begin iterator of iterator pair input sequence.
  *  @param seqs_end End iterator of iterator pair input sequence.
@@ -1330,17 +981,18 @@ template<typename RandomAccessIteratorIterator,
  *  @param stable Stable merging incurs a performance penalty.
  *  @param sentinel The sequences have a sentinel element.
  *  @return End iterator of output sequence. */
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
+template<
+    bool stable,
+    bool sentinels,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge(RandomAccessIteratorIterator seqs_begin,
+  sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin,
                  RandomAccessIteratorIterator seqs_end,
                  RandomAccessIterator3 target,
-                 Comparator comp, _DifferenceTp length,
-                 bool stable, bool sentinel,
-                 sequential_tag)
+                 Comparator comp, _DifferenceTp length)
   {
     _GLIBCXX_CALL(length)
 
@@ -1353,17 +1005,14 @@ template<typename RandomAccessIteratorIterator,
 
 #if _GLIBCXX_ASSERTIONS
     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+      {
+        _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+      }
 #endif
 
-    RandomAccessIterator3 return_target = target;
+      RandomAccessIterator3 return_target = target;
     int k = static_cast<int>(seqs_end - seqs_begin);
 
-    _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
-
-    if (!sentinel && mwma == LOSER_TREE_SENTINEL)
-      mwma = LOSER_TREE_COMBINED;
-
     switch (k)
       {
       case 0:
@@ -1382,113 +1031,30 @@ template<typename RandomAccessIteratorIterator,
                                       target, length, comp);
         break;
       case 3:
-        switch (mwma)
-          {
-          case LOSER_TREE_COMBINED:
-            return_target = multiway_merge_3_combined(seqs_begin,
-						      seqs_end,
-						      target,
-						      comp, length,
-						      stable);
-            break;
-          case LOSER_TREE_SENTINEL:
-            return_target =
-	      multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
-							   seqs_end,
-							   target,
-							   comp, length,
-							   stable);
-            break;
-          default:
-            return_target = 
-	      multiway_merge_3_variant<guarded_iterator>(seqs_begin,
-							 seqs_end,
-							 target,
-							 comp, length,
-							 stable);
-            break;
-          }
+        return_target = multiway_merge_3_variant_sentinel_switch<
+            sentinels
+          , RandomAccessIteratorIterator
+          , RandomAccessIterator3
+          , _DifferenceTp
+          , Comparator>()(seqs_begin, seqs_end, target, comp, length);
         break;
       case 4:
-        switch (mwma)
-          {
-          case LOSER_TREE_COMBINED:
-            return_target = multiway_merge_4_combined(seqs_begin,
-						      seqs_end,
-						      target,
-						      comp, length, stable);
-            break;
-          case LOSER_TREE_SENTINEL:
-            return_target = 
-	      multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
-							   seqs_end,
-							   target,
-							   comp, length,
-							   stable);
-            break;
-          default:
-            return_target = multiway_merge_4_variant<guarded_iterator>(
-	      seqs_begin,
-	      seqs_end,
-	      target,
-	      comp, length, stable);
-            break;
-          }
+        return_target = multiway_merge_4_variant_sentinel_switch<
+            sentinels
+          , RandomAccessIteratorIterator
+          , RandomAccessIterator3
+          , _DifferenceTp
+          , Comparator>()(seqs_begin, seqs_end, target, comp, length);
         break;
       default:
-        {
-          switch (mwma)
-            {
-            case BUBBLE:
-              return_target = multiway_merge_bubble(seqs_begin,
-						    seqs_end,
-						    target,
-						    comp, length, stable);
-              break;
-#if _GLIBCXX_LOSER_TREE_EXPLICIT
-            case LOSER_TREE_EXPLICIT:
-              return_target = multiway_merge_loser_tree<
-	      LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
-							  seqs_end,
-							  target,
-							  comp, length,
-							  stable);
-              break;
-#endif
-#if _GLIBCXX_LOSER_TREE
-            case LOSER_TREE:
-              return_target = multiway_merge_loser_tree<
-                    LoserTree<value_type, Comparator> >(seqs_begin,
-							seqs_end,
-							target,
-							comp, length,
-							stable);
-              break;
-#endif
-#if _GLIBCXX_LOSER_TREE_COMBINED
-            case LOSER_TREE_COMBINED:
-              return_target = multiway_merge_loser_tree_combined(seqs_begin,
-								 seqs_end,
-								 target,
-								 comp, length,
-								 stable);
-              break;
-#endif
-#if _GLIBCXX_LOSER_TREE_SENTINEL
-            case LOSER_TREE_SENTINEL:
-              return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
-								 seqs_end,
-								 target,
-								 comp, length,
-								 stable);
-              break;
-#endif
-            default:
-              // multiway_merge algorithm not implemented.
-              _GLIBCXX_PARALLEL_ASSERT(0);
-              break;
-            }
-        }
+          return_target = multiway_merge_k_variant_sentinel_switch<
+              sentinels
+            , stable
+            , RandomAccessIteratorIterator
+            , RandomAccessIterator3
+            , _DifferenceTp
+            , Comparator>()(seqs_begin, seqs_end, target, comp, length);
+          break;
       }
 #if _GLIBCXX_ASSERTIONS
     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1497,38 +1063,246 @@ template<typename RandomAccessIteratorIterator,
     return return_target;
   }
 
+/**
+ * @brief Stable sorting functor.
+ *
+ * Used to reduce code instanciation in multiway_merge_sampling_splitting.
+ */
+template<bool stable, class RandomAccessIterator, class StrictWeakOrdering>
+struct sampling_sorter
+{
+  void operator()(RandomAccessIterator first, RandomAccessIterator last,
+                  StrictWeakOrdering comp)
+  { __gnu_sequential::stable_sort(first, last, comp); }
+};
+
+/**
+ * @brief Non-stable sorting functor.
+ *
+ * Used to reduce code instanciation in multiway_merge_sampling_splitting.
+ */
+template<class RandomAccessIterator, class StrictWeakOrdering>
+struct sampling_sorter<false, RandomAccessIterator, StrictWeakOrdering>
+{
+  void operator()(RandomAccessIterator first, RandomAccessIterator last,
+                  StrictWeakOrdering comp)
+  { __gnu_sequential::sort(first, last, comp); }
+};
+
+/**
+ * @brief Sampling based splitting for parallel multiway-merge routine.
+ */
+template<
+    bool stable
+  , typename RandomAccessIteratorIterator
+  , typename Comparator
+  , typename difference_type>
+void multiway_merge_sampling_splitting(
+    RandomAccessIteratorIterator seqs_begin,
+    RandomAccessIteratorIterator seqs_end,
+    Comparator comp, difference_type length,
+    difference_type total_length,
+    std::vector<std::pair<difference_type, difference_type> > *pieces)
+{
+  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+    ::value_type::first_type
+    RandomAccessIterator1;
+  typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+    value_type;
+
+  // k sequences.
+  int k = static_cast<int>(seqs_end - seqs_begin);
+
+  int num_threads = omp_get_num_threads();
+
+  difference_type num_samples =
+      __gnu_parallel::_Settings::get().merge_oversampling * num_threads;
+
+  value_type* samples = static_cast<value_type*>(
+    ::operator new(sizeof(value_type) * k * num_samples));
+  // Sample.
+  for (int s = 0; s < k; ++s)
+    for (difference_type i = 0; i < num_samples; ++i)
+      {
+        difference_type sample_index =
+            static_cast<difference_type>(
+                _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
+                (num_samples + 1)) * (double(length)
+                / total_length));
+        new(&(samples[s * num_samples + i])) value_type(
+            seqs_begin[s].first[sample_index]);
+      }
+
+  // Sort stable or non-stable, depending on value of template parameter
+  // "stable".
+  sampling_sorter<stable, value_type*, Comparator>()(
+      samples, samples + (num_samples * k), comp);
+
+  for (int slab = 0; slab < num_threads; ++slab)
+    // For each slab / processor.
+    for (int seq = 0; seq < k; ++seq)
+      {
+        // For each sequence.
+        if (slab > 0)
+          pieces[slab][seq].first =
+              std::upper_bound(
+                seqs_begin[seq].first,
+                seqs_begin[seq].second,
+                samples[num_samples * k * slab / num_threads],
+                  comp)
+              - seqs_begin[seq].first;
+        else
+          {
+            // Absolute beginning.
+            pieces[slab][seq].first = 0;
+          }
+        if ((slab + 1) < num_threads)
+          pieces[slab][seq].second =
+              std::upper_bound(
+                  seqs_begin[seq].first,
+                  seqs_begin[seq].second,
+                  samples[num_samples * k * (slab + 1) /
+                      num_threads], comp)
+              - seqs_begin[seq].first;
+        else
+        pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+      }
+    ::operator delete(samples);
+}
+
+/**
+ * @brief Exact splitting for parallel multiway-merge routine.
+ */
+template<
+    bool stable
+  , typename RandomAccessIteratorIterator
+  , typename Comparator
+  , typename difference_type>
+void multiway_merge_exact_splitting(
+    RandomAccessIteratorIterator seqs_begin,
+    RandomAccessIteratorIterator seqs_end,
+    Comparator comp,
+    difference_type length,
+    difference_type total_length,
+    std::vector<std::pair<difference_type, difference_type> > *pieces)
+{
+  typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+    ::value_type::first_type
+    RandomAccessIterator1;
+
+  const bool tight = (total_length == length);
+
+  // k sequences.
+  const int k = static_cast<int>(seqs_end - seqs_begin);
+
+  const int num_threads = omp_get_num_threads();
+
+  // (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT).
+  std::vector<RandomAccessIterator1>* offsets =
+      new std::vector<RandomAccessIterator1>[num_threads];
+  std::vector<
+      std::pair<RandomAccessIterator1, RandomAccessIterator1>
+      > se(k);
+
+  copy(seqs_begin, seqs_end, se.begin());
+
+  difference_type* borders =
+      new difference_type[num_threads + 1];
+  equally_split(length, num_threads, borders);
+
+  for (int s = 0; s < (num_threads - 1); ++s)
+    {
+      offsets[s].resize(k);
+      multiseq_partition(
+          se.begin(), se.end(), borders[s + 1],
+          offsets[s].begin(), comp);
+
+      // Last one also needed and available.
+      if (!tight)
+        {
+          offsets[num_threads - 1].resize(k);
+          multiseq_partition(se.begin(), se.end(),
+                difference_type(length),
+                offsets[num_threads - 1].begin(),  comp);
+        }
+    }
+
+
+  for (int slab = 0; slab < num_threads; ++slab)
+    {
+      // For each slab / processor.
+      for (int seq = 0; seq < k; ++seq)
+        {
+          // For each sequence.
+          if (slab == 0)
+            {
+              // Absolute beginning.
+              pieces[slab][seq].first = 0;
+            }
+          else
+            pieces[slab][seq].first =
+                pieces[slab - 1][seq].second;
+          if (!tight || slab < (num_threads - 1))
+            pieces[slab][seq].second =
+                offsets[slab][seq] - seqs_begin[seq].first;
+          else
+            {
+              // slab == num_threads - 1
+              pieces[slab][seq].second =
+                  _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+            }
+        }
+    }
+  delete[] offsets;
+}
+
 /** @brief Parallel multi-way merge routine.
  *
- *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor
- *  and runtime settings.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Stable merging incurs a performance penalty.
- *  @param sentinel Ignored.
- *  @return End iterator of output sequence.
+ * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
+ * and runtime settings.
+ *
+ * Must not be called if the number of sequences is 1.
+ *
+ * @param Splitter functor to split input (either exact or sampling based)
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @param sentinel Ignored.
+ * @return End iterator of output sequence.
  */
-template<typename RandomAccessIteratorIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
+template<
+    bool stable,
+    bool sentinels,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Splitter,
+    typename Comparator
+    >
   RandomAccessIterator3
   parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
                           RandomAccessIteratorIterator seqs_end,
-                           RandomAccessIterator3 target,
-                           Comparator comp,
-                           _DifferenceTp length, bool stable, bool sentinel)
+                          RandomAccessIterator3 target,
+                          Comparator comp,
+                          Splitter splitter,
+                          _DifferenceTp length)
     {
+#if _GLIBCXX_ASSERTIONS
+      _GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1);
+#endif
+
       _GLIBCXX_CALL(length)
 
       typedef _DifferenceTp difference_type;
       typedef typename std::iterator_traits<RandomAccessIteratorIterator>
         ::value_type::first_type
         RandomAccessIterator1;
-      typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-        value_type;
+      typedef typename
+        std::iterator_traits<RandomAccessIterator1>::value_type value_type;
 
       // k sequences.
       int k = static_cast<int>(seqs_end - seqs_begin);
@@ -1543,13 +1317,10 @@ template<typename RandomAccessIteratorIterator,
       if (total_length == 0 || k == 0)
         return target;
 
-      bool tight = (total_length == length);
-
       std::vector<std::pair<difference_type, difference_type> >* pieces;
 
       thread_index_t num_threads = static_cast<thread_index_t>(
-	std::min<difference_type>(get_max_threads(), total_length));
-      const _Settings& __s = _Settings::get();
+      	std::min<difference_type>(get_max_threads(), total_length));
 
 #     pragma omp parallel num_threads (num_threads)
         {
@@ -1562,126 +1333,12 @@ template<typename RandomAccessIteratorIterator,
               for (int s = 0; s < num_threads; ++s)
                 pieces[s].resize(k);
 
-              difference_type num_samples = __s.merge_oversampling 
-					    * num_threads;
+              difference_type num_samples =
+                  __gnu_parallel::_Settings::get().merge_oversampling *
+                    num_threads;
 
-              if (__s.multiway_merge_splitting == SAMPLING)
-                {
-                  value_type* samples = static_cast<value_type*>(
-                    ::operator new(sizeof(value_type) * k * num_samples));
-                  // Sample.
-                  for (int s = 0; s < k; ++s)
-                    for (difference_type i = 0; i < num_samples; ++i)
-                      {
-                        difference_type sample_index =
-			  static_cast<difference_type>(
-			    _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
-			    * (double(i + 1) / (num_samples + 1))
-			    * (double(length) / total_length));
-                        ::new(&(samples[s * num_samples + i]))
-			    value_type(seqs_begin[s].first[sample_index]);
-                      }
-
-                  if (stable)
-                    __gnu_sequential::stable_sort(samples, samples
-						  + (num_samples * k), comp);
-                  else
-                    __gnu_sequential::sort(samples, samples
-					   + (num_samples * k), comp);
-
-                  for (int slab = 0; slab < num_threads; ++slab)
-                    // For each slab / processor.
-                    for (int seq = 0; seq < k; ++seq)
-                      {
-                        // For each sequence.
-                        if (slab > 0)
-                          pieces[slab][seq].first =
-                              std::upper_bound(seqs_begin[seq].first,
-					       seqs_begin[seq].second,
-					       samples[num_samples * k
-						       * slab / num_threads],
-					       comp)
-			    - seqs_begin[seq].first;
-                        else
-                          {
-                            // Absolute beginning.
-                            pieces[slab][seq].first = 0;
-                          }
-                        if ((slab + 1) < num_threads)
-                          pieces[slab][seq].second =
-			    std::upper_bound(seqs_begin[seq].first,
-					     seqs_begin[seq].second,
-					     samples[num_samples * k
-						     * (slab + 1)
-						     / num_threads], comp)
-			    - seqs_begin[seq].first;
-                        else
-			  pieces[slab][seq].second 
-			    = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
-                      }
-		  ::operator delete(samples);
-                }
-              else
-                {
-                  // (_Settings::multiway_merge_splitting == _Settings::EXACT).
-                  std::vector<RandomAccessIterator1>* offsets =
-                      new std::vector<RandomAccessIterator1>[num_threads];
-                  std::vector<
-                      std::pair<RandomAccessIterator1, RandomAccessIterator1>
-                      > se(k);
-
-                  copy(seqs_begin, seqs_end, se.begin());
-
-                  difference_type* borders =
-                      new difference_type[num_threads + 1];
-                  equally_split(length, num_threads, borders);
-
-                  for (int s = 0; s < (num_threads - 1); ++s)
-                    {
-                      offsets[s].resize(k);
-                      multiseq_partition(
-                          se.begin(), se.end(), borders[s + 1],
-                          offsets[s].begin(), comp);
-
-                      // Last one also needed and available.
-                      if (!tight)
-                        {
-                          offsets[num_threads - 1].resize(k);
-                          multiseq_partition(se.begin(), se.end(),
-					     difference_type(length),
-					     offsets[num_threads - 1].begin(),
-					     comp);
-                        }
-                    }
-
-
-                  for (int slab = 0; slab < num_threads; ++slab)
-                    {
-                      // For each slab / processor.
-                      for (int seq = 0; seq < k; ++seq)
-                        {
-                          // For each sequence.
-                          if (slab == 0)
-                            {
-                              // Absolute beginning.
-                              pieces[slab][seq].first = 0;
-                            }
-                          else
-                            pieces[slab][seq].first =
-                                pieces[slab - 1][seq].second;
-                          if (!tight || slab < (num_threads - 1))
-                            pieces[slab][seq].second =
-			      offsets[slab][seq] - seqs_begin[seq].first;
-                          else
-                            {
-                              // slab == num_threads - 1
-                              pieces[slab][seq].second =
-				_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
-                            }
-                        }
-                    }
-                  delete[] offsets;
-                }
+              splitter(seqs_begin, seqs_end, comp, length, total_length,
+                       pieces);
             } //single
 
           thread_index_t iam = omp_get_thread_num();
@@ -1701,15 +1358,14 @@ template<typename RandomAccessIteratorIterator,
               for (int s = 0; s < k; ++s)
                 {
                   chunks[s] = std::make_pair(
-		    seqs_begin[s].first + pieces[iam][s].first,
-		    seqs_begin[s].first + pieces[iam][s].second);
+                  seqs_begin[s].first + pieces[iam][s].first,
+                  seqs_begin[s].first + pieces[iam][s].second);
                   local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
                 }
 
-              multiway_merge(
+              sequential_multiway_merge<stable, sentinels>(
                     chunks, chunks + k, target + target_position, comp,
-                    std::min(local_length, length - target_position),
-                    stable, false, sequential_tag());
+                    std::min(local_length, length - target_position));
 
               delete[] chunks;
             }
@@ -1727,7 +1383,7 @@ template<typename RandomAccessIteratorIterator,
 			    (pieces[iam][1].second - pieces[iam][1].first),
 			    comp);
             }
-        } //parallel
+        } // parallel
 
 #if _GLIBCXX_ASSERTIONS
       _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1743,88 +1399,605 @@ template<typename RandomAccessIteratorIterator,
     }
 
 /**
- *  @brief Multi-way merging front-end.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Stable merging incurs a performance penalty.
- *  @return End iterator of output sequence.
+ * @brief Multiway Merge Frontend.
+ *
+ * Merge the sequences specified by seqs_begin and seqs_end into
+ * target.  seqs_begin and seqs_end must point to a sequence of
+ * pairs.  These pairs must contain an iterator to the beginning
+ * of a sequence in their first entry and an iterator the end of
+ * the same sequence in their second entry.
+ *
+ * Ties are broken arbitrarily.  See stable_multiway_merge for a variant
+ * that breaks ties by sequence number but is slower.
+ *
+ * The first entries of the pairs (i.e. the begin iterators) will be moved
+ * forward.
+ *
+ * The output sequence has to provide enough space for all elements
+ * that are written to it.
+ *
+ * This function will merge the input sequences:
+ *
+ * - not stable
+ * - parallel, depending on the input size and Settings
+ * - using sampling for splitting
+ * - not using sentinels
+ *
+ * Example:
+ *
+ * <pre>
+ *   int sequences[10][10];
+ *   for (int i = 0; i < 10; ++i)
+ *     for (int j = 0; i < 10; ++j)
+ *       sequences[i][j] = j;
+ *   
+ *   int out[33];
+ *   std::vector<std::pair<int*> > seqs;
+ *   for (int i = 0; i < 10; ++i)
+ *     { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
+ *   
+ *   multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
+ * </pre>
+ *
+ * @see stable_multiway_merge
+ *
+ * @pre All input sequences must be sorted.
+ * @pre Target must provide enough space to merge out length elements or
+ *    the number of elements in all sequences, whichever is smaller.
+ *
+ * @post [target, return value) contains merged elements from the
+ *    input sequences.
+ * @post return value - target = min(length, number of elements in all
+ *    sequences).
+ *
+ * @param RandomAccessIteratorPairIterator iterator over sequence
+ *    of pairs of iterators
+ * @param RandomAccessIteratorOut iterator over target sequence
+ * @param _DifferenceTp difference type for the sequence
+ * @param Comparator strict weak ordering type to compare elements
+ *    in sequences
+ *
+ * @param seqs_begin  begin of sequence sequence
+ * @param seqs_end    end of sequence sequence
+ * @param target      target sequence to merge to.
+ * @param comp        strict weak ordering to use for element comparison.
+ * @param length      the number of elements to merge into target.
+ *
+ * @return end iterator of output sequence
  */
-template<typename RandomAccessIteratorPairIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
-                RandomAccessIteratorPairIterator seqs_end,
-                RandomAccessIterator3 target, Comparator comp,
-                _DifferenceTp length, bool stable)
-  {
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length)
+{
+  typedef _DifferenceTp difference_type;
+  _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+  // catch special case: no sequences
+  if (seqs_begin == seqs_end)
+    return target;
+
+  // Execute merge; maybe parallel, depending on the number of merged
+  // elements and the number of sequences and global thresholds in
+  // Settings.
+  RandomAccessIteratorOut target_end;
+  if ((seqs_end - seqs_begin > 1) &&
+        _GLIBCXX_PARALLEL_CONDITION(
+        ((seqs_end - seqs_begin) >=
+        __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+        && ((sequence_index_t)length >=
+        __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+    target_end = parallel_multiway_merge
+      </* stable = */ false, /* sentinels = */ false>
+        (seqs_begin, seqs_end, target, comp,
+        multiway_merge_sampling_splitting</* stable = */ false,
+          RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+        static_cast<difference_type>(length));
+  else
+    target_end = sequential_multiway_merge
+      </* stable = */false, /* sentinels = */ false>(
+        seqs_begin, seqs_end,
+        target, comp, length);
+
+  return target_end;
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::sequential_tag)
+{
+  typedef _DifferenceTp difference_type;
+  _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+  // catch special case: no sequences
+  if (seqs_begin == seqs_end)
+    return target;
+
+  // Execute multiway merge *sequentially*.
+  return sequential_multiway_merge
+    </* stable = */ false, /* sentinels = */ false>
+      (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::exact_tag)
+{
     typedef _DifferenceTp difference_type;
     _GLIBCXX_CALL(seqs_end - seqs_begin)
 
+    // catch special case: no sequences
     if (seqs_begin == seqs_end)
       return target;
 
-    const _Settings& __s = _Settings::get();
-
-    RandomAccessIterator3 target_end;
-    if (_GLIBCXX_PARALLEL_CONDITION(
-        ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
-        && ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
-      target_end = parallel_multiway_merge(seqs_begin, seqs_end,
-					   target, comp,
-					  static_cast<difference_type>(length),
-					   stable, false);
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+             __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+                    </* stable = */ false, /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp,
+          multiway_merge_exact_splitting</* stable = */ false,
+            RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
     else
-      target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length,
-				  stable, false, sequential_tag());
+      target_end = sequential_multiway_merge
+                      </* stable = */ false, /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp, length);
 
     return target_end;
-  }
+}
 
-/** @brief Multi-way merging front-end.
- *  @param seqs_begin Begin iterator of iterator pair input sequence.
- *  @param seqs_end End iterator of iterator pair input sequence.
- *  @param target Begin iterator out output sequence.
- *  @param comp Comparator.
- *  @param length Maximum length to merge.
- *  @param stable Stable merging incurs a performance penalty.
- *  @return End iterator of output sequence.
- *  @pre For each @c i, @c seqs_begin[i].second must be the end
- *  marker of the sequence, but also reference the one more sentinel
- *  element. */
-template<typename RandomAccessIteratorPairIterator,
-	 typename RandomAccessIterator3,
-	 typename _DifferenceTp,
-	 typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
-                          RandomAccessIteratorPairIterator seqs_end,
-                          RandomAccessIterator3 target,
-                          Comparator comp,
-                          _DifferenceTp length,
-                          bool stable)
-  {
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length)
+{
     typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
 
+    // catch special case: no sequences
     if (seqs_begin == seqs_end)
       return target;
 
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ true, /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp,
+          multiway_merge_sampling_splitting</* stable = */ true,
+          RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
+    else
+      target_end = sequential_multiway_merge
+        </* stable = */ true, /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::sequential_tag)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute multiway merge *sequentially*.
+    return sequential_multiway_merge
+      </* stable = */ true, /* sentinels = */ false>
+        (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::exact_tag)
+{
+    typedef _DifferenceTp difference_type;
     _GLIBCXX_CALL(seqs_end - seqs_begin)
 
-    const _Settings& __s = _Settings::get();
-    const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
-    const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
-    if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
-      return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, 
-				     length, stable, true);
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ true, /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp, 
+          multiway_merge_exact_splitting
+            </* stable = */ true, RandomAccessIteratorPairIterator,
+             Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
     else
-      return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
-			    true, sequential_tag());
-  }
+      target_end = sequential_multiway_merge</* stable = */ true,
+        /* sentinels = */ false>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
+}
+
+/**
+ * @brief Multiway Merge Frontend.
+ *
+ * Merge the sequences specified by seqs_begin and seqs_end into
+ * target.  seqs_begin and seqs_end must point to a sequence of
+ * pairs.  These pairs must contain an iterator to the beginning
+ * of a sequence in their first entry and an iterator the end of
+ * the same sequence in their second entry.
+ *
+ * Ties are broken arbitrarily.  See stable_multiway_merge for a variant
+ * that breaks ties by sequence number but is slower.
+ *
+ * The first entries of the pairs (i.e. the begin iterators) will be moved
+ * forward.
+ *
+ * The output sequence has to provide enough space for all elements
+ * that are written to it.
+ *
+ * This function will merge the input sequences:
+ *
+ * - not stable
+ * - parallel, depending on the input size and Settings
+ * - using sampling for splitting
+ * - using sentinels
+ *
+ * You have to take care that the element the end iterator points to is
+ * readable and contains a value that is greater than any other non-sentinel
+ * value in all sequences.
+ *
+ * Example:
+ *
+ * <pre>
+ *   int sequences[10][11];
+ *   for (int i = 0; i < 10; ++i)
+ *     for (int j = 0; i < 11; ++j)
+ *       sequences[i][j] = j; // last one is sentinel!
+ *
+ *   int out[33];
+ *   std::vector<std::pair<int*> > seqs;
+ *   for (int i = 0; i < 10; ++i)
+ *     { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
+ *
+ *   multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
+ * </pre>
+ *
+ * @pre All input sequences must be sorted.
+ * @pre Target must provide enough space to merge out length elements or
+ *    the number of elements in all sequences, whichever is smaller.
+ * @pre For each @c i, @c seqs_begin[i].second must be the end
+ *    marker of the sequence, but also reference the one more sentinel
+ *    element.
+ *
+ * @post [target, return value) contains merged elements from the
+ *    input sequences.
+ * @post return value - target = min(length, number of elements in all
+ *    sequences).
+ *
+ * @see stable_multiway_merge_sentinels
+ *
+ * @param RandomAccessIteratorPairIterator iterator over sequence
+ *    of pairs of iterators
+ * @param RandomAccessIteratorOut iterator over target sequence
+ * @param _DifferenceTp difference type for the sequence
+ * @param Comparator strict weak ordering type to compare elements
+ *    in sequences
+ *
+ * @param seqs_begin  begin of sequence sequence
+ * @param seqs_end    end of sequence sequence
+ * @param target      target sequence to merge to.
+ * @param comp        strict weak ordering to use for element comparison.
+ * @param length      the number of elements to merge into target.
+ *
+ * @return end iterator of output sequence
+ */
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ false, /* sentinels = */ true>
+          (seqs_begin, seqs_end, target, comp,
+          multiway_merge_sampling_splitting
+            </* stable = */ false, RandomAccessIteratorPairIterator,
+             Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
+    else
+      target_end = sequential_multiway_merge
+        </* stable = */false, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
 }
 
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::sequential_tag)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute multiway merge *sequentially*.
+    return sequential_multiway_merge
+      </* stable = */ false, /* sentinels = */ true>
+        (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::exact_tag)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ false, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp,
+          multiway_merge_exact_splitting
+            </* stable = */ false, RandomAccessIteratorPairIterator,
+              Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
+    else
+      target_end = sequential_multiway_merge
+        </* stable = */ false, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ true, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp,
+          multiway_merge_sampling_splitting
+            </* stable = */ true, RandomAccessIteratorPairIterator,
+            Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
+    else
+      target_end = sequential_multiway_merge
+        </* stable = */ true, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::sequential_tag)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute multiway merge *sequentially*.
+    return sequential_multiway_merge
+      </* stable = */ true, /* sentinels = */ true>
+        (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+    typename RandomAccessIteratorPairIterator
+  , typename RandomAccessIteratorOut
+  , typename _DifferenceTp
+  , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+    , RandomAccessIteratorPairIterator seqs_end
+    , RandomAccessIteratorOut target
+    , Comparator comp, _DifferenceTp length
+    , __gnu_parallel::exact_tag)
+{
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    // catch special case: no sequences
+    if (seqs_begin == seqs_end)
+      { return target; }
+
+    // Execute merge; maybe parallel, depending on the number of merged
+    // elements and the number of sequences and global thresholds in
+    // Settings.
+    RandomAccessIteratorOut target_end;
+    if ((seqs_end - seqs_begin > 1) &&
+          _GLIBCXX_PARALLEL_CONDITION(
+          ((seqs_end - seqs_begin) >=
+          __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+          && ((sequence_index_t)length >=
+          __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge
+        </* stable = */ true, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp, 
+          multiway_merge_exact_splitting
+            </* stable = */ true, RandomAccessIteratorPairIterator,
+            Comparator, _DifferenceTp>,
+          static_cast<difference_type>(length));
+    else
+      target_end = sequential_multiway_merge
+        </* stable = */ true, /* sentinels = */ true>(
+          seqs_begin, seqs_end,
+          target, comp, length);
+
+    return target_end;
+}
+
+}; // namespace __gnu_parallel
+
 #endif
diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h
index c8ceb2f40b7..3791a144d53 100644
--- a/libstdc++-v3/include/parallel/multiway_mergesort.h
+++ b/libstdc++-v3/include/parallel/multiway_mergesort.h
@@ -80,26 +80,9 @@ template<typename RandomAccessIterator>
     /** @brief Start indices, per thread. */
     difference_type* starts;
 
-    /** @brief Temporary arrays for each thread.
-     *
-     *  Indirection Allows using the temporary storage in different
-     *  ways, without code duplication.
-     *  @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
-    value_type** temporaries;
-
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
     /** @brief Storage in which to sort. */
-    RandomAccessIterator* sorting_places;
+    value_type** temporary;
 
-    /** @brief Storage into which to merge. */
-    value_type** merging_places;
-#else
-    /** @brief Storage in which to sort. */
-    value_type** sorting_places;
-
-    /** @brief Storage into which to merge. */
-    RandomAccessIterator* merging_places;
-#endif
     /** @brief Samples. */
     value_type* samples;
 
@@ -108,9 +91,6 @@ template<typename RandomAccessIterator>
 
     /** @brief Pieces of data to merge @c [thread][sequence] */
     std::vector<Piece<difference_type> >* pieces;
-
-    /** @brief Stable sorting desired. */
-    bool stable;
 };
 
 /**
@@ -122,7 +102,7 @@ template<typename RandomAccessIterator>
 template<typename RandomAccessIterator, typename _DifferenceTp>
   void 
   determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
-                    _DifferenceTp& num_samples)
+                    _DifferenceTp num_samples)
   {
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
@@ -130,8 +110,6 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
 
     thread_index_t iam = omp_get_thread_num();
 
-    num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
-
     difference_type* es = new difference_type[num_samples + 2];
 
     equally_split(sd->starts[iam + 1] - sd->starts[iam], 
@@ -144,11 +122,201 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
     delete[] es;
   }
 
+/** @brief Split consistently. */
+template<bool exact, typename RandomAccessIterator,
+          typename Comparator, typename SortingPlacesIterator>
+  struct split_consistently
+  {
+  };
+
+/** @brief Split by exact splitting. */
+template<typename RandomAccessIterator, typename Comparator,
+          typename SortingPlacesIterator>
+  struct split_consistently
+    <true, RandomAccessIterator, Comparator, SortingPlacesIterator>
+  {
+    void operator()(
+      const thread_index_t iam,
+      PMWMSSortingData<RandomAccessIterator>* sd,
+      Comparator& comp,
+      const typename
+        std::iterator_traits<RandomAccessIterator>::difference_type
+          num_samples)
+      const
+  {
+#   pragma omp barrier
+
+    std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+        seqs(sd->num_threads);
+    for (thread_index_t s = 0; s < sd->num_threads; s++)
+      seqs[s] = std::make_pair(sd->temporary[s],
+                                sd->temporary[s]
+                                    + (sd->starts[s + 1] - sd->starts[s]));
+
+    std::vector<SortingPlacesIterator> offsets(sd->num_threads);
+
+    // if not last thread
+    if (iam < sd->num_threads - 1)
+      multiseq_partition(seqs.begin(), seqs.end(),
+                          sd->starts[iam + 1], offsets.begin(), comp);
+
+    for (int seq = 0; seq < sd->num_threads; seq++)
+      {
+        // for each sequence
+        if (iam < (sd->num_threads - 1))
+          sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
+        else
+          // very end of this sequence
+          sd->pieces[iam][seq].end =
+              sd->starts[seq + 1] - sd->starts[seq];
+      }
+
+#   pragma omp barrier
+
+    for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
+      {
+        // For each sequence.
+        if (iam > 0)
+          sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
+        else
+          // Absolute beginning.
+          sd->pieces[iam][seq].begin = 0;
+      }
+  }   
+  };
+
+/** @brief Split by sampling. */ 
+template<typename RandomAccessIterator, typename Comparator,
+          typename SortingPlacesIterator>
+  struct split_consistently<false, RandomAccessIterator, Comparator,
+                             SortingPlacesIterator>
+  {
+    void operator()(
+        const thread_index_t iam,
+        PMWMSSortingData<RandomAccessIterator>* sd,
+        Comparator& comp,
+        const typename
+          std::iterator_traits<RandomAccessIterator>::difference_type
+            num_samples)
+        const
+    {
+      typedef std::iterator_traits<RandomAccessIterator> traits_type;
+      typedef typename traits_type::value_type value_type;
+      typedef typename traits_type::difference_type difference_type;
+
+      determine_samples(sd, num_samples);
+
+#     pragma omp barrier
+
+#     pragma omp single
+      __gnu_sequential::sort(sd->samples,
+                             sd->samples + (num_samples * sd->num_threads),
+                             comp);
+
+#     pragma omp barrier
+
+      for (thread_index_t s = 0; s < sd->num_threads; ++s)
+        {
+          // For each sequence.
+          if (num_samples * iam > 0)
+            sd->pieces[iam][s].begin =
+                std::lower_bound(sd->temporary[s],
+                    sd->temporary[s]
+                        + (sd->starts[s + 1] - sd->starts[s]),
+                    sd->samples[num_samples * iam],
+                    comp)
+                - sd->temporary[s];
+          else
+            // Absolute beginning.
+            sd->pieces[iam][s].begin = 0;
+
+          if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
+            sd->pieces[iam][s].end =
+                std::lower_bound(sd->temporary[s],
+                        sd->temporary[s]
+                            + (sd->starts[s + 1] - sd->starts[s]),
+                        sd->samples[num_samples * (iam + 1)],
+                        comp)
+                - sd->temporary[s];
+          else
+            // Absolute end.
+            sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
+        }
+    }
+  };
+  
+template<bool stable, typename RandomAccessIterator, typename Comparator>
+  struct possibly_stable_sort
+  {
+  };
+
+template<typename RandomAccessIterator, typename Comparator>
+  struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
+  {
+    void operator()(const RandomAccessIterator& begin,
+                     const RandomAccessIterator& end, Comparator& comp) const
+    {
+      __gnu_sequential::stable_sort(begin, end, comp); 
+    }
+  };
+
+template<typename RandomAccessIterator, typename Comparator>
+  struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
+  {
+    void operator()(const RandomAccessIterator begin,
+                     const RandomAccessIterator end, Comparator& comp) const
+    {
+      __gnu_sequential::sort(begin, end, comp); 
+    }
+  };
+
+template<bool stable, typename SeqRandomAccessIterator,
+          typename RandomAccessIterator, typename Comparator,
+          typename DiffType>
+  struct possibly_stable_multiway_merge
+  {
+  };
+
+template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
+          typename Comparator, typename DiffType>
+  struct possibly_stable_multiway_merge
+    <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
+    DiffType>
+  {
+    void operator()(const SeqRandomAccessIterator& seqs_begin,
+                      const SeqRandomAccessIterator& seqs_end,
+                      const RandomAccessIterator& target,
+                      Comparator& comp,
+                      DiffType length_am) const
+    {
+      stable_multiway_merge(seqs_begin, seqs_end, target, comp,
+                       length_am, sequential_tag());
+    }
+  };
+
+template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
+          typename Comparator, typename DiffType>
+  struct possibly_stable_multiway_merge
+    <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
+    DiffType>
+  {
+    void operator()(const SeqRandomAccessIterator& seqs_begin,
+                      const SeqRandomAccessIterator& seqs_end,
+                      const RandomAccessIterator& target,
+                      Comparator& comp,
+                      DiffType length_am) const
+    {
+      multiway_merge(seqs_begin, seqs_end, target, comp,
+                       length_am, sequential_tag());
+    }
+  };
+
 /** @brief PMWMS code executed by each thread.
   *  @param sd Pointer to algorithm data.
   *  @param comp Comparator.
   */
-template<typename RandomAccessIterator, typename Comparator>
+template<bool stable, bool exact, typename RandomAccessIterator,
+          typename Comparator>
   void 
   parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
                         Comparator& comp)
@@ -162,165 +330,65 @@ template<typename RandomAccessIterator, typename Comparator>
     // Length of this thread's chunk, before merging.
     difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
 
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-    typedef RandomAccessIterator SortingPlacesIterator;
+    // Sort in temporary storage, leave space for sentinel.
 
-    // Sort in input storage.
-    sd->sorting_places[iam] = sd->source + sd->starts[iam];
-#else
     typedef value_type* SortingPlacesIterator;
 
-    // Sort in temporary storage, leave space for sentinel.
-    sd->sorting_places[iam] = sd->temporaries[iam] = 
+    sd->temporary[iam] =
         static_cast<value_type*>(
         ::operator new(sizeof(value_type) * (length_local + 1)));
 
     // Copy there.
     std::uninitialized_copy(sd->source + sd->starts[iam],
                             sd->source + sd->starts[iam] + length_local,
-                            sd->sorting_places[iam]);
-#endif
-
-    // Sort locally.
-    if (sd->stable)
-      __gnu_sequential::stable_sort(sd->sorting_places[iam],
-                                    sd->sorting_places[iam] + length_local,
-                                    comp);
-    else
-      __gnu_sequential::sort(sd->sorting_places[iam],
-                             sd->sorting_places[iam] + length_local,
-                             comp);
-
-    // Invariant: locally sorted subsequence in sd->sorting_places[iam],
-    // sd->sorting_places[iam] + length_local.
-    const _Settings& __s = _Settings::get();
-    if (__s.sort_splitting == SAMPLING)
-      {
-        difference_type num_samples;
-        determine_samples(sd, num_samples);
-
-#       pragma omp barrier
-
-#       pragma omp single
-        __gnu_sequential::sort(sd->samples,
-                               sd->samples + (num_samples * sd->num_threads),
-                               comp);
-
-#       pragma omp barrier
-
-        for (int s = 0; s < sd->num_threads; ++s)
-          {
-            // For each sequence.
-              if (num_samples * iam > 0)
-                sd->pieces[iam][s].begin = 
-                    std::lower_bound(sd->sorting_places[s],
-                        sd->sorting_places[s]
-                            + (sd->starts[s + 1] - sd->starts[s]),
-                        sd->samples[num_samples * iam],
-                        comp)
-                    - sd->sorting_places[s];
-            else
-              // Absolute beginning.
-              sd->pieces[iam][s].begin = 0;
-
-            if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
-              sd->pieces[iam][s].end =
-                  std::lower_bound(sd->sorting_places[s],
-                          sd->sorting_places[s]
-                              + (sd->starts[s + 1] - sd->starts[s]),
-                          sd->samples[num_samples * (iam + 1)],
-                          comp)
-                  - sd->sorting_places[s];
-            else
-              // Absolute end.
-              sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
-            }
-      }
-    else if (__s.sort_splitting == EXACT)
-      {
-#       pragma omp barrier
+                            sd->temporary[iam]);
 
-        std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
-            seqs(sd->num_threads);
-        for (int s = 0; s < sd->num_threads; ++s)
-          seqs[s] = std::make_pair(sd->sorting_places[s],
-                                   sd->sorting_places[s]
-                                       + (sd->starts[s + 1] - sd->starts[s]));
+    possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
+        (sd->temporary[iam], sd->temporary[iam] + length_local, comp);
 
-        std::vector<SortingPlacesIterator> offsets(sd->num_threads);
+    // Invariant: locally sorted subsequence in sd->temporary[iam],
+    // sd->temporary[iam] + length_local.
 
-        // if not last thread
-        if (iam < sd->num_threads - 1)
-          multiseq_partition(seqs.begin(), seqs.end(),
-                             sd->starts[iam + 1], offsets.begin(), comp);
+    // No barrier here: Synchronization is done by the splitting routine.
 
-        for (int seq = 0; seq < sd->num_threads; ++seq)
-          {
-            // for each sequence
-            if (iam < (sd->num_threads - 1))
-              sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
-            else
-              // very end of this sequence
-              sd->pieces[iam][seq].end = (sd->starts[seq + 1]
-					  - sd->starts[seq]);
-          }
-
-#       pragma omp barrier
-
-        for (int seq = 0; seq < sd->num_threads; ++seq)
-          {
-            // For each sequence.
-            if (iam > 0)
-              sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
-            else
-              // Absolute beginning.
-              sd->pieces[iam][seq].begin = 0;
-          }
-      }
+    difference_type num_samples =
+        _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
+    split_consistently
+      <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
+        (iam, sd, comp, num_samples);
 
     // Offset from target begin, length after merging.
     difference_type offset = 0, length_am = 0;
-    for (int s = 0; s < sd->num_threads; ++s)
+    for (thread_index_t s = 0; s < sd->num_threads; s++)
       {
         length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
         offset += sd->pieces[iam][s].begin;
       }
 
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-    // Merge to temporary storage, uninitialized creation not possible
-    // since there is no multiway_merge calling the placement new
-    // instead of the assignment operator.
-    // XXX incorrect (de)construction
-    sd->merging_places[iam] = sd->temporaries[iam] =
-        static_cast<value_type*>(::operator new(sizeof(value_type)
-						* length_am));
-#else
-    // Merge directly to target.
-    sd->merging_places[iam] = sd->source + offset;
-#endif
-    std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
-        seqs(sd->num_threads);
+    typedef std::vector<
+      std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+        seq_vector_type;
+    seq_vector_type seqs(sd->num_threads);
 
     for (int s = 0; s < sd->num_threads; ++s)
       {
         seqs[s] =
-	  std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
-			 sd->sorting_places[s] + sd->pieces[iam][s].end);
+          std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
+        sd->temporary[s] + sd->pieces[iam][s].end);
       }
 
-    multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
-                   length_am, sd->stable, false, sequential_tag());
+    possibly_stable_multiway_merge<
+        stable,
+        typename seq_vector_type::iterator,
+        RandomAccessIterator,
+        Comparator, difference_type>()
+          (seqs.begin(), seqs.end(),
+           sd->source + offset, comp,
+           length_am);
 
 #   pragma omp barrier
 
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-    // Write back.
-    std::copy(sd->merging_places[iam],
-              sd->merging_places[iam] + length_am,
-              sd->source + offset);
-#endif
-
-    ::operator delete(sd->temporaries[iam]);
+    ::operator delete(sd->temporary[iam]);
   }
 
 /** @brief PMWMS main call.
@@ -329,21 +397,22 @@ template<typename RandomAccessIterator, typename Comparator>
   *  @param comp Comparator.
   *  @param n Length of sequence.
   *  @param num_threads Number of threads to use.
-  *  @param stable Stable sorting.
   */
-template<typename RandomAccessIterator, typename Comparator>
+template<bool stable, bool exact, typename RandomAccessIterator,
+           typename Comparator>
   void
   parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
-                     Comparator comp, typename
-		     std::iterator_traits<RandomAccessIterator>::
-		     difference_type n, int num_threads, bool stable)
+                     Comparator comp,
+                     thread_index_t num_threads)
   {
-    _GLIBCXX_CALL(n)
+    _GLIBCXX_CALL(end - begin)
 
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
     typedef typename traits_type::difference_type difference_type;
 
+    difference_type n = end - begin;
+
     if (n <= 1)
       return;
 
@@ -354,7 +423,6 @@ template<typename RandomAccessIterator, typename Comparator>
     // shared variables
     PMWMSSortingData<RandomAccessIterator> sd;
     difference_type* starts;
-    const _Settings& __s = _Settings::get();
 
 #   pragma omp parallel num_threads(num_threads)
       {
@@ -364,23 +432,16 @@ template<typename RandomAccessIterator, typename Comparator>
           {
             sd.num_threads = num_threads;
             sd.source = begin;
-            sd.temporaries = new value_type*[num_threads];
-
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-            sd.sorting_places = new RandomAccessIterator[num_threads];
-            sd.merging_places = new value_type*[num_threads];
-#else
-            sd.sorting_places = new value_type*[num_threads];
-            sd.merging_places = new RandomAccessIterator[num_threads];
-#endif
 
-            if (__s.sort_splitting == SAMPLING)
+            sd.temporary = new value_type*[num_threads];
+
+            if (!exact)
               {
-                unsigned int size = 
-                    (__s.sort_mwms_oversampling * num_threads - 1)
+                difference_type size =
+                    (_Settings::get().sort_mwms_oversampling * num_threads - 1)
                         * num_threads;
                 sd.samples = static_cast<value_type*>(
-		  ::operator new(size * sizeof(value_type)));
+                              ::operator new(size * sizeof(value_type)));
               }
             else
               sd.samples = NULL;
@@ -390,7 +451,6 @@ template<typename RandomAccessIterator, typename Comparator>
             for (int s = 0; s < num_threads; ++s)
               sd.pieces[s].resize(num_threads);
             starts = sd.starts = new difference_type[num_threads + 1];
-            sd.stable = stable;
 
             difference_type chunk_length = n / num_threads;
             difference_type split = n % num_threads;
@@ -401,18 +461,16 @@ template<typename RandomAccessIterator, typename Comparator>
                 pos += (i < split) ? (chunk_length + 1) : chunk_length;
               }
             starts[num_threads] = pos;
-          }
+          } //single
 
         // Now sort in parallel.
-        parallel_sort_mwms_pu(&sd, comp);
+        parallel_sort_mwms_pu<stable, exact>(&sd, comp);
       } //parallel
 
     delete[] starts;
-    delete[] sd.temporaries;
-    delete[] sd.sorting_places;
-    delete[] sd.merging_places;
+    delete[] sd.temporary;
 
-    if (__s.sort_splitting == SAMPLING)
+    if (!exact)
       ::operator delete(sd.samples);
 
     delete[] sd.offsets;
diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h
index edf4eea02d8..83aa2df1b11 100644
--- a/libstdc++-v3/include/parallel/sort.h
+++ b/libstdc++-v3/include/parallel/sort.h
@@ -71,7 +71,7 @@ namespace __gnu_parallel
   template<typename RandomAccessIterator, typename Comparator>
     inline void
     parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
-		  Comparator comp, bool stable)
+                  Comparator comp, bool stable)
     {
       _GLIBCXX_CALL(end - begin)
       typedef std::iterator_traits<RandomAccessIterator> traits_type;
@@ -79,25 +79,43 @@ namespace __gnu_parallel
       typedef typename traits_type::difference_type difference_type;
 
       if (begin != end)
-	{
-	  difference_type n = end - begin;
+      {
+        difference_type n = end - begin;
 
-	  if (false) ;
+        if (false) ;
 #if _GLIBCXX_MERGESORT
-	  else if (stable || _Settings::get().sort_algorithm == MWMS)
-	    parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
+        else if (stable)
+          {
+            if(_Settings::get().sort_splitting == EXACT)
+              parallel_sort_mwms<true, true>
+                (begin, end, comp, get_max_threads());
+            else
+              parallel_sort_mwms<true, false>
+                (begin, end, comp, get_max_threads());
+          }
+        else if (_Settings::get().sort_algorithm == MWMS)
+          {
+            if(_Settings::get().sort_splitting == EXACT)
+              parallel_sort_mwms<false, true>
+                (begin, end, comp, get_max_threads());
+            else
+              parallel_sort_mwms<false, false>
+                (begin, end, comp, get_max_threads());
+          }
 #endif
 #if _GLIBCXX_QUICKSORT
-	  else if (!stable && _Settings::get().sort_algorithm == QS)
-	    parallel_sort_qs(begin, end, comp, n, get_max_threads());
+        else if (!stable && _Settings::get().sort_algorithm == QS)
+          parallel_sort_qs(begin, end, comp, n, get_max_threads());
 #endif
 #if _GLIBCXX_BAL_QUICKSORT
-	  else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
-	    parallel_sort_qsb(begin, end, comp, n, get_max_threads());
+        else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
+          parallel_sort_qsb(begin, end, comp, n, get_max_threads());
 #endif
-	  else
-	    __gnu_sequential::sort(begin, end, comp);
-	}
+        else if(stable)
+          __gnu_sequential::stable_sort(begin, end, comp);
+        else
+          __gnu_sequential::sort(begin, end, comp);
+      }
     }
 } // end namespace __gnu_parallel
 
diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h
index b3f2ec86912..f57add97c7b 100644
--- a/libstdc++-v3/include/parallel/tags.h
+++ b/libstdc++-v3/include/parallel/tags.h
@@ -44,6 +44,9 @@ namespace __gnu_parallel
   /** @brief Forces sequential execution at compile time. */
   struct sequential_tag { };
 
+  /** @brief Forces exact splitting in multiway merge at compile time. */
+  struct exact_tag { };
+
   /** @brief Recommends parallel execution at compile time. */
   struct parallel_tag { };
 
diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h
index ded617edb6d..1b646b02084 100644
--- a/libstdc++-v3/include/parallel/types.h
+++ b/libstdc++-v3/include/parallel/types.h
@@ -87,15 +87,10 @@ namespace __gnu_parallel
   /// Merging algorithms: 
   // bubblesort-alike, loser-tree variants, enum sentinel.
   enum _MultiwayMergeAlgorithm
-    { 
-      BUBBLE, 
-      LOSER_TREE_EXPLICIT, 
-      LOSER_TREE, 
-      LOSER_TREE_COMBINED, 
-      LOSER_TREE_SENTINEL, 
-      ENUM_SENTINEL 
+    {
+      LOSER_TREE
     };
-  
+
   /// Partial sum algorithms: recursive, linear.
   enum _PartialSumAlgorithm 
     { 
diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 691f41cbcd8..c84ddb6573f 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -31,8 +31,8 @@
  *  This is a Standard C++ Library header.
  */
 
-#ifndef _GLIBCXX_CXX0X_ARRAY
-#define _GLIBCXX_CXX0X_ARRAY 1
+#ifndef _GLIBCXX_ARRAY
+#define _GLIBCXX_ARRAY 1
 
 #pragma GCC system_header
 
@@ -60,4 +60,4 @@
 #  undef _GLIBCXX_INCLUDE_AS_CXX0X
 #endif
 
-#endif // _GLIBCXX_CXX0X_ARRAY
+#endif // _GLIBCXX_ARRAY
diff --git a/libstdc++-v3/include/std/date_time b/libstdc++-v3/include/std/date_time
index b956a9b0119..0aca6b3b4ac 100644
--- a/libstdc++-v3/include/std/date_time
+++ b/libstdc++-v3/include/std/date_time
@@ -27,7 +27,7 @@
 // invalidate any other reasons why the executable file might be covered by
 // the GNU General Public License.
 
-/** @file include/date_time
+/** @file date_time
  *  This is a Standard C++ Library header.
  */
 
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex
index 5c257181f2e..9014fbabb2e 100644
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -31,8 +31,8 @@
  *  This is a Standard C++ Library header.
  */
 
-#ifndef _GLIBCXX_CXX0X_REGEX
-#define _GLIBCXX_CXX0X_REGEX 1
+#ifndef _GLIBCXX_REGEX
+#define _GLIBCXX_REGEX 1
 
 #pragma GCC system_header
 
@@ -67,4 +67,4 @@
 #  undef _GLIBCXX_INCLUDE_AS_CXX0X
 #endif
 
-#endif // _GLIBCXX_CXX0X_REGEX
+#endif // _GLIBCXX_REGEX
diff --git a/libstdc++-v3/include/std/system_error b/libstdc++-v3/include/std/system_error
index c8973d14735..5081e3f11ec 100644
--- a/libstdc++-v3/include/std/system_error
+++ b/libstdc++-v3/include/std/system_error
@@ -1,6 +1,6 @@
 // <system_error> -*- C++ -*-
 
-// Copyright (C) 2007 Free Software Foundation, Inc.
+// Copyright (C) 2007, 2008 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -27,7 +27,7 @@
 // invalidate any other reasons why the executable file might be covered by
 // the GNU General Public License.
 
-/** @file include/system_error
+/** @file system_error
  *  This is a Standard C++ Library header.
  */
 
@@ -142,6 +142,9 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
     error_code 	_M_code;
 
   public:
+    system_error(error_code __ec = error_code())
+    : runtime_error(""), _M_code(__ec) { }
+
     system_error(const string& __what, error_code __ec = error_code())
     : runtime_error(__what), _M_code(__ec) { }
 
diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index 3dda5301644..38a6bbb47f2 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -31,8 +31,8 @@
  *  This is a Standard C++ Library header.
  */
 
-#ifndef _GLIBCXX_CXX0X_TUPLE
-#define _GLIBCXX_CXX0X_TUPLE 1
+#ifndef _GLIBCXX_TUPLE
+#define _GLIBCXX_TUPLE 1
 
 #pragma GCC system_header
 
@@ -644,4 +644,4 @@ namespace std
   }; // anonymous namespace
 }
 
-#endif // _GLIBCXX_CXX0X_TUPLE
+#endif // _GLIBCXX_TUPLE
diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits
index 1f9a2d9f72d..66650f540d9 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -31,12 +31,12 @@
  *  This is a Standard C++ Library header.
  */
 
-#ifndef _GLIBCXX_CXX0X_TYPE_TRAITS
-#define _GLIBCXX_CXX0X_TYPE_TRAITS 1
+#ifndef _GLIBCXX_TYPE_TRAITS
+#define _GLIBCXX_TYPE_TRAITS 1
 
 #pragma GCC system_header
 
-#ifndef __GXX_EXPERIMENTAL_CXX0X__
+#ifndef __GXX_EXPERIMENTAL__
 # include <c++0x_warning.h>
 #endif
 
@@ -553,5 +553,5 @@ namespace std
     struct make_signed<bool>;
 }
 
-#endif  // _GLIBCXX_CXX0X_TYPE_TRAITS 
+#endif  // _GLIBCXX_TYPE_TRAITS