45 files changed, 17901 insertions, 0 deletions
diff --git a/libstdc++-v3/include/parallel/algo.h b/libstdc++-v3/include/parallel/algo.h
new file mode 100644
index 00000000000..dcda79090b4
--- /dev/null
+++ b/libstdc++-v3/include/parallel/algo.h
@@ -0,0 +1,1585 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/algo.h
+ *  @brief Parallel STL function calls corresponding to the stl_algo.h header.
+ *
+ *  The functions defined here mainly do case switches and
+ *  call the actual parallelized versions in other files.
+ *  Inlining policy: Functions that basically only contain one function call,
+ *  are declared inline.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_ALGO_H
+#define _GLIBCXX_PARALLEL_ALGO_H 1
+
+#include <parallel/algorithmfwd.h>
+#include <bits/stl_algobase.h>
+#include <bits/stl_algo.h>
+#include <parallel/iterator.h>
+#include <parallel/base.h>
+#include <parallel/sort.h>
+#include <parallel/workstealing.h>
+#include <parallel/par_loop.h>
+#include <parallel/omp_loop.h>
+#include <parallel/omp_loop_static.h>
+#include <parallel/for_each_selectors.h>
+#include <parallel/for_each.h>
+#include <parallel/find.h>
+#include <parallel/find_selectors.h>
+#include <parallel/search.h>
+#include <parallel/random_shuffle.h>
+#include <parallel/partition.h>
+#include <parallel/merge.h>
+#include <parallel/unique_copy.h>
+#include <parallel/set_operations.h>
+
+namespace std
+{
+namespace __parallel
+{
+  // Sequential fallback
+  template<typename InputIterator, typename Function>
+  inline Function
+  for_each(InputIterator begin, InputIterator end, Function f, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::for_each<InputIterator, Function>(begin, end, f);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator, typename Function, typename IteratorTag>
+  Function
+  for_each_switch(InputIterator begin, InputIterator end, Function f, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    return for_each<InputIterator, Function>(begin, end, f, __gnu_parallel::sequential_tag());
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator, typename Function>
+  Function
+  for_each_switch(RandomAccessIterator begin, RandomAccessIterator end, Function f, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::for_each_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy;
+	__gnu_parallel::for_each_selector<RandomAccessIterator> functionality;
+	return __gnu_parallel::for_each_template_random_access(begin, end, f, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag);
+      }
+    else
+      return for_each<RandomAccessIterator, Function>(begin, end, f, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename Iterator, typename Function>
+  inline Function
+  for_each(Iterator begin, Iterator end, Function f, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef std::iterator_traits<Iterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+
+    return for_each_switch(begin, end, f, iterator_category(), parallelism_tag);
+  }
+
+
+  // Sequential fallback
+  template<typename InputIterator, typename T>
+  inline InputIterator
+  find(InputIterator begin, InputIterator end, const T& val, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::find<InputIterator, T>(begin, end, val); }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator, typename T, typename IteratorTag>
+  inline InputIterator
+  find_switch(InputIterator begin, InputIterator end, const T& val, IteratorTag)
+  { return _GLIBCXX_STD_P::find(begin, end, val); }
+
+  // Parallel find for random access iterators
+  template<typename RandomAccessIterator, typename T>
+  RandomAccessIterator
+  find_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& val, random_access_iterator_tag)
+  {
+    typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      {
+	binder2nd<__gnu_parallel::equal_to<value_type, T> > comp(__gnu_parallel::equal_to<value_type, T>(), val);
+	return __gnu_parallel::find_template(begin, end, begin, comp, __gnu_parallel::find_if_selector()).first;
+      }
+    else
+      return _GLIBCXX_STD_P::find(begin, end, val);
+  }
+
+  // Public interface
+  template<typename InputIterator, typename T>
+  inline InputIterator
+  find(InputIterator begin, InputIterator end, const T& val)
+  {
+    typedef std::iterator_traits<InputIterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+    return find_switch(begin, end, val, iterator_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename Predicate>
+  inline InputIterator
+  find_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::find_if<InputIterator, Predicate>(begin, end, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator, typename Predicate, typename IteratorTag>
+  inline InputIterator
+  find_if_switch(InputIterator begin, InputIterator end, Predicate pred, IteratorTag)
+  {
+    return _GLIBCXX_STD_P::find_if(begin, end, pred);
+  }
+
+  // Parallel find_if for random access iterators
+  template<typename RandomAccessIterator, typename Predicate>
+  RandomAccessIterator
+  find_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      return __gnu_parallel::find_template(begin, end, begin, pred, __gnu_parallel::find_if_selector()).first;
+    else
+      return _GLIBCXX_STD_P::find_if(begin, end, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator, typename Predicate>
+  inline InputIterator
+  find_if (InputIterator begin, InputIterator end, Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+    return find_if_switch(begin, end, pred, iterator_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename ForwardIterator>
+  inline InputIterator
+  find_first_of(InputIterator begin1, InputIterator end1, ForwardIterator begin2, ForwardIterator end2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename ForwardIterator,
+	   typename BinaryPredicate>
+  inline InputIterator
+  find_first_of(InputIterator begin1, InputIterator end1,
+		ForwardIterator begin2, ForwardIterator end2,
+		BinaryPredicate comp, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2, comp);
+  }
+
+  // Sequential fallback for input iterator type
+  template<typename InputIterator, typename ForwardIterator, typename IteratorTag1, typename IteratorTag2>
+  inline InputIterator
+  find_first_of_switch(InputIterator begin1, InputIterator end1,
+		       ForwardIterator begin2, ForwardIterator end2, IteratorTag1, IteratorTag2)
+  {
+    return find_first_of(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag());
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator, typename ForwardIterator, typename BinaryPredicate, typename IteratorTag>
+  inline RandomAccessIterator
+  find_first_of_switch(RandomAccessIterator begin1, RandomAccessIterator end1,
+		       ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp, random_access_iterator_tag, IteratorTag)
+  {
+    return __gnu_parallel::find_template(begin1, end1, begin1, comp, __gnu_parallel::find_first_of_selector<ForwardIterator>(begin2, end2)).first;
+  }
+
+  // Sequential fallback for input iterator type
+  template<typename InputIterator, typename ForwardIterator, typename BinaryPredicate, typename IteratorTag1, typename IteratorTag2>
+  inline
+  InputIterator
+  find_first_of_switch(InputIterator begin1, InputIterator end1,
+		       ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp, IteratorTag1, IteratorTag2)
+  {
+    return find_first_of(begin1, end1, begin2, end2, comp, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename InputIterator, typename ForwardIterator, typename BinaryPredicate>
+  inline InputIterator
+  find_first_of(InputIterator begin1, InputIterator end1,
+		ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp)
+  {
+    typedef std::iterator_traits<InputIterator> iteratori_traits;
+    typedef std::iterator_traits<ForwardIterator> iteratorf_traits;
+    typedef typename iteratori_traits::iterator_category iteratori_category;
+    typedef typename iteratorf_traits::iterator_category iteratorf_category;
+
+    return find_first_of_switch(begin1, end1, begin2, end2, comp, iteratori_category(), iteratorf_category());
+  }
+
+  // Public interface, insert default comparator
+  template<typename InputIterator, typename ForwardIterator>
+  InputIterator
+  find_first_of(InputIterator begin1, InputIterator end1, ForwardIterator begin2, ForwardIterator end2)
+  {
+    typedef std::iterator_traits<InputIterator> iteratori_traits;
+    typedef std::iterator_traits<ForwardIterator> iteratorf_traits;
+    typedef typename iteratori_traits::value_type valuei_type;
+    typedef typename iteratorf_traits::value_type valuef_type;
+
+    return find_first_of(begin1, end1, begin2, end2, __gnu_parallel::equal_to<valuei_type, valuef_type>());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out,
+	      __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::unique_copy<InputIterator, OutputIterator>(begin1, end1, out);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out,
+	      Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::unique_copy<InputIterator, OutputIterator, Predicate>(begin1, end1, out, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator, typename OutputIterator, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline OutputIterator
+  unique_copy_switch(InputIterator begin, InputIterator last, OutputIterator out,
+		     Predicate pred, IteratorTag1, IteratorTag2)
+  {
+    return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred);
+  }
+
+  // Parallel unique_copy for random access iterators
+  template<typename RandomAccessIterator, typename RandomAccessOutputIterator, typename Predicate>
+  RandomAccessOutputIterator
+  unique_copy_switch(RandomAccessIterator begin, RandomAccessIterator last, RandomAccessOutputIterator out,
+		     Predicate pred, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(last - begin) > __gnu_parallel::Settings::unique_copy_minimal_n))
+      return __gnu_parallel::parallel_unique_copy(begin, last, out, pred);
+    else
+      return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out)
+  {
+    typedef std::iterator_traits<InputIterator> iteratori_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori_traits::iterator_category iteratori_category;
+    typedef typename iteratori_traits::value_type value_type;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return unique_copy_switch(begin1, end1, out, equal_to<value_type>(),
+			      iteratori_category(), iteratoro_category());
+  }
+
+  // Public interface
+  template<typename InputIterator, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out,
+	      Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator> iteratori_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori_traits::iterator_category iteratori_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return unique_copy_switch(begin1, end1, out, pred, iteratori_category(), iteratoro_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  set_union(InputIterator1 begin1, InputIterator1 end1,
+	    InputIterator2 begin2, InputIterator2 end2,
+	    OutputIterator out, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, out);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  set_union(InputIterator1 begin1, InputIterator1 end1,
+	    InputIterator2 begin2, InputIterator2 end2,
+	    OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, out, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename Predicate,
+	   typename OutputIterator, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline OutputIterator 
+  set_union_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+		   InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1,
+		   IteratorTag2, IteratorTag3)
+  {
+    return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Parallel set_union for random access iterators
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2,
+	   typename OutputRandomAccessIterator, typename Predicate>
+  OutputRandomAccessIterator 
+  set_union_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2,
+		   RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred,
+		   random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_union_minimal_n))
+      return __gnu_parallel::parallel_set_union(begin1, end1, begin2, end2, result, pred);
+    else
+      return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator 
+  set_union(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+    typedef typename iteratori1_traits::value_type value1_type;
+    typedef typename iteratori2_traits::value_type value2_type;
+
+    return set_union_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less<value1_type, value2_type>(),
+			    iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator 
+  set_union(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+	    InputIterator2 end2, OutputIterator out, Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return set_union_switch(begin1, end1, begin2, end2, out, pred,
+			    iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  set_intersection(InputIterator1 begin1, InputIterator1 end1,
+		   InputIterator2 begin2, InputIterator2 end2,
+		   OutputIterator out, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, out);
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  set_intersection(InputIterator1 begin1, InputIterator1 end1,
+		   InputIterator2 begin2, InputIterator2 end2,
+		   OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, out, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename Predicate,
+	   typename OutputIterator, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline OutputIterator 
+  set_intersection_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+			  InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1,
+			  IteratorTag2, IteratorTag3)
+  {
+    return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Parallel set_intersection for random access iterators
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2,
+	   typename OutputRandomAccessIterator, typename Predicate>
+  OutputRandomAccessIterator 
+  set_intersection_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2,
+			  RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred,
+			  random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_union_minimal_n))
+      return __gnu_parallel::parallel_set_intersection(begin1, end1, begin2, end2, result, pred);
+    else
+      return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator 
+  set_intersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+    typedef typename iteratori1_traits::value_type value1_type;
+    typedef typename iteratori2_traits::value_type value2_type;
+
+    return set_intersection_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less<value1_type, value2_type>(),
+				   iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator 
+  set_intersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+		   InputIterator2 end2, OutputIterator out, Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return set_intersection_switch(begin1, end1, begin2, end2, out, pred,
+				   iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2,
+	   typename OutputIterator>
+  inline OutputIterator
+  set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1,
+			   InputIterator2 begin2, InputIterator2 end2,
+			   OutputIterator out, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_symmetric_difference(begin1,end1, begin2, end2, out);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2,
+	   typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1,
+			   InputIterator2 begin2, InputIterator2 end2,
+			   OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, out, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename Predicate, typename OutputIterator, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline OutputIterator 
+  set_symmetric_difference_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, IteratorTag2, IteratorTag3)
+  {
+    return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Parallel set_symmetric_difference for random access iterators
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2,
+	   typename OutputRandomAccessIterator, typename Predicate>
+  OutputRandomAccessIterator 
+  set_symmetric_difference_switch(RandomAccessIterator1 begin1,
+				  RandomAccessIterator1 end1, RandomAccessIterator2 begin2,
+				  RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred,
+				  random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n))
+      return __gnu_parallel::parallel_set_symmetric_difference(begin1, end1, begin2, end2, result, pred);
+    else
+      return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Public interface.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator 
+  set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+    typedef typename iteratori1_traits::value_type value1_type;
+    typedef typename iteratori2_traits::value_type value2_type;
+
+    return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less<value1_type, value2_type>(),
+					   iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Public interface.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator 
+  set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+			   InputIterator2 end2, OutputIterator out, Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+   return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, pred,
+					  iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_difference(begin1,end1, begin2, end2, out);
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, out, pred);
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator1, typename InputIterator2, typename Predicate,
+	   typename OutputIterator, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline OutputIterator
+  set_difference_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+			InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, IteratorTag2, IteratorTag3)
+  {
+    return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Parallel set_difference for random access iterators
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2,
+	   typename OutputRandomAccessIterator, typename Predicate>
+  OutputRandomAccessIterator
+  set_difference_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2,
+			RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred,
+			random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_difference_minimal_n))
+      return __gnu_parallel::parallel_set_difference(begin1, end1, begin2, end2, result, pred);
+    else
+      return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, result, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+    typedef typename iteratori1_traits::value_type value1_type;
+    typedef typename iteratori2_traits::value_type value2_type;
+
+    return set_difference_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less<value1_type, value2_type>(),
+				 iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
+  inline OutputIterator
+  set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+		 InputIterator2 end2, OutputIterator out, Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return set_difference_switch(begin1, end1, begin2, end2, out, pred,
+				 iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator>
+  inline ForwardIterator
+  adjacent_find(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::adjacent_find<ForwardIterator>(begin, end);
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename BinaryPredicate>
+  inline ForwardIterator
+  adjacent_find(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::adjacent_find<ForwardIterator, BinaryPredicate>(begin, end, binary_pred);
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator>
+  RandomAccessIterator
+  adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, random_access_iterator_tag)
+  {
+    typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      {
+	RandomAccessIterator spot = __gnu_parallel::find_template(begin, end - 1, begin, equal_to<value_type>(), __gnu_parallel::adjacent_find_selector()).first;
+	if (spot == (end - 1))
+	  return end;
+	else
+	  return spot;
+      }
+    else
+      return adjacent_find<RandomAccessIterator>(begin, end, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename IteratorTag>
+  inline ForwardIterator
+  adjacent_find_switch(ForwardIterator begin, ForwardIterator end, IteratorTag)
+  {
+    return adjacent_find<ForwardIterator>(begin, end, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename ForwardIterator>
+  inline ForwardIterator
+  adjacent_find(ForwardIterator begin, ForwardIterator end)
+  {
+    return adjacent_find_switch(begin, end, typename std::iterator_traits<ForwardIterator>::iterator_category());
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename BinaryPredicate, typename IteratorTag>
+  inline ForwardIterator
+  adjacent_find_switch(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred, IteratorTag)
+  {
+    return adjacent_find<ForwardIterator, BinaryPredicate>(begin, end, binary_pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator, typename BinaryPredicate>
+  RandomAccessIterator
+  adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, BinaryPredicate binary_pred, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      return __gnu_parallel::find_template(begin, end, begin, binary_pred, __gnu_parallel::adjacent_find_selector()).first;
+    else
+      return adjacent_find(begin, end, binary_pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename ForwardIterator, typename BinaryPredicate>
+  inline ForwardIterator
+  adjacent_find(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred)
+  {
+    return adjacent_find_switch<ForwardIterator, BinaryPredicate>(begin, end, binary_pred, typename std::iterator_traits<ForwardIterator>::iterator_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator, typename T>
+  inline typename iterator_traits<InputIterator>::difference_type
+  count(InputIterator begin, InputIterator end, const T& value, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::count<InputIterator, T>(begin, end, value);
+  }
+
+  // Parallel code for random access iterators
+  template<typename RandomAccessIterator, typename T>
+  typename iterator_traits<RandomAccessIterator>::difference_type
+  count_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+    typedef typename iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	difference_type res = 0;
+	__gnu_parallel::count_selector<RandomAccessIterator, difference_type> functionality;
+	__gnu_parallel::for_each_template_random_access(begin, end, value, functionality, std::plus<__gnu_parallel::sequence_index_t>(), res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return count<RandomAccessIterator, T>(begin, end, value, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename T, typename IteratorTag>
+  typename iterator_traits<InputIterator>::difference_type
+  count_switch(InputIterator begin, InputIterator end, const T& value, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    return count<InputIterator, T>(begin, end, value, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename T>
+  inline typename iterator_traits<InputIterator>::difference_type
+  count(InputIterator begin, InputIterator end, const T& value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    return count_switch(begin, end, value, typename std::iterator_traits<InputIterator>::iterator_category(), parallelism_tag);
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename Predicate>
+  inline typename iterator_traits<InputIterator>::difference_type
+  count_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::count_if(begin, end, pred);
+  }
+
+  // Parallel count_if for random access iterators
+  template<typename RandomAccessIterator, typename Predicate>
+  typename iterator_traits<RandomAccessIterator>::difference_type
+  count_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+    typedef typename iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	difference_type res = 0;
+	__gnu_parallel::count_if_selector<RandomAccessIterator, difference_type> functionality;
+	__gnu_parallel::for_each_template_random_access(begin, end, pred, functionality, std::plus<__gnu_parallel::sequence_index_t>(), res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return count_if<RandomAccessIterator, Predicate>(begin, end, pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename Predicate, typename IteratorTag>
+  typename iterator_traits<InputIterator>::difference_type
+  count_if_switch(InputIterator begin, InputIterator end, Predicate pred, IteratorTag, __gnu_parallel::parallelism)
+  {
+    return count_if<InputIterator, Predicate>(begin, end, pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename Predicate>
+  inline typename iterator_traits<InputIterator>::difference_type
+  count_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    typedef iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::iterator_category iterator_category;
+    return count_if_switch(begin, end, pred, iterator_category(), parallelism_tag);
+  }
+
+
+  // Sequential fallback.
+  template<typename ForwardIterator1, typename ForwardIterator2>
+  inline ForwardIterator1
+  search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2);
+  }
+
+  // Parallel algorithm for random access iterator
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
+  RandomAccessIterator1
+  search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator2 end2, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    typedef std::iterator_traits<RandomAccessIterator1> iterator1_traits;
+    typedef typename iterator1_traits::value_type value1_type;
+    typedef std::iterator_traits<RandomAccessIterator2> iterator2_traits;
+    typedef typename iterator2_traits::value_type value2_type;
+
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      return __gnu_parallel::search_template(begin1, end1, begin2, end2, __gnu_parallel::equal_to<value1_type, value2_type>());
+    else
+      return search(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator1, typename ForwardIterator2, typename IteratorTag1, typename IteratorTag2>
+  inline ForwardIterator1
+  search_switch(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, IteratorTag1, IteratorTag2)
+  {
+    return search(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator1, typename ForwardIterator2>
+  inline ForwardIterator1
+  search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2)
+  {
+    typedef std::iterator_traits<ForwardIterator1> iterator1_traits;
+    typedef typename iterator1_traits::iterator_category iterator1_category;
+    typedef std::iterator_traits<ForwardIterator2> iterator2_traits;
+    typedef typename iterator2_traits::iterator_category iterator2_category;
+
+    return search_switch(begin1, end1, begin2, end2, iterator1_category(), iterator2_category());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+  inline ForwardIterator1
+  search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2, pred);
+  }
+
+  // Parallel algorithm for random access iterator.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2,
+	   typename BinaryPredicate>
+  RandomAccessIterator1
+  search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
+		RandomAccessIterator2 begin2, RandomAccessIterator2 end2, BinaryPredicate  pred, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      return __gnu_parallel::search_template(begin1, end1, begin2, end2, pred);
+    else
+      return search(begin1, end1, begin2, end2, pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator1, typename ForwardIterator2,
+	   typename BinaryPredicate, typename IteratorTag1, typename IteratorTag2>
+  inline ForwardIterator1
+  search_switch(ForwardIterator1 begin1, ForwardIterator1 end1,
+		ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate pred, IteratorTag1, IteratorTag2)
+  {
+    return search(begin1, end1, begin2, end2, pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
+  inline ForwardIterator1
+  search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate  pred)
+  {
+    typedef std::iterator_traits<ForwardIterator1> iterator1_traits;
+    typedef typename iterator1_traits::iterator_category iterator1_category;
+    typedef std::iterator_traits<ForwardIterator2> iterator2_traits;
+    typedef typename iterator2_traits::iterator_category iterator2_category;
+    return search_switch(begin1, end1, begin2, end2, pred, iterator1_category(), iterator2_category());
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Integer, typename T>
+  inline ForwardIterator
+  search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::search_n(begin, end, count, val); }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Integer, typename T, typename BinaryPredicate>
+  inline ForwardIterator
+  search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::search_n(begin, end, count, val, binary_pred);
+  }
+
+  // Public interface.
+  template<typename ForwardIterator, typename Integer, typename T>
+  inline ForwardIterator
+  search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val)
+  {
+    typedef typename iterator_traits<ForwardIterator>::value_type value_type;
+    return search_n(begin, end, count, val, __gnu_parallel::equal_to<value_type, T>());
+  }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename Integer, typename T, typename BinaryPredicate>
+  RandomAccessIterator
+  search_n_switch(RandomAccessIterator begin, RandomAccessIterator end, Integer count, const T& val, BinaryPredicate binary_pred, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      {
+	__gnu_parallel::pseudo_sequence<T, Integer> ps(val, count);
+	return __gnu_parallel::search_template(begin, end, ps.begin(), ps.end(), binary_pred);
+      }
+    else
+      return std::__search_n(begin, end, count, val, binary_pred, random_access_iterator_tag());
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename ForwardIterator, typename Integer, typename T, typename BinaryPredicate, typename IteratorTag>
+  inline ForwardIterator
+  search_n_switch(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred, IteratorTag)
+  {
+    return __search_n(begin, end, count, val, binary_pred, IteratorTag());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator, typename Integer, typename T, typename BinaryPredicate>
+  inline ForwardIterator
+  search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred)
+  {
+    return search_n_switch(begin, end, count, val, binary_pred, typename std::iterator_traits<ForwardIterator>::iterator_category());
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename OutputIterator, typename UnaryOperation>
+  inline OutputIterator
+  transform(InputIterator begin, InputIterator end, OutputIterator result, UnaryOperation unary_op, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::transform(begin, end, result, unary_op);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  transform(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, OutputIterator result, BinaryOperation binary_op, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::transform(begin1, end1, begin2, result, binary_op);
+  }
+
+  // Parallel unary transform for random access iterators.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator3, typename UnaryOperation>
+  RandomAccessIterator3
+  transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, RandomAccessIterator3 result, UnaryOperation unary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy = true;
+	typedef __gnu_parallel::iterator_pair<RandomAccessIterator1, RandomAccessIterator3, random_access_iterator_tag> ip;
+	ip begin_pair(begin, result), end_pair(end, result + (end - begin));
+	__gnu_parallel::transform1_selector<ip> functionality;
+	__gnu_parallel::for_each_template_random_access(begin_pair, end_pair, unary_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag);
+	return functionality.finish_iterator;
+      }
+    else
+      return transform(begin, end, result, unary_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator3, typename UnaryOperation, typename IteratorTag1, typename IteratorTag2>
+  inline RandomAccessIterator3
+  transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, RandomAccessIterator3 result, UnaryOperation unary_op, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    return _GLIBCXX_STD_P::transform(begin, end, result, unary_op);
+  }
+
+
+  // Parallel binary transform for random access iterators.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename BinaryOperation>
+  RandomAccessIterator3
+  transform2_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator3 result, BinaryOperation binary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION((end1 - begin1) >= __gnu_parallel::Settings::transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy = true;
+	typedef __gnu_parallel::iterator_triple<RandomAccessIterator1, RandomAccessIterator2, RandomAccessIterator3, random_access_iterator_tag> ip;
+	ip begin_triple(begin1, begin2, result), end_triple(end1, begin2 + (end1 - begin1), result + (end1 - begin1));
+	__gnu_parallel::transform2_selector<ip> functionality;
+	__gnu_parallel::for_each_template_random_access(begin_triple, end_triple, binary_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag);
+	return functionality.finish_iterator;
+      }
+    else
+      return transform(begin1, end1, begin2, result, binary_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename BinaryOperation, typename tag1, typename tag2, typename tag3>
+  inline RandomAccessIterator3
+  transform2_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator3 result, BinaryOperation binary_op, tag1, tag2, tag3, __gnu_parallel::parallelism)
+  {
+    return _GLIBCXX_STD_P::transform(begin1, end1, begin2, result, binary_op);
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename OutputIterator, typename UnaryOperation>
+  inline OutputIterator
+  transform(InputIterator begin, InputIterator end, OutputIterator result,
+	    UnaryOperation unary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef std::iterator_traits<InputIterator> iteratori_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori_traits::iterator_category iteratori_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return transform1_switch(begin, end, result, unary_op,
+			     iteratori_category(), iteratoro_category(), parallelism_tag);
+  }
+
+  // Public interface.
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  transform(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, OutputIterator result, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+
+    return transform2_switch(begin1, end1, begin2, result, binary_op,
+			     iteratori1_category(), iteratori2_category(), iteratoro_category(), parallelism_tag);
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename T>
+  inline void
+  replace(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::replace(begin, end, old_value, new_value); }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename T, typename IteratorTag>
+  void
+  replace_switch(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { replace(begin, end, old_value, new_value, __gnu_parallel::sequential_tag()); }
+
+  // Parallel replace for random access iterators
+  template<typename RandomAccessIterator, typename T>
+  void
+  replace_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& old_value, const T& new_value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  { replace(begin, end, old_value, new_value, __gnu_parallel::sequential_tag()); }
+
+  // Public interface
+  template<typename ForwardIterator, typename T>
+  inline void
+  replace(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    replace_switch(begin, end, old_value, new_value, typename std::iterator_traits<ForwardIterator>::iterator_category(), parallelism_tag);
+  }
+
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Predicate, typename T>
+  inline void
+  replace_if(ForwardIterator begin, ForwardIterator end, Predicate pred, const T& new_value, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::replace_if(begin, end, pred, new_value); }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename Predicate, typename T, typename IteratorTag>
+  void
+  replace_if_switch(ForwardIterator begin, ForwardIterator end, Predicate pred, const T& new_value, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { replace_if(begin, end, pred, new_value, __gnu_parallel::sequential_tag()); }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename Predicate, typename T>
+  void
+  replace_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, const T& new_value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::replace_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy;
+	__gnu_parallel::replace_if_selector<RandomAccessIterator, Predicate, T> functionality(new_value);
+	__gnu_parallel::for_each_template_random_access(begin, end, pred, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag);
+      }
+    else
+      replace_if(begin, end, pred, new_value, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator, typename Predicate, typename T>
+  inline void
+  replace_if(ForwardIterator begin, ForwardIterator end,
+	     Predicate pred, const T& new_value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef std::iterator_traits<ForwardIterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+
+    replace_if_switch(begin, end, pred, new_value, iterator_category(), parallelism_tag);
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Generator>
+  inline void
+  generate(ForwardIterator begin, ForwardIterator end, Generator gen, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::generate<ForwardIterator, Generator>(begin, end, gen); }
+
+  // Sequential fallback for input iterator case.
+  template<typename ForwardIterator, typename Generator, typename IteratorTag>
+  void
+  generate_switch(ForwardIterator begin, ForwardIterator end, Generator gen, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { generate(begin, end, gen, __gnu_parallel::sequential_tag()); }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename Generator>
+  void
+  generate_switch(RandomAccessIterator begin, RandomAccessIterator end,
+		  Generator gen, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::generate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy;
+	__gnu_parallel::generate_selector<RandomAccessIterator> functionality;
+	__gnu_parallel::for_each_template_random_access(begin, end, gen, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag);
+      }
+    else
+      generate(begin, end, gen, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator, typename Generator>
+  inline void
+  generate(ForwardIterator begin, ForwardIterator end,
+	   Generator gen, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef std::iterator_traits<ForwardIterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+    generate_switch(begin, end, gen, iterator_category(), parallelism_tag);
+  }
+
+
+  // Sequential fallback.
+  template<typename OutputIterator, typename Size, typename Generator>
+  inline OutputIterator
+  generate_n(OutputIterator begin, Size n, Generator gen, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::generate_n(begin, n, gen); }
+
+  // Sequential fallback for input iterator case.
+  template<typename OutputIterator, typename Size, typename Generator, typename IteratorTag>
+  OutputIterator
+  generate_n_switch(OutputIterator begin, Size n, Generator gen, IteratorTag, __gnu_parallel::parallelism)
+  { return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename Size, typename Generator>
+  RandomAccessIterator
+  generate_n_switch(RandomAccessIterator begin, Size n, Generator gen, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  { return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); }
+
+  // Public interface.
+  template<typename OutputIterator, typename Size, typename Generator>
+  inline OutputIterator
+  generate_n(OutputIterator begin, Size n, Generator gen, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  { 
+    typedef std::iterator_traits<OutputIterator> iterator_traits;
+    typedef typename iterator_traits::iterator_category iterator_category;
+    return generate_n_switch(begin, n, gen, iterator_category(), parallelism_tag); 
+  }
+
+
+  // Sequential fallback.
+  template<typename RandomAccessIterator>
+  inline void
+  random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::random_shuffle(begin, end); }
+
+  // Sequential fallback.
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void
+  random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rand, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::random_shuffle(begin, end, rand); }
+
+
+  /** @brief Functor wrapper for std::rand(). */
+  template<typename must_be_int = int>
+  struct c_rand_number
+  {
+    inline int operator()(int limit)
+    { return rand() % limit; }
+  };
+
+  // Fill in random number generator.
+  template<typename RandomAccessIterator>
+  inline void
+  random_shuffle(RandomAccessIterator begin, RandomAccessIterator end)
+  {
+    c_rand_number<> r;
+    // Parallelization still possible.
+    random_shuffle(begin, end, r);
+  }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  void
+  random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rand)
+  {
+    if (begin == end)
+      return;
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::random_shuffle_minimal_n))
+      __gnu_parallel::parallel_random_shuffle(begin, end, rand);
+    else
+      __gnu_parallel::sequential_random_shuffle(begin, end, rand);
+  }
+
+  // Sequential fallback.
+  template<typename ForwardIterator, typename Predicate>
+  inline ForwardIterator
+  partition(ForwardIterator begin, ForwardIterator end, Predicate pred, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::partition(begin, end, pred); }
+
+  // Sequential fallback for input iterator case.
+  template<typename ForwardIterator, typename Predicate, typename IteratorTag>
+  inline ForwardIterator
+  partition_switch(ForwardIterator begin, ForwardIterator end, Predicate pred, IteratorTag)
+  { return partition(begin, end, pred, __gnu_parallel::sequential_tag()); }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator, typename Predicate>
+  RandomAccessIterator
+  partition_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partition_minimal_n))
+      {
+	typedef typename std::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+	difference_type middle = __gnu_parallel::parallel_partition(begin, end, pred, __gnu_parallel::get_max_threads());
+	return begin + middle;
+      }
+    else
+      return partition(begin, end, pred, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename ForwardIterator, typename Predicate>
+  inline ForwardIterator
+  partition(ForwardIterator begin, ForwardIterator end, Predicate pred)
+  {
+    return partition_switch(begin, end, pred, typename std::iterator_traits<ForwardIterator>::iterator_category());
+  }
+
+  // Sequential fallback
+  template<typename RandomAccessIterator>
+  inline void
+  sort(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::sort<RandomAccessIterator>(begin, end); }
+
+  // Sequential fallback
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::sort<RandomAccessIterator, Comparator>(begin, end, comp); }
+
+  // Public interface, insert default comparator
+  template<typename RandomAccessIterator>
+  inline void
+  sort(RandomAccessIterator begin, RandomAccessIterator end)
+  {
+    typedef iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    sort(begin, end, std::less<value_type>());
+  }
+
+  template<typename RandomAccessIterator, typename Comparator>
+  void
+  sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp)
+  {
+    typedef iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+
+    if (begin != end)
+      {
+	if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::sort_minimal_n))
+	  __gnu_parallel::parallel_sort(begin, end, comp, false);
+	else
+	  sort<RandomAccessIterator, Comparator>(begin, end, comp, __gnu_parallel::sequential_tag());
+      }
+  }
+
+  // Sequential fallback.
+  template<typename RandomAccessIterator>
+  inline void
+  stable_sort(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::stable_sort<RandomAccessIterator>(begin, end);
+  }
+
+  // Sequential fallback.
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  stable_sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::stable_sort<RandomAccessIterator, Comparator>(begin, end, comp);
+  }
+
+  template<typename RandomAccessIterator>
+  void
+  stable_sort(RandomAccessIterator begin, RandomAccessIterator end)
+  {
+    typedef iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+
+    stable_sort(begin, end, std::less<value_type>());
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator, typename Comparator>
+  void
+  stable_sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp)
+  {
+    if (begin != end)
+      {
+	if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::sort_minimal_n))
+	  __gnu_parallel::parallel_sort(begin, end, comp, true);
+	else
+	  stable_sort<RandomAccessIterator, Comparator>(begin, end, comp, __gnu_parallel::sequential_tag());
+      }
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result,
+	__gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Comparator>
+  inline OutputIterator
+  merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp,
+	__gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result, comp);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Comparator, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline OutputIterator
+  merge_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp, IteratorTag1, IteratorTag2, IteratorTag3)
+  {
+    return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result, comp);
+  }
+
+  // Parallel algorithm for random access iterators
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Comparator>
+  OutputIterator
+  merge_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION((static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::merge_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::merge_minimal_n)))
+      return __gnu_parallel::parallel_merge_advance(begin1, end1, begin2, end2, result, (end1 - begin1) + (end2 - begin2), comp);
+    else
+      return __gnu_parallel::merge_advance(begin1, end1, begin2, end2, result, (end1 - begin1) + (end2 - begin2), comp);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Comparator>
+  inline OutputIterator
+  merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp)
+  {
+    typedef typename iterator_traits<InputIterator1>::value_type value_type;
+
+    typedef std::iterator_traits<InputIterator1> iteratori1_traits;
+    typedef std::iterator_traits<InputIterator2> iteratori2_traits;
+    typedef std::iterator_traits<OutputIterator> iteratoro_traits;
+    typedef typename iteratori1_traits::iterator_category iteratori1_category;
+    typedef typename iteratori2_traits::iterator_category iteratori2_category;
+    typedef typename iteratoro_traits::iterator_category iteratoro_category;
+
+    return merge_switch(begin1, end1, begin2, end2, result, comp, iteratori1_category(), iteratori2_category(), iteratoro_category());
+  }
+
+
+  // Public interface, insert default comparator
+  template<typename InputIterator1, typename InputIterator2, typename OutputIterator>
+  inline OutputIterator
+  merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result)
+  {
+    typedef std::iterator_traits<InputIterator1> iterator1_traits;
+    typedef std::iterator_traits<InputIterator2> iterator2_traits;
+    typedef typename iterator1_traits::value_type value1_type;
+    typedef typename iterator2_traits::value_type value2_type;
+
+    return merge(begin1, end1, begin2, end2, result, __gnu_parallel::less<value1_type, value2_type>());
+  }
+
+  // Sequential fallback
+  template<typename RandomAccessIterator>
+  inline void
+  nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::nth_element(begin, nth, end); }
+
+  // Sequential fallback
+  template<typename RandomAccessIterator, typename Comparator>
+  void
+  nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::nth_element(begin, nth, end, comp); }
+
+  // Public interface
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::nth_element_minimal_n))
+      __gnu_parallel::parallel_nth_element(begin, nth, end, comp);
+    else
+      nth_element(begin, nth, end, comp, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface, insert default comparator
+  template<typename RandomAccessIterator>
+  void
+  nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end)
+  {
+    typedef typename iterator_traits<RandomAccessIterator>::value_type value_type;
+    nth_element(begin, nth, end, std::less<value_type>());
+  }
+
+  // Sequential fallback
+  template<typename _RandomAccessIterator, typename _Compare>
+  void
+  partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, _Compare comp, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::partial_sort(begin, middle, end, comp); }
+
+  // Sequential fallback
+  template<typename _RandomAccessIterator>
+  void
+  partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, __gnu_parallel::sequential_tag)
+  { _GLIBCXX_STD_P::partial_sort(begin, middle, end); }
+
+  // Public interface, parallel algorithm for random access iterators
+  template<typename _RandomAccessIterator, typename _Compare>
+  void
+  partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, _Compare comp)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partial_sort_minimal_n))
+      __gnu_parallel::parallel_partial_sort(begin, middle, end, comp);
+    else
+      partial_sort(begin, middle, end, comp, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface, insert default comparator
+  template<typename _RandomAccessIterator>
+  void
+  partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end)
+  {
+    typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
+    partial_sort(begin, middle, end, std::less<value_type>());
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator>
+  inline ForwardIterator
+  max_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::max_element(begin, end); }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Comparator>
+  inline ForwardIterator
+  max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::max_element(begin, end, comp); }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename Comparator, typename IteratorTag>
+  ForwardIterator
+  max_element_switch(ForwardIterator begin, ForwardIterator end, Comparator comp, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { return max_element(begin, end, comp, __gnu_parallel::sequential_tag()); }
+
+  // Public interface, insert default comparator
+  template<typename ForwardIterator>
+  inline ForwardIterator
+  max_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef typename iterator_traits<ForwardIterator>::value_type value_type;
+    return max_element(begin, end, std::less<value_type>(), parallelism_tag);
+  }
+
+  template<typename RandomAccessIterator, typename Comparator>
+  RandomAccessIterator
+  max_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::max_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	RandomAccessIterator res(begin);
+	__gnu_parallel::identity_selector<RandomAccessIterator> functionality;
+	__gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), functionality, __gnu_parallel::max_element_reduct<Comparator, RandomAccessIterator>(comp), res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return max_element(begin, end, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename ForwardIterator, typename Comparator>
+  inline ForwardIterator
+  max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    return max_element_switch(begin, end, comp, typename std::iterator_traits<ForwardIterator>::iterator_category(), parallelism_tag);
+  }
+
+  // Sequential fallback
+  template<typename ForwardIterator>
+  inline
+  ForwardIterator
+  min_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::min_element(begin, end); }
+
+  // Sequential fallback
+  template<typename ForwardIterator, typename Comparator>
+  inline ForwardIterator
+  min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::min_element(begin, end, comp); }
+
+  // Public interface
+  template<typename ForwardIterator>
+  inline ForwardIterator
+  min_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::parallelism parallelism_tag  = __gnu_parallel::parallel_balanced)
+  {
+    typedef typename iterator_traits<ForwardIterator>::value_type value_type;
+    return min_element(begin, end, std::less<value_type>(), parallelism_tag);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename ForwardIterator, typename Comparator, typename IteratorTag>
+  ForwardIterator
+  min_element_switch(ForwardIterator begin, ForwardIterator end, Comparator comp, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { return min_element(begin, end, comp, __gnu_parallel::sequential_tag()); }
+
+  // Parallel algorithm for random access iterators
+  template<typename RandomAccessIterator, typename Comparator>
+  RandomAccessIterator
+  min_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::min_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	RandomAccessIterator res(begin);
+	__gnu_parallel::identity_selector<RandomAccessIterator> functionality;
+	__gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), functionality, __gnu_parallel::min_element_reduct<Comparator, RandomAccessIterator>(comp), res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return min_element(begin, end, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface
+  template<typename ForwardIterator, typename Comparator>
+  inline ForwardIterator
+  min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef iterator_traits<ForwardIterator> traits_type;
+    typedef typename traits_type::iterator_category iterator_category;
+    return min_element_switch(begin, end, comp, iterator_category(), parallelism_tag);
+  }
+} // end namespace
+} // end namespace
+
+#endif /* _GLIBCXX_ALGORITHM_H */
+
diff --git a/libstdc++-v3/include/parallel/algobase.h b/libstdc++-v3/include/parallel/algobase.h
new file mode 100644
index 00000000000..0bd8b39afcc
--- /dev/null
+++ b/libstdc++-v3/include/parallel/algobase.h
@@ -0,0 +1,256 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/algobase.h
+ *  @brief Parallel STL function calls corresponding to the
+ *  stl_algobase.h header.  The functions defined here mainly do case
+ *  switches and call the actual parallelized versions in other files.
+ *  Inlining policy: Functions that basically only contain one
+ *  function call, are declared inline.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_ALGOBASE_H
+#define _GLIBCXX_PARALLEL_ALGOBASE_H 1
+
+#include <parallel/algorithmfwd.h>
+#include <bits/stl_algobase.h>
+#include <parallel/base.h>
+#include <parallel/tags.h>
+#include <parallel/settings.h>
+#include <parallel/find.h>
+#include <parallel/find_selectors.h>
+#include <parallel/for_each.h>
+#include <parallel/for_each_selectors.h>
+
+namespace std
+{
+namespace __parallel
+{
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2>
+  inline bool
+  equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::equal<InputIterator1, InputIterator2>(begin1, end1, begin2);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline bool
+  equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::equal(begin1, end1, begin2, pred); }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2>
+  inline bool
+  equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
+  { return mismatch(begin1, end1, begin2).first == end1; }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline bool
+  equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred)
+  { return mismatch(begin1, end1, begin2, pred).first == end1; }
+
+  // NB: lexicographical_compare equires mismatch.
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2>
+  inline pair<InputIterator1, InputIterator2>
+  mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::mismatch<InputIterator1, InputIterator2>(begin1, end1, begin2);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline pair<InputIterator1, InputIterator2>
+  mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline pair<InputIterator1, InputIterator2>
+  mismatch_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, IteratorTag1, IteratorTag2)
+  { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); }
+
+  // Parallel mismatch for random access iterators
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Predicate>
+  pair<RandomAccessIterator1, RandomAccessIterator2>
+  mismatch_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Predicate pred, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      {
+	RandomAccessIterator1 res_first =
+	  __gnu_parallel::find_template(begin1, end1, begin2, pred, __gnu_parallel::mismatch_selector()).first;
+	return make_pair(res_first, begin2 + (res_first - begin1));
+      }
+    else
+      return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2>
+  inline pair<InputIterator1, InputIterator2>
+  mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2)
+  {
+    typedef std::iterator_traits<InputIterator1> iterator1_traits;
+    typedef std::iterator_traits<InputIterator2> iterator2_traits;
+    typedef typename iterator1_traits::value_type value1_type;
+    typedef typename iterator2_traits::value_type value2_type;
+    typedef typename iterator1_traits::iterator_category iterator1_category;
+    typedef typename iterator2_traits::iterator_category iterator2_category;
+
+    return mismatch_switch(begin1, end1, begin2, __gnu_parallel::equal_to<value1_type, value2_type>(), iterator1_category(), iterator2_category());
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline pair<InputIterator1, InputIterator2>
+  mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2,
+	   Predicate pred)
+  {
+    typedef std::iterator_traits<InputIterator1> iterator1_traits;
+    typedef std::iterator_traits<InputIterator2> iterator2_traits;
+    typedef typename iterator1_traits::iterator_category iterator1_category;
+    typedef typename iterator2_traits::iterator_category iterator2_category;
+
+    return mismatch_switch(begin1, end1, begin2, pred, iterator1_category(), iterator2_category());
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2>
+  inline bool
+  lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::lexicographical_compare<InputIterator1, InputIterator2>(begin1, end1, begin2, end2);
+  }
+
+  // Sequential fallback
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline bool
+  lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred);
+  }
+
+  // Sequential fallback for input iterator case
+  template<typename InputIterator1, typename InputIterator2, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline bool
+  lexicographical_compare_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred, IteratorTag1, IteratorTag2)
+  {
+    return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred);
+  }
+
+  // Parallel lexicographical_compare for random access iterators
+  // Limitation: Both valuetypes must be the same
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Predicate>
+  bool
+  lexicographical_compare_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator2 end2, Predicate pred, random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(true))
+      {
+	typedef iterator_traits<RandomAccessIterator1> traits1_type;
+	typedef typename traits1_type::value_type value1_type;
+
+	typedef iterator_traits<RandomAccessIterator2> traits2_type;
+	typedef typename traits2_type::value_type value2_type;
+	
+	typedef __gnu_parallel::equal_from_less<Predicate, value1_type, value2_type> equal_type;
+
+	// Longer sequence in first place.
+	if ((end1 - begin1) < (end2 - begin2))
+	  {
+	    typedef pair<RandomAccessIterator1, RandomAccessIterator2> pair_type;
+	    pair_type mm = mismatch_switch(begin1, end1, begin2, equal_type(pred), random_access_iterator_tag(), random_access_iterator_tag());
+
+	    // Less because shorter.
+	    const bool lbs = mm.first == end1;
+
+	    // Less because differing elements less.
+	    const bool lbdel = pred(*mm.first, *mm.second);
+
+	    return lbs || lbdel;
+	  }
+	else
+	  {
+	    typedef pair<RandomAccessIterator2, RandomAccessIterator1> pair_type;
+	    pair_type mm = mismatch_switch(begin2, end2, begin1, equal_type(pred), random_access_iterator_tag(), random_access_iterator_tag());
+
+	    // Less because shorter.
+	    const bool lbs = mm.first != end2;
+
+	    // Less because differing element less.
+	    const bool lbdel = pred(*mm.second, *mm.first);
+
+	    return lbs && lbdel;
+	  }
+      }
+    else
+      return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred);
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2>
+  inline bool
+  lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2)
+  {
+    typedef iterator_traits<InputIterator1> traits1_type;
+    typedef typename traits1_type::value_type value1_type;
+    typedef typename traits1_type::iterator_category iterator1_category;
+    
+    typedef iterator_traits<InputIterator2> traits2_type;
+    typedef typename traits2_type::value_type value2_type;
+    typedef typename traits2_type::iterator_category iterator2_category;
+    typedef __gnu_parallel::less<value1_type, value2_type> less_type;
+
+    return lexicographical_compare_switch(begin1, end1, begin2, end2, less_type(), iterator1_category(), iterator2_category());
+  }
+
+  // Public interface
+  template<typename InputIterator1, typename InputIterator2, typename Predicate>
+  inline bool
+  lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred)
+  {
+    typedef iterator_traits<InputIterator1> traits1_type;
+    typedef typename traits1_type::iterator_category iterator1_category;
+    
+    typedef iterator_traits<InputIterator2> traits2_type;
+    typedef typename traits2_type::iterator_category iterator2_category;
+
+    return lexicographical_compare_switch(begin1, end1, begin2, end2, pred, iterator1_category(), iterator2_category());
+  }
+} // end namespace
+} // end namespace
+
+#endif /* _GLIBCXX_ALGOBASE_H */
diff --git a/libstdc++-v3/include/parallel/algorithm b/libstdc++-v3/include/parallel/algorithm
new file mode 100644
index 00000000000..0672e372eb9
--- /dev/null
+++ b/libstdc++-v3/include/parallel/algorithm
@@ -0,0 +1,45 @@
+// Algorithm extensions -*- C++ -*-
+
+// Copyright (C) 2007
+// Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+/** @file parallel/algorithm
+ *  This file is a GNU extension to the Standard C++ Library.
+ */
+
+#ifndef _PARALLEL_ALGORITHM
+#define _PARALLEL_ALGORITHM 1
+
+#pragma GCC system_header
+
+#include <algorithm>
+#include <parallel/algorithmfwd.h>
+#include <parallel/algobase.h>
+#include <parallel/algo.h>
+
+#endif
diff --git a/libstdc++-v3/include/parallel/algorithmfwd.h b/libstdc++-v3/include/parallel/algorithmfwd.h
new file mode 100644
index 00000000000..319091904cd
--- /dev/null
+++ b/libstdc++-v3/include/parallel/algorithmfwd.h
@@ -0,0 +1,719 @@
+// <algorithm> parallel extensions -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/algorithmfwd.h
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+#ifndef _GLIBCXX_PARALLEL_ALGORITHMFWD_H
+#define _GLIBCXX_PARALLEL_ALGORITHMFWD_H 1
+
+#pragma GCC system_header
+
+#include <parallel/tags.h>
+#include <parallel/settings.h>
+
+namespace std
+{
+namespace __parallel
+{
+  template<typename _FIter>
+  inline _FIter
+  adjacent_find(_FIter, _FIter, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename BinaryPredicate>
+  inline _FIter
+  adjacent_find(_FIter, _FIter, BinaryPredicate, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter>
+  inline _FIter
+  adjacent_find(_FIter, _FIter);
+
+  template<typename _FIter, typename BinaryPredicate>
+  inline _FIter
+  adjacent_find(_FIter, _FIter, BinaryPredicate);
+
+  template<typename _RAIter>
+  _RAIter
+  adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag);
+
+  template<typename _FIter, typename IteratorTag>
+  inline _FIter
+  adjacent_find_switch(_FIter, _FIter, IteratorTag);
+
+  template<typename _FIter, typename BinaryPredicate, typename IteratorTag>
+  inline _FIter
+  adjacent_find_switch(_FIter, _FIter, BinaryPredicate, IteratorTag);
+
+  template<typename _RAIter, typename BinaryPredicate>
+  _RAIter
+  adjacent_find_switch(_RAIter, _RAIter, BinaryPredicate, random_access_iterator_tag);
+
+
+  template<typename _IIter, typename T>
+  inline typename iterator_traits<_IIter>::difference_type
+  count(_IIter, _IIter, const T& value, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename T>
+  inline typename iterator_traits<_IIter>::difference_type
+  count(_IIter, _IIter, const T& value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _RAIter, typename T>
+  typename iterator_traits<_RAIter>::difference_type
+  count_switch(_RAIter, _RAIter, const T& value, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _IIter, typename T, typename IteratorTag>
+  typename iterator_traits<_IIter>::difference_type
+  count_switch(_IIter, _IIter, const T& value, IteratorTag, __gnu_parallel::parallelism);
+
+
+  template<typename _IIter, typename Predicate>
+  inline typename iterator_traits<_IIter>::difference_type
+  count_if(_IIter, _IIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename Predicate>
+  inline typename iterator_traits<_IIter>::difference_type
+  count_if(_IIter, _IIter, Predicate, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _RAIter, typename Predicate>
+  typename iterator_traits<_RAIter>::difference_type
+  count_if_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _IIter, typename Predicate, typename IteratorTag>
+  typename iterator_traits<_IIter>::difference_type
+  count_if_switch(_IIter, _IIter, Predicate, IteratorTag, __gnu_parallel::parallelism);
+
+  // algobase.h
+  template<typename _IIter1, typename _IIter2>
+  inline bool
+  equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline bool
+  equal(_IIter1, _IIter1, _IIter2, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2>
+  inline bool
+  equal(_IIter1, _IIter1, _IIter2);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline bool
+  equal(_IIter1, _IIter1, _IIter2, Predicate);
+
+  template<typename _IIter, typename T>
+  inline _IIter
+  find(_IIter, _IIter, const T&, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename T>
+  inline _IIter
+  find(_IIter, _IIter, const T& val);
+
+  template<typename _IIter, typename T, typename IteratorTag>
+  inline _IIter
+  find_switch(_IIter, _IIter, const T&, IteratorTag);
+
+  template<typename _RAIter, typename T>
+  _RAIter
+  find_switch(_RAIter, _RAIter, const T&, random_access_iterator_tag);
+
+  template<typename _IIter, typename Predicate>
+  inline _IIter
+  find_if(_IIter, _IIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename Predicate>
+  inline _IIter
+  find_if (_IIter, _IIter, Predicate);
+
+  template<typename _IIter, typename Predicate, typename IteratorTag>
+  inline _IIter
+  find_if_switch(_IIter, _IIter, Predicate, IteratorTag);
+
+  template<typename _RAIter, typename Predicate>
+  _RAIter
+  find_if_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag);
+
+  template<typename _IIter, typename _FIter>
+  inline _IIter
+  find_first_of(_IIter, _IIter, _FIter, _FIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _FIter, typename BinaryPredicate>
+  inline _IIter
+  find_first_of(_IIter, _IIter, _FIter, _FIter, BinaryPredicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _FIter, typename BinaryPredicate>
+  inline _IIter
+  find_first_of(_IIter, _IIter, _FIter, _FIter, BinaryPredicate);
+
+  template<typename _IIter, typename _FIter>
+  _IIter
+  find_first_of(_IIter, _IIter, _FIter, _FIter);
+
+  template<typename _IIter, typename _FIter, typename IteratorTag1, typename IteratorTag2>
+  inline _IIter
+  find_first_of_switch(_IIter, _IIter, _FIter, _FIter, IteratorTag1, IteratorTag2);
+
+  template<typename _RAIter, typename _FIter, typename BinaryPredicate, typename IteratorTag>
+  inline _RAIter
+  find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, BinaryPredicate, random_access_iterator_tag, IteratorTag);
+
+  template<typename _IIter, typename _FIter, typename BinaryPredicate, typename IteratorTag1, typename IteratorTag2>
+  inline _IIter
+  find_first_of_switch(_IIter, _IIter, _FIter, _FIter, BinaryPredicate, IteratorTag1, IteratorTag2);
+
+
+  template<typename _IIter, typename Function>
+  inline Function
+  for_each(_IIter, _IIter, Function f, __gnu_parallel::sequential_tag);
+
+  template<typename Iterator, typename Function>
+  inline Function
+  for_each(Iterator, Iterator, Function f, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _IIter, typename Function, typename IteratorTag>
+  Function
+  for_each_switch(_IIter, _IIter, Function f, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename Function>
+  Function
+  for_each_switch(_RAIter, _RAIter, Function f, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _FIter, typename Generator>
+  inline void
+  generate(_FIter, _FIter, Generator, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename Generator>
+  inline void
+  generate(_FIter, _FIter, Generator, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename Generator, typename IteratorTag>
+  void
+  generate_switch(_FIter, _FIter, Generator, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename Generator>
+  void
+  generate_switch(_RAIter, _RAIter, Generator, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _OIter, typename Size, typename Generator>
+  inline _OIter
+  generate_n(_OIter, Size, Generator, __gnu_parallel::sequential_tag);
+
+  template<typename _OIter, typename Size, typename Generator>
+  inline _OIter
+  generate_n(_OIter, Size, Generator, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _OIter, typename Size, typename Generator, typename IteratorTag>
+  _OIter
+  generate_n_switch(_OIter, Size, Generator, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename Size, typename Generator>
+  _RAIter
+  generate_n_switch(_RAIter, Size, Generator, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _IIter1, typename _IIter2>
+  inline bool
+  lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline bool
+  lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2>
+  inline bool
+  lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline bool
+  lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline bool
+  lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2, Predicate, IteratorTag1, IteratorTag2);
+
+  template<typename _RAIter1, typename _RAIter2, typename Predicate>
+  bool
+  lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Predicate, random_access_iterator_tag, random_access_iterator_tag);
+
+  // algo.h
+  template<typename _IIter1, typename _IIter2>
+  inline pair<_IIter1, _IIter2>
+  mismatch(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline pair<_IIter1, _IIter2>
+  mismatch(_IIter1, _IIter1, _IIter2, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2>
+  inline pair<_IIter1, _IIter2>
+  mismatch(_IIter1, _IIter1, _IIter2);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate>
+  inline pair<_IIter1, _IIter2>
+  mismatch(_IIter1, _IIter1, _IIter2, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline pair<_IIter1, _IIter2>
+  mismatch_switch(_IIter1, _IIter1, _IIter2, Predicate, IteratorTag1, IteratorTag2);
+
+  template<typename _RAIter1, typename _RAIter2, typename Predicate>
+  pair<_RAIter1, _RAIter2>
+  mismatch_switch(_RAIter1, _RAIter1, _RAIter2, Predicate, random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _FIter1, typename _FIter2>
+  inline _FIter1
+  search(_FIter1, _FIter1, _FIter2, _FIter2, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter1, typename _FIter2>
+  inline _FIter1
+  search(_FIter1, _FIter1, _FIter2, _FIter2);
+
+  template<typename _FIter1, typename _FIter2, typename BinaryPredicate>
+  inline _FIter1
+  search(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter1, typename _FIter2, typename BinaryPredicate>
+  inline _FIter1
+  search(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate);
+
+  template<typename _RAIter1, typename _RAIter2>
+  _RAIter1
+  search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _FIter1, typename _FIter2, typename IteratorTag1, typename IteratorTag2>
+  inline _FIter1
+  search_switch(_FIter1, _FIter1, _FIter2, _FIter2, IteratorTag1, IteratorTag2);
+
+  template<typename _RAIter1, typename _RAIter2, typename BinaryPredicate>
+  _RAIter1
+  search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, BinaryPredicate , random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _FIter1, typename _FIter2, typename BinaryPredicate, typename IteratorTag1, typename IteratorTag2>
+  inline _FIter1
+  search_switch(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate, IteratorTag1, IteratorTag2);
+
+  template<typename _FIter, typename Integer, typename T>
+  inline _FIter
+  search_n(_FIter, _FIter, Integer, const T&, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename Integer, typename T, typename BinaryPredicate>
+  inline _FIter
+  search_n(_FIter, _FIter, Integer, const T&, BinaryPredicate, __gnu_parallel::sequential_tag);
+    
+  template<typename _FIter, typename Integer, typename T>
+  inline _FIter
+  search_n(_FIter, _FIter, Integer, const T& val);
+
+  template<typename _FIter, typename Integer, typename T, typename BinaryPredicate>
+  inline _FIter
+  search_n(_FIter, _FIter, Integer, const T&, BinaryPredicate);
+
+  template<typename _RAIter, typename Integer, typename T, typename BinaryPredicate>
+  _RAIter
+  search_n_switch(_RAIter, _RAIter, Integer, const T&, BinaryPredicate, random_access_iterator_tag);
+
+  template<typename _FIter, typename Integer, typename T, typename BinaryPredicate, typename IteratorTag>
+  inline _FIter
+  search_n_switch(_FIter, _FIter, Integer, const T&, BinaryPredicate, IteratorTag);
+
+
+  template<typename _IIter, typename _OIter, typename UnaryOperation>
+  inline _OIter
+  transform(_IIter, _IIter, _OIter, UnaryOperation, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename BinaryOperation>
+  inline _OIter
+  transform(_IIter1, _IIter1, _IIter2, _OIter, BinaryOperation binary_op, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter, typename UnaryOperation>
+  inline _OIter
+  transform(_IIter, _IIter, _OIter, UnaryOperation, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename BinaryOperation>
+  inline _OIter
+  transform(_IIter1, _IIter1, _IIter2, _OIter, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _RAIter1, typename _RAIter3, typename UnaryOperation>
+  _RAIter3
+  transform1_switch(_RAIter1, _RAIter1, _RAIter3, UnaryOperation, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _RAIter1, typename _RAIter3, typename UnaryOperation, typename IteratorTag1, typename IteratorTag2>
+  inline _RAIter3
+  transform1_switch(_RAIter1, _RAIter1, _RAIter3, UnaryOperation, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+    
+  template<typename _RAIter1, typename _RAIter2, typename _RAIter3, typename BinaryOperation>
+  _RAIter3
+  transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, BinaryOperation binary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _RAIter1, typename _RAIter2, typename _RAIter3, typename BinaryOperation, typename tag1, typename tag2, typename tag3>
+  inline _RAIter3
+  transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, BinaryOperation binary_op, tag1, tag2, tag3, __gnu_parallel::parallelism);
+
+  template<typename _FIter, typename T>
+  inline void
+  replace(_FIter, _FIter, const T&, const T&, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename T>
+  inline void
+  replace(_FIter, _FIter, const T&, const T&, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename T, typename IteratorTag>
+  void
+  replace_switch(_FIter, _FIter, const T&, const T&, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename T>
+  void
+  replace_switch(_RAIter, _RAIter, const T&, const T&, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+
+  template<typename _FIter, typename Predicate, typename T>
+  inline void
+  replace_if(_FIter, _FIter, Predicate, const T&, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename Predicate, typename T>
+  inline void
+  replace_if(_FIter, _FIter, Predicate, const T&, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename Predicate, typename T, typename IteratorTag>
+  void
+  replace_if_switch(_FIter, _FIter, Predicate, const T&, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename Predicate, typename T>
+  void
+  replace_if_switch(_RAIter, _RAIter, Predicate, const T&, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _FIter>
+  inline _FIter
+  max_element(_FIter, _FIter, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename _Compare>
+  inline _FIter
+  max_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter>
+  inline _FIter
+  max_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename _Compare>
+  inline _FIter
+  max_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename _Compare, typename IteratorTag>
+  _FIter
+  max_element_switch(_FIter, _FIter, _Compare, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename _Compare>
+  _RAIter
+  max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename _Compare>
+  inline _OIter
+  merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename _Compare>
+  inline _OIter
+  merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename _Compare, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline _OIter
+  merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, IteratorTag1, IteratorTag2, IteratorTag3);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename _Compare>
+  _OIter
+  merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _FIter>
+  inline _FIter
+  min_element(_FIter, _FIter, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter, typename _Compare>
+  inline _FIter
+  min_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _FIter>
+  inline _FIter
+  min_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag  = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename _Compare>
+  inline _FIter
+  min_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _FIter, typename _Compare, typename IteratorTag>
+  _FIter
+  min_element_switch(_FIter, _FIter, _Compare, IteratorTag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter, typename _Compare>
+  _RAIter
+  min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, __gnu_parallel::parallelism);
+
+  template<typename _RAIter>
+  inline void
+  nth_element(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename _Compare>
+  void
+  nth_element(_RAIter, _RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename _Compare>
+  inline void
+  nth_element(_RAIter, _RAIter, _RAIter, _Compare);
+
+  template<typename _RAIter>
+  void
+  nth_element(_RAIter, _RAIter, _RAIter);
+
+  template<typename _RAIter, typename _Compare>
+  void
+  partial_sort(_RAIter, _RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter>
+  void
+  partial_sort(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename _Compare>
+  void
+  partial_sort(_RAIter, _RAIter, _RAIter, _Compare);
+
+  template<typename _RAIter>
+  void
+  partial_sort(_RAIter, _RAIter, _RAIter);
+
+  template<typename _FIter, typename Predicate>
+  inline _FIter
+  partition(_FIter, _FIter, Predicate, __gnu_parallel::sequential_tag);
+    
+  template<typename _FIter, typename Predicate>
+  inline _FIter
+  partition(_FIter, _FIter, Predicate);
+
+  template<typename _FIter, typename Predicate, typename IteratorTag>
+  inline _FIter
+  partition_switch(_FIter, _FIter, Predicate, IteratorTag);
+    
+  template<typename _RAIter, typename Predicate>
+  _RAIter
+  partition_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag);
+
+  template<typename _RAIter>
+  inline void
+  random_shuffle(_RAIter, _RAIter, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename RandomNumberGenerator>
+  inline void
+  random_shuffle(_RAIter, _RAIter, RandomNumberGenerator& rand, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter>
+  inline void
+  random_shuffle(_RAIter, _RAIter);
+
+  template<typename _RAIter, typename RandomNumberGenerator>
+  void
+  random_shuffle(_RAIter, _RAIter, RandomNumberGenerator& rand);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter
+  set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter 
+  set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter 
+  set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename _OIter, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline _OIter 
+  set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3);
+
+  template<typename _RAIter1, typename _RAIter2, typename Output_RAIter, typename Predicate>
+  Output_RAIter 
+  set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter
+  set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter 
+  set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter 
+  set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename _OIter, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline _OIter 
+  set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3);
+
+  template<typename _RAIter1, typename _RAIter2, typename Output_RAIter, typename Predicate>
+  Output_RAIter 
+  set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter
+  set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter 
+  set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter 
+  set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename _OIter, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline _OIter 
+  set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3);
+
+  template<typename _RAIter1, typename _RAIter2, typename Output_RAIter, typename Predicate>
+  Output_RAIter 
+  set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag);
+
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter
+  set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter>
+  inline _OIter
+  set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter);
+
+  template<typename _IIter1, typename _IIter2, typename _OIter, typename Predicate>
+  inline _OIter
+  set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate);
+
+  template<typename _IIter1, typename _IIter2, typename Predicate, typename _OIter, typename IteratorTag1, typename IteratorTag2, typename IteratorTag3>
+  inline _OIter
+  set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3);
+
+  template<typename _RAIter1, typename _RAIter2, typename Output_RAIter, typename Predicate>
+  Output_RAIter
+  set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag);
+
+
+  template<typename _RAIter>
+  inline void
+  sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename _Compare>
+  inline void
+  sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter>
+  inline void
+  sort(_RAIter, _RAIter);
+
+  template<typename _RAIter, typename _Compare>
+  void
+  sort(_RAIter, _RAIter, _Compare);
+
+  template<typename _RAIter>
+  inline void
+  stable_sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter, typename _Compare>
+  inline void
+  stable_sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag);
+
+  template<typename _RAIter>
+  void
+  stable_sort(_RAIter, _RAIter);
+
+  template<typename _RAIter, typename _Compare>
+  void
+  stable_sort(_RAIter, _RAIter, _Compare);
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  unique_copy(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter, typename Predicate>
+  inline _OIter
+  unique_copy(_IIter, _IIter, _OIter, Predicate, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  unique_copy(_IIter, _IIter, _OIter);
+
+  template<typename _IIter, typename _OIter, typename Predicate>
+  inline _OIter
+  unique_copy(_IIter, _IIter, _OIter, Predicate);
+
+  template<typename _IIter, typename _OIter, typename Predicate, typename IteratorTag1, typename IteratorTag2>
+  inline _OIter
+  unique_copy_switch(_IIter, _IIter, _OIter, Predicate, IteratorTag1, IteratorTag2);
+
+  template<typename _RAIter, typename RandomAccess_OIter, typename Predicate>
+  RandomAccess_OIter
+  unique_copy_switch(_RAIter, _RAIter, RandomAccess_OIter, Predicate, random_access_iterator_tag, random_access_iterator_tag);
+} // end namespace __parallel
+} // end namespace std
+
+// NB: cannot use _GLIBCXX_STD_P directly here, as it is both scoped
+// (std::__norm) and unscoped (std::).
+namespace __gnu_sequential
+{
+#ifdef _GLIBCXX_PARALLEL
+  using std::__norm::partition;
+  using std::__norm::sort;
+  using std::__norm::stable_sort;
+  using std::__norm::random_shuffle;
+#else
+  using std::partition;
+  using std::sort;
+  using std::stable_sort;
+  using std::random_shuffle;    
+#endif    
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h
new file mode 100644
index 00000000000..94b0e8cd6c6
--- /dev/null
+++ b/libstdc++-v3/include/parallel/balanced_quicksort.h
@@ -0,0 +1,451 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/balanced_quicksort.h
+ *  @brief Implementation of a dynamically load-balanced parallel quicksort.
+ *
+ *  It works in-place and needs only logarithmic extra memory.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_BAL_QUICKSORT_H
+#define _GLIBCXX_PARALLEL_BAL_QUICKSORT_H 1
+
+#include <parallel/basic_iterator.h>
+#include <bits/stl_algo.h>
+
+#include <parallel/settings.h>
+#include <parallel/partition.h>
+#include <parallel/random_number.h>
+#include <parallel/queue.h>
+#include <functional>
+
+#if _GLIBCXX_ASSERTIONS
+#include <parallel/checkers.h>
+#endif
+
+namespace __gnu_parallel
+{
+  /** @brief Information local to one thread in the parallel quicksort run. */
+  template<typename RandomAccessIterator>
+  struct QSBThreadLocal
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    /** @brief Continuous part of the sequence, described by an
+	iterator pair. */
+    typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
+
+    /** @brief Initial piece to work on. */
+    Piece initial;
+
+    /** @brief Work-stealing queue. */
+    RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
+
+    /** @brief Number of threads involved in this algorithm. */
+    thread_index_t num_threads;
+
+    /** @brief Pointer to a counter of elements left over to sort. */
+    volatile difference_type* elements_leftover;
+
+    /** @brief The complete sequence to sort. */
+    Piece global;
+
+    /** @brief Constructor.
+     *  @param queue_size Size of the work-stealing queue. */
+    QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
+  };
+
+  /** @brief Initialize the thread local storage.
+   *  @param tls Array of thread-local storages.
+   *  @param queue_size Size of the work-stealing queue. */
+  template<typename RandomAccessIterator>
+  inline void
+  qsb_initialize(QSBThreadLocal<RandomAccessIterator>** tls, int queue_size)
+  {
+    int iam = omp_get_thread_num();
+    tls[iam] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
+  }
+
+
+  /** @brief Balanced quicksort divide step.
+   *  @param begin Begin iterator of subsequence.
+   *  @param end End iterator of subsequence.
+   *  @param comp Comparator.
+   *  @param num_threads Number of threads that are allowed to work on
+   *  this part.
+   *  @pre @c (end-begin)>=1 */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline typename std::iterator_traits<RandomAccessIterator>::difference_type
+  qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
+	     Comparator comp, int num_threads)
+  {
+    _GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
+
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    RandomAccessIterator pivot_pos = median_of_three_iterators(begin, begin + (end - begin) / 2, end  - 1, comp);
+
+#if defined(_GLIBCXX_ASSERTIONS)
+    // Must be in between somewhere.
+    difference_type n = end - begin;
+
+    _GLIBCXX_PARALLEL_ASSERT((!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
+	   || (!comp(*pivot_pos, *begin) && !comp(*end, *pivot_pos))
+	   || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
+	   || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*end, *pivot_pos))
+	   || (!comp(*pivot_pos, *end) && !comp(*begin, *pivot_pos))
+	   || (!comp(*pivot_pos, *end) && !comp(*(begin + n / 2), *pivot_pos)));
+#endif
+
+    // Swap pivot value to end.
+    if (pivot_pos != (end - 1))
+      std::swap(*pivot_pos, *(end - 1));
+    pivot_pos = end - 1;
+
+    __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+
+    // Divide, returning end - begin - 1 in the worst case.
+    difference_type split_pos = parallel_partition(begin, end - 1, pred, num_threads);
+
+    // Swap back pivot to middle.
+    std::swap(*(begin + split_pos), *pivot_pos);
+    pivot_pos = begin + split_pos;
+
+#if _GLIBCXX_ASSERTIONS
+    RandomAccessIterator r;
+    for (r = begin; r != pivot_pos; r++)
+      _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
+    for (; r != end; r++)
+      _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
+#endif
+
+    return split_pos;
+  }
+
+  /** @brief Quicksort conquer step.
+   *  @param tls Array of thread-local storages.
+   *  @param begin Begin iterator of subsequence.
+   *  @param end End iterator of subsequence.
+   *  @param comp Comparator.
+   *  @param iam Number of the thread processing this function.
+   *  @param num_threads Number of threads that are allowed to work on this part. */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
+	      RandomAccessIterator begin, RandomAccessIterator end,
+	      Comparator comp, thread_index_t iam, thread_index_t num_threads)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+
+    if (num_threads <= 1 || n < 2)
+      {
+	tls[iam]->initial.first  = begin;
+	tls[iam]->initial.second = end;
+
+	qsb_local_sort_with_helping(tls, comp, iam);
+
+	return;
+      }
+
+    // Divide step.
+    difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
+#endif
+
+    thread_index_t num_threads_leftside = std::max<thread_index_t>(1, std::min<thread_index_t>(num_threads - 1, split_pos * num_threads / n));
+
+#pragma omp atomic
+    *tls[iam]->elements_leftover -= (difference_type)1;
+
+    // Conquer step.
+#pragma omp parallel sections num_threads(2)
+    {
+#pragma omp section
+      qsb_conquer(tls, begin, begin + split_pos, comp, iam, num_threads_leftside);
+      // The pivot_pos is left in place, to ensure termination.
+#pragma omp section
+      qsb_conquer(tls, begin + split_pos + 1, end, comp,
+		  iam + num_threads_leftside, num_threads - num_threads_leftside);
+    }
+  }
+
+  /** 
+   *  @brief Quicksort step doing load-balanced local sort.
+   *  @param tls Array of thread-local storages.
+   *  @param comp Comparator.
+   *  @param iam Number of the thread processing this function. 
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
+			      Comparator& comp, int iam)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
+
+    QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
+
+    difference_type base_case_n = Settings::sort_qsb_base_case_maximal_n;
+    if (base_case_n < 2)
+      base_case_n = 2;
+    thread_index_t num_threads = tl.num_threads;
+
+    // Every thread has its own random number generator.
+    random_number rng(iam + 1);
+
+    Piece current = tl.initial;
+
+    difference_type elements_done = 0;
+#if _GLIBCXX_ASSERTIONS
+    difference_type total_elements_done = 0;
+#endif
+
+    for (;;)
+      {
+	// Invariant: current must be a valid (maybe empty) range.
+	RandomAccessIterator begin = current.first, end = current.second;
+	difference_type n = end - begin;
+
+	if (n > base_case_n)
+	  {
+	    // Divide.
+	    RandomAccessIterator pivot_pos = begin +  rng(n);
+
+	    // Swap pivot_pos value to end.
+	    if (pivot_pos != (end - 1))
+	      std::swap(*pivot_pos, *(end - 1));
+	    pivot_pos = end - 1;
+
+	    __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+
+	    // Divide, leave pivot unchanged in last place.
+	    RandomAccessIterator split_pos1, split_pos2;
+	    split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
+
+	    // Left side: < pivot_pos; right side: >= pivot_pos.
+#if _GLIBCXX_ASSERTIONS
+	    _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
+#endif
+	    // Swap pivot back to middle.
+	    if (split_pos1 != pivot_pos)
+	      std::swap(*split_pos1, *pivot_pos);
+	    pivot_pos = split_pos1;
+
+	    // In case all elements are equal, split_pos1 == 0.
+	    if ((split_pos1 + 1 - begin) < (n >> 7)
+		|| (end - split_pos1) < (n >> 7))
+	      {
+		// Very unequal split, one part smaller than one 128th
+		// elements not stricly larger than the pivot.
+		__gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
+
+		// Find other end of pivot-equal range.
+		split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred);
+	      }
+	    else
+	      {
+		// Only skip the pivot.
+		split_pos2 = split_pos1 + 1;
+	      }
+
+	    // Elements equal to pivot are done.
+	    elements_done += (split_pos2 - split_pos1);
+#if _GLIBCXX_ASSERTIONS
+	    total_elements_done += (split_pos2 - split_pos1);
+#endif
+	    // Always push larger part onto stack.
+	    if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
+	      {
+		// Right side larger.
+		if ((split_pos2) != end)
+		  tl.leftover_parts.push_front(std::make_pair(split_pos2, end));
+
+		//current.first = begin;	//already set anyway
+		current.second = split_pos1;
+		continue;
+	      }
+	    else
+	      {
+		// Left side larger.
+		if (begin != split_pos1)
+		  tl.leftover_parts.push_front(std::make_pair(begin, split_pos1));
+
+		current.first = split_pos2;
+		//current.second = end;	//already set anyway
+		continue;
+	      }
+	  }
+	else
+	  {
+	    __gnu_sequential::sort(begin, end, comp);
+	    elements_done += n;
+#if _GLIBCXX_ASSERTIONS
+	    total_elements_done += n;
+#endif
+
+	    // Prefer own stack, small pieces.
+	    if (tl.leftover_parts.pop_front(current))
+	      continue;
+
+#pragma omp atomic
+	    *tl.elements_leftover -= elements_done;
+	    elements_done = 0;
+
+#if _GLIBCXX_ASSERTIONS
+	    double search_start = omp_get_wtime();
+#endif
+
+	    // Look for new work.
+	    bool success = false;
+	    while (*tl.elements_leftover > 0 && !success
+#if _GLIBCXX_ASSERTIONS
+		   // Possible dead-lock.
+		   && (omp_get_wtime() < (search_start + 1.0))
+#endif
+		   )
+	      {
+		thread_index_t victim;
+		victim = rng(num_threads);
+
+		// Large pieces.
+		success = (victim != iam) && tls[victim]->leftover_parts.pop_back(current);
+		if (!success)
+		  yield();
+#if !defined(__ICC) && !defined(__ECC)
+#pragma omp flush
+#endif
+	      }
+
+#if _GLIBCXX_ASSERTIONS
+	    if (omp_get_wtime() >= (search_start + 1.0))
+	      {
+		sleep(1);
+		_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() < (search_start + 1.0));
+	      }
+#endif
+	    if (!success)
+	      {
+#if _GLIBCXX_ASSERTIONS
+		_GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
+#endif
+		return;
+	      }
+	  }
+      }
+  }
+
+  /** @brief Top-level quicksort routine.
+   *  @param begin Begin iterator of sequence.
+   *  @param end End iterator of sequence.
+   *  @param comp Comparator.
+   *  @param n Length of the sequence to sort.
+   *  @param num_threads Number of threads that are allowed to work on
+   *  this part.
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
+		    Comparator comp,
+		    typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads)
+  {
+    _GLIBCXX_CALL(end - begin)
+
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
+
+    typedef QSBThreadLocal<RandomAccessIterator> tls_type;
+
+    if (n <= 1)
+      return;
+
+    // At least one element per processor.
+    if (num_threads > n)
+      num_threads = static_cast<thread_index_t>(n);
+
+    tls_type** tls = new tls_type*[num_threads];
+
+#pragma omp parallel num_threads(num_threads)
+    // Initialize variables per processor.
+    qsb_initialize(tls, num_threads * (thread_index_t)(log2(n) + 1));
+
+    // There can never be more than ceil(log2(n)) ranges on the stack, because
+    // 1. Only one processor pushes onto the stack
+    // 2. The largest range has at most length n
+    // 3. Each range is larger than half of the range remaining
+    volatile difference_type elements_leftover = n;
+    for (int i = 0; i < num_threads; i++)
+      {
+	tls[i]->elements_leftover = &elements_leftover;
+	tls[i]->num_threads = num_threads;
+	tls[i]->global = std::make_pair(begin, end);
+
+	// Just in case nothing is left to assign.
+	tls[i]->initial = std::make_pair(end, end);
+      }
+
+    // Initial splitting, recursively.
+    int old_nested = omp_get_nested();
+    omp_set_nested(true);
+
+    // Main recursion call.
+    qsb_conquer(tls, begin, begin + n, comp, 0, num_threads);
+
+    omp_set_nested(old_nested);
+
+#if _GLIBCXX_ASSERTIONS
+    // All stack must be empty.
+    Piece dummy;
+    for (int i = 1; i < num_threads; i++)
+      _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
+#endif
+
+    for (int i = 0; i < num_threads; i++)
+      delete tls[i];
+    delete[] tls;
+  }
+} // namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h
new file mode 100644
index 00000000000..117292ba44b
--- /dev/null
+++ b/libstdc++-v3/include/parallel/base.h
@@ -0,0 +1,358 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/base.h
+ *  @brief Sequential helper functions.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_BASE_H
+#define _GLIBCXX_PARALLEL_BASE_H 1
+
+#include <parallel/features.h>
+#include <functional>
+#include <parallel/basic_iterator.h>
+#include <parallel/parallel.h>
+#include <cstdio>
+
+namespace __gnu_parallel
+{
+  // XXX remove std::duplicates from here if possible,
+  // XXX but keep minimal dependencies.
+
+  /** @brief Calculates the rounded-down logrithm of @c n for base 2.
+   *  @param n Argument.
+   *  @return Returns 0 for argument 0.
+   */
+  template<typename Size> 
+    inline Size 
+    log2(Size n)
+    {
+      Size k;
+      for (k = 0; n != 1; n >>= 1)
+	++k;
+      return k;
+    }
+
+  /** @brief Encode two integers into one __gnu_parallel::lcas_t.
+   *  @param a First integer, to be encoded in the most-significant @c
+   *  lcas_t_bits/2 bits.
+   *  @param b Second integer, to be encoded in the least-significant
+   *  @c lcas_t_bits/2 bits.
+   *  @return __gnu_parallel::lcas_t value encoding @c a and @c b.
+   *  @see decode2 
+   */
+  inline lcas_t
+  encode2(int a, int b)	//must all be non-negative, actually
+  {
+    return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0);
+  }
+
+  /** @brief Decode two integers from one __gnu_parallel::lcas_t.
+   *  @param x __gnu_parallel::lcas_t to decode integers from.
+   *  @param a First integer, to be decoded from the most-significant
+   *  @c lcas_t_bits/2 bits of @c x.
+   *  @param b Second integer, to be encoded in the least-significant
+   *  @c lcas_t_bits/2 bits of @c x.
+   *  @see encode2
+   */
+  inline void
+  decode2(lcas_t x, int& a, int& b)
+  {
+    a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask);
+    b = (int)((x >>               0 ) & lcas_t_mask);
+  }
+
+  /** @brief Constructs predicate for equality from strict weak
+   *  ordering predicate
+   */
+  // XXX comparator at the end, as per others
+  template<typename Comparator, typename T1, typename T2>
+  class equal_from_less : public std::binary_function<T1, T2, bool>
+  {
+  private:
+    Comparator& comp;
+
+  public:
+    equal_from_less(Comparator& _comp) : comp(_comp) { }
+
+    bool operator()(const T1& a, const T2& b)
+    {
+      // FIXME: wrong in general (T1 != T2)
+      return !comp(a, b) && !comp(b, a);
+    }
+  };
+
+
+  /** @brief Similar to std::equal_to, but allows two different types. */
+  template<typename T1, typename T2>
+  struct equal_to : std::binary_function<T1, T2, bool>
+  {
+    bool operator()(const T1& t1, const T2& t2) const
+    { return t1 == t2; }
+  };
+
+  /** @brief Similar to std::binder1st, but giving the argument types explicitly. */
+  template<typename _Predicate, typename argument_type>
+    class unary_negate
+    : public std::unary_function<argument_type, bool>
+    {
+    protected:
+      _Predicate _M_pred;
+
+    public:
+      explicit
+      unary_negate(const _Predicate& __x) : _M_pred(__x) { }
+
+      bool
+      operator()(const argument_type& __x)
+      { return !_M_pred(__x); }
+    };
+
+  /** @brief Similar to std::binder1st, but giving the argument types explicitly. */
+  template<typename _Operation, typename first_argument_type, typename second_argument_type, typename result_type>
+    class binder1st
+    : public std::unary_function<second_argument_type, result_type>
+    {
+    protected:
+      _Operation op;
+      first_argument_type value;
+
+    public:
+      binder1st(const _Operation& __x,
+		const first_argument_type& __y)
+      : op(__x), value(__y) { }
+
+      result_type
+      operator()(const second_argument_type& __x)
+      { return op(value, __x); }
+
+      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+      // 109.  Missing binders for non-const sequence elements
+      result_type
+      operator()(second_argument_type& __x) const
+      { return op(value, __x); }
+    };
+
+  /** @brief Similar to std::binder2nd, but giving the argument types explicitly. */
+  template<typename _Operation, typename first_argument_type, typename second_argument_type, typename result_type>
+    class binder2nd
+    : public std::unary_function<first_argument_type, result_type>
+    {
+    protected:
+      _Operation op;
+      second_argument_type value;
+
+    public:
+      binder2nd(const _Operation& __x,
+		const second_argument_type& __y)
+      : op(__x), value(__y) { }
+
+      result_type
+      operator()(const first_argument_type& __x) const
+      { return op(__x, value); }
+
+      // _GLIBCXX_RESOLVE_LIB_DEFECTS
+      // 109.  Missing binders for non-const sequence elements
+      result_type
+      operator()(first_argument_type& __x)
+      { return op(__x, value); }
+    };
+
+  /** @brief Similar to std::less, but allows two different types. */
+  template<typename T1, typename T2>
+  struct less : std::binary_function<T1, T2, bool>
+  {
+    bool operator()(const T1& t1, const T2& t2) const
+    { return t1 < t2; }
+  };
+
+
+  template<typename T, typename _DifferenceTp>
+  class pseudo_sequence;
+
+  /** @brief Iterator associated with __gnu_parallel::pseudo_sequence.
+   *  If features the usual random-access iterator functionality.
+   *  @param T Sequence value type.
+   *  @param difference_type Sequence difference type. 
+   */
+  template<typename T, typename _DifferenceTp>
+  class pseudo_sequence_iterator
+  {
+  public:
+    typedef _DifferenceTp difference_type;
+
+  private:
+    typedef pseudo_sequence_iterator<T, _DifferenceTp> type;
+
+    const T& val;
+    difference_type pos;
+
+  public:
+    pseudo_sequence_iterator(const T& val, difference_type pos)
+    : val(val), pos(pos) { }
+
+    // Pre-increment operator.
+    type&
+    operator++()
+    {
+      ++pos;
+      return *this;
+    }
+
+    // Post-increment operator.
+    const type
+    operator++(int)
+    { return type(pos++); }
+
+    const T& 
+    operator*() const
+    { return val; }
+
+    const T& 
+    operator[](difference_type) const
+    { return val; }
+
+    bool 
+    operator==(const type& i2)
+    { return pos == i2.pos; }
+
+    difference_type 
+    operator!=(const type& i2)
+    { return pos != i2.pos; }
+
+    difference_type 
+    operator-(const type& i2)
+    { return pos - i2.pos; }
+  };
+
+  /** @brief Sequence that conceptually consists of multiple copies of
+      the same element.
+   *  The copies are not stored explicitly, of course.
+   *  @param T Sequence value type.
+   *  @param difference_type Sequence difference type. 
+   */
+  template<typename T, typename _DifferenceTp>
+  class pseudo_sequence
+  {
+    typedef pseudo_sequence<T, _DifferenceTp> type;
+
+  public:
+    typedef _DifferenceTp difference_type;
+    typedef pseudo_sequence_iterator<T, uint64> iterator;	//better case down to uint64, than up to _DifferenceTp
+
+    /** @brief Constructor.
+     *  @param val Element of the sequence.
+     *  @param count Number of (virtual) copies.
+     */
+    pseudo_sequence(const T& val, difference_type count) 
+    : val(val), count(count)  { }
+
+    /** @brief Begin iterator. */
+    iterator
+    begin() const
+    { return iterator(val, 0); }
+
+    /** @brief End iterator. */
+    iterator
+    end() const
+    { return iterator(val, count); }
+
+  private:
+    const T& val;
+    difference_type count;
+  };
+
+  /** @brief Functor that does nothing */
+  template<typename _ValueTp>
+  class void_functor
+  {
+    inline void 
+    operator()(const _ValueTp& v) const { }
+  };
+
+  /** @brief Compute the median of three referenced elements,
+      according to @c comp.
+   *  @param a First iterator.
+   *  @param b Second iterator.
+   *  @param c Third iterator.
+   *  @param comp Comparator. 
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  RandomAccessIterator
+  median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, 
+			    RandomAccessIterator c, Comparator& comp)
+  {
+    if (comp(*a, *b))
+      if (comp(*b, *c))
+	return b;
+      else
+	if (comp(*a, *c))
+	  return c;
+	else
+	  return a;
+    else
+      {
+	// Just swap a and b.
+	if (comp(*a, *c))
+	  return a;
+	else
+	  if (comp(*b, *c))
+	    return c;
+	  else
+	    return b;
+      }
+  }
+
+  // Avoid the use of assert, because we're trying to keep the <cassert>
+  // include out of the mix. (Same as debug mode).
+  inline void
+  __replacement_assert(const char* __file, int __line, 
+		       const char* __function, const char* __condition)
+  {
+    std::printf("%s:%d: %s: Assertion '%s' failed.\n", __file, __line,
+		__function, __condition);
+    __builtin_abort();
+  }
+  
+#define _GLIBCXX_PARALLEL_ASSERT(_Condition)                            \
+  do 								        \
+    {									\
+      if (!(_Condition))						\
+	__gnu_parallel::__replacement_assert(__FILE__, __LINE__,	\
+				    __PRETTY_FUNCTION__, #_Condition);	\
+    } while (false)
+  
+} //namespace __gnu_parallel
+
+#endif
+
diff --git a/libstdc++-v3/include/parallel/basic_iterator.h b/libstdc++-v3/include/parallel/basic_iterator.h
new file mode 100644
index 00000000000..4b891be80b2
--- /dev/null
+++ b/libstdc++-v3/include/parallel/basic_iterator.h
@@ -0,0 +1,48 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/basic_iterator.h
+ *  @brief Includes the original header files concerned with iterators
+ *  except for stream iterators.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_BASIC_ITERATOR_H
+#define _GLIBCXX_PARALLEL_BASIC_ITERATOR_H 1
+
+#include <bits/c++config.h>
+#include <cstddef>
+#include <bits/stl_iterator_base_types.h>
+#include <bits/stl_iterator_base_funcs.h>
+#include <bits/stl_iterator.h>
+
+#endif /* _GLIBCXX_BASIC_ITERATOR_H */
diff --git a/libstdc++-v3/include/parallel/checkers.h b/libstdc++-v3/include/parallel/checkers.h
new file mode 100644
index 00000000000..b34ee051927
--- /dev/null
+++ b/libstdc++-v3/include/parallel/checkers.h
@@ -0,0 +1,148 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/checkers.h
+ *  @brief Routines for checking the correctness of algorithm results.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_CHECKERS
+#define _GLIBCXX_PARALLEL_CHECKERS 1
+
+#include <functional>
+#include <cstdio>
+#include <bits/stl_algobase.h>
+
+namespace __gnu_parallel
+{
+  /**
+   * @brief Check whether @c [begin, @c end) is sorted according to @c comp.
+   * @param begin Begin iterator of sequence.
+   * @param end End iterator of sequence.
+   * @param comp Comparator.
+   * @return @c true if sorted, @c false otherwise.
+   */
+  // XXX Comparator default template argument
+  template<typename InputIterator, typename Comparator>
+  bool
+  is_sorted(InputIterator begin, InputIterator end, Comparator comp = std::less<typename std::iterator_traits<InputIterator>::value_type>())
+  {
+    if (begin == end)
+      return true;
+
+    InputIterator current(begin), recent(begin);
+
+    unsigned long long position = 1;
+    for (current++; current != end; current++)
+      {
+	if (comp(*current, *recent))
+	  {
+	    printf("is_sorted: check failed before position %i.\n", position);
+	    return false;
+	  }
+	recent = current;
+	position++;
+      }
+
+    return true;
+  }
+
+  /**
+   * @brief Check whether @c [begin, @c end) is sorted according to @c comp.
+   * Prints the position in case an misordered pair is found.
+   * @param begin Begin iterator of sequence.
+   * @param end End iterator of sequence.
+   * @param first_failure The first failure is returned in this variable.
+   * @param comp Comparator.
+   * @return @c true if sorted, @c false otherwise.
+   */
+  // XXX Comparator default template argument
+  template<typename InputIterator, typename Comparator>
+  bool
+  is_sorted_failure(InputIterator begin, InputIterator end, InputIterator& first_failure, Comparator comp = std::less<typename std::iterator_traits<InputIterator>::value_type>())
+  {
+    if (begin == end)
+      return true;
+
+    InputIterator current(begin), recent(begin);
+
+    unsigned long long position = 1;
+    for (current++; current != end; current++)
+      {
+	if (comp(*current, *recent))
+	  {
+	    first_failure = current;
+	    printf("is_sorted: check failed before position %lld.\n", position);
+	    return false;
+	  }
+	recent = current;
+	position++;
+      }
+
+    first_failure = end;
+    return true;
+  }
+
+  /**
+   * @brief Check whether @c [begin, @c end) is sorted according to @c comp.
+   * Prints all misordered pair, including the surrounding two elements.
+   * @param begin Begin iterator of sequence.
+   * @param end End iterator of sequence.
+   * @param comp Comparator.
+   * @return @c true if sorted, @c false otherwise.
+   */
+  template<typename InputIterator, typename Comparator>
+  bool
+  // XXX Comparator default template argument
+  is_sorted_print_failures(InputIterator begin, InputIterator end, Comparator comp = std::less<typename std::iterator_traits<InputIterator>::value_type>())
+  {
+    if (begin == end)
+      return true;
+
+    InputIterator recent(begin);
+    bool ok = true;
+
+    for (InputIterator pos(begin + 1); pos != end; pos++)
+      {
+	if (comp(*pos, *recent))
+	  {
+	    printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2),
+		   *(pos- 1), *pos, *(pos + 1));
+	    ok = false;
+	  }
+	recent = pos;
+      }
+    return ok;
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/compatibility.h b/libstdc++-v3/include/parallel/compatibility.h
new file mode 100644
index 00000000000..ec0c0531885
--- /dev/null
+++ b/libstdc++-v3/include/parallel/compatibility.h
@@ -0,0 +1,338 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/compatibility.h
+ *  @brief Compatibility layer, mostly concerned with atomic operations.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
+#define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
+
+#include <parallel/types.h>
+#include <parallel/base.h>
+
+#if defined(__SUNPRO_CC) && defined(__sparc)
+#include <sys/atomic.h>
+#endif
+
+#if !defined(_WIN32)
+#include <sched.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <Windows.h>
+#include <intrin.h>
+#undef max
+#undef min
+#endif
+
+namespace __gnu_parallel
+{
+#if defined(__ICC)
+  template<typename must_be_int = int>
+  int32 faa32(int32* x, int32 inc)
+  {
+    asm volatile("lock xadd %0,%1"
+		 : "=r" (inc), "=m" (*x)
+		 : "0" (inc)
+		 : "memory");
+    return inc;
+  }
+#if defined(__x86_64)
+  template<typename must_be_int = int>
+  int64 faa64(int64* x, int64 inc)
+  {
+    asm volatile("lock xadd %0,%1"
+		 : "=r" (inc), "=m" (*x)
+		 : "0" (inc)
+		 : "memory");
+    return inc;
+  }
+#endif
+#endif
+
+  // atomic functions only work on integers
+
+  /** @brief Add a value to a variable, atomically.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to a 32-bit signed integer.
+   *  @param addend Value to add.
+   */
+  inline int32
+  fetch_and_add_32(volatile int32* ptr, int32 addend)
+  {
+#if defined(__ICC)	//x86 version
+    return _InterlockedExchangeAdd((void*)ptr, addend);
+#elif defined(__ECC)	//IA-64 version
+    return _InterlockedExchangeAdd((void*)ptr, addend);
+#elif defined(__ICL) || defined(_MSC_VER)
+    return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr), addend);
+#elif defined(__GNUC__)
+    return __sync_fetch_and_add(ptr, addend);
+#elif defined(__SUNPRO_CC) && defined(__sparc)
+    volatile int32 before, after;
+    do
+      {
+	before = *ptr;
+	after = before + addend;
+      } while (atomic_cas_32((volatile unsigned int*)ptr, before, after) != before);
+    return before;
+#else	//fallback, slow
+#pragma message("slow fetch_and_add_32")
+    int32 res;
+#pragma omp critical
+    {
+      res = *ptr;
+      *(ptr) += addend;
+    }
+    return res;
+#endif
+  }
+
+  /** @brief Add a value to a variable, atomically.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to a 64-bit signed integer.
+   *  @param addend Value to add.
+   */
+  inline int64
+  fetch_and_add_64(volatile int64* ptr, int64 addend)
+  {
+#if defined(__ICC) && defined(__x86_64)	//x86 version
+    return faa64<int>((int64*)ptr, addend);
+#elif defined(__ECC)	//IA-64 version
+    return _InterlockedExchangeAdd64((void*)ptr, addend);
+#elif defined(__ICL) || defined(_MSC_VER)
+#ifndef _WIN64
+    _GLIBCXX_PARALLEL_ASSERT(false);	//not available in this case
+    return 0;
+#else
+    return _InterlockedExchangeAdd64(ptr, addend);
+#endif
+#elif defined(__GNUC__) && defined(__x86_64)
+    return __sync_fetch_and_add(ptr, addend);
+#elif defined(__GNUC__) && defined(__i386) &&			\
+  (defined(__i686) || defined(__pentium4) || defined(__athlon))
+    return __sync_fetch_and_add(ptr, addend);
+#elif defined(__SUNPRO_CC) && defined(__sparc)
+    volatile int64 before, after;
+    do
+      {
+	before = *ptr;
+	after = before + addend;
+      } while (atomic_cas_64((volatile unsigned long long*)ptr, before, after) != before);
+    return before;
+#else	//fallback, slow
+#if defined(__GNUC__) && defined(__i386)
+    // XXX doesn't work with -march=native
+    //#warning "please compile with -march=i686 or better"
+#endif
+#pragma message("slow fetch_and_add_64")
+    int64 res;
+#pragma omp critical
+    {
+      res = *ptr;
+      *(ptr) += addend;
+    }
+    return res;
+#endif
+  }
+
+  /** @brief Add a value to a variable, atomically.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to a signed integer.
+   *  @param addend Value to add.
+   */
+  template<typename T>
+  inline T
+  fetch_and_add(volatile T* ptr, T addend)
+  {
+    if (sizeof(T) == sizeof(int32))
+      return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
+    else if (sizeof(T) == sizeof(int64))
+      return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
+    else
+      _GLIBCXX_PARALLEL_ASSERT(false);
+  }
+
+
+#if defined(__ICC)
+
+  template<typename must_be_int = int>
+  inline int32
+  cas32(volatile int32* ptr, int32 old, int32 nw)
+  {
+    int32 before;
+    __asm__ __volatile__("lock; cmpxchgl %1,%2"
+			 : "=a"(before)
+			 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
+			 : "memory");
+    return before;
+  }
+
+#if defined(__x86_64)
+  template<typename must_be_int = int>
+  inline int64
+  cas64(volatile int64 *ptr, int64 old, int64 nw)
+  {
+    int64 before;
+    __asm__ __volatile__("lock; cmpxchgq %1,%2"
+			 : "=a"(before)
+			 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
+			 : "memory");
+    return before;
+  }
+#endif
+
+#endif
+
+  /** @brief Compare @c *ptr and @c comparand. If equal, let @c
+   * *ptr=replacement and return @c true, return @c false otherwise.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to 32-bit signed integer.
+   *  @param comparand Compare value.
+   *  @param replacement Replacement value.
+   */
+  inline bool
+  compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
+  {
+#if defined(__ICC)	//x86 version
+    return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand;
+#elif defined(__ECC)	//IA-64 version
+    return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand;
+#elif defined(__ICL) || defined(_MSC_VER)
+    return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), replacement, comparand) == comparand;
+#elif defined(__GNUC__)
+    return __sync_bool_compare_and_swap(ptr, comparand, replacement);
+#elif defined(__SUNPRO_CC) && defined(__sparc)
+    return atomic_cas_32((volatile unsigned int*)ptr, comparand, replacement) == comparand;
+#else
+#pragma message("slow compare_and_swap_32")
+    bool res = false;
+#pragma omp critical
+    {
+      if (*ptr == comparand)
+	{
+	  *ptr = replacement;
+	  res = true;
+	}
+    }
+    return res;
+#endif
+  }
+
+  /** @brief Compare @c *ptr and @c comparand. If equal, let @c
+   * *ptr=replacement and return @c true, return @c false otherwise.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to 64-bit signed integer.
+   *  @param comparand Compare value.
+   *  @param replacement Replacement value.
+   */
+  inline bool
+  compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
+  {
+#if defined(__ICC) && defined(__x86_64)	//x86 version
+    return cas64<int>(ptr, comparand, replacement) == comparand;
+#elif defined(__ECC)	//IA-64 version
+    return _InterlockedCompareExchange64((void*)ptr, replacement, comparand) == comparand;
+#elif defined(__ICL) || defined(_MSC_VER)
+#ifndef _WIN64
+    _GLIBCXX_PARALLEL_ASSERT(false);	//not available in this case
+    return 0;
+#else
+    return _InterlockedCompareExchange64(ptr, replacement, comparand) == comparand;
+#endif
+
+#elif defined(__GNUC__) && defined(__x86_64)
+    return __sync_bool_compare_and_swap(ptr, comparand, replacement);
+#elif defined(__GNUC__) && defined(__i386) &&			\
+  (defined(__i686) || defined(__pentium4) || defined(__athlon))
+    return __sync_bool_compare_and_swap(ptr, comparand, replacement);
+#elif defined(__SUNPRO_CC) && defined(__sparc)
+    return atomic_cas_64((volatile unsigned long long*)ptr, comparand, replacement) == comparand;
+#else
+#if defined(__GNUC__) && defined(__i386)
+    // XXX -march=native
+    //#warning "please compile with -march=i686 or better"
+#endif
+#pragma message("slow compare_and_swap_64")
+    bool res = false;
+#pragma omp critical
+    {
+      if (*ptr == comparand)
+	{
+	  *ptr = replacement;
+	  res = true;
+	}
+    }
+    return res;
+#endif
+  }
+
+  /** @brief Compare @c *ptr and @c comparand. If equal, let @c
+   * *ptr=replacement and return @c true, return @c false otherwise.
+   *
+   *  Implementation is heavily platform-dependent.
+   *  @param ptr Pointer to signed integer.
+   *  @param comparand Compare value.
+   *  @param replacement Replacement value. */
+  template<typename T>
+  inline bool
+  compare_and_swap(volatile T* ptr, T comparand, T replacement)
+  {
+    if (sizeof(T) == sizeof(int32))
+      return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
+    else if (sizeof(T) == sizeof(int64))
+      return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
+    else
+      _GLIBCXX_PARALLEL_ASSERT(false);
+  }
+
+  /** @brief Yield the control to another thread, without waiting for
+      the end to the time slice. */
+  inline void
+  yield()
+  {
+#ifdef _WIN32
+    Sleep(0);
+#else
+    sched_yield();
+#endif
+  }
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h
new file mode 100644
index 00000000000..6278e44837a
--- /dev/null
+++ b/libstdc++-v3/include/parallel/compiletime_settings.h
@@ -0,0 +1,76 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/compiletime_settings.h
+ *  @brief Defines on options concerning debugging and performance, at
+ *  compile-time.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#include <cstdio>
+
+/** @brief Determine verbosity level of the parallel mode.
+ *  Level 1 prints a message each time when entering a parallel-mode function. */
+#define _GLIBCXX_VERBOSE_LEVEL 0
+
+/** @def _GLIBCXX_CALL
+ *  @brief Macro to produce log message when entering a function.
+ *  @param n Input size.
+ *  @see _GLIBCXX_VERBOSE_LEVEL */
+#if (_GLIBCXX_VERBOSE_LEVEL == 0)
+#define _GLIBCXX_CALL(n)
+#endif
+#if (_GLIBCXX_VERBOSE_LEVEL == 1)
+#define _GLIBCXX_CALL(n) printf("   %s:\niam = %d, n = %ld, num_threads = %d\n", __PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
+#endif
+
+/** @brief Use floating-point scaling instead of modulo for mapping
+ *  random numbers to a range.  This can be faster on certain CPUs. */
+#define _GLIBCXX_SCALE_DOWN_FPU 0
+
+/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
+ *  Should be switched on only locally. */
+#define _GLIBCXX_ASSERTIONS 0
+
+/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
+ *  Consider the size of the L1 cache for __gnu_parallel::parallel_random_shuffle(). */
+#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
+/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
+ *  Consider the size of the TLB for __gnu_parallel::parallel_random_shuffle(). */
+#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
+
+/** @brief First copy the data, sort it locally, and merge it back
+ * (0); or copy it back after everyting is done (1).
+ *
+ *  Recommendation: 0 */
+#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
+
diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h
new file mode 100644
index 00000000000..730875d0d52
--- /dev/null
+++ b/libstdc++-v3/include/parallel/equally_split.h
@@ -0,0 +1,68 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/equally_split.h
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H
+#define _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H 1
+
+namespace __gnu_parallel
+{
+  /** @brief Function to split a sequence into parts of almost equal size.
+   *
+   *  The resulting sequence s of length p+1 contains the splitting
+   *  positions when splitting the range [0,n) into parts of almost
+   *  equal size (plus minus 1).  The first entry is 0, the last one
+   *  n. There may result empty parts.
+   *  @param n Number of elements
+   *  @param p Number of parts
+   *  @param s Splitters
+   *  @returns End of splitter sequence, i. e. @c s+p+1 */
+  template<typename _DifferenceTp, typename OutputIterator>
+  OutputIterator
+  equally_split(_DifferenceTp n, thread_index_t p, OutputIterator s)
+  {
+    typedef _DifferenceTp difference_type;
+    difference_type chunk_length = n / p, split = n % p, start = 0;
+    for (int i = 0; i < p; i++)
+      {
+	*s++ = start;
+	start += (difference_type(i) < split) ? (chunk_length + 1) : chunk_length;
+      }
+    *s++ = n;
+    return s;
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h
new file mode 100644
index 00000000000..83771480f69
--- /dev/null
+++ b/libstdc++-v3/include/parallel/features.h
@@ -0,0 +1,170 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/features.h
+ *  @brief Defines on whether to include algorithm variants.
+ *
+ *  Less variants reduce executable size and compile time.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_FEATURES_H
+#define _GLIBCXX_PARALLEL_FEATURES_H 1
+
+#ifndef _GLIBCXX_MERGESORT
+/** @def _GLIBCXX_MERGESORT
+ *  @brief Include parallel multi-way mergesort.
+ *  @see __gnu_parallel::Settings::sort_algorithm */
+#define _GLIBCXX_MERGESORT 1
+#endif
+
+#ifndef _GLIBCXX_QUICKSORT
+/** @def _GLIBCXX_QUICKSORT
+ *  @brief Include parallel unbalanced quicksort.
+ *  @see __gnu_parallel::Settings::sort_algorithm */
+#define _GLIBCXX_QUICKSORT 1
+#endif
+
+#ifndef _GLIBCXX_BAL_QUICKSORT
+/** @def _GLIBCXX_BAL_QUICKSORT
+ *  @brief Include parallel dynamically load-balanced quicksort.
+ *  @see __gnu_parallel::Settings::sort_algorithm */
+#define _GLIBCXX_BAL_QUICKSORT 1
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE
+/** @def _GLIBCXX_LOSER_TREE
+ *  @brief Include guarded (sequences may run empty) loser tree,
+ *  moving objects.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE 1
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
+/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
+ *  @brief Include standard loser tree, storing two flags for infimum
+ *  and supremum.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
+/** @def _GLIBCXX_LOSER_TREE_REFERENCE
+ *  @brief Include some loser tree variant.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_REFERENCE 0
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_POINTER
+/** @def _GLIBCXX_LOSER_TREE_POINTER
+ *  @brief Include some loser tree variant.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_POINTER 0
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
+/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
+ *  @brief Include unguarded (sequences must not run empty) loser
+ *  tree, moving objects.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_UNGUARDED 1
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
+/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
+ *  @brief Include some loser tree variant.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 0
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_COMBINED
+/** @def _GLIBCXX_LOSER_TREE_COMBINED
+ *  @brief Include some loser tree variant.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_COMBINED 0
+#endif
+
+#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
+/** @def _GLIBCXX_LOSER_TREE_SENTINEL
+ *  @brief Include some loser tree variant.
+ *  @see __gnu_parallel::Settings multiway_merge_algorithm */
+#define _GLIBCXX_LOSER_TREE_SENTINEL 0
+#endif
+
+
+#ifndef _GLIBCXX_FIND_GROWING_BLOCKS
+/** @brief Include the growing blocks variant for std::find.
+ *  @see __gnu_parallel::Settings::find_distribution */
+#define _GLIBCXX_FIND_GROWING_BLOCKS 1
+#endif
+
+#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
+/** @brief Include the equal-sized blocks variant for std::find.
+ *  @see __gnu_parallel::Settings::find_distribution */
+#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1
+#endif
+
+#ifndef _GLIBCXX_FIND_EQUAL_SPLIT
+/** @def _GLIBCXX_FIND_EQUAL_SPLIT
+ *  @brief Include the equal splitting variant for std::find.
+ *  @see __gnu_parallel::Settings::find_distribution */
+#define _GLIBCXX_FIND_EQUAL_SPLIT 1
+#endif
+
+
+#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING
+/** @def _GLIBCXX_TREE_INITIAL_SPLITTING
+ *  @brief Include the initial splitting variant for
+ *  _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
+ *  @see __gnu_parallel::_Rb_tree */
+#define _GLIBCXX_TREE_INITIAL_SPLITTING 1
+#endif
+
+#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING
+/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING
+ *  @brief Include the dynamic balancing variant for
+ *  _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
+ *  @see __gnu_parallel::_Rb_tree */
+#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1
+#endif
+
+#ifndef _GLIBCXX_TREE_FULL_COPY
+/** @def _GLIBCXX_TREE_FULL_COPY
+ *  @brief In order to sort the input sequence of
+ *  _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a
+ *  full copy of the input elements is done.
+ *  @see __gnu_parallel::_Rb_tree */
+#define _GLIBCXX_TREE_FULL_COPY 1
+#endif
+
+
+#endif
diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h
new file mode 100644
index 00000000000..42f179fa6c7
--- /dev/null
+++ b/libstdc++-v3/include/parallel/find.h
@@ -0,0 +1,340 @@
+/// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/find.h
+ *  @brief Parallel implementation base for std::find(), std::equal()
+ *  and related functions.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze and Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_FIND_H
+#define _GLIBCXX_PARALLEL_FIND_H 1
+
+#include <bits/stl_algobase.h>
+
+#include <parallel/features.h>
+#include <parallel/parallel.h>
+#include <parallel/compatibility.h>
+#include <parallel/equally_split.h>
+
+namespace __gnu_parallel
+{
+  /** 
+   *  @brief Parallel std::find, switch for different algorithms.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence. Must have same
+   *  length as first sequence.
+   *  @param pred Find predicate.
+   *  @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+   *  @return Place of finding in both sequences. 
+   */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+  std::pair<RandomAccessIterator1, RandomAccessIterator2>
+  find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
+		RandomAccessIterator2 begin2, Pred pred, Selector selector)
+  {
+    switch (Settings::find_distribution)
+      {
+      case Settings::GROWING_BLOCKS:
+	return find_template(begin1, end1, begin2, pred, selector, growing_blocks_tag());
+      case Settings::CONSTANT_SIZE_BLOCKS:
+	return find_template(begin1, end1, begin2, pred, selector, constant_size_blocks_tag());
+      case Settings::EQUAL_SPLIT:
+	return find_template(begin1, end1, begin2, pred, selector, equal_split_tag());
+      default:
+	_GLIBCXX_PARALLEL_ASSERT(false);
+	return std::make_pair(begin1, begin2);
+      }
+  }
+
+#if _GLIBCXX_FIND_EQUAL_SPLIT
+
+  /** 
+   *  @brief Parallel std::find, equal splitting variant.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence. Second sequence
+   *  must have same length as first sequence.
+   *  @param pred Find predicate.
+   *  @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+   *  @return Place of finding in both sequences. 
+   */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+  std::pair<RandomAccessIterator1, RandomAccessIterator2>
+  find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred, Selector selector, equal_split_tag)
+  {
+    _GLIBCXX_CALL(end1 - begin1)
+
+    typedef std::iterator_traits<RandomAccessIterator1> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename traits_type::value_type value_type;
+
+    difference_type length = end1 - begin1;
+
+    difference_type result = length;
+
+    const thread_index_t num_threads = get_max_threads();
+
+    // XXX VLA error.
+    difference_type borders[num_threads + 1];
+    equally_split(length, num_threads, borders);
+
+#pragma omp parallel shared(result) num_threads(num_threads)
+    {
+      int iam = omp_get_thread_num();
+      difference_type pos = borders[iam], limit = borders[iam + 1];
+
+      RandomAccessIterator1 i1 = begin1 + pos;
+      RandomAccessIterator2 i2 = begin2 + pos;
+      for (; pos < limit; pos++)
+	{
+#pragma omp flush(result)
+	  // Result has been set to something lower.
+	  if (result < pos)
+	    break;
+
+	  if (selector(i1, i2, pred))
+	    {
+#pragma omp critical (result)
+	      if (result > pos)
+		result = pos;
+	      break;
+	    }
+	  i1++;
+	  i2++;
+	}
+    }
+    return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+  }
+
+#endif
+
+#if _GLIBCXX_FIND_GROWING_BLOCKS
+
+  /** 
+   *  @brief Parallel std::find, growing block size variant.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence. Second sequence
+   *  must have same length as first sequence.
+   *  @param pred Find predicate.
+   *  @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+   *  @return Place of finding in both sequences.
+   *  @see __gnu_parallel::Settings::find_sequential_search_size
+   *  @see __gnu_parallel::Settings::find_initial_block_size
+   *  @see __gnu_parallel::Settings::find_maximum_block_size
+   *  @see __gnu_parallel::Settings::find_increasing_factor
+   *
+   *  There are two main differences between the growing blocks and
+   *  the constant-size blocks variants.
+   *  1. For GB, the block size grows; for CSB, the block size is fixed.
+
+   *  2. For GB, the blocks are allocated dynamically;
+   *     for CSB, the blocks are allocated in a predetermined manner,
+   *     namely spacial round-robin.
+   */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+  std::pair<RandomAccessIterator1, RandomAccessIterator2>
+  find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
+		RandomAccessIterator2 begin2, Pred pred, Selector selector,
+		growing_blocks_tag)
+  {
+    _GLIBCXX_CALL(end1 - begin1)
+
+    typedef std::iterator_traits<RandomAccessIterator1> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename traits_type::value_type value_type;
+
+    difference_type length = end1 - begin1;
+
+    difference_type sequential_search_size = std::min<difference_type>(length, Settings::find_sequential_search_size);
+
+    // Try it sequentially first.
+    std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
+      selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred);
+
+    if (find_seq_result.first != (begin1 + sequential_search_size))
+      return find_seq_result;
+
+    // Index of beginning of next free block (after sequential find).
+    difference_type next_block_pos = sequential_search_size;
+    difference_type result = length;
+    const thread_index_t num_threads = get_max_threads();
+
+#pragma omp parallel shared(result) num_threads(num_threads)
+    {
+      // Not within first k elements -> start parallel.
+      thread_index_t iam = omp_get_thread_num();
+
+      difference_type block_size = Settings::find_initial_block_size;
+      difference_type start = fetch_and_add<difference_type>(&next_block_pos, block_size);
+
+      // Get new block, update pointer to next block.
+      difference_type stop = std::min<difference_type>(length, start + block_size);
+
+      std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
+
+      while (start < length)
+	{
+#pragma omp flush(result)
+	  // Get new value of result.
+	  if (result < start)
+	    {
+	      // No chance to find first element.
+	      break;
+	    }
+
+	  local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
+	  if (local_result.first != (begin1 + stop))
+	    {
+#pragma omp critical(result)
+	      if ((local_result.first - begin1) < result)
+		{
+		  result = local_result.first - begin1;
+
+		  // Result cannot be in future blocks, stop algorithm.
+		  fetch_and_add<difference_type>(&next_block_pos, length);
+		}
+	    }
+
+	  block_size = std::min<difference_type>(block_size * Settings::find_increasing_factor, Settings::find_maximum_block_size);
+
+	  // Get new block, update pointer to next block.
+	  start = fetch_and_add<difference_type>(&next_block_pos, block_size);
+	  stop = (length < (start + block_size)) ? length : (start + block_size);
+	}
+    }
+
+    // Return iterator on found element.
+    return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+  }
+
+#endif
+
+#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
+
+  /** 
+   *   @brief Parallel std::find, constant block size variant.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence. Second sequence
+   *  must have same length as first sequence.
+   *  @param pred Find predicate.
+   *  @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+   *  @return Place of finding in both sequences.
+   *  @see __gnu_parallel::Settings::find_sequential_search_size
+   *  @see __gnu_parallel::Settings::find_block_size
+   *  There are two main differences between the growing blocks and the
+   *  constant-size blocks variants.
+   *  1. For GB, the block size grows; for CSB, the block size is fixed.
+   *  2. For GB, the blocks are allocated dynamically; for CSB, the
+   *  blocks are allocated in a predetermined manner, namely spacial
+   *  round-robin.
+   */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+  std::pair<RandomAccessIterator1, RandomAccessIterator2>
+  find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
+		RandomAccessIterator2 begin2, Pred pred, Selector selector,
+		constant_size_blocks_tag)
+  {
+    _GLIBCXX_CALL(end1 - begin1)
+    typedef std::iterator_traits<RandomAccessIterator1> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename traits_type::value_type value_type;
+
+    difference_type length = end1 - begin1;
+
+    difference_type sequential_search_size = std::min<difference_type>(length, Settings::find_sequential_search_size);
+
+    // Try it sequentially first.
+    std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
+      selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred);
+
+    if (find_seq_result.first != (begin1 + sequential_search_size))
+      return find_seq_result;
+
+    difference_type result = length;
+    const thread_index_t num_threads = get_max_threads();
+
+    // Not within first sequential_search_size elements -> start parallel.
+#pragma omp parallel shared(result) num_threads(num_threads)
+    {
+      thread_index_t iam = omp_get_thread_num();
+      difference_type block_size = Settings::find_initial_block_size;
+
+      difference_type start, stop;
+
+      // First element of thread's current iteration.
+      difference_type iteration_start = sequential_search_size;
+
+      // Where to work (initialization).
+      start = iteration_start + iam * block_size;
+      stop = std::min<difference_type>(length, start + block_size);
+
+      std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
+
+      while (start < length)
+	{
+	  // Get new value of result.
+#pragma omp flush(result)
+	  // No chance to find first element.
+	  if (result < start)
+	    break;
+
+	  local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
+	  if (local_result.first != (begin1 + stop))
+	    {
+#pragma omp critical(result)
+	      if ((local_result.first - begin1) < result)
+		result = local_result.first - begin1;
+
+	      // Will not find better value in its interval.
+	      break;
+	    }
+
+	  iteration_start += num_threads * block_size;
+
+	  // Where to work.
+	  start = iteration_start + iam * block_size;
+	  stop = std::min<difference_type>(length, start + block_size);
+	}
+    }
+
+    // Return iterator on found element.
+    return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+  }
+#endif
+} // end namespace
+
+#endif
+
diff --git a/libstdc++-v3/include/parallel/find_selectors.h b/libstdc++-v3/include/parallel/find_selectors.h
new file mode 100644
index 00000000000..fddd8263c0d
--- /dev/null
+++ b/libstdc++-v3/include/parallel/find_selectors.h
@@ -0,0 +1,184 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/find_selectors.h
+ *  @brief Function objects representing different tasks to be plugged
+ *  into the parallel find algorithm.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_FIND_FUNCTIONS_H
+#define _GLIBCXX_PARALLEL_FIND_FUNCTIONS_H 1
+
+#include <parallel/tags.h>
+#include <parallel/basic_iterator.h>
+#include <bits/stl_pair.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Base class of all __gnu_parallel::find_template selectors. */
+  struct generic_find_selector
+  { };
+
+  /** 
+   *  @brief Test predicate on a single element, used for std::find()
+   *  and std::find_if ().
+   */
+  struct find_if_selector : public generic_find_selector
+  {
+    /** @brief Test on one position.
+     * @param i1 Iterator on first sequence.
+     * @param i2 Iterator on second sequence (unused).
+     * @param pred Find predicate.
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline bool 
+    operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
+    { return pred(*i1); }
+
+    /** @brief Corresponding sequential algorithm on a sequence.
+     *  @param begin1 Begin iterator of first sequence.
+     *  @param end1 End iterator of first sequence.
+     *  @param begin2 Begin iterator of second sequence.
+     *  @param pred Find predicate.
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline std::pair<RandomAccessIterator1, RandomAccessIterator2> 
+    sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred)
+    {
+      return std::make_pair(find_if(begin1, end1, pred, sequential_tag()), begin2);
+    }
+  };
+
+  /** @brief Test predicate on two adjacent elements. */
+  struct adjacent_find_selector : public generic_find_selector
+  {
+    /** @brief Test on one position.
+     *  @param i1 Iterator on first sequence.
+     *  @param i2 Iterator on second sequence (unused).
+     *  @param pred Find predicate.
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline bool 
+    operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
+    {
+      // Passed end iterator is one short.
+      return pred(*i1, *(i1 + 1));
+    }
+
+    /** @brief Corresponding sequential algorithm on a sequence.
+     *  @param begin1 Begin iterator of first sequence.
+     *  @param end1 End iterator of first sequence.
+     *  @param begin2 Begin iterator of second sequence.
+     *  @param pred Find predicate.
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
+    sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred)
+    {
+      // Passed end iterator is one short.
+      RandomAccessIterator1 spot = adjacent_find(begin1, end1 + 1, pred, sequential_tag());
+      if (spot == (end1 + 1))
+	spot = end1;
+      return std::make_pair(spot, begin2);
+    }
+  };
+
+  /** @brief Test inverted predicate on a single element. */
+  struct mismatch_selector : public generic_find_selector
+  {
+    /** 
+     *  @brief Test on one position.
+     *  @param i1 Iterator on first sequence.
+     *  @param i2 Iterator on second sequence (unused).
+     *  @param pred Find predicate. 
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline bool 
+    operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
+    { return !pred(*i1, *i2); }
+
+    /** 
+     *  @brief Corresponding sequential algorithm on a sequence.
+     *  @param begin1 Begin iterator of first sequence.
+     *  @param end1 End iterator of first sequence.
+     *  @param begin2 Begin iterator of second sequence.
+     *  @param pred Find predicate. 
+     */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
+    sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred)
+    { 
+      return mismatch(begin1, end1, begin2, pred, sequential_tag()); 
+    }
+  };
+
+
+  /** @brief Test predicate on several elements. */
+  template<typename ForwardIterator>
+  struct find_first_of_selector : public generic_find_selector
+  {
+    ForwardIterator begin;
+    ForwardIterator end;
+
+    explicit find_first_of_selector(ForwardIterator begin, ForwardIterator end)
+    : begin(begin), end(end) { }
+
+    /** @brief Test on one position.
+     *  @param i1 Iterator on first sequence.
+     *  @param i2 Iterator on second sequence (unused).
+     *  @param pred Find predicate. */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline bool 
+    operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred)
+    {
+      for (ForwardIterator pos_in_candidates = begin; pos_in_candidates != end; pos_in_candidates++)
+	if (pred(*i1, *pos_in_candidates))
+	  return true;
+      return false;
+    }
+
+    /** @brief Corresponding sequential algorithm on a sequence.
+     *  @param begin1 Begin iterator of first sequence.
+     *  @param end1 End iterator of first sequence.
+     *  @param begin2 Begin iterator of second sequence.
+     *  @param pred Find predicate. */
+    template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred>
+    inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
+    sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred)
+    {
+      return std::make_pair(find_first_of(begin1, end1, begin, end, pred, sequential_tag()), begin2);
+    }
+  };
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/for_each.h b/libstdc++-v3/include/parallel/for_each.h
new file mode 100644
index 00000000000..eb5e04e84f1
--- /dev/null
+++ b/libstdc++-v3/include/parallel/for_each.h
@@ -0,0 +1,83 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/for_each.h
+ *  @brief Main interface for embarassingly parallel functions.
+ *
+ *  The explicit implementation are in other header files, like
+ *  workstealing.h, par_loop.h, omp_loop.h, and omp_loop_static.h.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_FOR_EACH_H
+#define _GLIBCXX_PARALLEL_FOR_EACH_H 1
+
+#include <parallel/settings.h>
+#include <parallel/par_loop.h>
+#include <parallel/omp_loop.h>
+#include <parallel/workstealing.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Chose the desired algorithm by evaluating @c parallelism_tag.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator of input sequence.
+   *  @param user_op A user-specified functor (comparator, predicate,
+   *  associative operator,...)
+   *  @param functionality functor to "process" an element with
+   *  user_op (depends on desired functionality, e. g. accumulate,
+   *  for_each,...
+   *  @param reduction Reduction functor.
+   *  @param reduction_start Initial value for reduction.
+   *  @param output Output iterator.
+   *  @param bound Maximum number of elements processed.
+   *  @param parallelism_tag Parallelization method */
+  template<typename InputIterator, typename UserOp, typename Functionality, typename Red, typename Result>
+  UserOp
+  for_each_template_random_access(InputIterator begin, InputIterator end,
+				  UserOp user_op, Functionality& functionality,
+				  Red reduction, Result reduction_start,
+				  Result& output,
+				  typename std::iterator_traits<InputIterator>::difference_type bound, parallelism parallelism_tag)
+  {
+    if (parallelism_tag == parallel_unbalanced)
+      return for_each_template_random_access_ed(begin, end, user_op, functionality, reduction, reduction_start, output, bound);
+    else if (parallelism_tag == parallel_omp_loop)
+      return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound);
+    else if (parallelism_tag == parallel_omp_loop_static)
+      return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound);
+    else	//e. g. parallel_balanced
+      return for_each_template_random_access_workstealing(begin, end, user_op, functionality, reduction, reduction_start, output, bound);
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/for_each_selectors.h b/libstdc++-v3/include/parallel/for_each_selectors.h
new file mode 100644
index 00000000000..f1d0abf255b
--- /dev/null
+++ b/libstdc++-v3/include/parallel/for_each_selectors.h
@@ -0,0 +1,343 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/for_each_selectors.h
+ *  @brief Functors representing different tasks to be plugged into the
+ *  generic parallelization methods for embarrassingly parallel functions.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H
+#define _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H 1
+
+#include <parallel/basic_iterator.h>
+
+namespace __gnu_parallel
+{
+
+  /** @brief Generic selector for embarrassingly parallel functions. */
+  template<typename It>
+  struct generic_for_each_selector
+  {
+    /** @brief Iterator on last element processed; needed for some
+     *  algorithms (e. g. std::transform()).
+     */
+    It finish_iterator;
+  };
+
+
+  /** @brief std::for_each() selector. */
+  template<typename It>
+  struct for_each_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object. */
+    template<typename Op>
+    inline bool operator()(Op& o, It i)
+    {
+      o(*i);
+      return true;
+    }
+  };
+
+  /** @brief std::generate() selector. */
+  template<typename It>
+  struct generate_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object. */
+    template<typename Op>
+    inline bool operator()(Op& o, It i)
+    {
+      *i = o();
+      return true;
+    }
+  };
+
+  /** @brief std::fill() selector. */
+  template<typename It>
+  struct fill_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param v Current value.
+     *  @param i Iterator referencing object. */
+    template<typename Val>
+    inline bool operator()(Val& v, It i)
+    {
+      *i = v;
+      return true;
+    }
+  };
+
+  /** @brief std::transform() selector, one input sequence variant. */
+  template<typename It>
+  struct transform1_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object. */
+    template<typename Op>
+    inline bool operator()(Op& o, It i)
+    {
+      *i.second = o(*i.first);
+      return true;
+    }
+  };
+
+  /** @brief std::transform() selector, two input sequences variant. */
+  template<typename It>
+  struct transform2_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object. */
+    template<typename Op>
+    inline bool operator()(Op& o, It i)
+    {
+      *i.third = o(*i.first, *i.second);
+      return true;
+    }
+  };
+
+  /** @brief std::replace() selector. */
+  template<typename It, typename T>
+  struct replace_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Value to replace with. */
+    const T& new_val;
+
+    /** @brief Constructor
+     *  @param new_val Value to replace with. */
+    explicit replace_selector(const T &new_val) : new_val(new_val) {}
+
+    /** @brief Functor execution.
+     *  @param v Current value.
+     *  @param i Iterator referencing object. */
+    inline bool operator()(T& v, It i)
+    {
+      if (*i == v)
+	*i = new_val;
+      return true;
+    }
+  };
+
+  /** @brief std::replace() selector. */
+  template<typename It, typename Op, typename T>
+  struct replace_if_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Value to replace with. */
+    const T& new_val;
+
+    /** @brief Constructor.
+     *  @param new_val Value to replace with. */
+    explicit replace_if_selector(const T &new_val) : new_val(new_val) { }
+
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object. */
+    inline bool operator()(Op& o, It i)
+    {
+      if (o(*i))
+	*i = new_val;
+      return true;
+    }
+  };
+
+  /** @brief std::count() selector. */
+  template<typename It, typename Diff>
+  struct count_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param v Current value.
+     *  @param i Iterator referencing object.
+     *  @return 1 if count, 0 if does not count. */
+    template<typename Val>
+    inline Diff operator()(Val& v, It i)
+    { return (v == *i) ? 1 : 0; }
+  };
+
+  /** @brief std::count_if () selector. */
+  template<typename It, typename Diff>
+  struct count_if_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator.
+     *  @param i Iterator referencing object.
+     *  @return 1 if count, 0 if does not count. */
+    template<typename Op>
+    inline Diff operator()(Op& o, It i)
+    { return (o(*i)) ? 1 : 0; }
+  };
+
+  /** @brief std::accumulate() selector. */
+  template<typename It>
+  struct accumulate_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator (unused).
+     *  @param i Iterator referencing object.
+     *  @return The current value. */
+    template<typename Op>
+    inline typename std::iterator_traits<It>::value_type operator()(Op o, It i)
+    { return *i; }
+  };
+
+  /** @brief std::inner_product() selector. */
+  template<typename It, typename It2, typename T>
+  struct inner_product_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Begin iterator of first sequence. */
+    It begin1_iterator;
+
+    /** @brief Begin iterator of second sequence. */
+    It2 begin2_iterator;
+
+    /** @brief Constructor.
+     *  @param b1 Begin iterator of first sequence.
+     *  @param b2 Begin iterator of second sequence. */
+    explicit inner_product_selector(It b1, It2 b2) : begin1_iterator(b1), begin2_iterator(b2) { }
+
+    /** @brief Functor execution.
+     *  @param mult Multiplication functor.
+     *  @param current Iterator referencing object.
+     *  @return Inner product elemental result. */
+    template<typename Op>
+    inline T operator()(Op mult, It current)
+    {
+      typename std::iterator_traits<It>::difference_type position = current - begin1_iterator;
+      return mult(*current, *(begin2_iterator + position));
+    }
+  };
+
+  /** @brief Selector that just returns the passed iterator. */
+  template<typename It>
+  struct identity_selector : public generic_for_each_selector<It>
+  {
+    /** @brief Functor execution.
+     *  @param o Operator (unused).
+     *  @param i Iterator referencing object.
+     *  @return Passed iterator. */
+    template<typename Op>
+    inline It operator()(Op o, It i)
+    { return i; }
+  };
+
+  /** @brief Selector that returns the difference between two adjacent
+   *  elements.
+   */
+  template<typename It>
+  struct adjacent_difference_selector : public generic_for_each_selector<It>
+  {
+    template<typename Op>
+    inline bool operator()(Op& o, It i)
+    {
+      typename It::first_type go_back_one = i.first;
+      --go_back_one;
+      *i.second = o(*i.first, *go_back_one);
+      return true;
+    }
+  };
+
+  // XXX move into type_traits?
+  /** @brief Functor doing nothing
+   *
+   *  For some reduction tasks (this is not a function object, but is
+   *  passed as selector dummy parameter.
+   */
+  struct nothing
+  {
+    /** @brief Functor execution.
+     *  @param i Iterator referencing object. */
+    template<typename It>
+    inline void operator()(It i)
+    { }
+  };
+
+  /** @brief Reduction function doing nothing. */
+  struct dummy_reduct
+  {
+    inline bool operator()(bool /*x*/, bool /*y*/) const
+    { return true; }
+  };
+
+  /** @brief Reduction for finding the maximum element, using a comparator. */
+  template<typename Comp, typename It>
+  struct min_element_reduct
+  {
+    Comp& comp;
+
+    explicit min_element_reduct(Comp &c) : comp(c)
+    { }
+
+    inline It operator()(It x, It y)
+    {
+      if (comp(*x, *y))
+	return x;
+      else
+	return y;
+    }
+  };
+
+  /** @brief Reduction for finding the maximum element, using a comparator. */
+  template<typename Comp, typename It>
+  struct max_element_reduct
+  {
+    Comp& comp;
+
+    explicit max_element_reduct(Comp& c) : comp(c)
+    { }
+
+    inline It operator()(It x, It y)
+    {
+      if (comp(*x, *y))
+	return y;
+      else
+	return x;
+    }
+  };
+
+  /** @brief General reduction, using a binary operator. */
+  template<typename BinOp>
+  struct accumulate_binop_reduct
+  {
+    BinOp& binop;
+
+    explicit accumulate_binop_reduct(BinOp& b) : binop(b) {}
+
+    template<typename T>
+    inline T operator()(T x, T y) { return binop(x, y); }
+  };
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/iterator.h b/libstdc++-v3/include/parallel/iterator.h
new file mode 100644
index 00000000000..26b7f331a82
--- /dev/null
+++ b/libstdc++-v3/include/parallel/iterator.h
@@ -0,0 +1,203 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/iterator.h
+ * @brief Helper iterator classes for the std::transform() functions.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_ITERATOR_H
+#define _GLIBCXX_PARALLEL_ITERATOR_H 1
+
+#include <parallel/basic_iterator.h>
+#include <bits/stl_pair.h>
+
+namespace __gnu_parallel
+{
+  /** @brief A pair of iterators. The usual iterator operations are
+   *  applied to both child iterators.
+   */
+  template<typename Iterator1, typename Iterator2, typename IteratorCategory>
+  class iterator_pair : public std::pair<Iterator1, Iterator2>
+  {
+  private:
+    typedef iterator_pair<Iterator1, Iterator2, IteratorCategory> type;
+    typedef std::pair<Iterator1, Iterator2> base_type;
+
+  public:
+    typedef IteratorCategory iterator_category;
+    typedef void value_type;
+
+    typedef std::iterator_traits<Iterator1> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef type* pointer;
+    typedef type& reference;
+
+    iterator_pair() { }
+
+    iterator_pair(const Iterator1& first, const Iterator2& second) 
+    : base_type(first, second) { }
+
+    // Pre-increment operator.
+    type&
+    operator++()
+    {
+      ++base_type::first;
+      ++base_type::second;
+      return *this;
+    }
+
+    // Post-increment operator.
+    const type
+    operator++(int)
+    { return type(base_type::first++, base_type::second++); }
+
+    // Pre-decrement operator.
+    type&
+    operator--()
+    {
+      --base_type::first;
+      --base_type::second;
+      return *this;
+    }
+
+    // Post-decrement operator.
+    const type
+    operator--(int)
+    { return type(base_type::first--, base_type::second--); }
+
+    // Type conversion.
+    operator Iterator2() const
+    { return base_type::second; }
+
+    type&
+    operator=(const type& other)
+    {
+      base_type::first = other.first;
+      base_type::second = other.second;
+      return *this;
+    }
+
+    type
+    operator+(difference_type delta) const
+    { return type(base_type::first + delta, base_type::second + delta); }
+
+    difference_type
+    operator-(const type& other) const
+    { return base_type::first - other.first; }
+  };
+
+
+  /** @brief A triple of iterators. The usual iterator operations are
+      applied to all three child iterators.
+   */
+  template<typename Iterator1, typename Iterator2, typename Iterator3, typename IteratorCategory>
+  class iterator_triple
+  {
+  private:
+    typedef iterator_triple<Iterator1, Iterator2, Iterator3, IteratorCategory> type;
+
+  public:
+    typedef IteratorCategory iterator_category;
+    typedef void value_type;
+    typedef typename Iterator1::difference_type difference_type;
+    typedef type* pointer;
+    typedef type& reference;
+
+    Iterator1 first;
+    Iterator2 second;
+    Iterator3 third;
+
+    iterator_triple() { }
+
+    iterator_triple(const Iterator1& _first, const Iterator2& _second,
+		    const Iterator3& _third)
+    {
+      first = _first;
+      second = _second;
+      third = _third;
+    }
+
+    // Pre-increment operator.
+    type&
+    operator++()
+    {
+      ++first;
+      ++second;
+      ++third;
+      return *this;
+    }
+
+    // Post-increment operator.
+    const type
+    operator++(int)
+    { return type(first++, second++, third++); }
+
+    // Pre-decrement operator.
+    type&
+    operator--()
+    {
+      --first;
+      --second;
+      --third;
+      return *this;
+    }
+
+    // Post-decrement operator.
+    const type
+    operator--(int)
+    { return type(first--, second--, third--); }
+
+    // Type conversion.
+    operator Iterator3() const
+    { return third; }
+
+    type&
+    operator=(const type& other)
+    {
+      first = other.first;
+      second = other.second;
+      third = other.third;
+      return *this;
+    }
+
+    type
+    operator+(difference_type delta) const
+    { return type(first + delta, second + delta, third + delta); }
+
+    difference_type
+    operator-(const type& other) const
+    { return first - other.first; }
+  };
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/list_partition.h b/libstdc++-v3/include/parallel/list_partition.h
new file mode 100644
index 00000000000..320b1b8286a
--- /dev/null
+++ b/libstdc++-v3/include/parallel/list_partition.h
@@ -0,0 +1,181 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/list_partition.h
+ *  @brief Functionality to split sequence referenced by only input
+ *  iterators.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Leonor Frias Moya and Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_LIST_PARTITION_H
+#define _GLIBCXX_PARALLEL_LIST_PARTITION_H 1
+
+#include <parallel/parallel.h>
+#include <vector>
+
+namespace __gnu_parallel
+{
+  /** @brief Shrinks and doubles the ranges.
+   *  @param os_starts Start positions worked on (oversampled).
+   *  @param count_to_two Counts up to 2.
+   *  @param range_length Current length of a chunk.
+   *  @param make_twice Whether the @c os_starts is allowed to be
+   *  grown or not
+   */
+  template<typename InputIterator>
+  void
+  shrink_and_double(std::vector<InputIterator>& os_starts, size_t& count_to_two, size_t& range_length, const bool make_twice)
+  {
+    ++count_to_two;
+    if (not make_twice or count_to_two < 2)
+      {
+	shrink(os_starts, count_to_two, range_length);
+      }
+    else
+      {
+	os_starts.resize((os_starts.size() - 1) * 2 + 1);
+	count_to_two = 0;
+      }
+  }
+
+  /** @brief Combines two ranges into one and thus halves the number of ranges.
+   *  @param os_starts Start positions worked on (oversampled).
+   *  @param count_to_two Counts up to 2.
+   *  @param range_length Current length of a chunk. */
+  template<typename InputIterator>
+  void
+  shrink(std::vector<InputIterator>& os_starts, size_t& count_to_two,
+	 size_t& range_length)
+  {
+    for (typename std::vector<InputIterator>::size_type i = 0; i <= (os_starts.size() / 2); ++i)
+      {
+	os_starts[i] = os_starts[i * 2];
+      }
+    range_length *= 2;
+  }
+
+  /** @brief Splits a sequence given by input iterators into parts of
+   * almost equal size
+   *
+   *  The function needs only one pass over the sequence.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator of input sequence.
+   *  @param starts Start iterators for the resulting parts, dimension
+   *  @c num_parts+1. For convenience, @c starts @c [num_parts]
+   *  contains the end iterator of the sequence.
+   *  @param lengths Length of the resulting parts.
+   *  @param num_parts Number of parts to split the sequence into.
+   *  @param f Functor to be applied to each element by traversing it
+   *  @param oversampling Oversampling factor. If 0, then the
+   *  partitions will differ in at most @f$ \sqrt{\mathrm{end} -
+   *  \mathrm{begin}} @f$ elements. Otherwise, the ratio between the
+   *  longest and the shortest part is bounded by @f$
+   *  1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$.
+   *  @return Length of the whole sequence.
+   */
+  template<typename InputIterator, typename FunctorType>
+  size_t
+  list_partition(const InputIterator begin, const InputIterator end,
+		 InputIterator* starts, size_t* lengths, const int num_parts,
+		 FunctorType& f, int oversampling = 0)
+  {
+    bool make_twice = false;
+
+    // According to the oversampling factor, the resizing algorithm is chosen.
+    if (oversampling == 0)
+      {
+	make_twice = true;
+	oversampling = 1;
+      }
+
+    std::vector<InputIterator> os_starts(2 * oversampling * num_parts + 1);
+
+    os_starts[0]= begin;
+    InputIterator prev = begin, it = begin;
+    size_t dist_limit = 0, dist = 0;
+    size_t cur = 1, next = 1;
+    size_t range_length = 1;
+    size_t count_to_two = 0;
+    while (it != end){
+      cur = next;
+      for (; cur < os_starts.size() and it != end; ++cur)
+	{
+	  for (dist_limit += range_length; dist < dist_limit and it != end; ++dist)
+	    {
+	      f(it);
+	      ++it;
+	    }
+	  os_starts[cur] = it;
+	}
+
+      // Must compare for end and not cur < os_starts.size() , because
+      // cur could be == os_starts.size() as well
+      if (it == end)
+	break;
+
+      shrink_and_double(os_starts, count_to_two, range_length, make_twice);
+      next = os_starts.size()/2 + 1;
+    }
+
+    // Calculation of the parts (one must be extracted from current
+    // because the partition beginning at end, consists only of
+    // itself).
+    size_t size_part = (cur - 1) / num_parts;
+    int size_greater = static_cast<int>((cur - 1) % num_parts);
+    starts[0] = os_starts[0];
+
+    size_t index = 0;
+
+    // Smallest partitions.
+    for (int i = 1; i < (num_parts + 1 - size_greater); ++i)
+      {
+	lengths[i-1] =  size_part * range_length;
+	index += size_part;
+	starts[i] = os_starts[index];
+      }
+
+    // Biggest partitions.
+    for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i)
+      {
+	lengths[i-1] =  (size_part+1) * range_length;
+	index += (size_part+1);
+	starts[i] = os_starts[index];
+      }
+
+    // Correction of the end size (the end iteration has not finished).
+    lengths[num_parts - 1] -= (dist_limit - dist);
+
+    return dist;
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h
new file mode 100644
index 00000000000..1823282c9d3
--- /dev/null
+++ b/libstdc++-v3/include/parallel/losertree.h
@@ -0,0 +1,1077 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/losertree.h
+ *  @brief Many generic loser tree variants.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_LOSERTREE_H
+#define _GLIBCXX_PARALLEL_LOSERTREE_H
+
+#include <functional>
+
+#include <bits/stl_algobase.h>
+#include <parallel/features.h>
+#include <parallel/base.h>
+
+namespace __gnu_parallel
+{
+
+#if _GLIBCXX_LOSER_TREE_EXPLICIT
+
+  /** @brief Guarded loser tree, copying the whole element into the
+   * tree structure.
+   *
+   *  Guarding is done explicitly through two flags per element, inf
+   *  and sup This is a quite slow variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTreeExplicit
+  {
+  private:
+    struct Loser
+    {
+      // The relevant element.
+      T key;
+
+      // Is this an infimum or supremum element?
+      bool inf, sup;
+
+      // Number of the sequence the element comes from.
+      int source;
+    };
+
+    unsigned int size, offset;
+    Loser* losers;
+    Comparator comp;
+
+  public:
+    inline LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>()) : comp(_comp)
+    {
+      size = _size;
+      offset = size;
+      losers = new Loser[size];
+      for (unsigned int l = 0; l < size; l++)
+	{
+	  //losers[l].key = ... 	stays unset
+	  losers[l].inf = true;
+	  losers[l].sup = false;
+	  //losers[l].source = -1;	//sentinel
+	}
+    }
+
+    inline ~LoserTreeExplicit()
+    { delete[] losers; }
+
+    inline void
+    print() { }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(T key, int source, bool sup)
+    {
+      bool inf = false;
+      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
+	{
+	  if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key)) || losers[pos].inf || sup)
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].key, key);
+	      std::swap(losers[pos].inf, inf);
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	    }
+	}
+
+      losers[0].key = key;
+      losers[0].inf = inf;
+      losers[0].sup = sup;
+      losers[0].source = source;
+    }
+
+    inline void
+    init() { }
+
+    inline void
+    delete_min_insert(T key, bool sup)
+    {
+      bool inf = false;
+      int source = losers[0].source;
+      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key))
+	      || losers[pos].inf || sup)
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].key, key);
+	      std::swap(losers[pos].inf, inf);
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	    }
+	}
+
+      losers[0].key = key;
+      losers[0].inf = inf;
+      losers[0].sup = sup;
+      losers[0].source = source;
+    }
+
+    inline void
+    insert_start_stable(T key, int source, bool sup)
+    {
+      bool inf = false;
+      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
+	{
+	  if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup &&
+	       ((comp(losers[pos].key, key)) ||
+		(!comp(key, losers[pos].key) && losers[pos].source < source)))
+	      || losers[pos].inf || sup)
+	    {
+	      // Take next key.
+	      std::swap(losers[pos].key, key);
+	      std::swap(losers[pos].inf, inf);
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	    }
+	}
+
+      losers[0].key = key;
+      losers[0].inf = inf;
+      losers[0].sup = sup;
+      losers[0].source = source;
+    }
+
+    inline void
+    init_stable() { }
+
+    inline void
+    delete_min_insert_stable(T key, bool sup)
+    {
+      bool inf = false;
+      int source = losers[0].source;
+      for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
+	{
+	  if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
+	       && ((comp(losers[pos].key, key)) ||
+		(!comp(key, losers[pos].key) && losers[pos].source < source)))
+	      || losers[pos].inf || sup)
+	    {
+	      std::swap(losers[pos].key, key);
+	      std::swap(losers[pos].inf, inf);
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	    }
+	}
+
+      losers[0].key = key;
+      losers[0].inf = inf;
+      losers[0].sup = sup;
+      losers[0].source = source;
+    }
+  };
+
+#endif
+
+#if _GLIBCXX_LOSER_TREE
+
+  /** @brief Guarded loser tree, either copying the whole element into
+   * the tree structure, or looking up the element via the index.
+   *
+   *  Guarding is done explicitly through one flag sup per element,
+   *  inf is not needed due to a better initialization routine.  This
+   *  is a well-performing variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTree
+  {
+  private:
+    struct Loser
+    {
+      bool sup;
+      int source;
+      T key;
+    };
+
+    unsigned int ik, k, offset;
+    Loser* losers;
+    Comparator comp;
+
+  public:
+    inline LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
+    : comp(_comp)
+    {
+      ik = _k;
+
+      // Next greater power of 2.
+      k = 1 << (log2(ik - 1) + 1);
+      offset = k;
+      losers = new Loser[k * 2];
+      for (unsigned int i = ik - 1; i < k; i++)
+	losers[i + k].sup = true;
+    }
+
+    inline ~LoserTree()
+    { delete[] losers; }
+
+    void
+    print()
+    {
+      for (unsigned int i = 0; i < (k * 2); i++)
+	printf("%d    %d from %d,  %d\n", i, losers[i].key, losers[i].source, losers[i].sup);
+    }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(const T& key, int source, bool sup)
+    {
+      unsigned int pos = k + source;
+
+      losers[pos].sup = sup;
+      losers[pos].source = source;
+      losers[pos].key = key;
+    }
+
+    unsigned int
+    init_winner (unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (losers[right].sup ||
+	      (!losers[left].sup && !comp(losers[right].key, losers[left].key)))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {	// Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init()
+    { losers[0] = losers[init_winner(1)]; }
+
+    // Do not pass const reference since key will be used as local variable.
+    inline void
+    delete_min_insert(T key, bool sup)
+    {
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].key, key);
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+      losers[0].key = key;
+    }
+
+    inline void
+    insert_start_stable(const T& key, int source, bool sup)
+    { return insert_start(key, source, sup); }
+
+    unsigned int
+    init_winner_stable (unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (	losers[right].sup ||
+		(!losers[left].sup && !comp(losers[right].key, losers[left].key)))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init_stable()
+    { losers[0] = losers[init_winner_stable(1)]; }
+
+    // Do not pass const reference since key will be used as local variable.
+    inline void
+    delete_min_insert_stable(T key, bool sup)
+    {
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted, ties are broken by source.
+	  if (	(sup && (!losers[pos].sup || losers[pos].source < source)) ||
+		(!sup && !losers[pos].sup &&
+		 ((comp(losers[pos].key, key)) ||
+		  (!comp(key, losers[pos].key) && losers[pos].source < source))))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].key, key);
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+      losers[0].key = key;
+    }
+  };
+
+#endif
+
+#if _GLIBCXX_LOSER_TREE_REFERENCE
+
+  /** @brief Guarded loser tree, either copying the whole element into
+   * the tree structure, or looking up the element via the index.
+   *
+   *  Guarding is done explicitly through one flag sup per element,
+   *  inf is not needed due to a better initialization routine.  This
+   *  is a well-performing variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTreeReference
+  {
+#undef COPY
+#ifdef COPY
+#define KEY(i) losers[i].key
+#define KEY_SOURCE(i) key
+#else
+#define KEY(i) keys[losers[i].source]
+#define KEY_SOURCE(i) keys[i]
+#endif
+  private:
+    struct Loser
+    {
+      bool sup;
+      int source;
+#ifdef COPY
+      T key;
+#endif
+    };
+
+    unsigned int ik, k, offset;
+    Loser* losers;
+#ifndef COPY
+    T* keys;
+#endif
+    Comparator comp;
+
+  public:
+    inline LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+    {
+      ik = _k;
+
+      // Next greater power of 2.
+      k = 1 << (log2(ik - 1) + 1);
+      offset = k;
+      losers = new Loser[k * 2];
+#ifndef COPY
+      keys = new T[ik];
+#endif
+      for (unsigned int i = ik - 1; i < k; i++)
+	losers[i + k].sup = true;
+    }
+
+    inline ~LoserTreeReference()
+    {
+      delete[] losers;
+#ifndef COPY
+      delete[] keys;
+#endif
+    }
+
+    void
+    print()
+    {
+      for (unsigned int i = 0; i < (k * 2); i++)
+	printf("%d    %d from %d,  %d\n", i, KEY(i), losers[i].source, losers[i].sup);
+    }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(T key, int source, bool sup)
+    {
+      unsigned int pos = k + source;
+
+      losers[pos].sup = sup;
+      losers[pos].source = source;
+      KEY(pos) = key;
+    }
+
+    unsigned int
+    init_winner(unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (	losers[right].sup ||
+		(!losers[left].sup && !comp(KEY(right), KEY(left))))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init()
+    {
+      losers[0] = losers[init_winner(1)];
+    }
+
+    inline void
+    delete_min_insert(T key, bool sup)
+    {
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+#ifdef COPY
+	      std::swap(KEY(pos), KEY_SOURCE(source));
+#endif
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+#ifdef COPY
+      KEY(0) = KEY_SOURCE(source);
+#endif
+    }
+
+    inline void
+    insert_start_stable(T key, int source, bool sup)
+    { return insert_start(key, source, sup); }
+
+    unsigned int
+    init_winner_stable(unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (losers[right].sup
+	      || (!losers[left].sup && !comp(KEY(right), KEY(left))))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init_stable()
+    { losers[0] = losers[init_winner_stable(1)]; }
+
+    inline void
+    delete_min_insert_stable(T key, bool sup)
+    {
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted, ties are broken by source.
+	  if (	(sup && (!losers[pos].sup || losers[pos].source < source)) ||
+		(!sup && !losers[pos].sup &&
+		 ((comp(KEY(pos), KEY_SOURCE(source))) ||
+		  (!comp(KEY_SOURCE(source), KEY(pos)) && losers[pos].source < source))))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+#ifdef COPY
+	      std::swap(KEY(pos), KEY_SOURCE(source));
+#endif
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+#ifdef COPY
+      KEY(0) = KEY_SOURCE(source);
+#endif
+    }
+  };
+#undef KEY
+#undef KEY_SOURCE
+
+#endif
+
+#if _GLIBCXX_LOSER_TREE_POINTER
+
+  /** @brief Guarded loser tree, either copying the whole element into
+      the tree structure, or looking up the element via the index.
+   *  Guarding is done explicitly through one flag sup per element,
+   *  inf is not needed due to a better initialization routine.
+   *  This is a well-performing variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTreePointer
+  {
+  private:
+    struct Loser
+    {
+      bool sup;
+      int source;
+      const T* keyp;
+    };
+
+    unsigned int ik, k, offset;
+    Loser* losers;
+    Comparator comp;
+
+  public:
+    inline LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+    {
+      ik = _k;
+
+      // Next greater power of 2.
+      k = 1 << (log2(ik - 1) + 1);
+      offset = k;
+      losers = new Loser[k * 2];
+      for (unsigned int i = ik - 1; i < k; i++)
+	losers[i + k].sup = true;
+    }
+
+    inline ~LoserTreePointer()
+    { delete[] losers; }
+
+    void
+    print()
+    {
+      for (unsigned int i = 0; i < (k * 2); i++)
+	printf("%d    %d from %d,  %d\n", i, losers[i].keyp, losers[i].source, losers[i].sup);
+    }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(const T& key, int source, bool sup)
+    {
+      unsigned int pos = k + source;
+
+      losers[pos].sup = sup;
+      losers[pos].source = source;
+      losers[pos].keyp = &key;
+    }
+
+    unsigned int
+    init_winner(unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (	losers[right].sup ||
+		(!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp)))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init()
+    { losers[0] = losers[init_winner(1)]; }
+
+    inline void delete_min_insert(const T& key, bool sup)
+    {
+      const T* keyp = &key;
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].keyp, keyp);
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+      losers[0].keyp = keyp;
+    }
+
+    inline void
+    insert_start_stable(const T& key, int source, bool sup)
+    { return insert_start(key, source, sup); }
+
+    unsigned int
+    init_winner_stable (unsigned int root)
+    {
+      if (root >= k)
+	{
+	  return root;
+	}
+      else
+	{
+	  unsigned int left = init_winner (2 * root);
+	  unsigned int right = init_winner (2 * root + 1);
+	  if (losers[right].sup
+	      || (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp)))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init_stable()
+    { losers[0] = losers[init_winner_stable(1)]; }
+
+    inline void
+    delete_min_insert_stable(const T& key, bool sup)
+    {
+      const T* keyp = &key;
+      int source = losers[0].source;
+      for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted, ties are broken by source.
+	  if (	(sup && (!losers[pos].sup || losers[pos].source < source)) ||
+		(!sup && !losers[pos].sup &&
+		 ((comp(*losers[pos].keyp, *keyp)) ||
+		  (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].sup, sup);
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].keyp, keyp);
+	    }
+	}
+
+      losers[0].sup = sup;
+      losers[0].source = source;
+      losers[0].keyp = keyp;
+    }
+  };
+
+#endif
+
+#if _GLIBCXX_LOSER_TREE_UNGUARDED
+
+  /** @brief Unguarded loser tree, copying the whole element into the
+   * tree structure.
+   *
+   *  No guarding is done, therefore not a single input sequence must
+   *  run empty.  This is a very fast variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTreeUnguarded
+  {
+  private:
+    struct Loser
+    {
+      int source;
+      T key;
+    };
+
+    unsigned int ik, k, offset;
+    unsigned int* mapping;
+    Loser* losers;
+    Comparator comp;
+
+    void
+    map(unsigned int root, unsigned int begin, unsigned int end)
+    {
+      if (begin + 1 == end)
+	mapping[begin] = root;
+      else
+	{
+	  // Next greater or equal power of 2.
+	  unsigned int left = 1 << (log2(end - begin - 1));
+	  map(root * 2, begin, begin + left);
+	  map(root * 2 + 1, begin + left, end);
+	}
+    }
+
+  public:
+    inline LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+    {
+      ik = _k;
+      // Next greater or equal power of 2.
+      k = 1 << (log2(ik - 1) + 1);
+      offset = k;
+      losers = new Loser[k + ik];
+      mapping = new unsigned int[ik];
+      map(1, 0, ik);
+    }
+
+    inline ~LoserTreeUnguarded()
+    {
+      delete[] losers;
+      delete[] mapping;
+    }
+
+    void
+    print()
+    {
+      for (unsigned int i = 0; i < k + ik; i++)
+	printf("%d    %d from %d\n", i, losers[i].key, losers[i].source);
+    }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(const T& key, int source, bool)
+    {
+      unsigned int pos = mapping[source];
+      losers[pos].source = source;
+      losers[pos].key = key;
+    }
+
+    unsigned int
+    init_winner(unsigned int root, unsigned int begin, unsigned int end)
+    {
+      if (begin + 1 == end)
+	return mapping[begin];
+      else
+	{
+	  // Next greater or equal power of 2.
+	  unsigned int division = 1 << (log2(end - begin - 1));
+	  unsigned int left = init_winner(2 * root, begin, begin + division);
+	  unsigned int right = init_winner(2 * root + 1, begin + division, end);
+	  if (!comp(losers[right].key, losers[left].key))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init()
+    { losers[0] = losers[init_winner(1, 0, ik)]; }
+
+    // Do not pass const reference since key will be used as local variable.
+    inline void
+    delete_min_insert(const T& key, bool)
+    {
+      losers[0].key = key;
+      T& keyr = losers[0].key;
+      int& source = losers[0].source;
+      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if (comp(losers[pos].key, keyr))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].key, keyr);
+	    }
+	}
+    }
+
+    inline void
+    insert_start_stable(const T& key, int source, bool)
+    { return insert_start(key, source, false); }
+
+    inline void
+    init_stable()
+    { init(); }
+
+    inline void
+    delete_min_insert_stable(const T& key, bool)
+    {
+      losers[0].key = key;
+      T& keyr = losers[0].key;
+      int& source = losers[0].source;
+      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted, ties are broken by source.
+	  if (comp(losers[pos].key, keyr)
+	      || (!comp(keyr, losers[pos].key) && losers[pos].source < source))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].key, keyr);
+	    }
+	}
+    }
+  };
+
+#endif
+
+#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
+
+  /** @brief Unguarded loser tree, keeping only pointers to the
+   * elements in the tree structure.
+   *
+   *  No guarding is done, therefore not a single input sequence must
+   *  run empty.  This is a very fast variant.
+   */
+  template<typename T, typename Comparator = std::less<T> >
+  class LoserTreePointerUnguarded
+  {
+  private:
+    struct Loser
+    {
+      int source;
+      const T* keyp;
+    };
+
+    unsigned int ik, k, offset;
+    unsigned int* mapping;
+    Loser* losers;
+    Comparator comp;
+
+    void map(unsigned int root, unsigned int begin, unsigned int end)
+    {
+      if (begin + 1 == end)
+	mapping[begin] = root;
+      else
+	{
+	  // Next greater or equal power of 2.
+	  unsigned int left = 1 << (log2(end - begin - 1));
+	  map(root * 2, begin, begin + left);
+	  map(root * 2 + 1, begin + left, end);
+	}
+    }
+
+  public:
+    inline LoserTreePointerUnguarded(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+    {
+      ik = _k;
+
+      // Next greater power of 2.
+      k = 1 << (log2(ik - 1) + 1);
+      offset = k;
+      losers = new Loser[k + ik];
+      mapping = new unsigned int[ik];
+      map(1, 0, ik);
+    }
+
+    inline ~LoserTreePointerUnguarded()
+    {
+      delete[] losers;
+      delete[] mapping;
+    }
+
+    void
+    print()
+    {
+      for (unsigned int i = 0; i < k + ik; i++)
+	printf("%d    %d from %d\n", i, *losers[i].keyp, losers[i].source);
+    }
+
+    inline int
+    get_min_source()
+    { return losers[0].source; }
+
+    inline void
+    insert_start(const T& key, int source, bool)
+    {
+      unsigned int pos = mapping[source];
+      losers[pos].source = source;
+      losers[pos].keyp = &key;
+    }
+
+    unsigned int
+    init_winner(unsigned int root, unsigned int begin, unsigned int end)
+    {
+      if (begin + 1 == end)
+	return mapping[begin];
+      else
+	{
+	  // Next greater or equal power of 2.
+	  unsigned int division = 1 << (log2(end - begin - 1));
+	  unsigned int left = init_winner(2 * root, begin, begin + division);
+	  unsigned int right = init_winner(2 * root + 1, begin + division, end);
+	  if (!comp(*losers[right].keyp, *losers[left].keyp))
+	    {
+	      // Left one is less or equal.
+	      losers[root] = losers[right];
+	      return left;
+	    }
+	  else
+	    {
+	      // Right one is less.
+	      losers[root] = losers[left];
+	      return right;
+	    }
+	}
+    }
+
+    inline void
+    init()
+    {
+      losers[0] = losers[init_winner(1, 0, ik)];
+    }
+
+    inline void
+    delete_min_insert(const T& key, bool)
+    {
+      const T* keyp = &key;
+      int& source = losers[0].source;
+      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted.
+	  if (comp(*losers[pos].keyp, *keyp))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].keyp, keyp);
+	    }
+	}
+
+      losers[0].keyp = keyp;
+    }
+
+    inline void
+    insert_start_stable(const T& key, int source, bool)
+    { return insert_start(key, source, false); }
+
+    inline void
+    init_stable()
+    { init(); }
+
+    inline void
+    delete_min_insert_stable(const T& key, bool)
+    {
+      int& source = losers[0].source;
+      const T* keyp = &key;
+      for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
+	{
+	  // The smaller one gets promoted, ties are broken by source.
+	  if (comp(*losers[pos].keyp, *keyp)
+	      || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))
+	    {
+	      // The other one is smaller.
+	      std::swap(losers[pos].source, source);
+	      std::swap(losers[pos].keyp, keyp);
+	    }
+	}
+      losers[0].keyp = keyp;
+    }
+  };
+#endif
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h
new file mode 100644
index 00000000000..0bf29497f53
--- /dev/null
+++ b/libstdc++-v3/include/parallel/merge.h
@@ -0,0 +1,230 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/merge.h
+ *  @brief Parallel implementation of std::merge().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_MERGE_H
+#define _GLIBCXX_PARALLEL_MERGE_H 1
+
+#include <parallel/basic_iterator.h>
+#include <bits/stl_algo.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Merge routine being able to merge only the @c max_length
+   * smallest elements.
+   *
+   * The @c begin iterators are advanced accordingly, they might not
+   * reach @c end, in contrast to the usual variant.
+   * @param begin1 Begin iterator of first sequence.
+   * @param end1 End iterator of first sequence.
+   * @param begin2 Begin iterator of second sequence.
+   * @param end2 End iterator of second sequence.
+   * @param target Target begin iterator.
+   * @param max_length Maximum number of elements to merge.
+   * @param comp Comparator.
+   * @return Output end iterator. */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename OutputIterator, typename _DifferenceTp, typename Comparator>
+  OutputIterator
+  merge_advance_usual(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp)
+  {
+    typedef _DifferenceTp difference_type;
+    while (begin1 != end1 && begin2 != end2 && max_length > 0)
+      {
+	// array1[i1] < array0[i0]
+	if (comp(*begin2, *begin1))
+	  *target++ = *begin2++;
+	else
+	  *target++ = *begin1++;
+	max_length--;
+      }
+
+    if (begin1 != end1)
+      {
+	target = std::copy(begin1, begin1 + max_length, target);
+	begin1 += max_length;
+      }
+    else
+      {
+	target = std::copy(begin2, begin2 + max_length, target);
+	begin2 += max_length;
+      }
+    return target;
+  }
+
+  /** @brief Merge routine being able to merge only the @c max_length
+   * smallest elements.
+   *
+   * The @c begin iterators are advanced accordingly, they might not
+   * reach @c end, in contrast to the usual variant.
+   * Specially designed code should allow the compiler to generate
+   * conditional moves instead of branches.
+   * @param begin1 Begin iterator of first sequence.
+   * @param end1 End iterator of first sequence.
+   * @param begin2 Begin iterator of second sequence.
+   * @param end2 End iterator of second sequence.
+   * @param target Target begin iterator.
+   * @param max_length Maximum number of elements to merge.
+   * @param comp Comparator.
+   * @return Output end iterator. */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename OutputIterator, typename _DifferenceTp, typename Comparator>
+  OutputIterator
+  merge_advance_movc(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp)
+  {
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type value_type1;
+    typedef typename std::iterator_traits<RandomAccessIterator2>::value_type value_type2;
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(max_length >= 0);
+#endif
+
+    while (begin1 != end1 && begin2 != end2 && max_length > 0)
+      {
+	value_type1 element1;
+	value_type2 element2;
+	RandomAccessIterator1 next1;
+	RandomAccessIterator2 next2;
+
+	next1 = begin1 + 1;
+	next2 = begin2 + 1;
+	element1 = *begin1;
+	element2 = *begin2;
+
+	if (comp(element2, element1))
+	  {
+	    element1 = element2;
+	    begin2 = next2;
+	  }
+	else
+	  {
+	    begin1 = next1;
+	  }
+
+	*target = element1;
+
+	target++;
+	max_length--;
+      }
+    if (begin1 != end1)
+      {
+	target = std::copy(begin1, begin1 + max_length, target);
+	begin1 += max_length;
+      }
+    else
+      {
+	target = std::copy(begin2, begin2 + max_length, target);
+	begin2 += max_length;
+      }
+    return target;
+  }
+
+  /** @brief Merge routine being able to merge only the @c max_length
+   * smallest elements.
+   *
+   *  The @c begin iterators are advanced accordingly, they might not
+   *  reach @c end, in contrast to the usual variant.
+   *  Static switch on whether to use the conditional-move variant.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence.
+   *  @param end2 End iterator of second sequence.
+   *  @param target Target begin iterator.
+   *  @param max_length Maximum number of elements to merge.
+   *  @param comp Comparator.
+   *  @return Output end iterator. */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename OutputIterator, typename _DifferenceTp, typename Comparator>
+  inline OutputIterator
+  merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp)
+  {
+    _GLIBCXX_CALL(max_length)
+
+      return merge_advance_movc(begin1, end1, begin2, end2, target, max_length, comp);
+  }
+
+  /** @brief Merge routine fallback to sequential in case the
+      iterators of the two input sequences are of different type.
+      *  @param begin1 Begin iterator of first sequence.
+      *  @param end1 End iterator of first sequence.
+      *  @param begin2 Begin iterator of second sequence.
+      *  @param end2 End iterator of second sequence.
+      *  @param target Target begin iterator.
+      *  @param max_length Maximum number of elements to merge.
+      *  @param comp Comparator.
+      *  @return Output end iterator. */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Comparator>
+  inline RandomAccessIterator3
+  parallel_merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1,
+			 RandomAccessIterator2& begin2, RandomAccessIterator2 end2,	//different iterators, parallel implementation not available
+			 RandomAccessIterator3 target,
+			 typename std::iterator_traits<RandomAccessIterator1>::difference_type max_length, Comparator comp)
+  {
+    return merge_advance(begin1, end1, begin2, end2, target, max_length, comp);
+  }
+
+  /** @brief Parallel merge routine being able to merge only the @c
+   * max_length smallest elements.
+   *
+   *  The @c begin iterators are advanced accordingly, they might not
+   *  reach @c end, in contrast to the usual variant.
+   *  The functionality is projected onto parallel_multiway_merge.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence.
+   *  @param end2 End iterator of second sequence.
+   *  @param target Target begin iterator.
+   *  @param max_length Maximum number of elements to merge.
+   *  @param comp Comparator.
+   *  @return Output end iterator.
+   */
+  template<typename RandomAccessIterator1, typename RandomAccessIterator3, typename Comparator>
+  inline RandomAccessIterator3
+  parallel_merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator1& begin2, RandomAccessIterator1 end2, RandomAccessIterator3 target, typename std::iterator_traits<RandomAccessIterator1>::difference_type max_length, Comparator comp)
+  {
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+      difference_type1 /* == difference_type2 */;
+    typedef typename std::iterator_traits<RandomAccessIterator3>::difference_type
+      difference_type3;
+
+    std::pair<RandomAccessIterator1, RandomAccessIterator1> seqs[2] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) };
+    RandomAccessIterator3 target_end = parallel_multiway_merge(seqs, seqs + 2, target, comp, max_length, true, false);
+
+    return target_end;
+  }
+}	//namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/multiseq_selection.h b/libstdc++-v3/include/parallel/multiseq_selection.h
new file mode 100644
index 00000000000..5b34173cff2
--- /dev/null
+++ b/libstdc++-v3/include/parallel/multiseq_selection.h
@@ -0,0 +1,608 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/multiseq_selection.h
+ *  @brief Functions to find elements of a certain global rank in
+ *  multiple sorted sequences.  Also serves for splitting such
+ *  sequence sets.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
+#define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
+
+#include <vector>
+#include <queue>
+
+#include <bits/stl_algo.h>
+
+#include <parallel/sort.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Compare a pair of types lexicographically, ascending. */
+  template<typename T1, typename T2, typename Comparator>
+  class lexicographic : public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
+  {
+  private:
+    Comparator& comp;
+
+  public:
+    lexicographic(Comparator& _comp) : comp(_comp) { }
+
+    // XXX const
+    inline bool
+    operator()(const std::pair<T1, T2>& p1, const std::pair<T1, T2>& p2) const
+    {
+      if (comp(p1.first, p2.first))
+	return true;
+
+      if (comp(p2.first, p1.first))
+	return false;
+
+      // Firsts are equal.
+      return p1.second < p2.second;
+    }
+  };
+
+  /** @brief Compare a pair of types lexicographically, descending. */
+  template<typename T1, typename T2, typename Comparator>
+  class lexicographic_reverse : public std::binary_function<T1, T2, bool>
+  {
+  private:
+    Comparator& comp;
+
+  public:
+    lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
+
+    inline bool
+    operator()(const std::pair<T1, T2>& p1, const std::pair<T1, T2>& p2) const
+    {
+      if (comp(p2.first, p1.first))
+	return true;
+
+      if (comp(p1.first, p2.first))
+	return false;
+
+      // Firsts are equal.
+      return p2.second < p1.second;
+    }
+  };
+
+  /** 
+   *  @brief Splits several sorted sequences at a certain global rank,
+   *  resulting in a splitting point for each sequence.
+   *  The sequences are passed via a sequence of random-access
+   *  iterator pairs, none of the sequences may be empty.  If there
+   *  are several equal elements across the split, the ones on the
+   *  left side will be chosen from sequences with smaller number.
+   *  @param begin_seqs Begin of the sequence of iterator pairs.
+   *  @param end_seqs End of the sequence of iterator pairs.
+   *  @param rank The global rank to partition at.
+   *  @param begin_offsets A random-access sequence begin where the
+   *  result will be stored in. Each element of the sequence is an
+   *  iterator that points to the first element on the greater part of
+   *  the respective sequence.
+   *  @param comp The ordering functor, defaults to std::less<T>. 
+   */
+  template<typename RanSeqs, typename RankType, typename RankIterator, typename Comparator>
+  void 
+  multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
+		     RankIterator begin_offsets,
+		     Comparator comp = std::less<
+		     typename std::iterator_traits<typename std::iterator_traits<RanSeqs>::value_type::first_type>::value_type>()) // std::less<T>
+  {
+    _GLIBCXX_CALL(end_seqs - begin_seqs)
+
+    typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It;
+    typedef typename std::iterator_traits<It>::difference_type difference_type;
+    typedef typename std::iterator_traits<It>::value_type T;
+
+    lexicographic<T, int, Comparator> lcomp(comp);
+    lexicographic_reverse<T, int, Comparator> lrcomp(comp);
+
+    // Number of sequences, number of elements in total (possibly
+    // including padding).
+    difference_type m = std::distance(begin_seqs, end_seqs), N = 0, nmax, n, r;
+
+    for (int i = 0; i < m; i++)
+      N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
+
+    if (rank == N)
+      {
+	for (int i = 0; i < m; i++)
+	  begin_offsets[i] = begin_seqs[i].second; // Very end.
+	// Return m - 1;
+      }
+
+    _GLIBCXX_PARALLEL_ASSERT(m != 0 && N != 0 && rank >= 0 && rank < N);
+
+    difference_type* ns = new difference_type[m];
+    difference_type* a = new difference_type[m];
+    difference_type* b = new difference_type[m];
+    difference_type l;
+
+    ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
+    nmax = ns[0];
+    for (int i = 0; i < m; i++)
+      {
+	ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
+	nmax = std::max(nmax, ns[i]);
+      }
+
+    r = log2(nmax) + 1;
+
+    // Pad all lists to this length, at least as long as any ns[i],
+    // equality iff nmax = 2^k - 1.
+    l = (1ULL << r) - 1;
+
+    // From now on, including padding.
+    N = l * m;
+
+    for (int i = 0; i < m; i++)
+      {
+	a[i] = 0;
+	b[i] = l;
+      }
+    n = l / 2;
+
+    // Invariants:
+    // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
+
+#define S(i) (begin_seqs[i].first)
+
+    // Initial partition.
+    std::vector<std::pair<T, int> > sample;
+
+    for (int i = 0; i < m; i++)
+      if (n < ns[i])	//sequence long enough
+	sample.push_back(std::make_pair(S(i)[n], i));
+    __gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
+
+    for (int i = 0; i < m; i++)	//conceptual infinity
+      if (n >= ns[i])	//sequence too short, conceptual infinity
+	sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
+
+    difference_type localrank = rank * m / N ;
+
+    int j;
+    for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++)
+      a[sample[j].second] += n + 1;
+    for (; j < m; j++)
+      b[sample[j].second] -= n + 1;
+
+    // Further refinement.
+    while (n > 0)
+      {
+	n /= 2;
+
+	int lmax_seq = -1;	// to avoid warning
+	const T* lmax = NULL;	// impossible to avoid the warning?
+	for (int i = 0; i < m; i++)
+	  {
+	    if (a[i] > 0)
+	      {
+		if (!lmax)
+		  {
+		    lmax = &(S(i)[a[i] - 1]);
+		    lmax_seq = i;
+		  }
+		else
+		  {
+		    // Max, favor rear sequences.
+		    if (!comp(S(i)[a[i] - 1], *lmax))
+		      {
+			lmax = &(S(i)[a[i] - 1]);
+			lmax_seq = i;
+		      }
+		  }
+	      }
+	  }
+
+	int i;
+	for (i = 0; i < m; i++)
+	  {
+	    difference_type middle = (b[i] + a[i]) / 2;
+	    if (lmax && middle < ns[i] &&
+		lcomp(std::make_pair(S(i)[middle], i), std::make_pair(*lmax, lmax_seq)))
+	      a[i] = std::min(a[i] + n + 1, ns[i]);
+	    else
+	      b[i] -= n + 1;
+	  }
+
+	difference_type leftsize = 0, total = 0;
+	for (int i = 0; i < m; i++)
+	  {
+	    leftsize += a[i] / (n + 1);
+	    total += l / (n + 1);
+	  }
+
+	difference_type skew = static_cast<difference_type>(static_cast<uint64>(total) * rank / N - leftsize);
+
+	if (skew > 0)
+	  {
+	    // Move to the left, find smallest.
+	    std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
+
+	    for (int i = 0; i < m; i++)
+	      if (b[i] < ns[i])
+		pq.push(std::make_pair(S(i)[b[i]], i));
+
+	    for (; skew != 0 && !pq.empty(); skew--)
+	      {
+		int source = pq.top().second;
+		pq.pop();
+
+		a[source] = std::min(a[source] + n + 1, ns[source]);
+		b[source] += n + 1;
+
+		if (b[source] < ns[source])
+		  pq.push(std::make_pair(S(source)[b[source]], source));
+	      }
+	  }
+	else if (skew < 0)
+	  {
+	    // Move to the right, find greatest.
+	    std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic<T, int, Comparator> > pq(lcomp);
+
+	    for (int i = 0; i < m; i++)
+	      if (a[i] > 0)
+		pq.push(std::make_pair(S(i)[a[i] - 1], i));
+
+	    for (; skew != 0; skew++)
+	      {
+		int source = pq.top().second;
+		pq.pop();
+
+		a[source] -= n + 1;
+		b[source] -= n + 1;
+
+		if (a[source] > 0)
+		  pq.push(std::make_pair(S(source)[a[source] - 1], source));
+	      }
+	  }
+      }
+
+    // Postconditions:
+    // a[i] == b[i] in most cases, except when a[i] has been clamped
+    // because of having reached the boundary
+
+    // Now return the result, calculate the offset.
+
+    // Compare the keys on both edges of the border.
+
+    // Maximum of left edge, minimum of right edge.
+    bool maxleftset = false, minrightset = false;
+    T maxleft, minright;	// Impossible to avoid the warning?
+    for (int i = 0; i < m; i++)
+      {
+	if (a[i] > 0)
+	  {
+	    if (!maxleftset)
+	      {
+		maxleft = S(i)[a[i] - 1];
+		maxleftset = true;
+	      }
+	    else
+	      {
+		// Max, favor rear sequences.
+		if (!comp(S(i)[a[i] - 1], maxleft))
+		  maxleft = S(i)[a[i] - 1];
+	      }
+	  }
+	if (b[i] < ns[i])
+	  {
+	    if (!minrightset)
+	      {
+		minright = S(i)[b[i]];
+		minrightset = true;
+	      }
+	    else
+	      {
+		// Min, favor fore sequences.
+		if (comp(S(i)[b[i]], minright))
+		  minright = S(i)[b[i]];
+	      }
+	  }
+      }
+
+    int seq = 0;
+    for (int i = 0; i < m; i++)
+      begin_offsets[i] = S(i) + a[i];
+
+    delete[] ns;
+    delete[] a;
+    delete[] b;
+  }
+
+
+
+  /** 
+   *  @brief Selects the element at a certain global rank from several
+   *  sorted sequences.
+   *
+   *  The sequences are passed via a sequence of random-access
+   *  iterator pairs, none of the sequences may be empty.
+   *  @param begin_seqs Begin of the sequence of iterator pairs.
+   *  @param end_seqs End of the sequence of iterator pairs.
+   *  @param rank The global rank to partition at.
+   *  @param offset The rank of the selected element in the global
+   *  subsequence of elements equal to the selected element. If the
+   *  selected element is unique, this number is 0.
+   *  @param comp The ordering functor, defaults to std::less. 
+   */
+  template<typename T, typename RanSeqs, typename RankType, typename Comparator>
+  T 
+  multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
+		     RankType& offset, Comparator comp = std::less<T>())
+  {
+    _GLIBCXX_CALL(end_seqs - begin_seqs)
+
+    typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It;
+    typedef typename std::iterator_traits<It>::difference_type difference_type;
+
+    lexicographic<T, int, Comparator> lcomp(comp);
+    lexicographic_reverse<T, int, Comparator> lrcomp(comp);
+
+    // Number of sequences, number of elements in total (possibly
+    // including padding).
+    difference_type m = std::distance(begin_seqs, end_seqs);
+    difference_type N = 0;
+    difference_type nmax, n, r;
+
+    for (int i = 0; i < m; i++)
+      N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
+
+    if (m == 0 || N == 0 || rank < 0 || rank >= N)
+      {
+	// Result undefined when there is no data or rank is outside bounds.
+	throw std::exception();
+      }
+
+
+    difference_type* ns = new difference_type[m];
+    difference_type* a = new difference_type[m];
+    difference_type* b = new difference_type[m];
+    difference_type l;
+
+    ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
+    nmax = ns[0];
+    for (int i = 0; i < m; i++)
+      {
+	ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
+	nmax = std::max(nmax, ns[i]);
+      }
+
+    r = log2(nmax) + 1;
+
+    // Pad all lists to this length, at least as long as any ns[i],
+    // equality iff nmax = 2^k - 1
+    l = pow2(r) - 1;
+
+    // From now on, including padding.
+    N = l * m;
+
+    for (int i = 0; i < m; i++)
+      {
+	a[i] = 0;
+	b[i] = l;
+      }
+    n = l / 2;
+
+    // Invariants:
+    // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
+
+#define S(i) (begin_seqs[i].first)
+
+    // Initial partition.
+    std::vector<std::pair<T, int> > sample;
+
+    for (int i = 0; i < m; i++)
+      if (n < ns[i])
+	sample.push_back(std::make_pair(S(i)[n], i));
+    __gnu_sequential::sort(sample.begin(), sample.end(), lcomp, sequential_tag());
+
+    // Conceptual infinity.
+    for (int i = 0; i < m; i++)
+      if (n >= ns[i])
+	sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
+
+    difference_type localrank = rank * m / N ;
+
+    int j;
+    for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++)
+      a[sample[j].second] += n + 1;
+    for (; j < m; j++)
+      b[sample[j].second] -= n + 1;
+
+    // Further refinement.
+    while (n > 0)
+      {
+	n /= 2;
+
+	const T* lmax = NULL;
+	for (int i = 0; i < m; i++)
+	  {
+	    if (a[i] > 0)
+	      {
+		if (!lmax)
+		  {
+		    lmax = &(S(i)[a[i] - 1]);
+		  }
+		else
+		  {
+		    if (comp(*lmax, S(i)[a[i] - 1]))	//max
+		      lmax = &(S(i)[a[i] - 1]);
+		  }
+	      }
+	  }
+
+	int i;
+	for (i = 0; i < m; i++)
+	  {
+	    difference_type middle = (b[i] + a[i]) / 2;
+	    if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
+	      a[i] = std::min(a[i] + n + 1, ns[i]);
+	    else
+	      b[i] -= n + 1;
+	  }
+
+	difference_type leftsize = 0, total = 0;
+	for (int i = 0; i < m; i++)
+	  {
+	    leftsize += a[i] / (n + 1);
+	    total += l / (n + 1);
+	  }
+
+	difference_type skew = (unsigned long long)total * rank / N - leftsize;
+
+	if (skew > 0)
+	  {
+	    // Move to the left, find smallest.
+	    std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
+
+	    for (int i = 0; i < m; i++)
+	      if (b[i] < ns[i])
+		pq.push(std::make_pair(S(i)[b[i]], i));
+
+	    for (; skew != 0 && !pq.empty(); skew--)
+	      {
+		int source = pq.top().second;
+		pq.pop();
+
+		a[source] = std::min(a[source] + n + 1, ns[source]);
+		b[source] += n + 1;
+
+		if (b[source] < ns[source])
+		  pq.push(std::make_pair(S(source)[b[source]], source));
+	      }
+	  }
+	else if (skew < 0)
+	  {
+	    // Move to the right, find greatest.
+	    std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic<T, int, Comparator> > pq(lcomp);
+
+	    for (int i = 0; i < m; i++)
+	      if (a[i] > 0)
+		pq.push(std::make_pair(S(i)[a[i] - 1], i));
+
+	    for (; skew != 0; skew++)
+	      {
+		int source = pq.top().second;
+		pq.pop();
+
+		a[source] -= n + 1;
+		b[source] -= n + 1;
+
+		if (a[source] > 0)
+		  pq.push(std::make_pair(S(source)[a[source] - 1], source));
+	      }
+	  }
+      }
+
+    // Postconditions:
+    // a[i] == b[i] in most cases, except when a[i] has been clamped
+    // because of having reached the boundary
+
+    // Now return the result, calculate the offset.
+
+    // Compare the keys on both edges of the border.
+
+    // Maximum of left edge, minimum of right edge.
+    bool maxleftset = false, minrightset = false;
+
+    // Impossible to avoid the warning?
+    T maxleft, minright;
+    for (int i = 0; i < m; i++)
+      {
+	if (a[i] > 0)
+	  {
+	    if (!maxleftset)
+	      {
+		maxleft = S(i)[a[i] - 1];
+		maxleftset = true;
+	      }
+	    else
+	      {
+		// Max.
+		if (comp(maxleft, S(i)[a[i] - 1]))
+		  maxleft = S(i)[a[i] - 1];
+	      }
+	  }
+	if (b[i] < ns[i])
+	  {
+	    if (!minrightset)
+	      {
+		minright = S(i)[b[i]];
+		minrightset = true;
+	      }
+	    else
+	      {
+		// Min.
+		if (comp(S(i)[b[i]], minright))
+		  minright = S(i)[b[i]];
+	      }
+	  }
+      }
+
+    // Minright is the splitter, in any case.
+
+    if (!maxleftset || comp(minright, maxleft))
+      {
+	// Good luck, everything is split unambigiously.
+	offset = 0;
+      }
+    else
+      {
+	// We have to calculate an offset.
+	offset = 0;
+
+	for (int i = 0; i < m; i++)
+	  {
+	    difference_type lb = std::lower_bound(S(i), S(i) + ns[i], minright,
+						  comp) - S(i);
+	    offset += a[i] - lb;
+	  }
+      }
+
+    delete[] ns;
+    delete[] a;
+    delete[] b;
+
+    return minright;
+  }
+}
+
+#undef S
+
+#endif
+
diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h
new file mode 100644
index 00000000000..cdafacbd7a8
--- /dev/null
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -0,0 +1,1639 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/multiway_merge.h
+ *  @brief Implementation of sequential and parallel multiway merge.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
+#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
+
+#include <vector>
+
+#include <bits/stl_algo.h>
+#include <parallel/features.h>
+#include <parallel/parallel.h>
+#include <parallel/merge.h>
+#include <parallel/losertree.h>
+#include <parallel/timing.h>
+#if _GLIBCXX_ASSERTIONS
+#include <parallel/checkers.h>
+#endif
+
+/** @brief Length of a sequence described by a pair of iterators. */
+#define LENGTH(s) ((s).second - (s).first)
+
+// XXX need iterator typedefs
+namespace __gnu_parallel
+{
+  template<typename RandomAccessIterator, typename Comparator>
+  class guarded_iterator;
+
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	    guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	     guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+  /** @brief Iterator wrapper supporting an implicit supremum at the end
+      of the sequence, dominating all comparisons.
+      *  Deriving from RandomAccessIterator is not possible since
+      *  RandomAccessIterator need not be a class.
+      */
+  template<typename RandomAccessIterator, typename Comparator>
+  class guarded_iterator
+  {
+  private:
+    /** @brief Current iterator position. */
+    RandomAccessIterator current;
+
+    /** @brief End iterator of the sequence. */
+    RandomAccessIterator end;
+
+    /** @brief Comparator. */
+    Comparator& comp;
+
+  public:
+    /** @brief Constructor. Sets iterator to beginning of sequence.
+     *  @param begin Begin iterator of sequence.
+     *  @param end End iterator of sequence.
+     *  @param comp Comparator provided for associated overloaded
+     *  compare operators. */
+    inline guarded_iterator(RandomAccessIterator begin, 
+			    RandomAccessIterator end, Comparator& comp) 
+    : current(begin), end(end), comp(comp)
+    { }
+
+    /** @brief Pre-increment operator.
+     *  @return This. */
+    inline guarded_iterator<RandomAccessIterator, Comparator>&
+    operator++()
+    {
+      ++current;
+      return *this;
+    }
+
+    /** @brief Dereference operator.
+     *  @return Referenced element. */
+    inline typename std::iterator_traits<RandomAccessIterator>::value_type
+    operator*()
+    { return *current; }
+
+    /** @brief Convert to wrapped iterator.
+     *  @return Wrapped iterator. */
+    inline operator RandomAccessIterator()
+    { return current; }
+
+    friend bool
+    operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+    friend bool
+    operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+  };
+
+  /** @brief Compare two elements referenced by guarded iterators.
+   *  @param bi1 First iterator.
+   *  @param bi2 Second iterator.
+   *  @return @c True if less. */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	    guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+  {
+    if (bi1.current == bi1.end)	//bi1 is sup
+      return bi2.current == bi2.end;	//bi2 is not sup
+    if (bi2.current == bi2.end)	//bi2 is sup
+      return true;
+    return (bi1.comp)(*bi1, *bi2);	//normal compare
+  }
+
+  /** @brief Compare two elements referenced by guarded iterators.
+   *  @param bi1 First iterator.
+   *  @param bi2 Second iterator.
+   *  @return @c True if less equal. */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	     guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+  {
+    if (bi2.current == bi2.end)	//bi1 is sup
+      return bi1.current != bi1.end;	//bi2 is not sup
+    if (bi1.current == bi1.end)	//bi2 is sup
+      return false;
+    return !(bi1.comp)(*bi2, *bi1);	//normal compare
+  }
+
+  template<typename RandomAccessIterator, typename Comparator>
+  class unguarded_iterator;
+
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	    unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	     unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+  template<typename RandomAccessIterator, typename Comparator>
+  class unguarded_iterator
+  {
+  private:
+    /** @brief Current iterator position. */
+    RandomAccessIterator& current;
+    /** @brief Comparator. */
+    mutable Comparator& comp;
+
+  public:
+    /** @brief Constructor. Sets iterator to beginning of sequence.
+     *  @param begin Begin iterator of sequence.
+     *  @param end Unused, only for compatibility.
+     *  @param comp Unused, only for compatibility. */
+    inline unguarded_iterator(RandomAccessIterator begin, 
+			      RandomAccessIterator end, Comparator& comp) 
+    : current(begin), comp(comp)
+    { }
+
+    /** @brief Pre-increment operator.
+     *  @return This. */
+    inline  unguarded_iterator<RandomAccessIterator, Comparator>&
+    operator++()
+    {
+      current++;
+      return *this;
+    }
+
+    /** @brief Dereference operator.
+     *  @return Referenced element. */
+    inline typename std::iterator_traits<RandomAccessIterator>::value_type 
+    operator*()
+    { return *current; }
+
+    /** @brief Convert to wrapped iterator.
+     *  @return Wrapped iterator. */
+    inline
+    operator RandomAccessIterator()
+    { return current; }
+
+    friend bool
+    operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+
+    friend bool
+    operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+  };
+
+  /** @brief Compare two elements referenced by unguarded iterators.
+   *  @param bi1 First iterator.
+   *  @param bi2 Second iterator.
+   *  @return @c True if less. */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	    unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+  {
+    // Normal compare.
+    return (bi1.comp)(*bi1, *bi2);
+  }
+
+  /** @brief Compare two elements referenced by unguarded iterators.
+   *  @param bi1 First iterator.
+   *  @param bi2 Second iterator.
+   *  @return @c True if less equal. */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline bool
+  operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+	     unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+  {
+    // Normal compare.
+    return !(bi1.comp)(*bi2, *bi1);
+  }
+
+  /** Prepare a set of sequences to be merged without a (end) guard
+   *  @param seqs_begin
+   *  @param seqs_end
+   *  @param comp
+   *  @param min_sequence
+   *  @param stable
+   *  @pre (seqs_end - seqs_begin > 0) */
+  template<typename RandomAccessIteratorIterator, typename Comparator>
+  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+  prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
+		    RandomAccessIteratorIterator seqs_end, Comparator comp,
+		    int& min_sequence, bool stable)
+  {
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+      typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+      difference_type;
+
+    if ((*seqs_begin).first == (*seqs_begin).second)
+      {
+	// Empty sequence found, it's the first one.
+	min_sequence = 0;
+	return -1;
+      }
+
+    // Last element in sequence.
+    value_type min = *((*seqs_begin).second - 1);
+    min_sequence = 0;
+    for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
+      {
+	if ((*s).first == (*s).second)
+	  {
+	    // Empty sequence found.
+	    min_sequence = static_cast<int>(s - seqs_begin);
+	    return -1;
+	  }
+
+	// Last element in sequence.
+	const value_type& v = *((*s).second - 1);
+	if (comp(v, min))	//strictly smaller
+	  {
+	    min = v;
+	    min_sequence = static_cast<int>(s - seqs_begin);
+	  }
+      }
+
+    difference_type overhang_size = 0;
+
+    int s = 0;
+    for (s = 0; s <= min_sequence; s++)
+      {
+	RandomAccessIterator1 split;
+	if (stable)
+	  split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
+				   min, comp);
+	else
+	  split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
+				   min, comp);
+
+	overhang_size += seqs_begin[s].second - split;
+      }
+
+    for (; s < (seqs_end - seqs_begin); s++)
+      {
+	RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
+	overhang_size += seqs_begin[s].second - split;
+      }
+
+    // So many elements will be left over afterwards.
+    return overhang_size;
+  }
+
+  /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
+   *  @param seqs_begin
+   *  @param seqs_end
+   *  @param comp */
+  template<typename RandomAccessIteratorIterator, typename Comparator>
+  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+  prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
+			     RandomAccessIteratorIterator seqs_end,
+			     Comparator comp)
+  {
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+      difference_type;
+
+    // Last element in sequence.
+    value_type max;
+    bool max_found = false;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      {
+	if ((*s).first == (*s).second)
+	  continue;
+
+	// Last element in sequence.
+	value_type& v = *((*s).second - 1);
+
+	// Strictly greater.
+	if (!max_found || comp(max, v))
+	  max = v;
+	max_found = true;
+      }
+
+    difference_type overhang_size = 0;
+
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      {
+	RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
+	overhang_size += (*s).second - split;
+
+	// Set sentinel.
+	*((*s).second) = max;
+      }
+
+    // So many elements will be left over afterwards.
+    return overhang_size;
+  }
+
+  /** @brief Highly efficient 3-way merging procedure.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Unused, stable anyway.
+   *  @return End iterator of output sequence. */
+  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length);
+    
+    typedef _DifferenceTp difference_type;
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    if (length == 0)
+      return target;
+
+    iterator<RandomAccessIterator1, Comparator>
+      seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
+      seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
+      seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
+
+    if (seq0 <= seq1)
+      {
+	if (seq1 <= seq2)
+	  goto s012;
+	else
+	  if (seq2 <  seq0)
+	    goto s201;
+	  else
+	    goto s021;
+      }
+    else
+      {
+	if (seq1 <= seq2)
+	  {
+	    if (seq0 <= seq2)
+	      goto s102;
+	    else
+	      goto s120;
+	  }
+	else
+	  goto s210;
+      }
+
+#define Merge3Case(a,b,c,c0,c1)				\
+    s ## a ## b ## c :					\
+      *target = *seq ## a;				\
+    ++target;						\
+    length--;						\
+    ++seq ## a;						\
+    if (length == 0) goto finish;			\
+    if (seq ## a c0 seq ## b) goto s ## a ## b ## c;	\
+    if (seq ## a c1 seq ## c) goto s ## b ## a ## c;	\
+    goto s ## b ## c ## a;
+
+    Merge3Case(0, 1, 2, <=, <=);
+    Merge3Case(1, 2, 0, <=, < );
+    Merge3Case(2, 0, 1, < , < );
+    Merge3Case(1, 0, 2, < , <=);
+    Merge3Case(0, 2, 1, <=, <=);
+    Merge3Case(2, 1, 0, < , < );
+
+#undef Merge3Case
+
+  finish:
+    ;
+
+    seqs_begin[0].first = seq0;
+    seqs_begin[1].first = seq1;
+    seqs_begin[2].first = seq2;
+
+    return target;
+  }
+
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length);
+    
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    int min_seq;
+    RandomAccessIterator3 target_end;
+
+    // Stable anyway.
+    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+
+    difference_type total_length = 0;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
+      total_length += LENGTH(*s);
+
+    if (overhang != -1)
+      {
+	difference_type unguarded_length = std::min(length, total_length - overhang);
+	target_end = multiway_merge_3_variant<unguarded_iterator>
+	  (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+	overhang = length - unguarded_length;
+      }
+    else
+      {
+	// Empty sequence found.
+	overhang = length;
+	target_end = target;
+      }
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    switch (min_seq)
+      {
+      case 0:
+	// Iterators will be advanced accordingly.
+	target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
+				   seqs_begin[2].first, seqs_begin[2].second,
+				   target_end, overhang, comp);
+	break;
+      case 1:
+	target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+				   seqs_begin[2].first, seqs_begin[2].second,
+				   target_end, overhang, comp);
+	break;
+      case 2:
+	target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+				   seqs_begin[1].first, seqs_begin[1].second,
+				   target_end, overhang, comp);
+	break;
+      default:
+	_GLIBCXX_PARALLEL_ASSERT(false);
+      }
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    return target_end;
+  }
+
+  /** @brief Highly efficient 4-way merging procedure.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Unused, stable anyway.
+   *  @return End iterator of output sequence. */
+  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length);
+    typedef _DifferenceTp difference_type;
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    iterator<RandomAccessIterator1, Comparator>
+      seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
+      seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
+      seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
+      seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
+
+#define Decision(a,b,c,d) {					\
+      if (seq ## d < seq ## a) goto s ## d ## a ## b ## c;	\
+      if (seq ## d < seq ## b) goto s ## a ## d ## b ## c;	\
+      if (seq ## d < seq ## c) goto s ## a ## b ## d ## c;	\
+      goto s ## a ## b ## c ## d;  }
+
+    if (seq0 <= seq1)
+      {
+	if (seq1 <= seq2)
+	  Decision(0,1,2,3)
+	  else
+	    if (seq2 < seq0)
+	      Decision(2,0,1,3)
+	      else
+		Decision(0,2,1,3)
+		  }
+    else
+      {
+	if (seq1 <= seq2)
+	  {
+	    if (seq0 <= seq2)
+	      Decision(1,0,2,3)
+	      else
+		Decision(1,2,0,3)
+		  }
+	else
+	  Decision(2,1,0,3)
+	    }
+
+#define Merge4Case(a,b,c,d,c0,c1,c2)				\
+    s ## a ## b ## c ## d:					\
+      if (length == 0) goto finish;				\
+    *target = *seq ## a;					\
+    ++target;							\
+    length--;							\
+    ++seq ## a;							\
+    if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;	\
+    if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;	\
+    if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;	\
+    goto s ## b ## c ## d ## a;
+
+    Merge4Case(0, 1, 2, 3, <=, <=, <=);
+    Merge4Case(0, 1, 3, 2, <=, <=, <=);
+    Merge4Case(0, 2, 1, 3, <=, <=, <=);
+    Merge4Case(0, 2, 3, 1, <=, <=, <=);
+    Merge4Case(0, 3, 1, 2, <=, <=, <=);
+    Merge4Case(0, 3, 2, 1, <=, <=, <=);
+    Merge4Case(1, 0, 2, 3, < , <=, <=);
+    Merge4Case(1, 0, 3, 2, < , <=, <=);
+    Merge4Case(1, 2, 0, 3, <=, < , <=);
+    Merge4Case(1, 2, 3, 0, <=, <=, < );
+    Merge4Case(1, 3, 0, 2, <=, < , <=);
+    Merge4Case(1, 3, 2, 0, <=, <=, < );
+    Merge4Case(2, 0, 1, 3, < , < , <=);
+    Merge4Case(2, 0, 3, 1, < , <=, < );
+    Merge4Case(2, 1, 0, 3, < , < , <=);
+    Merge4Case(2, 1, 3, 0, < , <=, < );
+    Merge4Case(2, 3, 0, 1, <=, < , < );
+    Merge4Case(2, 3, 1, 0, <=, < , < );
+    Merge4Case(3, 0, 1, 2, < , < , < );
+    Merge4Case(3, 0, 2, 1, < , < , < );
+    Merge4Case(3, 1, 0, 2, < , < , < );
+    Merge4Case(3, 1, 2, 0, < , < , < );
+    Merge4Case(3, 2, 0, 1, < , < , < );
+    Merge4Case(3, 2, 1, 0, < , < , < );
+
+#undef Merge4Case
+#undef Decision
+
+  finish:
+    ;
+
+    seqs_begin[0].first = seq0;
+    seqs_begin[1].first = seq1;
+    seqs_begin[2].first = seq2;
+    seqs_begin[3].first = seq3;
+
+    return target;
+  }
+
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length);
+    typedef _DifferenceTp difference_type;
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    int min_seq;
+    RandomAccessIterator3 target_end;
+
+    // Stable anyway.
+    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+
+    difference_type total_length = 0;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
+      total_length += LENGTH(*s);
+
+    if (overhang != -1)
+      {
+	difference_type unguarded_length = std::min(length, total_length - overhang);
+	target_end = multiway_merge_4_variant<unguarded_iterator>
+	  (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+	overhang = length - unguarded_length;
+      }
+    else
+      {
+	// Empty sequence found.
+	overhang = length;
+	target_end = target;
+      }
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
+    one_missing.erase(one_missing.begin() + min_seq);	//remove
+
+    target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
+
+    // Insert back again.
+    one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
+    // Write back modified iterators.
+    copy(one_missing.begin(), one_missing.end(), seqs_begin);
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    return target_end;
+  }
+
+  /** @brief Basic multi-way merging procedure.
+   *
+   *  The head elements are kept in a sorted array, new heads are
+   *  inserted linearly.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @return End iterator of output sequence. 
+   */
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length)
+
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    // Num remaining pieces.
+    int k = static_cast<int>(seqs_end - seqs_begin), nrp;
+
+    value_type* pl = new value_type[k];
+    int* source = new int[k];
+    difference_type total_length = 0;
+
+#define POS(i) seqs_begin[(i)].first
+#define STOPS(i) seqs_begin[(i)].second
+
+    // Write entries into queue.
+    nrp = 0;
+    for (int pi = 0; pi < k; pi++)
+      {
+	if (STOPS(pi) != POS(pi))
+	  {
+	    pl[nrp] = *(POS(pi));
+	    source[nrp] = pi;
+	    nrp++;
+	    total_length += LENGTH(seqs_begin[pi]);
+	  }
+      }
+
+    if (stable)
+      {
+	for (int k = 0; k < nrp - 1; k++)
+	  for (int pi = nrp - 1; pi > k; pi--)
+	    if (comp(pl[pi], pl[pi - 1]) ||
+		(!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
+	      {
+		std::swap(pl[pi - 1], pl[pi]);
+		std::swap(source[pi - 1], source[pi]);
+	      }
+      }
+    else
+      {
+	for (int k = 0; k < nrp - 1; k++)
+	  for (int pi = nrp - 1; pi > k; pi--)
+	    if (comp(pl[pi], pl[pi-1]))
+	      {
+		std::swap(pl[pi-1], pl[pi]);
+		std::swap(source[pi-1], source[pi]);
+	      }
+      }
+
+    // Iterate.
+    if (stable)
+      {
+	int j;
+	while (nrp > 0 && length > 0)
+	  {
+	    if (source[0] < source[1])
+	      {
+		// pl[0] <= pl[1]
+		while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
+		  {
+		    *target = pl[0];
+		    ++target;
+		    ++POS(source[0]);
+		    length--;
+		    if (POS(source[0]) == STOPS(source[0]))
+		      {
+			// Move everything to the left.
+			for (int s = 0; s < nrp - 1; s++)
+			  {
+			    pl[s] = pl[s + 1];
+			    source[s] = source[s + 1];
+			  }
+			nrp--;
+			break;
+		      }
+		    else
+		      pl[0] = *(POS(source[0]));
+		  }
+	      }
+	    else
+	      {
+		// pl[0] < pl[1]
+		while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
+		  {
+		    *target = pl[0];
+		    ++target;
+		    ++POS(source[0]);
+		    length--;
+		    if (POS(source[0]) == STOPS(source[0]))
+		      {
+			for (int s = 0; s < nrp - 1; s++)
+			  {
+			    pl[s] = pl[s + 1];
+			    source[s] = source[s + 1];
+			  }
+			nrp--;
+			break;
+		      }
+		    else
+		      pl[0] = *(POS(source[0]));
+		  }
+	      }
+
+	    // Sink down.
+	    j = 1;
+	    while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
+				 (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
+	      {
+		std::swap(pl[j - 1], pl[j]);
+		std::swap(source[j - 1], source[j]);
+		j++;
+	      }
+	  }
+      }
+    else
+      {
+	int j;
+	while (nrp > 0 && length > 0)
+	  {
+	    // pl[0] <= pl[1]
+	    while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
+	      {
+		*target = pl[0];
+		++target;
+		++POS(source[0]);
+		length--;
+		if (POS(source[0]) == STOPS(source[0]))
+		  {
+		    for (int s = 0; s < (nrp - 1); s++)
+		      {
+			pl[s] = pl[s + 1];
+			source[s] = source[s + 1];
+		      }
+		    nrp--;
+		    break;
+		  }
+		else
+		  pl[0] = *(POS(source[0]));
+	      }
+
+	    // Sink down.
+	    j = 1;
+	    while ((j < nrp) && comp(pl[j], pl[j - 1]))
+	      {
+		std::swap(pl[j - 1], pl[j]);
+		std::swap(source[j - 1], source[j]);
+		j++;
+	      }
+	  }
+      }
+
+    delete[] pl;
+    delete[] source;
+
+    return target;
+  }
+
+  /** @brief Multi-way merging procedure for a high branching factor,
+   * guarded case.
+   *
+   *  The head elements are kept in a loser tree.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @return End iterator of output sequence. 
+   */
+  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length)
+
+      typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    int k = static_cast<int>(seqs_end - seqs_begin);
+
+    LT lt(k, comp);
+
+    difference_type total_length = 0;
+
+    for (int t = 0; t < k; t++)
+      {
+	if (stable)
+	  {
+	    if (seqs_begin[t].first == seqs_begin[t].second)
+	      lt.insert_start_stable(value_type(), t, true);
+	    else
+	      lt.insert_start_stable(*seqs_begin[t].first, t, false);
+	  }
+	else
+	  {
+	    if (seqs_begin[t].first == seqs_begin[t].second)
+	      lt.insert_start(value_type(), t, true);
+	    else
+	      lt.insert_start(*seqs_begin[t].first, t, false);
+	  }
+
+	total_length += LENGTH(seqs_begin[t]);
+      }
+
+    if (stable)
+      lt.init_stable();
+    else
+      lt.init();
+
+    total_length = std::min(total_length, length);
+
+    int source;
+
+    if (stable)
+      {
+	for (difference_type i = 0; i < total_length; i++)
+	  {
+	    // Take out.
+	    source = lt.get_min_source();
+
+	    *(target++) = *(seqs_begin[source].first++);
+
+	    // Feed.
+	    if (seqs_begin[source].first == seqs_begin[source].second)
+	      lt.delete_min_insert_stable(value_type(), true);
+	    else
+	      // Replace from same source.
+	      lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+
+	  }
+      }
+    else
+      {
+	for (difference_type i = 0; i < total_length; i++)
+	  {
+	    //take out
+	    source = lt.get_min_source();
+
+	    *(target++) = *(seqs_begin[source].first++);
+
+	    // Feed.
+	    if (seqs_begin[source].first == seqs_begin[source].second)
+	      lt.delete_min_insert(value_type(), true);
+	    else
+	      // Replace from same source.
+	      lt.delete_min_insert(*seqs_begin[source].first, false);
+	  }
+      }
+
+    return target;
+  }
+
+  /** @brief Multi-way merging procedure for a high branching factor,
+   * unguarded case.
+   *
+   *  The head elements are kept in a loser tree.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @return End iterator of output sequence.
+   *  @pre No input will run out of elements during the merge. 
+   */
+  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length)
+    typedef _DifferenceTp difference_type;
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    int k = seqs_end - seqs_begin;
+
+    LT lt(k, comp);
+
+    difference_type total_length = 0;
+
+    for (int t = 0; t < k; t++)
+      {
+#if _GLIBCXX_ASSERTIONS
+	_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
+#endif
+	if (stable)
+	  lt.insert_start_stable(*seqs_begin[t].first, t, false);
+	else
+	  lt.insert_start(*seqs_begin[t].first, t, false);
+
+	total_length += LENGTH(seqs_begin[t]);
+      }
+
+    if (stable)
+      lt.init_stable();
+    else
+      lt.init();
+
+    // Do not go past end.
+    length = std::min(total_length, length);
+
+    int source;
+
+#if _GLIBCXX_ASSERTIONS
+    difference_type i = 0;
+#endif
+
+    if (stable)
+      {
+	RandomAccessIterator3 target_end = target + length;
+	while (target < target_end)
+	  {
+	    // Take out.
+	    source = lt.get_min_source();
+
+#if _GLIBCXX_ASSERTIONS
+	    _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+#endif
+
+	    *(target++) = *(seqs_begin[source].first++);
+
+#if _GLIBCXX_ASSERTIONS
+	    _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
+	    i++;
+#endif
+	    // Feed.
+	    // Replace from same source.
+	    lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+
+	  }
+      }
+    else
+      {
+	RandomAccessIterator3 target_end = target + length;
+	while (target < target_end)
+	  {
+	    // Take out.
+	    source = lt.get_min_source();
+
+#if _GLIBCXX_ASSERTIONS
+	    if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
+	      printf("         %i %i %i\n", length, i, source);
+	    _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+#endif
+
+	    *(target++) = *(seqs_begin[source].first++);
+
+#if _GLIBCXX_ASSERTIONS
+	    if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
+	      printf("         %i %i %i\n", length, i, source);
+	    _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
+	    i++;
+#endif
+	    // Feed.
+	    // Replace from same source.
+	    lt.delete_min_insert(*seqs_begin[source].first, false);
+	  }
+      }
+
+    return target;
+  }
+
+  template<typename _ValueTp, class Comparator>
+  struct loser_tree_traits
+  {
+    typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
+  };
+
+
+  /*#define NO_POINTER(T) \
+    template<typename Comparator> \
+    struct loser_tree_traits<T, Comparator> \
+    { \
+    typedef LoserTreePointer<T, Comparator> LT; \
+    };*/
+  //
+  // NO_POINTER(unsigned char)
+  // NO_POINTER(char)
+  // NO_POINTER(unsigned short)
+  // NO_POINTER(short)
+  // NO_POINTER(unsigned int)
+  // NO_POINTER(int)
+  // NO_POINTER(unsigned long)
+  // NO_POINTER(long)
+  // NO_POINTER(unsigned long long)
+  // NO_POINTER(long long)
+  //
+  // #undef NO_POINTER
+
+  template<typename _ValueTp, class Comparator>
+  struct loser_tree_traits_unguarded
+  {
+    typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
+  };
+
+  /*#define NO_POINTER_UNGUARDED(T) \
+    template<typename Comparator> \
+    struct loser_tree_traits_unguarded<T, Comparator> \
+    { \
+    typedef LoserTreePointerUnguarded<T, Comparator> LT; \
+    };*/
+  //
+  // NO_POINTER_UNGUARDED(unsigned char)
+  // NO_POINTER_UNGUARDED(char)
+  // NO_POINTER_UNGUARDED(unsigned short)
+  // NO_POINTER_UNGUARDED(short)
+  // NO_POINTER_UNGUARDED(unsigned int)
+  // NO_POINTER_UNGUARDED(int)
+  // NO_POINTER_UNGUARDED(unsigned long)
+  // NO_POINTER_UNGUARDED(long)
+  // NO_POINTER_UNGUARDED(unsigned long long)
+  // NO_POINTER_UNGUARDED(long long)
+  //
+  // #undef NO_POINTER_UNGUARDED
+
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length)
+
+    typedef _DifferenceTp difference_type;
+
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+    int min_seq;
+    RandomAccessIterator3 target_end;
+    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
+					  comp, min_seq, stable);
+
+    difference_type total_length = 0;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      total_length += LENGTH(*s);
+
+    if (overhang != -1)
+      {
+	difference_type unguarded_length = std::min(length, total_length - overhang);
+	target_end = multiway_merge_loser_tree_unguarded
+	  <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
+	  (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+	overhang = length - unguarded_length;
+      }
+    else
+      {
+	// Empty sequence found.
+	overhang = length;
+	target_end = target;
+      }
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    target_end = multiway_merge_loser_tree
+      <typename loser_tree_traits<value_type, Comparator>::LT>
+      (seqs_begin, seqs_end, target_end, comp, overhang, stable);
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    return target_end;
+  }
+
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  {
+    _GLIBCXX_CALL(length)
+
+    typedef _DifferenceTp difference_type;
+    typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+
+    RandomAccessIterator3 target_end;
+    difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
+
+    difference_type total_length = 0;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      {
+	total_length += LENGTH(*s);
+
+	// Sentinel spot.
+	(*s).second++;
+      }
+
+    difference_type unguarded_length = std::min(length, total_length - overhang);
+    target_end = multiway_merge_loser_tree_unguarded
+      <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
+      (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+    overhang = length - unguarded_length;
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    // Copy rest stable.
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
+      {
+	// Restore.
+	(*s).second--;
+	difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
+	target_end = std::copy((*s).first, (*s).first + local_length, target_end);
+	(*s).first += local_length;
+	overhang -= local_length;
+      }
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
+    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+#endif
+
+    return target_end;
+  }
+
+  /** @brief Sequential multi-way merging switch.
+   *
+   *  The decision if based on the branching factor and runtime settings.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @param sentinel The sequences have a sentinel element.
+   *  @return End iterator of output sequence. */
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
+  {
+    _GLIBCXX_CALL(length)
+
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+#if _GLIBCXX_ASSERTIONS
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+#endif
+
+    RandomAccessIterator3 return_target = target;
+    int k = static_cast<int>(seqs_end - seqs_begin);
+
+    Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
+
+    if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
+      mwma = Settings::LOSER_TREE_COMBINED;
+
+    switch (k)
+      {
+      case 0:
+	break;
+      case 1:
+	return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
+	seqs_begin[0].first += length;
+	break;
+      case 2:
+	return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
+	break;
+      case 3:
+	switch (mwma)
+	  {
+	  case Settings::LOSER_TREE_COMBINED:
+	    return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  case Settings::LOSER_TREE_SENTINEL:
+	    return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  default:
+	    return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  }
+	break;
+      case 4:
+	switch (mwma)
+	  {
+	  case Settings::LOSER_TREE_COMBINED:
+	    return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  case Settings::LOSER_TREE_SENTINEL:
+	    return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  default:
+	    return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
+	    break;
+	  }
+	break;
+      default:
+	{
+	  switch (mwma)
+	    {
+	    case Settings::BUBBLE:
+	      return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
+	      break;
+#if _GLIBCXX_LOSER_TREE_EXPLICIT
+	    case Settings::LOSER_TREE_EXPLICIT:
+	      return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
+	      break;
+#endif
+#if _GLIBCXX_LOSER_TREE
+	    case Settings::LOSER_TREE:
+	      return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
+	      break;
+#endif
+#if _GLIBCXX_LOSER_TREE_COMBINED
+	    case Settings::LOSER_TREE_COMBINED:
+	      return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
+	      break;
+#endif
+#if _GLIBCXX_LOSER_TREE_SENTINEL
+	    case Settings::LOSER_TREE_SENTINEL:
+	      return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
+	      break;
+#endif
+	    default:
+	      // multiway_merge algorithm not implemented.
+	      _GLIBCXX_PARALLEL_ASSERT(0);
+	      break;
+	    }
+	}
+      }
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
+#endif
+
+    return return_target;
+  }
+
+  /** @brief Parallel multi-way merge routine.
+   *
+   *  The decision if based on the branching factor and runtime settings.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @param sentinel Ignored.
+   *  @return End iterator of output sequence. 
+   */
+  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
+  {
+    _GLIBCXX_CALL(length)
+
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+      RandomAccessIterator1;
+    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+      value_type;
+
+#if _GLIBCXX_ASSERTIONS
+    for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
+      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
+#endif
+
+    // k sequences.
+    int k = static_cast<int>(seqs_end - seqs_begin);
+
+    difference_type total_length = 0;
+    for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
+      total_length += LENGTH(*raii);
+
+    _GLIBCXX_CALL(total_length)
+
+    if (total_length == 0 || k == 0)
+      return target;
+
+    thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
+
+    Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
+
+    for (int pr = 0; pr < num_threads; pr++)
+      t[pr].tic();
+
+    bool tight = (total_length == length);
+
+    // Thread t will have to merge pieces[iam][0..k - 1]
+    std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
+    for (int s = 0; s < num_threads; s++)
+      pieces[s].resize(k);
+
+    difference_type num_samples = Settings::merge_oversampling * num_threads;
+
+    if (Settings::multiway_merge_splitting == Settings::SAMPLING)
+      {
+	value_type* samples = new value_type[k * num_samples];
+	// Sample.
+	for (int s = 0; s < k; s++)
+	  for (int i = 0; (difference_type)i < num_samples; i++)
+	    {
+	      difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
+	      samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
+	    }
+
+	if (stable)
+	  __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
+	else
+	  __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
+
+	for (int slab = 0; slab < num_threads; slab++)
+	  // For each slab / processor.
+	  for (int seq = 0; seq < k; seq++)
+	    {
+	      // For each sequence.
+	      if (slab > 0)
+		pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
+	      else
+		{
+		  // Absolute beginning.
+		  pieces[slab][seq].first = 0;
+		}
+	      if ((slab + 1) < num_threads)
+		pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
+	      else
+		pieces[slab][seq].second = LENGTH(seqs_begin[seq]);	//absolute ending
+	    }
+	delete[] samples;
+      }
+    else
+      {
+	// (Settings::multiway_merge_splitting == Settings::EXACT).
+	std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
+	std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
+
+	copy(seqs_begin, seqs_end, se.begin());
+
+	difference_type borders[num_threads + 1];
+	equally_split(length, num_threads, borders);
+
+	for (int s = 0; s < (num_threads - 1); s++)
+	  {
+	    offsets[s].resize(k);
+	    multiseq_partition(se.begin(), se.end(), borders[s + 1],
+			       offsets[s].begin(), comp);
+
+	    // Last one also needed and available.
+	    if (!tight)
+	      {
+		offsets[num_threads - 1].resize(k);
+		multiseq_partition(se.begin(), se.end(), (difference_type)length,
+				   offsets[num_threads - 1].begin(),  comp);
+	      }
+	  }
+
+
+	for (int slab = 0; slab < num_threads; slab++)
+	  {
+	    // For each slab / processor.
+	    for (int seq = 0; seq < k; seq++)
+	      {
+		// For each sequence.
+		if (slab == 0)
+		  {
+		    // Absolute beginning.
+		    pieces[slab][seq].first = 0;
+		  }
+		else
+		  pieces[slab][seq].first = pieces[slab - 1][seq].second;
+		if (!tight || slab < (num_threads - 1))
+		  pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
+		else
+		  {
+		    // slab == num_threads - 1
+		    pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
+		  }
+	      }
+	  }
+	delete[] offsets;
+      }
+
+    for (int pr = 0; pr < num_threads; pr++)
+      t[pr].tic();
+
+#	pragma omp parallel num_threads(num_threads)
+    {
+      thread_index_t iam = omp_get_thread_num();
+
+      t[iam].tic();
+
+      difference_type target_position = 0;
+
+      for (int c = 0; c < k; c++)
+	target_position += pieces[iam][c].first;
+
+      if (k > 2)
+	{
+	  std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
+
+	  difference_type local_length = 0;
+	  for (int s = 0; s < k; s++)
+	    {
+	      chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
+	      local_length += LENGTH(chunks[s]);
+	    }
+
+	  multiway_merge(chunks, chunks + k, target + target_position, comp,
+			 std::min(local_length, length - target_position),
+			 stable, false, sequential_tag());
+
+	  delete[] chunks;
+	}
+      else if (k == 2)
+	{
+	  RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
+	  merge_advance(begin0,
+			seqs_begin[0].first + pieces[iam][0].second,
+			begin1,
+			seqs_begin[1].first + pieces[iam][1].second,
+			target + target_position,
+			(pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
+			comp);
+	}
+
+      t[iam].tic();
+
+    }
+
+    for (int pr = 0; pr < num_threads; pr++)
+      t[pr].tic();
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
+#endif
+
+    // Update ends of sequences.
+    for (int s = 0; s < k; s++)
+      seqs_begin[s].first += pieces[num_threads - 1][s].second;
+
+    delete[] pieces;
+
+    for (int pr = 0; pr < num_threads; pr++)
+      t[pr].tic();
+    for (int pr = 0; pr < num_threads; pr++)
+      t[pr].print();
+    delete[] t;
+
+    return target + length;
+  }
+
+  /** 
+   *  @brief Multi-way merging front-end.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @return End iterator of output sequence. 
+   */
+  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
+		 RandomAccessIteratorPairIterator seqs_end,
+		 RandomAccessIterator3 target, Comparator comp,
+		 _DifferenceTp length, bool stable)
+  {
+    typedef _DifferenceTp difference_type;
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    if (seqs_begin == seqs_end)
+      return target;
+
+    RandomAccessIterator3 target_end;
+    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
+    else
+      target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
+
+    return target_end;
+  }
+
+  /** @brief Multi-way merging front-end.
+   *  @param seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param seqs_end End iterator of iterator pair input sequence.
+   *  @param target Begin iterator out output sequence.
+   *  @param comp Comparator.
+   *  @param length Maximum length to merge.
+   *  @param stable Stable merging incurs a performance penalty.
+   *  @return End iterator of output sequence.
+   *  @pre For each @c i, @c seqs_begin[i].second must be the end
+   *  marker of the sequence, but also reference the one more sentinel
+   *  element. */
+  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+  RandomAccessIterator3
+  multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
+			  RandomAccessIteratorPairIterator seqs_end,
+			  RandomAccessIterator3 target,
+			  Comparator comp,
+			  _DifferenceTp length,
+			  bool stable)
+  {
+    typedef _DifferenceTp difference_type;
+
+    if (seqs_begin == seqs_end)
+      return target;
+
+    _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+      return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
+    else
+      return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h
new file mode 100644
index 00000000000..7f0f3c06922
--- /dev/null
+++ b/libstdc++-v3/include/parallel/multiway_mergesort.h
@@ -0,0 +1,413 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/multiway_mergesort.h
+ *  @brief Parallel multiway merge sort.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_MERGESORT_H
+#define _GLIBCXX_PARALLEL_MERGESORT_H 1
+
+#include <vector>
+
+#include <parallel/basic_iterator.h>
+#include <bits/stl_algo.h>
+#include <parallel/parallel.h>
+#include <parallel/multiway_merge.h>
+#include <parallel/timing.h>
+
+namespace __gnu_parallel
+{
+
+  /** @brief Subsequence description. */
+  template<typename _DifferenceTp>
+  struct Piece
+  {
+    typedef _DifferenceTp difference_type;
+
+    /** @brief Begin of subsequence. */
+    difference_type begin;
+
+    /** @brief End of subsequence. */
+    difference_type end;
+  };
+
+  /** @brief Data accessed by all threads.
+   *
+   *  PMWMS = parallel multiway mergesort */
+  template<typename RandomAccessIterator>
+  struct PMWMSSortingData
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    /** @brief Input begin. */
+    RandomAccessIterator source;
+
+    /** @brief Start indices, per thread. */
+    difference_type* starts;
+
+    /** @brief Temporary arrays for each thread.
+     *
+     *  Indirection Allows using the temporary storage in different
+     *  ways, without code duplication.
+     *  @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
+    value_type** temporaries;
+
+#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
+    /** @brief Storage in which to sort. */
+    RandomAccessIterator* sorting_places;
+
+    /** @brief Storage into which to merge. */
+    value_type** merging_places;
+#else
+    /** @brief Storage in which to sort. */
+    value_type** sorting_places;
+
+    /** @brief Storage into which to merge. */
+    RandomAccessIterator* merging_places;
+#endif
+    /** @brief Samples. */
+    value_type* samples;
+
+    /** @brief Offsets to add to the found positions. */
+    difference_type* offsets;
+
+    /** @brief Pieces of data to merge @c [thread][sequence] */
+    std::vector<Piece<difference_type> >* pieces;
+  };
+
+  /** @brief Thread local data for PMWMS. */
+  template<typename RandomAccessIterator>
+  struct PMWMSSorterPU
+  {
+    /** @brief Total number of thread involved. */
+    thread_index_t num_threads;
+    /** @brief Number of owning thread. */
+    thread_index_t iam;
+    /** @brief Stable sorting desired. */
+    bool stable;
+    /** @brief Pointer to global data. */
+    PMWMSSortingData<RandomAccessIterator>* sd;
+  };
+
+  /** 
+   *  @brief Select samples from a sequence.
+   *  @param d Pointer to thread-local data. Result will be placed in
+   *  @c d->ds->samples.
+   *  @param num_samples Number of samples to select. 
+   */
+  template<typename RandomAccessIterator, typename _DifferenceTp>
+  inline void 
+  determine_samples(PMWMSSorterPU<RandomAccessIterator>* d, 
+		    _DifferenceTp& num_samples)
+  {
+    typedef _DifferenceTp difference_type;
+
+    PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
+
+    num_samples = Settings::sort_mwms_oversampling * d->num_threads - 1;
+
+    difference_type es[num_samples + 2];
+    equally_split(sd->starts[d->iam + 1] - sd->starts[d->iam], num_samples + 1, es);
+
+    for (difference_type i = 0; i < num_samples; i++)
+      sd->samples[d->iam * num_samples + i] = sd->source[sd->starts[d->iam] + es[i + 1]];
+  }
+
+  /** @brief PMWMS code executed by each thread.
+   *  @param d Pointer to thread-local data.
+   *  @param comp Comparator. 
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void 
+  parallel_sort_mwms_pu(PMWMSSorterPU<RandomAccessIterator>* d, 
+			Comparator& comp)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    Timing<sequential_tag> t;
+
+    t.tic();
+
+    PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
+    thread_index_t iam = d->iam;
+
+    // Length of this thread's chunk, before merging.
+    difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
+
+#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
+    typedef RandomAccessIterator SortingPlacesIterator;
+
+    // Sort in input storage.
+    sd->sorting_places[iam] = sd->source + sd->starts[iam];
+#else
+    typedef value_type* SortingPlacesIterator;
+
+    // Sort in temporary storage, leave space for sentinel.
+    sd->sorting_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) *(length_local + 1)));
+
+    // Copy there.
+    std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]);
+#endif
+
+    // Sort locally.
+    if (d->stable)
+      __gnu_sequential::stable_sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
+    else
+      __gnu_sequential::sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp));
+#endif
+
+    // Invariant: locally sorted subsequence in sd->sorting_places[iam],
+    // sd->sorting_places[iam] + length_local.
+    t.tic("local sort");
+
+    if (Settings::sort_splitting == Settings::SAMPLING)
+      {
+	difference_type num_samples;
+	determine_samples(d, num_samples);
+
+#pragma omp barrier
+
+	t.tic("sample/wait");
+
+#pragma omp single
+	__gnu_sequential::sort(sd->samples, sd->samples + (num_samples * d->num_threads), comp);
+
+#pragma omp barrier
+
+	for (int s = 0; s < d->num_threads; s++)
+	  {
+	    // For each sequence.
+	    if (num_samples * iam > 0)
+	      sd->pieces[iam][s].begin = std::lower_bound(sd->sorting_places[s],
+				 sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
+				 sd->samples[num_samples * iam],
+				 comp)
+		- sd->sorting_places[s];
+	    else
+	      // Absolute beginning.
+	      sd->pieces[iam][s].begin = 0;
+
+	    if ((num_samples * (iam + 1)) < (num_samples * d->num_threads))
+	      sd->pieces[iam][s].end = std::lower_bound(sd->sorting_places[s],
+							sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s], sd->samples[num_samples * (iam + 1)], comp)
+		- sd->sorting_places[s];
+	    else
+	      // Absolute end.
+	      sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
+	  }
+
+      }
+    else if (Settings::sort_splitting == Settings::EXACT)
+      {
+#pragma omp barrier
+
+	t.tic("wait");
+
+	std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
+	for (int s = 0; s < d->num_threads; s++)
+	  seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
+
+	std::vector<SortingPlacesIterator> offsets(d->num_threads);
+
+	// If not last thread.
+	if (iam < d->num_threads - 1)
+	  multiseq_partition(seqs.begin(), seqs.end(), sd->starts[iam + 1], offsets.begin(), comp);
+
+	for (int seq = 0; seq < d->num_threads; seq++)
+	  {
+	    // For each sequence.
+	    if (iam < (d->num_threads - 1))
+	      sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
+	    else
+	      // Absolute end of this sequence.
+	      sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
+	  }
+
+#pragma omp barrier
+
+	for (int seq = 0; seq < d->num_threads; seq++)
+	  {
+	    // For each sequence.
+	    if (iam > 0)
+	      sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
+	    else
+	      // Absolute beginning.
+	      sd->pieces[iam][seq].begin = 0;
+	  }
+      }
+
+    t.tic("split");
+
+    // Offset from target begin, length after merging.
+    difference_type offset = 0, length_am = 0;
+    for (int s = 0; s < d->num_threads; s++)
+      {
+	length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
+	offset += sd->pieces[iam][s].begin;
+      }
+
+#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
+    // Merge to temporary storage, uninitialized creation not possible
+    // since there is no multiway_merge calling the placement new
+    // instead of the assignment operator.
+    sd->merging_places[iam] = sd->temporaries[iam] = new value_type[length_am];
+#else
+    // Merge directly to target.
+    sd->merging_places[iam] = sd->source + offset;
+#endif
+    std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
+
+    for (int s = 0; s < d->num_threads; s++)
+      {
+	seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, sd->sorting_places[s] + sd->pieces[iam][s].end);
+
+#if _GLIBCXX_ASSERTIONS
+	_GLIBCXX_PARALLEL_ASSERT(is_sorted(seqs[s].first, seqs[s].second, comp));
+#endif
+      }
+
+    multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag());
+
+    t.tic("merge");
+
+#if _GLIBCXX_ASSERTIONS
+    _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp));
+#endif
+
+#	pragma omp barrier
+
+#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
+    // Write back.
+    std::copy(sd->merging_places[iam], sd->merging_places[iam] + length_am, 
+	      sd->source + offset);
+#endif
+
+    delete[] sd->temporaries[iam];
+
+    t.tic("copy back");
+
+    t.print();
+  }
+
+  /** @brief PMWMS main call.
+   *  @param begin Begin iterator of sequence.
+   *  @param end End iterator of sequence.
+   *  @param comp Comparator.
+   *  @param n Length of sequence.
+   *  @param num_threads Number of threads to use.
+   *  @param stable Stable sorting.
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads, bool stable)
+  {
+    _GLIBCXX_CALL(n)
+      
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (n <= 1)
+      return;
+
+    // At least one element per thread.
+    if (num_threads > n)
+      num_threads = static_cast<thread_index_t>(n);
+
+    PMWMSSortingData<RandomAccessIterator> sd;
+
+    sd.source = begin;
+    sd.temporaries = new value_type*[num_threads];
+
+#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
+    sd.sorting_places = new RandomAccessIterator[num_threads];
+    sd.merging_places = new value_type*[num_threads];
+#else
+    sd.sorting_places = new value_type*[num_threads];
+    sd.merging_places = new RandomAccessIterator[num_threads];
+#endif
+
+    if (Settings::sort_splitting == Settings::SAMPLING)
+      sd.samples = new value_type[num_threads * (Settings::sort_mwms_oversampling * num_threads - 1)];
+    else
+      sd.samples = NULL;
+
+    sd.offsets = new difference_type[num_threads - 1];
+    sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
+    for (int s = 0; s < num_threads; s++)
+      sd.pieces[s].resize(num_threads);
+    PMWMSSorterPU<RandomAccessIterator>* pus = new PMWMSSorterPU<RandomAccessIterator>[num_threads];
+    difference_type* starts = sd.starts = new difference_type[num_threads + 1];
+
+    difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0;
+    for (int i = 0; i < num_threads; i++)
+      {
+	starts[i] = start;
+	start += (i < split) ? (chunk_length + 1) : chunk_length;
+	pus[i].num_threads = num_threads;
+	pus[i].iam = i;
+	pus[i].sd = &sd;
+	pus[i].stable = stable;
+      }
+    starts[num_threads] = start;
+
+    // Now sort in parallel.
+#pragma omp parallel num_threads(num_threads)
+    parallel_sort_mwms_pu(&(pus[omp_get_thread_num()]), comp);
+
+    // XXX sd as RAII
+    delete[] starts;
+    delete[] sd.temporaries;
+    delete[] sd.sorting_places;
+    delete[] sd.merging_places;
+
+    if (Settings::sort_splitting == Settings::SAMPLING)
+      delete[] sd.samples;
+
+    delete[] sd.offsets;
+    delete[] sd.pieces;
+
+    delete[] pus;
+  }
+
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/numeric b/libstdc++-v3/include/parallel/numeric
new file mode 100644
index 00000000000..3209a58a3e6
--- /dev/null
+++ b/libstdc++-v3/include/parallel/numeric
@@ -0,0 +1,322 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/**
+ * @file parallel/numeric
+*
+ * @brief Parallel STL fucntion calls corresponding to stl_numeric.h.
+ * The functions defined here mainly do case switches and
+ * call the actual parallelized versions in other files.
+ * Inlining policy: Functions that basically only contain one function call,
+ * are declared inline.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_NUMERIC_H
+#define _GLIBCXX_PARALLEL_NUMERIC_H 1
+
+#include <numeric>
+#include <functional>
+#include <parallel/numericfwd.h>
+#include <parallel/iterator.h>
+#include <parallel/for_each.h>
+#include <parallel/for_each_selectors.h>
+#include <parallel/partial_sum.h>
+
+namespace std
+{
+namespace __parallel
+{
+  // Sequential fallback.
+  template<typename InputIterator, typename T>
+  inline T
+  accumulate(InputIterator begin, InputIterator end, T init, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::accumulate(begin, end, init); }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename T, typename BinaryOperation>
+  inline T
+  accumulate(InputIterator begin, InputIterator end, T init,
+	     BinaryOperation binary_op, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::accumulate(begin, end, init, binary_op); }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename T, typename IteratorTag>
+  inline T
+  accumulate_switch(InputIterator begin, InputIterator end, T init, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  { return accumulate(begin, end, init, __gnu_parallel::sequential_tag()); }
+
+  // Public interface.
+  template<typename InputIterator, typename T>
+  inline T
+  accumulate(InputIterator begin, InputIterator end, T init, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    return accumulate_switch(begin, end, init, std::plus<typename std::iterator_traits<InputIterator>::value_type>(), typename std::iterator_traits<InputIterator>::iterator_category(), parallelism_tag);
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename T, typename BinaryOperation, typename IteratorTag>
+  T
+  accumulate_switch(InputIterator begin, InputIterator end, T init, BinaryOperation binary_op, IteratorTag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    return accumulate(begin, end, init, binary_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Parallel algorithm for random access iterators.
+  template<typename _RandomAccessIterator, typename T, typename BinaryOperation>
+  T
+  accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end, T init, BinaryOperation binary_op, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::accumulate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	T res = init;
+	__gnu_parallel::accumulate_selector<_RandomAccessIterator> my_selector;
+	__gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), my_selector, __gnu_parallel::accumulate_binop_reduct<BinaryOperation>(binary_op), res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return accumulate(begin, end, init, binary_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename T, typename BinaryOperation>
+  inline T
+  accumulate(InputIterator begin, InputIterator end, T init, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    return accumulate_switch(begin, end, init, binary_op, typename std::iterator_traits<InputIterator>::iterator_category(), parallelism_tag);
+  }
+
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  inline T
+  inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init, binary_op1, binary_op2);
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator1, typename InputIterator2, typename T>
+  inline T
+  inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init);
+  }
+
+  // Parallel algorithm for random access iterators.
+  template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  T
+  inner_product_switch(RandomAccessIterator1 first1, RandomAccessIterator1 last1, RandomAccessIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1) >= __gnu_parallel::Settings::accumulate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	T res = init;
+	__gnu_parallel::inner_product_selector<RandomAccessIterator1, RandomAccessIterator2, T> my_selector(first1, first2);
+	__gnu_parallel::for_each_template_random_access(first1, last1, binary_op2, my_selector, binary_op1, res, res, -1, parallelism_tag);
+	return res;
+      }
+    else
+      return inner_product(first1, last1, first2, init, __gnu_parallel::sequential_tag());
+  }
+
+  // No parallelism for input iterators.
+  template<typename InputIterator1, typename InputIterator2, typename T, typename BinaryFunction1, typename BinaryFunction2, typename IteratorTag1, typename IteratorTag2>
+  inline T
+  inner_product_switch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag)
+  {
+    return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init, binary_op1, binary_op2);
+  }
+
+  template<typename InputIterator1, typename InputIterator2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  inline T
+  inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    typedef iterator_traits<InputIterator1> traits1_type;
+    typedef typename traits1_type::iterator_category iterator1_category;
+
+    typedef iterator_traits<InputIterator2> traits2_type;
+    typedef typename traits2_type::iterator_category iterator2_category;
+
+    return inner_product_switch(first1, last1, first2, init, binary_op1, binary_op2, iterator1_category(), iterator2_category(), parallelism_tag);
+  }
+
+  template<typename InputIterator1, typename InputIterator2, typename T>
+  inline T
+  inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced)
+  {
+    typedef iterator_traits<InputIterator1> traits_type;
+    typedef typename traits_type::value_type value_type;
+
+    return inner_product(first1, last1, first2, init, std::plus<value_type>(), 
+			 std::multiplies<value_type>(), parallelism_tag);
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
+	      __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::partial_sum(begin, end, result); }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
+	      BinaryOperation bin_op, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation, typename IteratorTag1, typename IteratorTag2>
+  inline OutputIterator
+  partial_sum_switch(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op, IteratorTag1, IteratorTag2)
+  {
+    return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op);
+  }
+
+  // Parallel algorithm for random access iterators.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  OutputIterator
+  partial_sum_switch(InputIterator begin, InputIterator end,
+		     OutputIterator result, BinaryOperation bin_op,
+		     random_access_iterator_tag, random_access_iterator_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partial_sum_minimal_n))
+      return __gnu_parallel::parallel_partial_sum(begin, end, result, bin_op);
+    else
+      return partial_sum(begin, end, result, bin_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  partial_sum(InputIterator begin, InputIterator end, OutputIterator result)
+  {
+    typedef typename iterator_traits<InputIterator>::value_type value_type;
+    return partial_sum(begin, end, result, std::plus<value_type>());
+  }
+
+  // Public interface
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  partial_sum(InputIterator begin, InputIterator end, OutputIterator result,
+	      BinaryOperation binary_op)
+  {
+    typedef iterator_traits<InputIterator> traitsi_type;
+    typedef typename traitsi_type::iterator_category iteratori_category;
+
+    typedef iterator_traits<OutputIterator> traitso_type;
+    typedef typename traitso_type::iterator_category iteratoro_category;
+
+    return partial_sum_switch(begin, end, result, binary_op,
+				   iteratori_category(), iteratoro_category());
+  }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  adjacent_difference(InputIterator begin, InputIterator end,
+		      OutputIterator result, __gnu_parallel::sequential_tag)
+  { return _GLIBCXX_STD_P::adjacent_difference(begin, end, result); }
+
+  // Sequential fallback.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  adjacent_difference(InputIterator begin, InputIterator end,
+		      OutputIterator result, BinaryOperation bin_op,
+		      __gnu_parallel::sequential_tag)
+  {
+    return _GLIBCXX_STD_P::adjacent_difference(begin, end, result, bin_op);
+  }
+
+  // Sequential fallback for input iterator case.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation, typename IteratorTag1, typename IteratorTag2>
+  inline OutputIterator
+  adjacent_difference_switch(InputIterator begin, InputIterator end,
+			     OutputIterator result, BinaryOperation bin_op,
+			     IteratorTag1, IteratorTag2, __gnu_parallel::parallelism)
+  { return adjacent_difference(begin, end, result, bin_op); }
+
+  // Parallel algorithm for random access iterators.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  OutputIterator
+  adjacent_difference_switch(InputIterator begin, InputIterator end,
+			     OutputIterator result, BinaryOperation bin_op,
+			     random_access_iterator_tag, random_access_iterator_tag,
+			     __gnu_parallel::parallelism parallelism_tag)
+  {
+    if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::adjacent_difference_minimal_n && __gnu_parallel::is_parallel(parallelism_tag)))
+      {
+	bool dummy = true;
+	typedef __gnu_parallel::iterator_pair<InputIterator, OutputIterator, random_access_iterator_tag> ip;
+	*result = *begin;
+	ip begin_pair(begin + 1, result + 1), end_pair(end, result + (end - begin));
+	__gnu_parallel::adjacent_difference_selector<ip> functionality;
+	__gnu_parallel::for_each_template_random_access(begin_pair, end_pair, bin_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag);
+	return functionality.finish_iterator;
+      }
+    else
+      return adjacent_difference(begin, end, result, bin_op, __gnu_parallel::sequential_tag());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  adjacent_difference(InputIterator begin, InputIterator end,
+		      OutputIterator result,
+		      __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    return adjacent_difference(begin, end, result, std::minus<value_type>());
+  }
+
+  // Public interface.
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  adjacent_difference(InputIterator begin, InputIterator end,
+		      OutputIterator result, BinaryOperation binary_op,
+		      __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced)
+  {
+    typedef iterator_traits<InputIterator> traitsi_type;
+    typedef typename traitsi_type::iterator_category iteratori_category;
+
+    typedef iterator_traits<OutputIterator> traitso_type;
+    typedef typename traitso_type::iterator_category iteratoro_category;
+
+    return adjacent_difference_switch(begin, end, result, binary_op,
+				      iteratori_category(), 
+				      iteratoro_category(), parallelism_tag);
+  }
+} // end namespace
+} // end namespace
+
+#endif /* _GLIBCXX_NUMERIC_H */
diff --git a/libstdc++-v3/include/parallel/numericfwd.h b/libstdc++-v3/include/parallel/numericfwd.h
new file mode 100644
index 00000000000..75fa3505f97
--- /dev/null
+++ b/libstdc++-v3/include/parallel/numericfwd.h
@@ -0,0 +1,152 @@
+// <numeric> parallel extensions -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/numericfwd.h
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+#ifndef _GLIBCXX_PARALLEL_NUMERICFWD_H
+#define _GLIBCXX_PARALLEL_NUMERICFWD_H 1
+
+#pragma GCC system_header
+
+#include <parallel/tags.h>
+#include <parallel/settings.h>
+
+namespace std
+{
+namespace __parallel
+{
+  template<typename _IIter, typename T>
+  inline T
+  accumulate(_IIter, _IIter, T, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename T, typename _BinaryOper>
+  inline T
+  accumulate(_IIter, _IIter, T, _BinaryOper, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename T>
+  inline T
+  accumulate(_IIter, _IIter, T, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _IIter, typename T, typename _BinaryOper>
+  inline T
+  accumulate(_IIter, _IIter, T, _BinaryOper, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _IIter, typename T, typename _Tag>
+  inline T
+  accumulate_switch(_IIter, _IIter, T, _Tag, __gnu_parallel::parallelism parallelism_tag);
+
+  template<typename _IIter, typename T, typename _BinaryOper, typename _Tag>
+  T
+  accumulate_switch(_IIter, _IIter, T, _BinaryOper, _Tag, __gnu_parallel::parallelism parallelism_tag);
+
+  template<typename _RAIter, typename T, typename _BinaryOper>
+  T
+  accumulate_switch(_RAIter, _RAIter, T, _BinaryOper, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag);
+
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  adjacent_difference(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  inline _OIter
+  adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  adjacent_difference(_IIter, _IIter, _OIter, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  inline _OIter
+  adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper, typename _Tag1, typename _Tag2>
+  inline _OIter
+  adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2, __gnu_parallel::parallelism);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  _OIter
+  adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag);
+
+
+  template<typename _IIter1, typename _IIter2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  inline T
+  inner_product(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename T>
+  inline T
+  inner_product(_IIter1, _IIter1, _IIter2, T, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter1, typename _IIter2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  inline T
+  inner_product(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _IIter1, typename _IIter2, typename T>
+  inline T
+  inner_product(_IIter1, _IIter1, _IIter2, T, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced);
+
+  template<typename _RAIter1, typename _RAIter2, typename T, typename BinaryFunction1, typename BinaryFunction2>
+  T
+  inner_product_switch(_RAIter1, _RAIter1, _RAIter2, T, BinaryFunction1, BinaryFunction2, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag);
+
+  template<typename _IIter1, typename _IIter2, typename T, typename BinaryFunction1, typename BinaryFunction2, typename _Tag1, typename _Tag2>
+  inline T
+  inner_product_switch(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, _Tag1, _Tag2, __gnu_parallel::parallelism parallelism_tag);
+
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  partial_sum(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  inline _OIter
+  partial_sum(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::sequential_tag);
+
+  template<typename _IIter, typename _OIter>
+  inline _OIter
+  partial_sum(_IIter, _IIter, _OIter result);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  inline _OIter
+  partial_sum(_IIter, _IIter, _OIter, _BinaryOper);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper, typename _Tag1, typename _Tag2>
+  inline _OIter
+  partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
+
+  template<typename _IIter, typename _OIter, typename _BinaryOper>
+  _OIter
+  partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, random_access_iterator_tag, random_access_iterator_tag);
+} // end namespace
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h
new file mode 100644
index 00000000000..23fe6f4a95f
--- /dev/null
+++ b/libstdc++-v3/include/parallel/omp_loop.h
@@ -0,0 +1,105 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/omp_loop.h
+ *  @brief Parallelization of embarrassingly parallel execution by
+ *  means of an OpenMP for loop.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_H
+#define _GLIBCXX_PARALLEL_OMP_LOOP_H 1
+
+#include <omp.h>
+
+#include <parallel/settings.h>
+#include <parallel/basic_iterator.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Embarrassingly parallel algorithm for random access
+   * iterators, using an OpenMP for loop.
+   *
+   *  @param begin Begin iterator of element sequence.
+   *  @param end End iterator of element sequence.
+   *  @param o User-supplied functor (comparator, predicate, adding
+   *  functor, etc.).
+   *  @param f Functor to "process" an element with op (depends on
+   *  desired functionality, e. g. for std::for_each(), ...).
+   *  @param r Functor to "add" a single result to the already
+   *  processed elements (depends on functionality).
+   *  @param base Base value for reduction.
+   *  @param output Pointer to position where final result is written to
+   *  @param bound Maximum number of elements processed (e. g. for
+   *  std::count_n()).
+   *  @return User-supplied functor (that may contain a part of the result).
+   */
+  template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+  Op
+  for_each_template_random_access_omp_loop(RandomAccessIterator begin, RandomAccessIterator end, Op o, Fu& f, Red r, Result base, Result& output, typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+  {
+    typedef typename std::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+
+    thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : static_cast<thread_index_t>((end - begin));
+    Result *thread_results = new Result[num_threads];
+    difference_type length = end - begin;
+
+    for (thread_index_t i = 0; i < num_threads; i++)
+      {
+	thread_results[i] = r(thread_results[i], f(o, begin+i));
+      }
+
+#pragma omp parallel num_threads(num_threads)
+    {
+#pragma omp for schedule(dynamic, Settings::workstealing_chunk_size)
+      for (difference_type pos = 0; pos < length; pos++)
+	{
+	  thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos));
+	}
+    }
+
+    for (thread_index_t i = 0; i < num_threads; i++)
+      {
+	output = r(output, thread_results[i]);
+      }
+
+    delete [] thread_results;
+
+    // Points to last element processed (needed as return value for
+    // some algorithms like transform).
+    f.finish_iterator = begin + length;
+
+    return o;
+  }
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h
new file mode 100644
index 00000000000..22acb2de348
--- /dev/null
+++ b/libstdc++-v3/include/parallel/omp_loop_static.h
@@ -0,0 +1,111 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/omp_loop_static.h
+ *  @brief Parallelization of embarrassingly parallel execution by
+ *  means of an OpenMP for loop with static scheduling.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H
+#define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1
+
+#include <omp.h>
+
+#include <parallel/settings.h>
+#include <parallel/basic_iterator.h>
+
+namespace __gnu_parallel
+{
+
+  /** @brief Embarrassingly parallel algorithm for random access
+   * iterators, using an OpenMP for loop with static scheduling.
+   *
+   *  @param begin Begin iterator of element sequence.
+   *  @param end End iterator of element sequence.
+   *  @param o User-supplied functor (comparator, predicate, adding
+   *  functor, ...).
+   *  @param f Functor to "process" an element with op (depends on
+   *  desired functionality, e. g. for std::for_each(), ...).
+   *  @param r Functor to "add" a single result to the already processed
+   *  elements (depends on functionality).
+   *  @param base Base value for reduction.
+   *  @param output Pointer to position where final result is written to
+   *  @param bound Maximum number of elements processed (e. g. for
+   *  std::count_n()).
+   *  @return User-supplied functor (that may contain a part of the result).
+   */
+  template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+  Op
+  for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
+						  RandomAccessIterator end,
+						  Op o, Fu& f, Red r,
+						  Result base, Result& output,
+						  typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : (end - begin);
+    Result *thread_results = new Result[num_threads];
+    difference_type length = end - begin;
+
+    for (thread_index_t i = 0; i < num_threads; i++)
+      {
+	thread_results[i] = r(thread_results[i], f(o, begin+i));
+      }
+
+#pragma omp parallel num_threads(num_threads)
+    {
+#pragma omp for schedule(static, Settings::workstealing_chunk_size)
+      for (difference_type pos = 0; pos < length; pos++)
+	{
+	  thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos));
+	}
+    }
+
+    for (thread_index_t i = 0; i < num_threads; i++)
+      {
+	output = r(output, thread_results[i]);
+      }
+
+    delete [] thread_results;
+
+    // Points to last element processed (needed as return value for
+    // some algorithms like transform).
+    f.finish_iterator = begin + length;
+
+    return o;
+  }
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/par_loop.h b/libstdc++-v3/include/parallel/par_loop.h
new file mode 100644
index 00000000000..98604cf1da4
--- /dev/null
+++ b/libstdc++-v3/include/parallel/par_loop.h
@@ -0,0 +1,120 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/par_loop.h
+ *  @brief Parallelization of embarrassingly parallel execution by
+ *  means of equal splitting.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H
+#define _GLIBCXX_PARALLEL_PAR_LOOP_H 1
+
+#include <omp.h>
+#include <parallel/settings.h>
+
+namespace __gnu_parallel
+{
+
+  /** @brief Embarrassingly parallel algorithm for random access
+   * iterators, using hand-crafted parallelization by equal splitting
+   * the work.
+   *
+   *  @param begin Begin iterator of element sequence.
+   *  @param end End iterator of element sequence.
+   *  @param o User-supplied functor (comparator, predicate, adding
+   *  functor, ...)
+   *  @param f Functor to "process" an element with op (depends on
+   *  desired functionality, e. g. for std::for_each(), ...).
+   *  @param r Functor to "add" a single result to the already
+   *  processed elements (depends on functionality).
+   *  @param base Base value for reduction.
+   *  @param output Pointer to position where final result is written to
+   *  @param bound Maximum number of elements processed (e. g. for
+   *  std::count_n()).
+   *  @return User-supplied functor (that may contain a part of the result).
+   */
+  template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+  Op
+  for_each_template_random_access_ed(RandomAccessIterator begin,
+				     RandomAccessIterator end, Op o, Fu& f,
+				     Red r, Result base, Result& output,
+				     typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    const difference_type length = end - begin;
+    const difference_type settings_threads = static_cast<difference_type>(get_max_threads());
+    const difference_type dmin = settings_threads < length ? settings_threads : length;
+    const difference_type dmax = dmin > 1 ? dmin : 1;
+
+    thread_index_t num_threads = static_cast<thread_index_t>(dmax);
+
+
+    Result *thread_results = new Result[num_threads];
+
+#pragma omp parallel num_threads(num_threads)
+    {
+      // Neutral element.
+      Result reduct = Result();
+
+      thread_index_t p = num_threads;
+      thread_index_t iam = omp_get_thread_num();
+      difference_type start = iam * length / p;
+      difference_type limit = (iam == p - 1) ? length : (iam + 1) * length / p;
+
+      if (start < limit)
+	{
+	  reduct = f(o, begin + start);
+	  start++;
+	}
+
+      for (; start < limit; start++)
+	reduct = r(reduct, f(o, begin + start));
+
+      thread_results[iam] = reduct;
+    }
+
+    for (thread_index_t i = 0; i < num_threads; i++)
+      output = r(output, thread_results[i]);
+
+    // Points to last element processed (needed as return value for
+    // some algorithms like transform).
+    f.finish_iterator = begin + length;
+
+    return o;
+  }
+
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/parallel.h b/libstdc++-v3/include/parallel/parallel.h
new file mode 100644
index 00000000000..63246126b8f
--- /dev/null
+++ b/libstdc++-v3/include/parallel/parallel.h
@@ -0,0 +1,48 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/parallel.h
+ *  @brief End-user include file. Provides advanced settings and
+ *  tuning options.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze and Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_PARALLEL_H
+#define _GLIBCXX_PARALLEL_PARALLEL_H 1
+
+#include <parallel/features.h>
+#include <parallel/compiletime_settings.h>
+#include <parallel/types.h>
+#include <parallel/tags.h>
+#include <parallel/settings.h>
+
+#endif
diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h
new file mode 100644
index 00000000000..909b171d6f4
--- /dev/null
+++ b/libstdc++-v3/include/parallel/partial_sum.h
@@ -0,0 +1,191 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/partial_sum.h
+ *  @brief Parallel implementation of std::partial_sum(), i. e. prefix
+ *  sums.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
+#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
+
+
+#include <omp.h>
+#include <bits/stl_algobase.h>
+#include <parallel/parallel.h>
+#include <parallel/numericfwd.h>
+
+namespace __gnu_parallel
+{
+  // Problem: there is no 0-element given.
+
+  /** @brief Base case prefix sum routine.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator of input sequence.
+   *  @param result Begin iterator of output sequence.
+   *  @param bin_op Associative binary function.
+   *  @param value Start value. Must be passed since the neutral
+   *  element is unknown in general.
+   *  @return End iterator of output sequence. */
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  inline OutputIterator
+  parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
+				OutputIterator result, BinaryOperation bin_op,
+				typename std::iterator_traits<InputIterator>::value_type value)
+  {
+    if (begin == end)
+      return result;
+
+    while (begin != end)
+      {
+	value = bin_op(value, *begin);
+	*result = value;
+	result++;
+	begin++;
+      }
+    return result;
+  }
+
+  /** @brief Parallel partial sum implmenetation, two-phase approach,
+      no recursion.
+      *  @param begin Begin iterator of input sequence.
+      *  @param end End iterator of input sequence.
+      *  @param result Begin iterator of output sequence.
+      *  @param bin_op Associative binary function.
+      *  @param n Length of sequence.
+      *  @param num_threads Number of threads to use.
+      *  @return End iterator of output sequence.
+      */
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  OutputIterator
+  parallel_partial_sum_linear(InputIterator begin, InputIterator end,
+			      OutputIterator result, BinaryOperation bin_op,
+			      typename std::iterator_traits<InputIterator>::difference_type n, int num_threads)
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (num_threads > (n - 1))
+      num_threads = static_cast<thread_index_t>(n - 1);
+    if (num_threads < 2)
+      {
+	*result = *begin;
+	return parallel_partial_sum_basecase(begin + 1, end, result + 1, bin_op, *begin);
+      }
+
+    difference_type borders[num_threads + 2];
+
+    if (Settings::partial_sum_dilatation == 1.0f)
+      equally_split(n, num_threads + 1, borders);
+    else
+      {
+	difference_type chunk_length = (int)((double)n / ((double)num_threads + Settings::partial_sum_dilatation)), borderstart = n - num_threads * chunk_length;
+	borders[0] = 0;
+	for (int i = 1; i < (num_threads + 1); i++)
+	  {
+	    borders[i] = borderstart;
+	    borderstart += chunk_length;
+	  }
+	borders[num_threads + 1] = n;
+      }
+
+    value_type* sums = new value_type[num_threads];
+    OutputIterator target_end;
+
+#pragma omp parallel num_threads(num_threads)
+    {
+      int id = omp_get_thread_num();
+      if (id == 0)
+	{
+	  *result = *begin;
+	  parallel_partial_sum_basecase(begin + 1, begin + borders[1], result + 1, bin_op, *begin);
+	  sums[0] = *(result + borders[1] - 1);
+	}
+      else
+	{
+	  sums[id] = std::accumulate(begin + borders[id] + 1, begin + borders[id + 1], *(begin + borders[id]), bin_op, __gnu_parallel::sequential_tag());
+	}
+
+#pragma omp barrier
+
+#pragma omp single
+      parallel_partial_sum_basecase(sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
+
+#pragma omp barrier
+
+      // Still same team.
+      parallel_partial_sum_basecase(begin + borders[id + 1], begin + borders[id + 2], result + borders[id + 1], bin_op, sums[id]);
+    }
+
+    delete[] sums;
+
+    return result + n;
+  }
+
+  /** @brief Parallel partial sum front-end.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator of input sequence.
+   *  @param result Begin iterator of output sequence.
+   *  @param bin_op Associative binary function.
+   *  @return End iterator of output sequence. */
+  template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+  OutputIterator
+  parallel_partial_sum(InputIterator begin, InputIterator end,
+		       OutputIterator result, BinaryOperation bin_op)
+  {
+    _GLIBCXX_CALL(begin - end);
+
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+
+    int num_threads = get_max_threads();
+
+    switch (Settings::partial_sum_algorithm)
+      {
+      case Settings::LINEAR:
+	// Need an initial offset.
+	return parallel_partial_sum_linear(begin, end, result, bin_op,
+					   n, num_threads);
+      default:
+	// Partial_sum algorithm not implemented.
+	_GLIBCXX_PARALLEL_ASSERT(0);
+	return end;
+      }
+  }
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h
new file mode 100644
index 00000000000..3c2917f0e2a
--- /dev/null
+++ b/libstdc++-v3/include/parallel/partition.h
@@ -0,0 +1,389 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/partition.h
+ *  @brief Parallel implementation of std::partition(),
+ *  std::nth_element(), and std::partial_sort().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_PARTITION_H
+#define _GLIBCXX_PARALLEL_PARTITION_H 1
+
+#include <parallel/basic_iterator.h>
+#include <parallel/sort.h>
+#include <bits/stl_algo.h>
+#include <parallel/parallel.h>
+
+/** @brief Decide whether to declare certain variable volatile in this file. */
+#define _GLIBCXX_VOLATILE volatile
+
+namespace __gnu_parallel
+{
+  /** @brief Parallel implementation of std::partition.
+   *  @param begin Begin iterator of input sequence to split.
+   *  @param end End iterator of input sequence to split.
+   *  @param pred Partition predicate, possibly including some kind of pivot.
+   *  @param max_num_threads Maximum number of threads to use for this task.
+   *  @return Number of elements not fulfilling the predicate. */
+  template<typename RandomAccessIterator, typename Predicate>
+  inline typename std::iterator_traits<RandomAccessIterator>::difference_type
+  parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
+		     Predicate pred, thread_index_t max_num_threads)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+
+    _GLIBCXX_CALL(n)
+
+    // Shared.
+    _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
+    _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right, leftnew, rightnew;
+    bool* reserved_left, * reserved_right;
+
+    reserved_left = new bool[max_num_threads];
+    reserved_right = new bool[max_num_threads];
+
+    difference_type chunk_size;
+    if (Settings::partition_chunk_share > 0.0)
+      chunk_size = std::max((difference_type)Settings::partition_chunk_size, (difference_type)((double)n * Settings::partition_chunk_share / (double)max_num_threads));
+    else
+      chunk_size = Settings::partition_chunk_size;
+
+    // At least good for two processors.
+    while (right - left + 1 >= 2 * max_num_threads * chunk_size)
+      {
+	difference_type num_chunks = (right - left + 1) / chunk_size;
+	thread_index_t num_threads = (int)std::min((difference_type)max_num_threads, num_chunks / 2);
+
+	for (int r = 0; r < num_threads; r++)
+	  {
+	    reserved_left[r] = false;
+	    reserved_right[r] = false;
+	  }
+	leftover_left = 0;
+	leftover_right = 0;
+
+#pragma omp parallel num_threads(num_threads)
+	{
+	  // Private.
+	  difference_type thread_left, thread_left_border, thread_right, thread_right_border;
+	  thread_left = left + 1;
+
+	  // Just to satify the condition below.
+	  thread_left_border = thread_left - 1;
+	  thread_right = n - 1;
+	  thread_right_border = thread_right + 1;
+
+	  bool iam_finished = false;
+	  while (!iam_finished)
+	    {
+	      if (thread_left > thread_left_border)
+#pragma omp critical
+		{
+		  if (left + (chunk_size - 1) > right)
+		    iam_finished = true;
+		  else
+		    {
+		      thread_left = left;
+		      thread_left_border = left + (chunk_size - 1);
+		      left += chunk_size;
+		    }
+		}
+
+	      if (thread_right < thread_right_border)
+#pragma omp critical
+		{
+		  if (left > right - (chunk_size - 1))
+		    iam_finished = true;
+		  else
+		    {
+		      thread_right = right;
+		      thread_right_border = right - (chunk_size - 1);
+		      right -= chunk_size;
+		    }
+		}
+
+	      if (iam_finished)
+		break;
+
+	      // Swap as usual.
+	      while (thread_left < thread_right)
+		{
+		  while (pred(begin[thread_left]) && thread_left <= thread_left_border)
+		    thread_left++;
+		  while (!pred(begin[thread_right]) && thread_right >= thread_right_border)
+		    thread_right--;
+
+		  if (thread_left > thread_left_border || thread_right < thread_right_border)
+		    // Fetch new chunk(s).
+		    break;
+
+		  std::swap(begin[thread_left], begin[thread_right]);
+		  thread_left++;
+		  thread_right--;
+		}
+	    }
+
+	  // Now swap the leftover chunks to the right places.
+	  if (thread_left <= thread_left_border)
+#pragma omp atomic
+	    leftover_left++;
+	  if (thread_right >= thread_right_border)
+#pragma omp atomic
+	    leftover_right++;
+
+#pragma omp barrier
+
+#pragma omp single
+	  {
+	    leftnew = left - leftover_left * chunk_size;
+	    rightnew = right + leftover_right * chunk_size;
+	  }
+
+#pragma omp barrier
+
+	  // <=> thread_left_border + (chunk_size - 1) >= leftnew
+	  if (thread_left <= thread_left_border
+	      && thread_left_border >= leftnew)
+	    {
+	      // Chunk already in place, reserve spot.
+	      reserved_left[(left - (thread_left_border + 1)) / chunk_size] = true;
+	    }
+
+	  // <=> thread_right_border - (chunk_size - 1) <= rightnew
+	  if (thread_right >= thread_right_border
+	      && thread_right_border <= rightnew)
+	    {
+	      // Chunk already in place, reserve spot.
+	      reserved_right[((thread_right_border - 1) - right) / chunk_size] = true;
+	    }
+
+#pragma omp barrier
+
+	  if (thread_left <= thread_left_border && thread_left_border < leftnew)
+	    {
+	      // Find spot and swap.
+	      difference_type swapstart = -1;
+#pragma omp critical
+	      {
+		for (int r = 0; r < leftover_left; r++)
+		  if (!reserved_left[r])
+		    {
+		      reserved_left[r] = true;
+		      swapstart = left - (r + 1) * chunk_size;
+		      break;
+		    }
+	      }
+
+#if _GLIBCXX_ASSERTIONS
+	      _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
+#endif
+
+	      std::swap_ranges(begin + thread_left_border - (chunk_size - 1), begin + thread_left_border + 1, begin + swapstart);
+	    }
+
+	  if (thread_right >= thread_right_border
+	      && thread_right_border > rightnew)
+	    {
+	      // Find spot and swap
+	      difference_type swapstart = -1;
+#pragma omp critical
+	      {
+		for (int r = 0; r < leftover_right; r++)
+		  if (!reserved_right[r])
+		    {
+		      reserved_right[r] = true;
+		      swapstart = right + r * chunk_size + 1;
+		      break;
+		    }
+	      }
+
+#if _GLIBCXX_ASSERTIONS
+	      _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
+#endif
+
+	      std::swap_ranges(begin + thread_right_border, begin + thread_right_border + chunk_size, begin + swapstart);
+	    }
+#if _GLIBCXX_ASSERTIONS
+#pragma omp barrier
+
+#pragma omp single
+	  {
+	    for (int r = 0; r < leftover_left; r++)
+	      _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
+	    for (int r = 0; r < leftover_right; r++)
+	      _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
+	  }
+
+#pragma omp barrier
+#endif
+
+#pragma omp barrier
+	  left = leftnew;
+	  right = rightnew;
+	}
+      }	// end "recursion"
+
+    difference_type final_left = left, final_right = right;
+
+    while (final_left < final_right)
+      {
+	// Go right until key is geq than pivot.
+	while (pred(begin[final_left]) && final_left < final_right)
+	  final_left++;
+
+	// Go left until key is less than pivot.
+	while (!pred(begin[final_right]) && final_left < final_right)
+	  final_right--;
+
+	if (final_left == final_right)
+	  break;
+	std::swap(begin[final_left], begin[final_right]);
+	final_left++;
+	final_right--;
+      }
+
+    // All elements on the left side are < piv, all elements on the
+    // right are >= piv
+    delete[] reserved_left;
+    delete[] reserved_right;
+
+    // Element "between" final_left and final_right might not have
+    // been regarded yet
+    if (final_left < n && !pred(begin[final_left]))
+      // Really swapped.
+      return final_left;
+    else
+      return final_left + 1;
+  }
+
+  /** 
+   *  @brief Parallel implementation of std::nth_element().
+   *  @param begin Begin iterator of input sequence.
+   *  @param nth Iterator of element that must be in position afterwards.
+   *  @param end End iterator of input sequence.
+   *  @param comp Comparator. 
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  void 
+  parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    _GLIBCXX_CALL(end - begin)
+
+    RandomAccessIterator split;
+    value_type pivot;
+    random_number rng;
+
+    difference_type minimum_length = std::max<difference_type>(2, Settings::partition_minimal_n);
+
+    // Break if input range to small.
+    while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
+      {
+	difference_type n = end - begin;
+
+	RandomAccessIterator pivot_pos = begin +  rng(n);
+
+	// Swap pivot_pos value to end.
+	if (pivot_pos != (end - 1))
+	  std::swap(*pivot_pos, *(end - 1));
+	pivot_pos = end - 1;
+
+	// XXX Comparator must have first_value_type, second_value_type, result_type
+	// Comparator == __gnu_parallel::lexicographic<S, int, __gnu_parallel::less<S, S> > 
+	// pivot_pos == std::pair<S, int>*
+	// XXX binder2nd only for RandomAccessIterators??
+	__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+
+	// Divide, leave pivot unchanged in last place.
+	RandomAccessIterator split_pos1, split_pos2;
+	split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads());
+
+	// Left side: < pivot_pos; right side: >= pivot_pos
+
+	// Swap pivot back to middle.
+	if (split_pos1 != pivot_pos)
+	  std::swap(*split_pos1, *pivot_pos);
+	pivot_pos = split_pos1;
+
+	// In case all elements are equal, split_pos1 == 0
+	if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7))
+	  {
+	    // Very unequal split, one part smaller than one 128th
+	    // elements not stricly larger than the pivot.
+	    __gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
+
+	    // Find other end of pivot-equal range.
+	    split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred);
+	  }
+	else
+	  // Only skip the pivot.
+	  split_pos2 = split_pos1 + 1;
+
+	// Compare iterators.
+	if (split_pos2 <= nth)
+	  begin = split_pos2;
+	else if (nth < split_pos1)
+	  end = split_pos1;
+	else
+	  break;
+      }
+
+    // Only at most Settings::partition_minimal_n elements left.
+    __gnu_sequential::sort(begin, end, comp);
+  }
+
+  /** @brief Parallel implementation of std::partial_sort().
+   *  @param begin Begin iterator of input sequence.
+   *  @param middle Sort until this position.
+   *  @param end End iterator of input sequence.
+   *  @param comp Comparator. */
+  template<typename RandomAccessIterator, typename Comparator>
+  void
+  parallel_partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, RandomAccessIterator end, Comparator comp)
+  {
+    parallel_nth_element(begin, middle, end, comp);
+    std::sort(begin, middle, comp);
+  }
+
+}	//namespace __gnu_parallel
+
+#undef _GLIBCXX_VOLATILE
+
+#endif
diff --git a/libstdc++-v3/include/parallel/queue.h b/libstdc++-v3/include/parallel/queue.h
new file mode 100644
index 00000000000..9d2143b5787
--- /dev/null
+++ b/libstdc++-v3/include/parallel/queue.h
@@ -0,0 +1,153 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/queue.h
+ *  @brief Lock-free double-ended queue.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_QUEUE_H
+#define _GLIBCXX_PARALLEL_QUEUE_H 1
+
+#include <parallel/types.h>
+#include <parallel/base.h>
+#include <parallel/compatibility.h>
+
+/** @brief Decide whether to declare certain variable volatile in this file. */
+#define _GLIBCXX_VOLATILE volatile
+
+namespace __gnu_parallel
+{
+  /**@brief Double-ended queue of bounded size, allowing lock-free
+   *  atomic access.  push_front() and pop_front() must not be called
+   *  concurrently to each other, while pop_back() can be called
+   *  concurrently at all times.
+   *  @c empty(), @c size(), and @c top() are intentionally not provided.
+   *  Calling them would not make sense in a concurrent setting.
+   *  @param T Contained element type. */
+  template<typename T>
+  class RestrictedBoundedConcurrentQueue
+  {
+  private:
+    /** @brief Array of elements, seen as cyclic buffer. */
+    T* base;
+
+    /** @brief Maximal number of elements contained at the same time. */
+    sequence_index_t max_size;
+
+    /** @brief Cyclic begin and end pointers contained in one
+	atomically changeable value. */
+    _GLIBCXX_VOLATILE lcas_t borders;
+
+  public:
+    /** @brief Constructor. Not to be called concurrent, of course.
+     *  @param max_size Maximal number of elements to be contained. */
+    RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
+    {
+      this->max_size = max_size;
+      base = new T[max_size];
+      borders = encode2(0, 0);
+#pragma omp flush
+    }
+
+    /** @brief Destructor. Not to be called concurrent, of course. */
+    ~RestrictedBoundedConcurrentQueue()
+    {
+      delete[] base;
+    }
+
+    /** @brief Pushes one element into the queue at the front end.
+     *  Must not be called concurrently with pop_front(). */
+    void push_front(const T& t)
+    {
+      lcas_t former_borders = borders;
+      int former_front, former_back;
+      decode2(former_borders, former_front, former_back);
+      *(base + former_front % max_size) = t;
+#if _GLIBCXX_ASSERTIONS
+      // Otherwise: front - back > max_size eventually.
+      _GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back) <= max_size);
+#endif
+      fetch_and_add(&borders, encode2(1, 0));
+    }
+
+    /** @brief Pops one element from the queue at the front end.
+     *  Must not be called concurrently with pop_front(). */
+    bool pop_front(T& t)
+    {
+      int former_front, former_back;
+#pragma omp flush
+      decode2(borders, former_front, former_back);
+      while (former_front > former_back)
+	{
+	  // Chance.
+	  lcas_t former_borders = encode2(former_front, former_back);
+	  lcas_t new_borders = encode2(former_front - 1, former_back);
+	  if (compare_and_swap(&borders, former_borders, new_borders))
+	    {
+	      t = *(base + (former_front - 1) % max_size);
+	      return true;
+	    }
+#pragma omp flush
+	  decode2(borders, former_front, former_back);
+	}
+      return false;
+    }
+
+    /** @brief Pops one element from the queue at the front end.
+     *  Must not be called concurrently with pop_front(). */
+    bool pop_back(T& t)	//queue behavior
+    {
+      int former_front, former_back;
+#pragma omp flush
+      decode2(borders, former_front, former_back);
+      while (former_front > former_back)
+	{
+	  // Chance.
+	  lcas_t former_borders = encode2(former_front, former_back);
+	  lcas_t new_borders = encode2(former_front, former_back + 1);
+	  if (compare_and_swap(&borders, former_borders, new_borders))
+	    {
+	      t = *(base + former_back % max_size);
+	      return true;
+	    }
+#pragma omp flush
+	  decode2(borders, former_front, former_back);
+	}
+      return false;
+    }
+  };
+}	//namespace __gnu_parallel
+
+#undef _GLIBCXX_VOLATILE
+
+#endif
diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h
new file mode 100644
index 00000000000..e3df87a66d6
--- /dev/null
+++ b/libstdc++-v3/include/parallel/quicksort.h
@@ -0,0 +1,172 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/quicksort.h
+ *  @brief Implementation of a unbalanced parallel quicksort (in-place).
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_QUICKSORT_H
+#define _GLIBCXX_PARALLEL_QUICKSORT_H 1
+
+#include <parallel/parallel.h>
+#include <parallel/partition.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Unbalanced quicksort divide step.
+   *  @param begin Begin iterator of subsequence.
+   *  @param end End iterator of subsequence.
+   *  @param comp Comparator.
+   *  @param pivot_rank Desired rank of the pivot.
+   *  @param num_samples Chosse pivot from that many samples.
+   *  @param num_threads Number of threads that are allowed to work on
+   *  this part.
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline typename std::iterator_traits<RandomAccessIterator>::difference_type
+  parallel_sort_qs_divide(RandomAccessIterator begin, RandomAccessIterator end,
+			  Comparator comp,
+			  typename std::iterator_traits<RandomAccessIterator>::difference_type pivot_rank,
+			  typename std::iterator_traits<RandomAccessIterator>::difference_type num_samples, thread_index_t num_threads)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+    num_samples = std::min(num_samples, n);
+    value_type samples[num_samples];
+
+    for (difference_type s = 0; s < num_samples; s++)
+      samples[s] = begin[(unsigned long long)s * n / num_samples];
+
+    __gnu_sequential::sort(samples, samples + num_samples, comp);
+
+    value_type& pivot = samples[pivot_rank * num_samples / n];
+
+    __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, pivot);
+    difference_type split = parallel_partition(begin, end, pred, num_threads);
+
+    return split;
+  }
+
+  /** @brief Unbalanced quicksort conquer step.
+   *  @param begin Begin iterator of subsequence.
+   *  @param end End iterator of subsequence.
+   *  @param comp Comparator.
+   *  @param num_threads Number of threads that are allowed to work on
+   *  this part.
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  parallel_sort_qs_conquer(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, int num_threads)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (num_threads <= 1)
+      {
+	__gnu_sequential::sort(begin, end, comp);
+	return;
+      }
+
+    difference_type n = end - begin, pivot_rank;
+
+    if (n <= 1)
+      return;
+
+    thread_index_t num_processors_left;
+
+    if ((num_threads % 2) == 1)
+      num_processors_left = num_threads / 2 + 1;
+    else
+      num_processors_left = num_threads / 2;
+
+    pivot_rank = n * num_processors_left / num_threads;
+
+    difference_type split = parallel_sort_qs_divide(begin, end, comp, pivot_rank,
+Settings::sort_qs_num_samples_preset, num_threads);
+
+#pragma omp parallel sections
+    {
+#pragma omp section
+      parallel_sort_qs_conquer(begin, begin + split, comp, num_processors_left);
+#pragma omp section
+      parallel_sort_qs_conquer(begin + split, end, comp, num_threads - num_processors_left);
+    }
+  }
+
+
+
+  /** @brief Unbalanced quicksort main call.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator input sequence, ignored.
+   *  @param comp Comparator.
+   *  @param n Length of input sequence.
+   *  @param num_threads Number of threads that are allowed to work on
+   *  this part.
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  parallel_sort_qs(RandomAccessIterator begin, RandomAccessIterator end,
+		   Comparator comp,
+		   typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads)
+  {
+    _GLIBCXX_CALL(n)
+
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (n == 0)
+      return;
+
+    // At least one element per processor.
+    if (num_threads > n)
+      num_threads = static_cast<thread_index_t>(n);
+
+    Settings::sort_qs_num_samples_preset = 100;
+
+    // Hard to avoid.
+    omp_set_num_threads(num_threads);
+
+    bool old_nested = (omp_get_nested() != 0);
+    omp_set_nested(true);
+    parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
+    omp_set_nested(old_nested);
+  }
+
+}	//namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/random_number.h b/libstdc++-v3/include/parallel/random_number.h
new file mode 100644
index 00000000000..266ed0ab251
--- /dev/null
+++ b/libstdc++-v3/include/parallel/random_number.h
@@ -0,0 +1,386 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/random_number.h
+ *  @brief Random number generator based on the Mersenne twister.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_RANDOM_NUMBER_H
+#define _GLIBCXX_PARALLEL_RANDOM_NUMBER_H 1
+
+#include <parallel/types.h>
+
+namespace __gnu_parallel
+{
+  // XXX use tr1 random number.
+  // http://www.math.keio.ac.jp/matumoto/emt.html
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  class mersenne_twister
+  {
+  public:
+    typedef UIntType result_type;
+    static const int word_size = w;
+    static const int state_size = n;
+    static const int shift_size = m;
+    static const int mask_bits = r;
+    static const UIntType parameter_a = a;
+    static const int output_u = u;
+    static const int output_s = s;
+    static const UIntType output_b = b;
+    static const int output_t = t;
+    static const UIntType output_c = c;
+    static const int output_l = l;
+
+    static const bool has_fixed_range = false;
+
+    mersenne_twister() { seed(); }
+
+#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x520)
+    // Work around overload resolution problem (Gennadiy E. Rozental)
+    explicit mersenne_twister(const UIntType& value)
+#else
+      explicit mersenne_twister(UIntType value)
+#endif
+    { seed(value); }
+    template<typename It> mersenne_twister(It& first, It last) { seed(first,last); }
+
+    template<typename Generator>
+    explicit mersenne_twister(Generator & gen) { seed(gen); }
+
+    // compiler-generated copy ctor and assignment operator are fine
+
+    void seed() { seed(UIntType(5489)); }
+
+#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x520)
+    // Work around overload resolution problem (Gennadiy E. Rozental)
+    void seed(const UIntType& value)
+#else
+      void seed(UIntType value)
+#endif
+    {
+      // New seeding algorithm from
+      // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html
+      // In the previous versions, MSBs of the seed affected only MSBs of the
+      // state x[].
+      const UIntType mask = ~0u;
+      x[0] = value & mask;
+      for (i = 1; i < n; i++) {
+	// See Knuth "The Art of Computer Programming" Vol. 2, 3rd ed., page 106
+	x[i] = (1812433253UL * (x[i-1] ^ (x[i-1] >> (w-2))) + i) & mask;
+      }
+    }
+
+    // For GCC, moving this function out-of-line prevents inlining, which may
+    // reduce overall object code size.  However, MSVC does not grok
+    // out-of-line definitions of member function templates.
+    template<typename Generator>
+    void seed(Generator & gen)
+    {
+      // I could have used std::generate_n, but it takes "gen" by value
+      for (int j = 0; j < n; j++)
+	x[j] = gen();
+      i = n;
+    }
+
+    template<typename It>
+    void seed(It& first, It last)
+    {
+      int j;
+      for (j = 0; j < n && first != last; ++j, ++first)
+	x[j] = *first;
+      i = n;
+      /*    if (first == last && j < n)
+	    throw std::invalid_argument("mersenne_twister::seed");*/
+    }
+
+    result_type min() const { return 0; }
+    result_type max() const
+    {
+      // avoid "left shift count >= with of type" warning
+      result_type res = 0;
+      for (int i = 0; i < w; ++i)
+	res |= (1u << i);
+      return res;
+    }
+
+    result_type operator()();
+    static bool validation(result_type v) { return val == v; }
+
+#ifndef BOOST_NO_OPERATORS_IN_NAMESPACE
+
+    friend bool operator==(const mersenne_twister& x, const mersenne_twister& y)
+    {
+      for (int j = 0; j < state_size; ++j)
+	if (x.compute(j) != y.compute(j))
+	  return false;
+      return true;
+    }
+
+    friend bool operator!=(const mersenne_twister& x, const mersenne_twister& y)
+    { return !(x == y); }
+#else
+    // Use a member function; Streamable concept not supported.
+    bool operator==(const mersenne_twister& rhs) const
+    {
+      for (int j = 0; j < state_size; ++j)
+	if (compute(j) != rhs.compute(j))
+	  return false;
+      return true;
+    }
+
+    bool operator!=(const mersenne_twister& rhs) const
+    { return !(*this == rhs); }
+#endif
+
+  private:
+    // returns x(i-n+index), where index is in 0..n-1
+    UIntType compute(unsigned int index) const
+    {
+      // equivalent to (i-n+index) % 2n, but doesn't produce negative numbers
+      return x[ (i + n + index) % (2*n) ];
+    }
+    void twist(int block);
+
+    // state representation: next output is o(x(i))
+    //   x[0]  ... x[k] x[k+1] ... x[n-1]     x[n]     ... x[2*n-1]   represents
+    //  x(i-k) ... x(i) x(i+1) ... x(i-k+n-1) x(i-k-n) ... x[i(i-k-1)]
+    // The goal is to always have x(i-n) ... x(i-1) available for
+    // operator== and save/restore.
+
+    UIntType x[2*n];
+    int i;
+  };
+
+#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
+  //  A definition is required even for integral static constants
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const bool mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::has_fixed_range;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::state_size;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::shift_size;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::mask_bits;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const UIntType mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::parameter_a;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_u;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_s;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const UIntType mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_b;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_t;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const UIntType mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_c;
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  const int mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::output_l;
+#endif
+
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  void mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::twist(int block)
+  {
+    const UIntType upper_mask = (~0u) << r;
+    const UIntType lower_mask = ~upper_mask;
+
+    if (block == 0) {
+      for (int j = n; j < 2*n; j++) {
+	UIntType y = (x[j-n] & upper_mask) | (x[j-(n-1)] & lower_mask);
+	x[j] = x[j-(n-m)] ^ (y >> 1) ^ (y&1 ? a : 0);
+      }
+    } else if (block == 1) {
+      // split loop to avoid costly modulo operations
+      {  // extra scope for MSVC brokenness w.r.t. for scope
+	for (int j = 0; j < n-m; j++) {
+	  UIntType y = (x[j+n] & upper_mask) | (x[j+n+1] & lower_mask);
+	  x[j] = x[j+n+m] ^ (y >> 1) ^ (y&1 ? a : 0);
+	}
+      }
+
+      for (int j = n-m; j < n-1; j++) {
+	UIntType y = (x[j+n] & upper_mask) | (x[j+n+1] & lower_mask);
+	x[j] = x[j-(n-m)] ^ (y >> 1) ^ (y&1 ? a : 0);
+      }
+      // last iteration
+      UIntType y = (x[2*n-1] & upper_mask) | (x[0] & lower_mask);
+      x[n-1] = x[m-1] ^ (y >> 1) ^ (y&1 ? a : 0);
+      i = 0;
+    }
+  }
+
+  template<typename UIntType, int w, int n, int m, int r, UIntType a, int u,
+	   int s, UIntType b, int t, UIntType c, int l, UIntType val>
+  inline typename mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::result_type
+  mersenne_twister<UIntType,w,n,m,r,a,u,s,b,t,c,l,val>::operator()()
+  {
+    if (i == n)
+      twist(0);
+    else if (i >= 2*n)
+      twist(1);
+    // Step 4
+    UIntType z = x[i];
+    ++i;
+    z ^= (z >> u);
+    z ^= ((z << s) & b);
+    z ^= ((z << t) & c);
+    z ^= (z >> l);
+    return z;
+  }
+
+
+  typedef mersenne_twister<uint32,32,351,175,19,0xccab8ee7,11,
+			   7,0x31b6ab00,15,0xffe50000,17, 0xa37d3c92> mt11213b;
+
+  // validation by experiment from mt19937.c
+  typedef mersenne_twister<uint32,32,624,397,31,0x9908b0df,11,
+			   7,0x9d2c5680,15,0xefc60000,18, 3346425566U> mt19937;
+
+  /** @brief Random number generator, based on the Mersenne twister. */
+  class random_number
+  {
+  private:
+    mt19937 mt;
+    uint64 supremum, RAND_SUP;
+    double supremum_reciprocal, RAND_SUP_REC;
+
+    uint64 cache;	/* assumed to be twice as long as the usual random number */
+    int bits_left;	/* bit results */
+
+    static inline uint32 scale_down(uint64 x,
+#if _GLIBCXX_SCALE_DOWN_FPU
+				    uint64 /*supremum*/, double supremum_reciprocal)
+#else
+      uint64 supremum, double /*supremum_reciprocal*/)
+#endif
+	{
+#if _GLIBCXX_SCALE_DOWN_FPU
+	  return (uint32)(x * supremum_reciprocal);
+#else
+	  return static_cast<uint32>(x % supremum);
+#endif
+	}
+
+public:
+  /** @brief Default constructor. Seed with 0. */
+  random_number() :
+    mt(0),
+    supremum(0x100000000ULL),
+    RAND_SUP(1ULL << (sizeof(uint32) * 8)),
+    supremum_reciprocal((double)supremum / (double)RAND_SUP),
+    RAND_SUP_REC(1.0 / (double)RAND_SUP),
+    cache(0), bits_left(0)
+  {
+  }
+
+  /** @brief Constructor.
+   *  @param seed Random seed.
+   *  @param supremum Generate integer random numbers in the interval @c [0,supremum). */
+  random_number(uint32 seed, uint64 supremum = 0x100000000ULL) :
+    mt(seed),
+    supremum(supremum),
+    RAND_SUP(1ULL << (sizeof(uint32) * 8)),
+    supremum_reciprocal((double)supremum / (double)RAND_SUP),
+    RAND_SUP_REC(1.0 / (double)RAND_SUP),
+    cache(0), bits_left(0)
+  {
+  }
+
+  /** @brief Generate unsigned random 32-bit integer. */
+  inline uint32 operator()()
+  {
+    return scale_down(mt(), supremum, supremum_reciprocal);
+  }
+
+  /** @brief Generate unsigned random 32-bit integer in the interval @c [0,local_supremum). */
+  inline uint32 operator()(uint64 local_supremum)
+  {
+    return scale_down(mt(), local_supremum, (double)local_supremum * RAND_SUP_REC);
+  }
+
+  /** @brief Set the random seed.
+   *  @param seed to set. */
+  inline void set_seed(uint32 seed)
+  {
+    mt.seed(seed);
+    cache = mt();
+    bits_left = 32;
+  }
+
+  /** @brief Generate a number of random bits, compile-time parameter. */
+  template<int bits>
+  inline unsigned long genrand_bits()
+  {
+    unsigned long res = cache & ((1 << bits) - 1);
+    cache = cache >> bits;
+    bits_left -= bits;
+    if (bits_left < 32)
+      {
+	cache |= (((uint64)mt()) << bits_left);
+	bits_left += 32;
+      }
+    return res;
+  }
+
+  /** @brief Generate a number of random bits, run-time parameter.
+   *  @param bits Number of bits to generate. */
+  inline unsigned long genrand_bits(int bits)
+  {
+    unsigned long res = cache & ((1 << bits) - 1);
+    cache = cache >> bits;
+    bits_left -= bits;
+    if (bits_left < 32)
+      {
+	cache |= (((uint64)mt()) << bits_left);
+	bits_left += 32;
+      }
+    return res;
+  }
+
+};
+
+} // namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h
new file mode 100644
index 00000000000..f18f7774840
--- /dev/null
+++ b/libstdc++-v3/include/parallel/random_shuffle.h
@@ -0,0 +1,516 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/random_shuffle.h
+ *  @brief Parallel implementation of std::random_shuffle().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
+#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
+
+#include <limits>
+
+#include <parallel/basic_iterator.h>
+#include <bits/stl_algo.h>
+
+#include <parallel/parallel.h>
+#include <parallel/base.h>
+#include <parallel/random_number.h>
+#include <parallel/timing.h>
+
+namespace __gnu_parallel
+{
+  /** @brief Type to hold the index of a bin.
+   *
+   *  Since many variables of this type are allocated, it should be
+   *  chosen as small as possible.
+   */
+  typedef unsigned short bin_index;
+
+  /** @brief Data known to every thread participating in
+      __gnu_parallel::parallel_random_shuffle(). */
+  template<typename RandomAccessIterator>
+  struct DRandomShufflingGlobalData
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    /** @brief Begin iterator of the source. */
+    RandomAccessIterator& source;
+
+    /** @brief Temporary arrays for each thread. */
+    value_type** temporaries;
+
+    /** @brief Two-dimensional array to hold the thread-bin distribution.
+     *
+     *  Dimensions (num_threads + 1) x (num_bins + 1). */
+    difference_type** dist;
+
+    /** @brief Start indexes of the threads' chunks. */
+    difference_type* starts;
+
+    /** @brief Number of the thread that will further process the
+	corresponding bin. */
+    thread_index_t* bin_proc;
+
+    /** @brief Number of bins to distribute to. */
+    int num_bins;
+
+    /** @brief Number of bits needed to address the bins. */
+    int num_bits;
+
+    /** @brief Constructor. */
+    DRandomShufflingGlobalData(RandomAccessIterator& _source)
+    : source(_source) { }
+  };
+
+  /** @brief Local data for a thread participating in
+      __gnu_parallel::parallel_random_shuffle().
+   */
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  struct DRSSorterPU
+  {
+    /** @brief Number of threads participating in total. */
+    int num_threads;
+
+    /** @brief Number of owning thread. */
+    int iam;
+
+    /** @brief Begin index for bins taken care of by this thread. */
+    bin_index bins_begin;
+
+    /** @brief End index for bins taken care of by this thread. */
+    bin_index bins_end;
+
+    /** @brief Random seed for this thread. */
+    uint32 seed;
+
+    /** @brief Pointer to global data. */
+    DRandomShufflingGlobalData<RandomAccessIterator>* sd;
+  };
+
+  /** @brief Generate a random number in @c [0,2^logp).
+   *  @param logp Logarithm (basis 2) of the upper range bound.
+   *  @param rng Random number generator to use.
+   */
+  template<typename RandomNumberGenerator>
+  inline int random_number_pow2(int logp, RandomNumberGenerator& rng)
+  {
+    return rng.genrand_bits(logp);
+  }
+
+  /** @brief Random shuffle code executed by each thread.
+   *  @param pus Arary of thread-local data records. */
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* pus)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    Timing<sequential_tag> t;
+    t.tic();
+
+    DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[omp_get_thread_num()];
+    DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
+    thread_index_t iam = d->iam;
+
+    // Indexing: dist[bin][processor]
+    difference_type length = sd->starts[iam + 1] - sd->starts[iam];
+    bin_index* oracles = new bin_index[length];
+    difference_type* dist = new difference_type[sd->num_bins + 1];
+    bin_index* bin_proc = new bin_index[sd->num_bins];
+    value_type** temporaries = new value_type*[d->num_threads];
+
+    // Compute oracles and count appearances.
+    for (bin_index b = 0; b < sd->num_bins + 1; b++)
+      dist[b] = 0;
+    int num_bits = sd->num_bits;
+
+    random_number rng(d->seed);
+
+    // First main loop.
+    for (difference_type i = 0; i < length; i++)
+      {
+	bin_index oracle = random_number_pow2(num_bits, rng);
+	oracles[i] = oracle;
+
+	// To allow prefix (partial) sum.
+	dist[oracle + 1]++;
+      }
+
+    for (bin_index b = 0; b < sd->num_bins + 1; b++)
+      sd->dist[b][iam + 1] = dist[b];
+
+    t.tic();
+
+#pragma omp barrier
+
+    t.tic();
+
+#pragma omp single
+    {
+      // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
+      // total number of items in bin s
+      for (bin_index s = 0; s < sd->num_bins; s++)
+	partial_sum(sd->dist[s + 1], sd->dist[s + 1] + d->num_threads + 1, sd->dist[s + 1]);
+    }
+
+#pragma omp barrier
+
+    t.tic();
+
+    sequence_index_t offset = 0, global_offset = 0;
+    for (bin_index s = 0; s < d->bins_begin; s++)
+      global_offset += sd->dist[s + 1][d->num_threads];
+
+#pragma omp barrier
+
+    for (bin_index s = d->bins_begin; s < d->bins_end; s++)
+      {
+	for (int t = 0; t < d->num_threads + 1; t++)
+	  sd->dist[s + 1][t] += offset;
+	offset = sd->dist[s + 1][d->num_threads];
+      }
+
+    sd->temporaries[iam] = new value_type[offset];
+
+    t.tic();
+
+#pragma omp barrier
+
+    t.tic();
+
+    // Draw local copies to avoid false sharing.
+    for (bin_index b = 0; b < sd->num_bins + 1; b++)
+      dist[b] = sd->dist[b][iam];
+    for (bin_index b = 0; b < sd->num_bins; b++)
+      bin_proc[b] = sd->bin_proc[b];
+    for (thread_index_t t = 0; t < d->num_threads; t++)
+      temporaries[t] = sd->temporaries[t];
+
+    RandomAccessIterator source = sd->source;
+    difference_type start = sd->starts[iam];
+
+    // Distribute according to oracles, second main loop.
+    for (difference_type i = 0; i < length; i++)
+      {
+	bin_index target_bin = oracles[i];
+	thread_index_t target_p = bin_proc[target_bin];
+
+	// Last column [d->num_threads] stays unchanged.
+	temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start);
+      }
+
+    delete[] oracles;
+    delete[] dist;
+    delete[] bin_proc;
+    delete[] temporaries;
+
+    t.tic();
+
+#pragma omp barrier
+
+    t.tic();
+
+    // Shuffle bins internally.
+    for (bin_index b = d->bins_begin; b < d->bins_end; b++)
+      {
+	value_type* begin = sd->temporaries[iam] + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
+	  * end = sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
+	sequential_random_shuffle(begin, end, rng);
+	std::copy(begin, end, sd->source + global_offset + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
+      }
+
+    delete[] sd->temporaries[iam];
+
+    t.tic();
+
+    t.print();
+  }
+
+  /** @brief Round up to the next greater power of 2.
+   *  @param x Integer to round up */
+  template<typename T>
+  T round_up_to_pow2(T x)
+  {
+    if (x <= 1)
+      return 1;
+    else
+      return (T)1 << (log2(x - 1) + 1);
+  }
+
+  /** @brief Main parallel random shuffle step.
+   *  @param begin Begin iterator of sequence.
+   *  @param end End iterator of sequence.
+   *  @param n Length of sequence.
+   *  @param num_threads Number of threads to use.
+   *  @param rng Random number generator to use.
+   */
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void
+  parallel_random_shuffle_drs(RandomAccessIterator begin, RandomAccessIterator end, typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads, RandomNumberGenerator& rng)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    _GLIBCXX_CALL(n)
+
+    if (num_threads > n)
+      num_threads = static_cast<thread_index_t>(n);
+
+    bin_index num_bins, num_bins_cache;
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+    // Try the L1 cache first.
+
+    // Must fit into L1.
+    num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type))));
+    num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+    // No more buckets than TLB entries, power of 2
+    // Power of 2 and at least one element per bin, at most the TLB size.
+    num_bins = std::min(n, (difference_type)num_bins_cache);
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+    // 2 TLB entries needed per bin.
+    num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+#endif
+    num_bins = round_up_to_pow2(num_bins);
+
+    if (num_bins < num_bins_cache)
+      {
+#endif
+	// Now try the L2 cache
+	// Must fit into L2
+	num_bins_cache = static_cast<bin_index>(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type)))));
+	num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+	// No more buckets than TLB entries, power of 2.
+	num_bins = static_cast<bin_index>(std::min(n, (difference_type)num_bins_cache));
+	// Power of 2 and at least one element per bin, at most the TLB size.
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+	// 2 TLB entries needed per bin.
+	num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+#endif
+	num_bins = round_up_to_pow2(num_bins);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+      }
+#endif
+
+    num_threads = std::min((bin_index)num_threads, (bin_index)num_bins);
+
+    if (num_threads <= 1)
+      return sequential_random_shuffle(begin, end, rng);
+
+    DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
+
+    DRSSorterPU<RandomAccessIterator, random_number >* pus = new DRSSorterPU<RandomAccessIterator, random_number >[num_threads];
+
+    sd.temporaries = new value_type*[num_threads];
+    //sd.oracles = new bin_index[n];
+    sd.dist = new difference_type*[num_bins + 1];
+    sd.bin_proc = new thread_index_t[num_bins];
+    for (bin_index b = 0; b < num_bins + 1; b++)
+      sd.dist[b] = new difference_type[num_threads + 1];
+    for (bin_index b = 0; b < (num_bins + 1); b++)
+      {
+	sd.dist[0][0] = 0;
+	sd.dist[b][0] = 0;
+      }
+    difference_type* starts = sd.starts = new difference_type[num_threads + 1];
+    int bin_cursor = 0;
+    sd.num_bins = num_bins;
+    sd.num_bits = log2(num_bins);
+
+    difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0;
+    int bin_chunk_length = num_bins / num_threads, bin_split = num_bins % num_threads;
+    for (int i = 0; i < num_threads; i++)
+      {
+	starts[i] = start;
+	start += (i < split) ? (chunk_length + 1) : chunk_length;
+	int j = pus[i].bins_begin = bin_cursor;
+
+	// Range of bins for this processor.
+	bin_cursor += (i < bin_split) ? (bin_chunk_length + 1) : bin_chunk_length;
+	pus[i].bins_end = bin_cursor;
+	for (; j < bin_cursor; j++)
+	  sd.bin_proc[j] = i;
+	pus[i].num_threads = num_threads;
+	pus[i].iam = i;
+	pus[i].seed = rng(std::numeric_limits<uint32>::max());
+	pus[i].sd = &sd;
+      }
+    starts[num_threads] = start;
+
+    // Now shuffle in parallel.
+#pragma omp parallel num_threads(num_threads)
+    parallel_random_shuffle_drs_pu(pus);
+
+    delete[] starts;
+    delete[] sd.bin_proc;
+    for (int s = 0; s < (num_bins + 1); s++)
+      delete[] sd.dist[s];
+    delete[] sd.dist;
+    delete[] sd.temporaries;
+
+    delete[] pus;
+  }
+
+  /** @brief Sequential cache-efficient random shuffle.
+   *  @param begin Begin iterator of sequence.
+   *  @param end End iterator of sequence.
+   *  @param rng Random number generator to use.
+   */
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void
+  sequential_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rng)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+
+    bin_index num_bins, num_bins_cache;
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+    // Try the L1 cache first, must fit into L1.
+    num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type))));
+    num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+    // No more buckets than TLB entries, power of 2
+    // Power of 2 and at least one element per bin, at most the TLB size
+    num_bins = std::min(n, (difference_type)num_bins_cache);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+    // 2 TLB entries needed per bin
+    num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+#endif
+    num_bins = round_up_to_pow2(num_bins);
+
+    if (num_bins < num_bins_cache)
+      {
+#endif
+	// Now try the L2 cache, must fit into L2.
+	num_bins_cache = static_cast<bin_index>(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type)))));
+	num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+	// No more buckets than TLB entries, power of 2
+	// Power of 2 and at least one element per bin, at most the TLB size.
+	num_bins = static_cast<bin_index>(std::min(n, (difference_type)num_bins_cache));
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+	// 2 TLB entries needed per bin
+	num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+#endif
+	num_bins = round_up_to_pow2(num_bins);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+      }
+#endif
+
+    int num_bits = log2(num_bins);
+
+    if (num_bins > 1)
+      {
+	value_type* target = new value_type[n];
+	bin_index* oracles = new bin_index[n];
+	difference_type* dist0 = new difference_type[num_bins + 1], * dist1 = new difference_type[num_bins + 1];
+
+	for (int b = 0; b < num_bins + 1; b++)
+	  dist0[b] = 0;
+
+	Timing<sequential_tag> t;
+	t.tic();
+
+	random_number bitrng(rng(0xFFFFFFFF));
+
+	for (difference_type i = 0; i < n; i++)
+	  {
+	    bin_index oracle = random_number_pow2(num_bits, bitrng);
+	    oracles[i] = oracle;
+
+	    // To allow prefix (partial) sum.
+	    dist0[oracle + 1]++;
+	  }
+
+	t.tic();
+
+	// Sum up bins.
+	partial_sum(dist0, dist0 + num_bins + 1, dist0);
+
+	for (int b = 0; b < num_bins + 1; b++)
+	  dist1[b] = dist0[b];
+
+	t.tic();
+
+	// Distribute according to oracles.
+	for (difference_type i = 0; i < n; i++)
+	  target[(dist0[oracles[i]])++] = *(begin + i);
+
+	for (int b = 0; b < num_bins; b++)
+	  {
+	    sequential_random_shuffle(target + dist1[b], target + dist1[b + 1],
+				      rng);
+	    t.tic();
+	  }
+	t.print();
+
+	delete[] dist0;
+	delete[] dist1;
+	delete[] oracles;
+	delete[] target;
+      }
+    else
+      __gnu_sequential::random_shuffle(begin, end, rng);
+  }
+
+  /** @brief Parallel random public call.
+   *  @param begin Begin iterator of sequence.
+   *  @param end End iterator of sequence.
+   *  @param rng Random number generator to use.
+   */
+  template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void
+  parallel_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator rng = random_number())
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    difference_type n = end - begin;
+    parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
+  }
+
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/search.h b/libstdc++-v3/include/parallel/search.h
new file mode 100644
index 00000000000..754150ced9d
--- /dev/null
+++ b/libstdc++-v3/include/parallel/search.h
@@ -0,0 +1,157 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/search.h
+ *  @brief Parallel implementation base for std::search() and
+ *  std::search_n().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_SEARCH_H
+#define _GLIBCXX_PARALLEL_SEARCH_H 1
+
+#include <bits/stl_algobase.h>
+
+#include <parallel/parallel.h>
+#include <parallel/equally_split.h>
+
+
+namespace __gnu_parallel
+{
+  /**
+   *  @brief Precalculate advances for Knuth-Morris-Pratt algorithm.
+   *  @param elements Begin iterator of sequence to search for.
+   *  @param length Length of sequence to search for.
+   *  @param advances Returned offsets. 
+   */
+  template<typename RandomAccessIterator, typename _DifferenceTp>
+  void
+  calc_borders(RandomAccessIterator elements, _DifferenceTp length, _DifferenceTp* off)
+  {
+    typedef _DifferenceTp difference_type;
+
+    off[0] = -1;
+    if (length > 1)
+      off[1] = 0;
+    difference_type k = 0;
+    for (difference_type j = 2; j <= length; j++)
+      {
+	while ((k >= 0) && (elements[k] != elements[j-1]))
+	  k = off[k];
+	off[j] = ++k;
+      }
+  }
+
+  // Generic parallel find algorithm (requires random access iterator).
+
+  /** @brief Parallel std::search.
+   *  @param begin1 Begin iterator of first sequence.
+   *  @param end1 End iterator of first sequence.
+   *  @param begin2 Begin iterator of second sequence.
+   *  @param end2 End iterator of second sequence.
+   *  @param pred Find predicate.
+   *  @return Place of finding in first sequences. */
+  template<typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename Pred>
+  _RandomAccessIterator1
+  search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
+		  _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
+		  Pred pred)
+  {
+    typedef std::iterator_traits<_RandomAccessIterator1> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2));
+
+    difference_type pattern_length = end2 - begin2;
+
+    // Pattern too short.
+    if(pattern_length <= 0)
+      return end1;
+
+    // Last point to start search.
+    difference_type input_length = (end1 - begin1) - pattern_length;
+
+    // Where is first occurence of pattern? defaults to end.
+    difference_type res = (end1 - begin1);
+
+    // Pattern too long.
+    if (input_length < 0)
+      return end1;
+
+    thread_index_t num_threads = std::max<difference_type>(1, std::min<difference_type>(input_length, __gnu_parallel::get_max_threads()));
+
+    difference_type borders[num_threads + 1];
+    __gnu_parallel::equally_split(input_length, num_threads, borders);
+
+    difference_type advances[pattern_length];
+    calc_borders(begin2, pattern_length, advances);
+
+#pragma omp parallel num_threads(num_threads)
+    {
+      thread_index_t iam = omp_get_thread_num();
+
+      difference_type start = borders[iam], stop = borders[iam + 1];
+
+      difference_type pos_in_pattern = 0;
+      bool found_pattern = false;
+
+      while (start <= stop && !found_pattern)
+	{
+	  // Get new value of res.
+#pragma omp flush(res)
+	  // No chance for this thread to find first occurence.
+	  if (res < start)
+	    break;
+	  while (pred(begin1[start + pos_in_pattern], begin2[pos_in_pattern]))
+	    {
+	      ++pos_in_pattern;
+	      if (pos_in_pattern == pattern_length)
+		{
+		  // Found new candidate for res.
+#pragma omp critical (res)
+		  res = std::min(res, start);
+		  found_pattern = true;
+		  break;
+		}
+	    }
+	  // Make safe jump.
+	  start += (pos_in_pattern - advances[pos_in_pattern]);
+	  pos_in_pattern = (advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
+	}
+    }
+
+    // Return iterator on found element.
+    return (begin1 + res);
+  }
+} // end namespace
+
+#endif
diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h
new file mode 100644
index 00000000000..006176de46f
--- /dev/null
+++ b/libstdc++-v3/include/parallel/set_operations.h
@@ -0,0 +1,529 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/**
+ * @file parallel/set_operations.h
+ * @brief Parallel implementations of set operations for random-access
+ * iterators.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Marius Elvert and Felix Bondarenko.
+
+#ifndef _GLIBCXX_PARALLEL_SET_OPERATIONS_H
+#define _GLIBCXX_PARALLEL_SET_OPERATIONS_H 1
+
+#include <omp.h>
+
+#include <parallel/settings.h>
+#include <parallel/multiseq_selection.h>
+
+namespace __gnu_parallel
+{
+  template<typename InputIterator, typename OutputIterator>
+  inline OutputIterator
+  copy_tail(std::pair<InputIterator, InputIterator> b,
+	    std::pair<InputIterator, InputIterator> e, OutputIterator r)
+  {
+    if (b.first != e.first)
+      {
+	do
+	  {
+	    *r++ = *b.first++;
+	  }
+	while (b.first != e.first);
+      }
+    else
+      {
+	while (b.second != e.second)
+	  *r++ = *b.second++;
+      }
+    return r;
+  }
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  struct symmetric_difference_func
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
+
+    symmetric_difference_func(Comparator c) : comp(c) {}
+
+    Comparator comp;
+
+    inline OutputIterator invoke(InputIterator a, InputIterator b,
+				 InputIterator c, InputIterator d,
+				 OutputIterator r) const
+    {
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    {
+	      *r = *a;
+	      ++a;
+	      ++r;
+	    }
+	  else if (comp(*c, *a))
+	    {
+	      *r = *c;
+	      ++c;
+	      ++r;
+	    }
+	  else
+	    {
+	      ++a;
+	      ++c;
+	    }
+	}
+      return std::copy(c, d, std::copy(a, b, r));
+    }
+
+    inline difference_type
+    count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+    {
+      difference_type counter = 0;
+
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    {
+	      ++a;
+	      ++counter;
+	    }
+	  else if (comp(*c, *a))
+	    {
+	      ++c;
+	      ++counter;
+	    }
+	  else
+	    {
+	      ++a;
+	      ++c;
+	    }
+	}
+
+      return counter + (b - a) + (d - c);
+    }
+
+    inline OutputIterator
+    first_empty(InputIterator c, InputIterator d, OutputIterator out) const
+    { return std::copy(c, d, out); }
+
+    inline OutputIterator
+    second_empty(InputIterator a, InputIterator b, OutputIterator out) const
+    { return std::copy(a, b, out); }
+
+  };
+
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  struct difference_func
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
+
+    difference_func(Comparator c) : comp(c) {}
+
+    Comparator comp;
+
+    inline OutputIterator
+    invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
+	   OutputIterator r) const
+    {
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    {
+	      *r = *a;
+	      ++a;
+	      ++r;
+	    }
+	  else if (comp(*c, *a))
+	    { ++c; }
+	  else
+	    {
+	      ++a;
+	      ++c;
+	    }
+	}
+      return std::copy(a, b, r);
+    }
+
+    inline difference_type
+    count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+    {
+      difference_type counter = 0;
+
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    {
+	      ++a;
+	      ++counter;
+	    }
+	  else if (comp(*c, *a))
+	    { ++c; }
+	  else
+	    { ++a; ++c; }
+	}
+
+      return counter + (b - a);
+    }
+
+    inline OutputIterator
+    first_empty(InputIterator c, InputIterator d, OutputIterator out) const
+    { return out; }
+
+    inline OutputIterator
+    second_empty(InputIterator a, InputIterator b, OutputIterator out) const
+    { return std::copy(a, b, out); }
+  };
+
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  struct intersection_func
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
+
+    intersection_func(Comparator c) : comp(c) {}
+
+    Comparator comp;
+
+    inline OutputIterator
+    invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
+	   OutputIterator r) const
+    {
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    { ++a; }
+	  else if (comp(*c, *a))
+	    { ++c; }
+	  else
+	    {
+	      *r = *a;
+	      ++a;
+	      ++c;
+	      ++r;
+	    }
+	}
+
+      return r;
+    }
+
+    inline difference_type
+    count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+    {
+      difference_type counter = 0;
+
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    { ++a; }
+	  else if (comp(*c, *a))
+	    { ++c; }
+	  else
+	    {
+	      ++a;
+	      ++c;
+	      ++counter;
+	    }
+	}
+
+      return counter;
+    }
+
+    inline OutputIterator
+    first_empty(InputIterator c, InputIterator d, OutputIterator out) const
+    { return out; }
+
+    inline OutputIterator
+    second_empty(InputIterator a, InputIterator b, OutputIterator out) const
+    { return out; }
+  };
+
+  template<class InputIterator, class OutputIterator, class Comparator>
+  struct union_func
+  {
+    typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+
+    union_func(Comparator c) : comp(c) {}
+
+    Comparator comp;
+
+    inline OutputIterator
+    invoke(InputIterator a, const InputIterator b, InputIterator c,
+	   const InputIterator d, OutputIterator r) const
+    {
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    {
+	      *r = *a;
+	      ++a;
+	    }
+	  else if (comp(*c, *a))
+	    {
+	      *r = *c;
+	      ++c;
+	    }
+	  else
+	    {
+	      *r = *a;
+	      ++a;
+	      ++c;
+	    }
+	  ++r;
+	}
+      return std::copy(c, d, std::copy(a, b, r));
+    }
+
+    inline difference_type
+    count(InputIterator a, const InputIterator b, InputIterator c,
+	  const InputIterator d) const
+    {
+      difference_type counter = 0;
+
+      while (a != b && c != d)
+	{
+	  if (comp(*a, *c))
+	    { ++a; }
+	  else if (comp(*c, *a))
+	    { ++c; }
+	  else
+	    {
+	      ++a;
+	      ++c;
+	    }
+	  ++counter;
+	}
+
+      counter += (b - a);
+      counter += (d - c);
+      return counter;
+    }
+
+    inline OutputIterator
+    first_empty(InputIterator c, InputIterator d, OutputIterator out) const
+    { return std::copy(c, d, out); }
+
+    inline OutputIterator
+    second_empty(InputIterator a, InputIterator b, OutputIterator out) const
+    { return std::copy(a, b, out); }
+  };
+
+  template<typename InputIterator, typename OutputIterator, typename Operation>
+  OutputIterator
+  parallel_set_operation(InputIterator begin1, InputIterator end1,
+			 InputIterator begin2, InputIterator end2,
+			 OutputIterator result, Operation op)
+  {
+    _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2))
+
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
+
+
+    if (begin1 == end1)
+      return op.first_empty(begin2, end2, result);
+
+    if (begin2 == end2)
+      return op.second_empty(begin1, end1, result);
+
+    const difference_type size = (end1 - begin1) + (end2 - begin2);
+
+    thread_index_t num_threads = std::min<difference_type>(std::min(end1 - begin1, end2 - begin2), get_max_threads());
+
+    difference_type borders[num_threads + 2];
+    equally_split(size, num_threads + 1, borders);
+
+    const iterator_pair sequence[ 2 ] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ;
+
+    iterator_pair block_begins[num_threads + 1];
+
+    // Very start.
+    block_begins[0] = std::make_pair(begin1, begin2);
+    difference_type length[num_threads];
+
+    OutputIterator return_value = result;
+
+#pragma omp parallel num_threads(num_threads)
+    {
+      Timing<sequential_tag> t;
+
+      t.tic();
+
+      // Result from multiseq_partition.
+      InputIterator offset[2];
+      const int iam = omp_get_thread_num();
+
+      const difference_type rank = borders[iam + 1];
+
+      multiseq_partition(sequence, sequence + 2, rank, offset, op.comp);
+
+      // allowed to read?
+      // together
+      // *(offset[ 0 ] - 1) == *offset[ 1 ]
+      if (offset[ 0 ] != begin1 && offset[ 1 ] != end2
+	   && !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ])
+	   && !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1)))
+	{
+	  // Avoid split between globally equal elements: move one to
+	  // front in first sequence.
+	  --offset[ 0 ];
+	}
+
+      iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]);
+
+      t.tic();
+
+      // Make sure all threads have their block_begin result written out.
+#pragma omp barrier
+
+      t.tic();
+
+      iterator_pair block_begin = block_begins[ iam ];
+
+      // Begin working for the first block, while the others except
+      // the last start to count.
+      if (iam == 0)
+	{
+	  // The first thread can copy already.
+	  length[ iam ] = op.invoke(block_begin.first, block_end.first, block_begin.second, block_end.second, result) - result;
+	}
+      else
+	{
+	  length[ iam ] = op.count(block_begin.first, block_end.first,
+				   block_begin.second, block_end.second);
+	}
+
+      t.tic();
+
+      // Make sure everyone wrote their lengths.
+#pragma omp barrier
+
+      t.tic();
+      OutputIterator r = result;
+
+      if (iam == 0)
+	{
+	  // Do the last block.
+	  for (int i = 0; i < num_threads; ++i)
+	    r += length[i];
+
+	  block_begin = block_begins[num_threads];
+
+	  // Return the result iterator of the last block.
+	  return_value = op.invoke(block_begin.first, end1, block_begin.second, end2, r);
+
+	}
+      else
+	{
+	  for (int i = 0; i < iam; ++i)
+	    r += length[ i ];
+
+	  // Reset begins for copy pass.
+	  op.invoke(block_begin.first, block_end.first,
+		    block_begin.second, block_end.second, r);
+	}
+
+      t.tic();
+      t.print();
+    }
+    return return_value;
+  }
+
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  OutputIterator
+  parallel_set_union(InputIterator begin1, InputIterator end1,
+		     InputIterator begin2, InputIterator end2,
+		     OutputIterator result, Comparator comp)
+  {
+    return parallel_set_operation(begin1, end1, begin2, end2, result,
+				  union_func< InputIterator, OutputIterator, Comparator>(comp));
+  }
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  OutputIterator
+  parallel_set_intersection(InputIterator begin1, InputIterator end1,
+			    InputIterator begin2, InputIterator end2,
+			    OutputIterator result, Comparator comp)
+  {
+    return parallel_set_operation(begin1, end1, begin2, end2, result,
+				  intersection_func<InputIterator, OutputIterator, Comparator>(comp));
+  }
+
+
+  template<typename InputIterator, typename OutputIterator>
+  OutputIterator
+  set_intersection(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result)
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+
+    return set_intersection(begin1, end1, begin2, end2, result,
+			    std::less<value_type>());
+  }
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  OutputIterator
+  parallel_set_difference(InputIterator begin1, InputIterator end1,
+			  InputIterator begin2, InputIterator end2,
+			  OutputIterator result, Comparator comp)
+  {
+    return parallel_set_operation(begin1, end1, begin2, end2, result,
+				  difference_func<InputIterator, OutputIterator, Comparator>(comp));
+  }
+
+  template<typename InputIterator, typename OutputIterator, typename Comparator>
+  OutputIterator
+  parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result, Comparator comp)
+  {
+    return parallel_set_operation(begin1, end1, begin2, end2, result,
+				  symmetric_difference_func<InputIterator, OutputIterator, Comparator>(comp));
+  }
+
+}
+
+#endif // _GLIBCXX_SET_ALGORITHM_
+
+
+
+
+
+
+
+
diff --git a/libstdc++-v3/include/parallel/settings.h b/libstdc++-v3/include/parallel/settings.h
new file mode 100644
index 00000000000..cec9d8225c9
--- /dev/null
+++ b/libstdc++-v3/include/parallel/settings.h
@@ -0,0 +1,388 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/settings.h
+ *  @brief Settings and tuning parameters, heuristics to decide
+ *  whether to use parallelized algorithms.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ *
+ *  @section parallelization_decision  The decision whether to run an algorithm in parallel.
+ *
+ *  There are several ways the user can switch on and off the 
+ *  parallel execution of an algorithm, both at compile- and 
+ *  run-time.
+ *
+ *  Only sequential execution can be forced at compile-time.
+ *  This reduces code size and protects code parts that have 
+ *  non-thread-safe side effects.
+ *
+ *  Ultimately forcing parallel execution at compile-time does 
+ *  make much sense.
+ *  Often, the sequential algorithm implementation is used as
+ *  a subroutine, so no reduction in code size can be achieved.
+ *  Also, the machine the program is run on might have only one
+ *  processor core, so to avoid overhead, the algorithm is
+ *  executed sequentially.
+ *
+ *  To force sequential execution of an algorithm ultimately
+ *  at compile-time, the user must add the tag 
+ *  __gnu_parallel::sequential_tag() to the end of the
+ *  parameter list, e. g.
+ *
+ *  \code
+ *  std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
+ *  \endcode
+ *
+ *  This is compatible with all overloaded algorithm variants.
+ *  No additional code will be instantiated, at all.
+ *  The same holds for most algorithm calls with iterators 
+ *  not providing random access.
+ *
+ *  If the algorithm call is not forced to be executed sequentially
+ *  at compile-time, the decision is made at run-time, for each call.
+ *  First, the two (conceptually) global variables 
+ *  __gnu_parallel::Settings::force_sequential and 
+ *  __gnu_parallel::Settings::force_parallel are executed.
+ *  If the former one is true, the sequential algorithm is executed.
+ *  If the latter one is true and the former one is false, 
+ *  the algorithm is executed in parallel.
+ *
+ *  If none of these conditions has fired so far, a heuristic is used.
+ *  The parallel algorithm implementation is called only if the
+ *  input size is sufficiently large.
+ *  For most algorithms, the input size is the (combined) length of 
+ *  the input sequence(s).
+ *  The threshold can be set by the user, individually for each
+ *  algorithm.
+ *  The according variables are called 
+ *  __gnu_parallel::Settings::[algorithm]_minimal_n .
+ *
+ *  For some of the algorithms, there are even more tuning options,
+ *  e. g. the ability to choose from multiple algorithm variants.
+ *  See the __gnu_parallel::Settings class for details.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
+#define _GLIBCXX_PARALLEL_SETTINGS_H 1
+
+#include <omp.h>
+#include <parallel/types.h>
+
+/** 
+  * @brief The extensible condition on whether the parallel variant of
+  * an algorithm sould be called.
+  * @param c A condition that is overruled by
+  * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
+  * the input size.
+  */
+#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::Settings::force_parallel))
+
+namespace __gnu_parallel
+{
+  // NB: Including this file cannot produce (unresolved) symbols from
+  // the OpenMP runtime unless the parallel mode is actually invoked
+  // and active, which imples that the OpenMP runtime is actually
+  // going to be linked in.
+  inline int
+  get_max_threads() 
+  { return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; }
+
+namespace 
+{
+  // XXX look at _Tune in mt_allocator.h
+  /** @brief Run-time settings for the parallel mode. */
+  struct Settings
+  {
+    /** @brief Different parallel sorting algorithms to choose
+	from: multi-way mergesort, quicksort, load-balanced
+	quicksort. */
+    enum SortAlgorithm 
+    { MWMS, QS, QS_BALANCED };
+
+    /** @brief Different merging algorithms: bubblesort-alike,
+	loser-tree variants, enum sentinel */
+    enum MultiwayMergeAlgorithm
+    { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };
+
+    /** @brief Different splitting strategies for sorting/merging:
+	by sampling, exact */
+    enum Splitting 
+    { SAMPLING, EXACT };
+
+    /** @brief Different partial sum algorithms: recursive, linear */
+    enum PartialSumAlgorithm 
+    { RECURSIVE, LINEAR };
+
+    /** @brief Different find distribution strategies: growing
+	blocks, equal-sized blocks, equal splitting. */
+    enum FindDistribution 
+    { GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT };
+
+    /** @brief Force all algorithms to be executed sequentially.
+     * This setting cannot be overwritten. */
+    static volatile bool force_sequential;
+
+    /** @brief Force all algorithms to be executed in parallel.
+     * This setting can be overriden by __gnu_parallel::sequential_tag
+     * (compile-time), and force_sequential (run-time). */
+    static volatile bool force_parallel;
+
+    /** @brief Algorithm to use for sorting. */
+    static volatile SortAlgorithm sort_algorithm;
+
+    /** @brief Strategy to use for splitting the input when
+	sorting (MWMS). */
+    static volatile Splitting sort_splitting;
+
+    /** @brief Minimal input size for parallel sorting. */
+    static volatile sequence_index_t sort_minimal_n;
+    /** @brief Oversampling factor for parallel std::sort (MWMS). */
+    static volatile unsigned int sort_mwms_oversampling;
+    /** @brief Such many samples to take to find a good pivot
+	(quicksort). */
+    static volatile unsigned int sort_qs_num_samples_preset;
+
+    /** @brief Maximal subsequence length to swtich to unbalanced
+     * base case.  Applies to std::sort with dynamically
+     * load-balanced quicksort. */
+    static volatile sequence_index_t sort_qsb_base_case_maximal_n;
+
+    /** @brief Minimal input size for parallel std::partition. */
+    static volatile sequence_index_t partition_minimal_n;
+
+    /** @brief Chunk size for parallel std::partition. */
+    static volatile sequence_index_t partition_chunk_size;
+
+    /** @brief Chunk size for parallel std::partition, relative to
+     * input size.  If >0.0, this value overrides
+     * partition_chunk_size. */
+    static volatile double partition_chunk_share;
+
+    /** @brief Minimal input size for parallel std::nth_element. */
+    static volatile sequence_index_t nth_element_minimal_n;
+
+    /** @brief Minimal input size for parallel std::partial_sort. */
+    static volatile sequence_index_t partial_sort_minimal_n;
+
+    /** @brief Minimal input size for parallel std::adjacent_difference. */
+    static volatile unsigned int adjacent_difference_minimal_n;
+
+    /** @brief Minimal input size for parallel std::partial_sum. */
+    static volatile unsigned int partial_sum_minimal_n;
+
+    /** @brief Algorithm to use for std::partial_sum. */
+    static volatile PartialSumAlgorithm partial_sum_algorithm;
+
+    /** @brief Assume "sum and write result" to be that factor
+     *  slower than just "sum".  This value is used for
+     *  std::partial_sum. */
+    static volatile float partial_sum_dilatation;
+
+    /** @brief Minimal input size for parallel std::random_shuffle. */
+    static volatile unsigned int random_shuffle_minimal_n;
+
+    /** @brief Minimal input size for parallel std::merge. */
+    static volatile sequence_index_t merge_minimal_n;
+
+    /** @brief Splitting strategy for parallel std::merge. */
+    static volatile Splitting merge_splitting;
+
+    /** @brief Oversampling factor for parallel std::merge.
+     * Such many samples per thread are collected. */
+    static volatile unsigned int merge_oversampling;
+
+    /** @brief Algorithm to use for parallel
+	__gnu_parallel::multiway_merge. */
+    static volatile MultiwayMergeAlgorithm multiway_merge_algorithm;
+
+    /** @brief Splitting strategy to use for parallel
+	__gnu_parallel::multiway_merge. */
+    static volatile Splitting multiway_merge_splitting;
+
+    //// Oversampling factor for parallel __gnu_parallel::multiway_merge.
+    static volatile unsigned int multiway_merge_oversampling;
+
+    /// Minimal input size for parallel __gnu_parallel::multiway_merge.
+    static volatile sequence_index_t multiway_merge_minimal_n;
+
+    /// Oversampling factor for parallel __gnu_parallel::multiway_merge.
+    static volatile int multiway_merge_minimal_k;
+
+    /** @brief Minimal input size for parallel std::unique_copy. */
+    static volatile sequence_index_t unique_copy_minimal_n;
+
+    static volatile sequence_index_t workstealing_chunk_size;
+
+    /** @brief Minimal input size for parallel std::for_each. */
+    static volatile sequence_index_t for_each_minimal_n;
+
+    /** @brief Minimal input size for parallel std::count and
+	std::count_if. */
+    static volatile sequence_index_t count_minimal_n;
+
+    /** @brief Minimal input size for parallel std::transform. */
+    static volatile sequence_index_t transform_minimal_n;
+
+    /** @brief Minimal input size for parallel std::replace and
+	std::replace_if. */
+    static volatile sequence_index_t replace_minimal_n;
+
+    /** @brief Minimal input size for parallel std::generate. */
+    static volatile sequence_index_t generate_minimal_n;
+
+    /** @brief Minimal input size for parallel std::fill. */
+    static volatile sequence_index_t fill_minimal_n;
+
+    /** @brief Minimal input size for parallel std::min_element. */
+    static volatile sequence_index_t min_element_minimal_n;
+
+    /** @brief Minimal input size for parallel std::max_element. */
+    static volatile sequence_index_t max_element_minimal_n;
+
+    /** @brief Minimal input size for parallel std::accumulate. */
+    static volatile sequence_index_t accumulate_minimal_n;
+
+    /** @brief Distribution strategy for parallel std::find. */
+    static volatile FindDistribution find_distribution;
+
+    /** @brief Start with looking for that many elements
+	sequentially, for std::find. */
+    static volatile sequence_index_t find_sequential_search_size;
+
+    /** @brief Initial block size for parallel std::find. */
+    static volatile sequence_index_t find_initial_block_size;
+
+    /** @brief Maximal block size for parallel std::find. */
+    static volatile sequence_index_t find_maximum_block_size;
+
+    /** @brief Block size increase factor for parallel std::find. */
+    static volatile double find_increasing_factor;
+
+    //set operations
+    /** @brief Minimal input size for parallel std::set_union. */
+    static volatile sequence_index_t set_union_minimal_n;
+
+    /** @brief Minimal input size for parallel
+	std::set_symmetric_difference. */
+    static volatile sequence_index_t set_symmetric_difference_minimal_n;
+
+    /** @brief Minimal input size for parallel std::set_difference. */
+    static volatile sequence_index_t set_difference_minimal_n;
+
+    /** @brief Minimal input size for parallel std::set_intersection. */
+    static volatile sequence_index_t set_intersection_minimal_n;
+
+    //hardware dependent tuning parameters
+    /** @brief Size of the L1 cache in bytes (underestimation). */
+    static volatile unsigned long long L1_cache_size;
+
+    /** @brief Size of the L2 cache in bytes (underestimation). */
+    static volatile unsigned long long L2_cache_size;
+
+    /** @brief Size of the Translation Lookaside Buffer
+	(underestimation). */
+    static volatile unsigned int TLB_size;
+
+    /** @brief Overestimation of cache line size.  Used to avoid
+     * false sharing, i. e. elements of different threads are at
+     * least this amount apart. */
+    static unsigned int cache_line_size;
+
+    //statistics
+    /** @brief Statistic on the number of stolen ranges in
+	load-balanced quicksort.*/
+    static volatile sequence_index_t qsb_steals;
+  };
+
+  volatile bool Settings::force_parallel = false;
+  volatile bool Settings::force_sequential = false;
+  volatile  Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS;
+  volatile  Settings::Splitting Settings::sort_splitting = Settings::EXACT;
+  volatile sequence_index_t Settings::sort_minimal_n = 1000;
+
+  volatile unsigned int Settings::sort_mwms_oversampling = 10;
+  volatile unsigned int Settings::sort_qs_num_samples_preset = 100;
+  volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100;
+  volatile sequence_index_t Settings::partition_minimal_n = 1000;
+  volatile sequence_index_t Settings::nth_element_minimal_n = 1000;
+  volatile sequence_index_t Settings::partial_sort_minimal_n = 1000;
+  volatile sequence_index_t Settings::partition_chunk_size = 1000;
+  volatile double Settings::partition_chunk_share = 0.0;
+  volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
+  volatile  Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR;
+  volatile unsigned int Settings::partial_sum_minimal_n = 1000;
+  volatile float Settings::partial_sum_dilatation = 1.0f;
+  volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
+  volatile  Settings::Splitting Settings::merge_splitting = Settings::EXACT;
+  volatile sequence_index_t Settings::merge_minimal_n = 1000;
+  volatile unsigned int Settings::merge_oversampling = 10;
+  volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000;
+  volatile int Settings::multiway_merge_minimal_k = 2;
+
+  // unique copy
+  volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
+  volatile  Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE;
+  volatile  Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT;
+  volatile unsigned int Settings::multiway_merge_oversampling = 10;
+  volatile  Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS;
+  volatile sequence_index_t Settings::find_sequential_search_size = 256;
+  volatile sequence_index_t Settings::find_initial_block_size = 256;
+  volatile sequence_index_t Settings::find_maximum_block_size = 8192;
+  volatile double Settings::find_increasing_factor = 2.0;
+  volatile sequence_index_t Settings::workstealing_chunk_size = 100;
+  volatile sequence_index_t Settings::for_each_minimal_n = 1000;
+  volatile sequence_index_t Settings::count_minimal_n = 1000;
+  volatile sequence_index_t Settings::transform_minimal_n = 1000;
+  volatile sequence_index_t Settings::replace_minimal_n = 1000;
+  volatile sequence_index_t Settings::generate_minimal_n = 1000;
+  volatile sequence_index_t Settings::fill_minimal_n = 1000;
+  volatile sequence_index_t Settings::min_element_minimal_n = 1000;
+  volatile sequence_index_t Settings::max_element_minimal_n = 1000;
+  volatile sequence_index_t Settings::accumulate_minimal_n = 1000;
+
+  //set operations
+  volatile sequence_index_t Settings::set_union_minimal_n = 1000;
+  volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
+  volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
+  volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000;
+  volatile unsigned long long Settings::L1_cache_size = 16 << 10;
+  volatile unsigned long long Settings::L2_cache_size = 256 << 10;
+  volatile unsigned int Settings::TLB_size = 128;
+  unsigned int Settings::cache_line_size = 64;
+
+  //statistics
+  volatile sequence_index_t Settings::qsb_steals = 0;
+} // end anonymous namespace
+
+}
+
+#endif /* _GLIBCXX_SETTINGS_H */
diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h
new file mode 100644
index 00000000000..6b20edd9fab
--- /dev/null
+++ b/libstdc++-v3/include/parallel/sort.h
@@ -0,0 +1,104 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/sort.h
+ *  @brief Parallel sorting algorithm switch.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_SORT_H
+#define _GLIBCXX_PARALLEL_SORT_H 1
+
+#include <parallel/basic_iterator.h>
+#include <parallel/features.h>
+#include <parallel/parallel.h>
+
+#if _GLIBCXX_ASSERTIONS
+#include <parallel/checkers.h>
+#endif
+
+#if _GLIBCXX_MERGESORT
+#include <parallel/multiway_mergesort.h>
+#endif
+
+#if _GLIBCXX_QUICKSORT
+#include <parallel/quicksort.h>
+#endif
+
+#if _GLIBCXX_BAL_QUICKSORT
+#include <parallel/balanced_quicksort.h>
+#endif
+
+namespace __gnu_parallel
+{
+  /** 
+   *  @brief Choose a parallel sorting algorithm.
+   *  @param begin Begin iterator of input sequence.
+   *  @param end End iterator of input sequence.
+   *  @param comp Comparator.
+   *  @param stable Sort stable.
+   *  @callgraph 
+   */
+  template<typename RandomAccessIterator, typename Comparator>
+  inline void
+  parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
+		Comparator comp, bool stable)
+  {
+    _GLIBCXX_CALL(end - begin)
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (begin != end)
+      {
+	difference_type n = end - begin;
+
+	if (false) ;
+#if _GLIBCXX_MERGESORT
+	else if (Settings::sort_algorithm == Settings::MWMS || stable)
+	  parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
+#endif
+#if _GLIBCXX_QUICKSORT
+	else if (Settings::sort_algorithm == Settings::QS && !stable)
+	  parallel_sort_qs(begin, end, comp, n, get_max_threads());
+#endif
+#if _GLIBCXX_BAL_QUICKSORT
+	else if (Settings::sort_algorithm == Settings::QS_BALANCED && !stable)
+	  parallel_sort_qsb(begin, end, comp, n, get_max_threads());
+#endif
+	else
+	  __gnu_sequential::sort(begin, end, comp);
+      }
+  }
+} // end namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h
new file mode 100644
index 00000000000..80926b44384
--- /dev/null
+++ b/libstdc++-v3/include/parallel/tags.h
@@ -0,0 +1,124 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/**
+ * @file parallel/tags.h
+ * @brief Tags for compile-time options.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_TAGS_H
+#define _GLIBCXX_PARALLEL_TAGS_H 1
+
+// Parallel mode namespaces.
+namespace std 
+{ 
+  namespace __parallel { } 
+}
+
+/**
+ * @namespace __gnu_sequential
+ * @brief GNU sequential classes for public use.
+ */
+namespace __gnu_sequential { }
+
+/**
+ * @namespace __gnu_parallel
+ * @brief GNU parallel classes for public use.
+ */
+namespace __gnu_parallel
+{
+  // Import all the parallel versions of components in namespace std.
+  using namespace std::__parallel;
+
+  enum parallelism
+    {
+      /// Not parallel.
+      sequential,
+
+      /// Parallel unbalanced (equal-sized chunks).
+      parallel_unbalanced,
+
+      /// Parallel balanced (work-stealing).
+      parallel_balanced,
+
+      /// Parallel with OpenMP dynamic load-balancing.
+      parallel_omp_loop,
+
+      /// Parallel with OpenMP static load-balancing.
+      parallel_omp_loop_static,
+
+      /// Parallel with OpenMP taskqueue construct.
+      parallel_taskqueue
+    };
+
+  inline bool 
+  is_parallel(const parallelism __p) { return __p != sequential; }
+
+
+  /** @brief Forces sequential execution at compile time. */
+  struct sequential_tag { };
+
+  /** @brief Recommends parallel execution at compile time. */
+  struct parallel_tag { };
+
+  /** @brief Recommends parallel execution using dynamic
+      load-balancing at compile time. */
+  struct balanced_tag : public parallel_tag { };
+
+  /** @brief Recommends parallel execution using static
+      load-balancing at compile time. */
+  struct unbalanced_tag : public parallel_tag { };
+
+  /** @brief Recommends parallel execution using OpenMP dynamic
+      load-balancing at compile time. */
+  struct omp_loop_tag : public parallel_tag { };
+
+  /** @brief Recommends parallel execution using OpenMP static
+      load-balancing at compile time. */
+  struct omp_loop_static_tag : public parallel_tag { };
+
+
+  /** @brief Selects the growing block size variant for std::find().
+      @see _GLIBCXX_FIND_GROWING_BLOCKS */
+  struct growing_blocks_tag { };
+
+  /** @brief Selects the constant block size variant for std::find().
+      @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */
+  struct constant_size_blocks_tag { };
+
+  /** @brief Selects the equal splitting variant for std::find().
+      @see _GLIBCXX_FIND_EQUAL_SPLIT */
+  struct equal_split_tag { };
+}
+
+#endif /* _GLIBCXX_TAGS_H */
diff --git a/libstdc++-v3/include/parallel/timing.h b/libstdc++-v3/include/parallel/timing.h
new file mode 100644
index 00000000000..f1f75225c15
--- /dev/null
+++ b/libstdc++-v3/include/parallel/timing.h
@@ -0,0 +1,217 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/timing.h
+ *  @brief Provides a simple tool to do performance debugging, also in
+ *  parallel code.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_TIMING_H
+#define _GLIBCXX_PARALLEL_TIMING_H 1
+
+#include <omp.h>
+#include <cstdio>
+#include <cstring>
+#include <parallel/tags.h>
+
+namespace __gnu_parallel
+{
+  // XXX integrate with existing performance testing infrastructure.
+  /** @brief Type of of point in time, used for the Timing classes. */
+  typedef double point_in_time;
+
+  template<typename tag, typename must_be_int = int>
+  class Timing;
+
+  /** @brief A class that provides simple run time measurements, also
+      for parallel code.
+   *  @param tag If parallel_tag, then the measurements are actually done.
+   *  Otherwise, no code at all is emitted by the compiler. */
+  template<typename must_be_int>
+  class Timing<parallel_tag, must_be_int>
+  {
+  private:
+    static const int max_points_in_time = 100;
+    point_in_time points_in_time[max_points_in_time];
+    point_in_time active, last_start;
+    int pos;
+    char* str;
+    const char* tags[max_points_in_time];
+
+  public:
+    Timing()
+    {
+      str = NULL;
+      pos = 0;
+      active = 0.0;
+      last_start = -1.0;
+    }
+
+    ~Timing()
+    {
+      delete[] str;
+    }
+
+    /** @brief Take a running time measurement.
+     *  @param tag Optional description that will be output again with
+     *  the timings.
+     *  It should describe the operation before the tic(). To time a
+     *  series of @c n operations, there should be @c n+1 calls to
+     *  tic(), and one call to print(). */
+    inline void
+    tic(const char* tag = NULL)
+    {
+      points_in_time[pos] = omp_get_wtime();
+      tags[pos] = tag;
+      pos++;
+    }
+
+    /** @brief Start the running time measurement.
+     *
+     *  Should be paired with stop(). */
+    inline void
+    start()
+    {
+      _GLIBCXX_PARALLEL_ASSERT(last_start == -1.0);
+      last_start = omp_get_wtime();
+    }
+
+    /** @brief Stop the running time measurement.
+     *
+     *  Should be paired with start(). */
+    inline void
+    stop()
+    {
+      _GLIBCXX_PARALLEL_ASSERT(last_start != -1.0);
+      active += (omp_get_wtime() - last_start);
+      last_start = -1.0;
+    }
+
+    /** @brief Reset running time accumulation. */
+    inline void
+    reset()
+    {
+      active = 0.0;
+      last_start = -1.0;
+    }
+
+    /** @brief Accumulate the time between all pairs of start() and
+	stop() so far */
+    inline point_in_time
+    active_time()
+    { return active; }
+
+    /** @brief Total time between first and last tic() */
+    inline point_in_time
+    total_time()
+    { return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; }
+
+  private:
+    /** @brief Construct string to print out, presenting the timings. */
+    const char*
+    c_str()
+    {
+      // Avoid stream library here, to avoid cyclic dependencies in
+      // header files.
+      char tmp[1000];
+
+      if (!str)
+	str = new char[pos * 200];
+      else
+	str[0] = '\0';
+
+      sprintf(str, "t %2d      T[ms]", omp_get_thread_num());
+      strcat(str, "\n");
+
+      for (int i = 0; i < pos; )
+	{
+	  point_in_time last = points_in_time[i];
+	  i++;
+	  if (i == pos)
+	    break;
+	  if (tags[i] == NULL)
+	    sprintf(tmp, "%2d:     ", i - 1);
+	  else
+	    sprintf(tmp, "%20s:     ", tags[i]);
+	  strcat(str, tmp);
+
+	  sprintf(tmp, "%7.2f     ", (points_in_time[i] - last) * 1000.0);
+	  strcat(str, tmp);
+	  strcat(str, "\n");
+	}
+
+      return str;
+    }
+
+  public:
+    /** @brief Print the running times between the tic()s. */
+    void
+    print()
+    {
+      printf("print\n");
+#pragma omp barrier
+#pragma omp master
+      printf("\n\n");
+#pragma omp critical
+      printf("%s\n", c_str());
+    }
+  };
+
+  /** @brief A class that provides simple run time measurements, also
+      for parallel code.
+   *  @param tag If parallel_tag, then the measurements are actually done,
+   *  otherwise, no code at all is emitted by the compiler.
+   */
+  template<typename must_be_int>
+  class Timing<sequential_tag, must_be_int>
+  {
+  private:
+    static const char* empty_string;
+
+  public:
+    inline void tic(const char* /*tag*/ = NULL) { }
+    inline void start() { }
+    inline void stop() { }
+    inline void reset() { }
+    inline point_in_time active_time() { return -1.0; }
+    inline point_in_time total_time() { return -1.0; }
+    inline const char* c_str() { return empty_string; }
+    inline void print() { }
+  };
+
+  template<typename must_be_int>
+  const char* Timing<sequential_tag, must_be_int>::empty_string = "";
+
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/tree.h b/libstdc++-v3/include/parallel/tree.h
new file mode 100644
index 00000000000..8aa9269394d
--- /dev/null
+++ b/libstdc++-v3/include/parallel/tree.h
@@ -0,0 +1,3574 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/tree.h
+ *  @brief Parallel red-black tree operations.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Leonor Frias Moya, Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_TREE_H
+#define _GLIBCXX_PARALLEL_TREE_H 1
+
+#include <parallel/parallel.h>
+#include <functional>
+#include <cmath>
+#include <algorithm>
+#include <iterator>
+#include <functional>
+#include <iostream>
+//#include <ext/malloc_allocator.h>
+#include <bits/stl_tree.h>
+
+#include <parallel/list_partition.h>
+
+//#define _GLIBCXX_TIMING
+#ifdef _GLIBCXX_TIMING
+#define _timing_tag parallel_tag
+#else
+#define _timing_tag sequential_tag
+#endif
+
+namespace std
+{
+  // XXX Declaration should go to stl_tree.h.
+  void
+  _Rb_tree_rotate_left(_Rb_tree_node_base* const __x,
+                       _Rb_tree_node_base*& __root);
+
+  void
+  _Rb_tree_rotate_right(_Rb_tree_node_base* const __x,
+                        _Rb_tree_node_base*& __root);
+}
+
+
+namespace __gnu_parallel
+{
+  // XXX move into parallel/type_traits.h if <type_traits> doesn't work.
+  /** @brief Helper class: remove the const modifier from the first
+      component, if present. Set kind component.
+   *  @param T Simple type, nothing to unconst */
+  template<typename T>
+  struct unconst_first_component
+  {
+    /** @brief New type after removing the const */
+    typedef T type;
+  };
+
+  /** @brief Helper class: remove the const modifier from the first
+      component, if present. Map kind component
+   *  @param Key First component, from which to remove the const modifier
+   *  @param Load Second component
+   *  @sa unconst_first_component */
+  template<typename Key, typename Load>
+  struct unconst_first_component<std::pair<const Key, Load> >
+  {
+    /** @brief New type after removing the const */
+    typedef std::pair<Key, Load> type;
+  };
+
+  /** @brief Helper class: set the appropriate comparator to deal with
+   * repetitions. Comparator for unique dictionaries.
+   *
+   *  StrictlyLess and LessEqual are part of a mechanism to deal with
+   *  repetitions transparently whatever the actual policy is.
+   *  @param _Key Keys to compare
+   *  @param _Compare Comparator equal to conceptual < */
+  template<typename _Key, typename _Compare>
+  struct StrictlyLess : public std::binary_function<_Key, _Key, bool>
+  {
+    /** @brief Comparator equal to conceptual < */
+    _Compare c;
+
+    /** @brief Constructor given a Comparator */
+    StrictlyLess(const _Compare& _c) : c(_c) { }
+
+    /** @brief Copy constructor */
+    StrictlyLess(const StrictlyLess<_Key, _Compare>& strictly_less)
+    : c(strictly_less.c) { }
+
+    /** @brief Operator() */
+    bool operator()(const _Key& k1, const _Key& k2) const
+    {
+      return c(k1, k2);
+    }
+  };
+
+  /** @brief Helper class: set the appropriate comparator to deal with
+   * repetitions. Comparator for non-unique dictionaries.
+   *
+   *  StrictlyLess and LessEqual are part of a mechanism to deal with
+   *  repetitions transparently whatever the actual policy is.
+   *  @param _Key Keys to compare
+   *  @param _Compare Comparator equal to conceptual <= */
+  template<typename _Key, typename _Compare>
+  struct LessEqual : public std::binary_function<_Key, _Key, bool>
+  {
+    /** @brief Comparator equal to conceptual < */
+    _Compare c;
+
+    /** @brief Constructor given a Comparator */
+    LessEqual(const _Compare& _c) : c(_c) { }
+
+    /** @brief Copy constructor */
+    LessEqual(const LessEqual<_Key, _Compare>& less_equal)
+    : c(less_equal.c) { }
+
+    /** @brief Operator() */
+    bool operator()(const _Key& k1, const _Key& k2) const
+    { return !c(k2, k1); }
+  };
+
+
+  /** @brief Parallel red-black tree.
+   *
+   *  Extension of the sequential red-black tree. Specifically,
+   *  parallel bulk insertion operations are provided.
+   *  @param _Key Keys to compare
+   *  @param _Val Elements to store in the tree
+   *  @param _KeyOfValue Obtains the key from an element <
+   *  @param _Compare Comparator equal to conceptual <
+   *  @param _Alloc Allocator for the elements */
+  template<typename _Key, typename _Val, typename _KeyOfValue,
+	   typename _Compare, typename _Alloc = std::allocator<_Val> >
+  class _Rb_tree : public std::_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>
+  {
+  private:
+    /** @brief Sequential tree */
+    typedef std::_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc> base_type;
+
+    /** @brief Renaming of base node type */
+    typedef typename std::_Rb_tree_node<_Val> _Rb_tree_node;
+
+    /** @brief Renaming of libstdc++ node type */
+    typedef typename std::_Rb_tree_node_base _Rb_tree_node_base;
+
+    /** @brief Renaming of base key_type */
+    typedef typename base_type::key_type key_type;
+
+    /** @brief Renaming of base value_type */
+    typedef typename base_type::value_type value_type;
+
+    /** @brief Helper class to unconst the first component of
+     * value_type if exists.
+     *
+     * This helper class is needed for map, but may discard qualifiers
+     * for set; however, a set with a const element type is not useful
+     * and should fail in some other place anyway.
+     */
+    typedef typename unconst_first_component<value_type>::type nc_value_type;
+
+    /** @brief Pointer to a node */
+    typedef _Rb_tree_node* _Rb_tree_node_ptr;
+
+    /** @brief Wrapper comparator class to deal with repetitions
+	transparently according to dictionary type with key _Key and
+	comparator _Compare. Unique dictionaries object
+    */
+    StrictlyLess<_Key, _Compare> strictly_less;
+
+    /** @brief Wrapper comparator class to deal with repetitions
+	transparently according to dictionary type with key _Key and
+	comparator _Compare. Non-unique dictionaries object
+    */
+    LessEqual<_Key, _Compare> less_equal;
+
+  public:
+    /** @brief Renaming of base size_type */
+    typedef typename base_type::size_type size_type;
+
+    /** @brief Constructor with a given comparator and allocator.
+     *
+     * Delegates the basic initialization to the sequential class and
+     * initializes the helper comparators of the parallel class
+     * @param c Comparator object with which to initialize the class
+     * comparator and the helper comparators
+     * @param a Allocator object with which to initialize the class comparator
+     */
+    _Rb_tree(const _Compare& c, const _Alloc& a)
+    : base_type(c, a), strictly_less(base_type::_M_impl._M_key_compare), less_equal(base_type::_M_impl._M_key_compare)
+    { }
+
+    /** @brief Copy constructor.
+     *
+     * Delegates the basic initialization to the sequential class and
+     * initializes the helper comparators of the parallel class
+     * @param __x Parallel red-black instance to copy
+     */
+    _Rb_tree(const _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __x)
+    : base_type(__x), strictly_less(base_type::_M_impl._M_key_compare), less_equal(base_type::_M_impl._M_key_compare)
+    { }
+
+    /** @brief Parallel replacement of the sequential
+     * std::_Rb_tree::_M_insert_unique()
+     *
+     * Parallel bulk insertion and construction. If the container is
+     * empty, bulk construction is performed. Otherwise, bulk
+     * insertion is performed
+     * @param __first First element of the input
+     * @param __last Last element of the input
+     */
+    template<typename _InputIterator>
+    void
+    _M_insert_unique(_InputIterator __first, _InputIterator __last)
+    {
+      if (__first==__last) return;
+      if (_GLIBCXX_PARALLEL_CONDITION(true))
+	if (base_type::_M_impl._M_node_count == 0)
+	  {
+	    _M_bulk_insertion_construction(__first, __last, true, strictly_less);
+	    _GLIBCXX_PARALLEL_ASSERT(rb_verify());
+	  }
+	else
+	  {
+	    _M_bulk_insertion_construction(__first, __last, false, strictly_less);
+	    _GLIBCXX_PARALLEL_ASSERT(rb_verify());
+	  }
+      else
+	{
+	  base_type::_M_insert_unique(__first, __last);
+	}
+    }
+
+    /** @brief Parallel replacement of the sequential
+     * std::_Rb_tree::_M_insert_equal()
+     *
+     * Parallel bulk insertion and construction. If the container is
+     * empty, bulk construction is performed. Otherwise, bulk
+     * insertion is performed
+     * @param __first First element of the input
+     * @param __last Last element of the input 	*/
+    template<typename _InputIterator>
+    void
+    _M_insert_equal(_InputIterator __first, _InputIterator __last)
+    {
+      if (__first==__last) return;
+      if (_GLIBCXX_PARALLEL_CONDITION(true))
+	if (base_type::_M_impl._M_node_count == 0)
+	  _M_bulk_insertion_construction(__first, __last, true, less_equal);
+	else
+	  _M_bulk_insertion_construction(__first, __last, false, less_equal);
+      else
+	base_type::_M_insert_equal(__first, __last);
+      _GLIBCXX_PARALLEL_ASSERT(rb_verify());
+    }
+
+  private:
+
+    /** @brief Helper class of _Rb_tree: node linking.
+     *
+     * Nodes linking forming an almost complete tree. The last level
+     * is coloured red, the rest are black
+     * @param ranker Calculates the position of a node in an array of nodes
+     */
+    template<typename ranker>
+    class nodes_initializer
+    {
+      /** @brief Renaming of tree size_type */
+
+      typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type;
+    public:
+
+      /** @brief mask[%i]= 0..01..1, where the number of 1s is %i+1 */
+      size_type mask[sizeof(size_type)*8];
+
+      /** @brief Array of nodes (initial address)	 */
+      const _Rb_tree_node_ptr* r_init;
+
+      /** @brief Total number of (used) nodes */
+      size_type n;
+
+      /** @brief Rank of the last tree node that can be calculated
+	  taking into account a complete tree
+      */
+      size_type splitting_point;
+
+      /** @brief Rank of the tree root */
+      size_type rank_root;
+
+      /** @brief Height of the tree */
+      int height;
+
+      /** @brief Number of threads into which divide the work */
+      const thread_index_t num_threads;
+
+      /** @brief Helper object to mind potential gaps in r_init */
+      const ranker& rank;
+
+      /** @brief Constructor
+       * @param r Array of nodes
+       * @param _n Total number of (used) nodes
+       * @param _num_threads Number of threads into which divide the work
+       * @param _rank Helper object to mind potential gaps in @c r_init */
+      nodes_initializer(const _Rb_tree_node_ptr* r, const size_type _n, const thread_index_t _num_threads, const ranker& _rank):
+	r_init(r),
+	n(_n),
+	num_threads(_num_threads),
+	rank(_rank)
+      {
+	height = log2(n);
+	splitting_point = 2 * (n - ((1 << height) - 1)) -1;
+
+	// Rank root.
+	size_type max = 1 << (height + 1);
+	rank_root= (max-2) >> 1;
+	if (rank_root > splitting_point)
+	  rank_root = complete_to_original(rank_root);
+
+	mask[0] = 0x1;
+	for (unsigned int i = 1; i < sizeof(size_type)*8; ++i)
+	  {
+	    mask[i] = (mask[i-1] << 1) + 1;
+	  }
+      }
+
+      /** @brief Query for tree height
+       * @return Tree height */
+      int get_height() const
+      {
+	return height;
+      }
+
+      /** @brief Query for the splitting point
+       * @return Splitting point */
+      size_type get_shifted_splitting_point() const
+      {
+	return rank.get_shifted_rank(splitting_point, 0);
+      }
+
+      /** @brief Query for the tree root node
+       * @return Tree root node */
+      _Rb_tree_node_ptr get_root() const
+      {
+	return  r_init[rank.get_shifted_rank(rank_root,num_threads/2)];
+      }
+
+      /** @brief Calculation of the parent position in the array of nodes
+       * @hideinitializer */
+#define CALCULATE_PARENT						\
+      if (p_s> splitting_point)						\
+	p_s = complete_to_original(p_s);				\
+	    int s_r = rank.get_shifted_rank(p_s,iam);			\
+	    r->_M_parent = r_init[s_r];					\
+									\
+      /** @brief Link a node with its parent and children taking into
+	  account that its rank (without gaps) is different to that in
+	  a complete tree
+       * @param r Pointer to the node
+       * @param iam Partition of the array in which the node is, where
+       * iam is in [0..num_threads)
+       * @sa link_complete */
+      void link_incomplete(const _Rb_tree_node_ptr& r, const int iam) const
+      {
+	size_type real_pos = rank.get_real_rank(&r-r_init, iam);
+	size_type l_s, r_s, p_s;
+	int mod_pos= original_to_complete(real_pos);
+	int zero= first_0_right(mod_pos);
+
+	// 1. Convert n to n', where n' will be its rank if the tree
+	//    was complete
+	// 2. Calculate neighbours for n'
+	// 3. Convert the neighbours n1', n2' and n3' to their
+	//    appropiate values n1, n2, n3. Note that it must be
+	//    checked that this neighbours reallly exist.
+	calculate_shifts_pos_level(mod_pos, zero, l_s, r_s, p_s);
+	if (l_s > splitting_point)
+	  {
+	    _GLIBCXX_PARALLEL_ASSERT(r_s > splitting_point);
+	    if (zero == 1)
+	      {
+		r->_M_left = 0;
+		r->_M_right = 0;
+	      }
+	    else
+	      {
+		r->_M_left= r_init[rank.get_shifted_rank(complete_to_original(l_s),iam)];
+		r->_M_right= r_init[rank.get_shifted_rank(complete_to_original(r_s),iam)];
+	      }
+
+	  }
+	else{
+	  r->_M_left= r_init[rank.get_shifted_rank(l_s,iam)];
+	  if (zero != 1)
+	    {
+	      r->_M_right= r_init[rank.get_shifted_rank(complete_to_original(r_s),iam)];
+	    }
+	  else
+	    {
+	      r->_M_right = 0;
+	    }
+	}
+	r->_M_color = std::_S_black;
+	CALCULATE_PARENT;
+      }
+
+      /** @brief Link a node with its parent and children taking into
+	  account that its rank (without gaps) is the same as that in
+	  a complete tree
+       * @param r Pointer to the node
+       * @param iam Partition of the array in which the node is, where
+       * iam is in [0..@c num_threads)
+       * @sa link_incomplete
+       */
+      void link_complete(const _Rb_tree_node_ptr& r, const int iam) const
+      {
+	size_type real_pos = rank.get_real_rank(&r-r_init, iam);
+	size_type p_s;
+
+	// Test if it is a leaf on the last not necessarily full level
+	if ((real_pos & mask[0]) == 0)
+	  {
+	    if ((real_pos & 0x2) == 0)
+	      p_s = real_pos + 1;
+	    else
+	      p_s = real_pos - 1;
+	    r->_M_color = std::_S_red;
+	    r->_M_left = 0;
+	    r->_M_right = 0;
+	  }
+	else
+	  {
+	    size_type l_s, r_s;
+	    int zero = first_0_right(real_pos);
+	    calculate_shifts_pos_level(real_pos, zero, l_s, r_s, p_s);
+	    r->_M_color = std::_S_black;
+
+	    r->_M_left = r_init[rank.get_shifted_rank(l_s,iam)];
+	    if (r_s > splitting_point)
+	      r_s = complete_to_original(r_s);
+	    r->_M_right = r_init[rank.get_shifted_rank(r_s,iam)];
+	  }
+	CALCULATE_PARENT;
+      }
+
+#undef CALCULATE_PARENT
+
+    private:
+      /** @brief Change of "base": Convert the rank in the actual tree
+	  into the corresponding rank if the tree was complete
+       * @param pos Rank in the actual incomplete tree
+       * @return Rank in the corresponding complete tree
+       * @sa complete_to_original  */
+      int original_to_complete(const int pos) const
+      {
+	return (pos << 1) - splitting_point;
+      }
+
+      /** @brief Change of "base": Convert the rank if the tree was
+	  complete into the corresponding rank in the actual tree
+       * @param pos Rank in the complete tree
+       * @return Rank in the actual incomplete tree
+       * @sa original_to_complete */
+      int complete_to_original(const int pos) const
+      {
+	return (pos + splitting_point) >> 1;
+      }
+
+
+      /** @brief Calculate the rank in the complete tree of the parent
+	  and children of a node
+       * @param pos Rank in the complete tree of the node whose parent
+       * and children rank must be calculated
+       * @param level Tree level in which the node at pos is in
+       * (starting to count at leaves). @pre @c level > 1
+       * @param left_shift Rank in the complete tree of the left child
+       * of pos (out parameter)
+       * @param right_shift Rank in the complete tree of the right
+       * child of pos (out parameter)
+       * @param parent_shift Rank in the complete tree of the parent
+       * of pos (out parameter)
+       */
+      void calculate_shifts_pos_level(const size_type pos, const int level, size_type& left_shift, size_type& right_shift, size_type& parent_shift) const
+      {
+	int stride =  1 << (level -1);
+	left_shift = pos - stride;
+	right_shift = pos + stride;
+	if (((pos >> (level + 1)) & 0x1) == 0)
+	  parent_shift = pos + 2*stride;
+	else
+	  parent_shift = pos - 2*stride;
+      }
+
+      /** @brief Search for the first 0 bit (growing the weight)
+       * @param x Binary number (corresponding to a rank in the tree)
+       * whose first 0 bit must be calculated
+       * @return Position of the first 0 bit in @c x (starting to
+       * count with 1)
+       */
+      int first_0_right(const size_type x) const
+      {
+	if ((x & 0x2) == 0)
+	  return 1;
+	else
+	  return first_0_right_bs(x);
+      }
+
+      /** @brief Search for the first 0 bit (growing the weight) using
+       * binary search
+       *
+       * Binary search can be used instead of a naïve loop using the
+       * masks in mask array
+       * @param x Binary number (corresponding to a rank in the tree)
+       * whose first 0 bit must be calculated
+       * @param k_beg Position in which to start searching. By default is 2.
+       * @return Position of the first 0 bit in x (starting to count with 1) */
+      int first_0_right_bs(const size_type x, int k_beg=2) const
+      {
+	int k_end = sizeof(size_type)*8;
+	size_type not_x = x ^ mask[k_end-1];
+	while ((k_end-k_beg) > 1)
+	  {
+	    int k = k_beg + (k_end-k_beg)/2;
+	    if ((not_x & mask[k-1]) != 0)
+	      k_end = k;
+	    else
+	      k_beg = k;
+	  }
+	return k_beg;
+      }
+    };
+
+    /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/
+    /** @brief Helper class of nodes_initializer: mind the gaps of an
+	array of nodes.
+     *
+     * Get absolute positions in an array of nodes taking into account
+     * the gaps in it @sa ranker_no_gaps
+     */
+    class ranker_gaps
+    {
+      /** @brief Renaming of tree's size_type */
+      typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type;
+
+      /** @brief Array containing the beginning ranks of all the
+	  num_threads partitions just considering the valid nodes, not
+	  the gaps */
+      size_type* beg_partition;
+
+      /** @brief Array containing the beginning ranks of all the
+	  num_threads partitions considering the valid nodes and the
+	  gaps */
+      const size_type* beg_shift_partition;
+
+      /** @brief Array containing the number of accumulated gaps at
+	  the beginning of each partition */
+      const size_type* rank_shift;
+
+      /** @brief Number of partitions (and threads that work on it) */
+      const thread_index_t num_threads;
+
+    public:
+      /** @brief Constructor
+       * @param size_p Pointer to the array containing the beginning
+       * ranks of all the @c _num_threads partitions considering the
+       * valid nodes and the gaps
+       * @param shift_r Array containing the number of accumulated
+       * gaps at the beginning of each partition
+       * @param _num_threads Number of partitions (and threads that
+       * work on it) */
+      ranker_gaps(const size_type* size_p, const size_type* shift_r, const thread_index_t _num_threads) :
+	beg_shift_partition(size_p),
+	rank_shift(shift_r),
+	num_threads(_num_threads)
+      {
+	beg_partition = new size_type[num_threads+1];
+	beg_partition[0] = 0;
+	for (int i = 1; i <= num_threads; ++i)
+	  {
+	    beg_partition[i] = beg_partition[i-1] + (beg_shift_partition[i] - beg_shift_partition[i-1]) - (rank_shift[i] - rank_shift[i-1]);
+
+	  }
+
+	// Ghost element, strictly larger than any index requested.
+	++beg_partition[num_threads];
+      }
+
+      /** @brief Destructor
+       * Needs to be defined to deallocate the dynamic memory that has
+       * been allocated for beg_partition array
+       */
+      ~ranker_gaps()
+      {
+	delete[] beg_partition;
+      }
+
+      /** @brief Convert a rank in the array of nodes considering
+	  valid nodes and gaps, to the corresponding considering only
+	  the valid nodes
+       * @param pos Rank in the array of nodes considering valid nodes and gaps
+       * @param index Partition which the rank belongs to
+       * @return Rank in the array of nodes considering only the valid nodes
+       * @sa get_shifted_rank
+       */
+      size_type get_real_rank(const size_type pos, const int index) const
+      {
+	return pos - rank_shift[index];
+      }
+
+      /** @brief Inverse of get_real_rank: Convert a rank in the array
+	  of nodes considering only valid nodes, to the corresponding
+	  considering valid nodes and gaps
+       * @param pos Rank in the array of nodes considering only valid nodes
+       * @param index Partition which the rank is most likely to
+       * belong to (ie. the corresponding if there were no gaps)
+       * @pre 0 <= @c pos <= number_of_distinct_elements
+       * @return Rank in the array of nodes considering valid nodes and gaps
+       * @post 0 <= @c return <= number_of_elements
+       * @sa get_real_rank()
+       */
+      size_type get_shifted_rank(const size_type pos, const int index) const
+      {
+	// Heuristic.
+	if (beg_partition[index] <= pos and pos < beg_partition[index+1])
+	  return pos + rank_shift[index];
+	else
+	  // Called rarely, do not hinder inlining.
+	  return get_shifted_rank_loop(pos,index);
+      }
+
+      /** @brief Helper method of get_shifted_rank: in case the given
+	  index in get_shifted_rank is not correct, look for it and
+	  then calculate the rank
+       * @param pos Rank in the array of nodes considering only valid nodes
+       * @param index Partition which the rank should have belong to
+       * if there were no gaps
+       * @return Rank in the array of nodes considering valid nodes and gaps
+       */
+      size_type get_shifted_rank_loop(const size_type pos, int index) const
+      {
+	while (pos >= beg_partition[index+1])
+	  ++index;
+	while (pos < beg_partition[index])
+	  --index;
+	_GLIBCXX_PARALLEL_ASSERT(0 <= index && index < num_threads);
+	return pos + rank_shift[index];
+      }
+    };
+
+    /** @brief Helper class of nodes_initializer: access an array of
+     * nodes with no gaps
+     *
+     * Get absolute positions in an array of nodes taking into account
+     * that there are no gaps in it.  @sa ranker_gaps */
+    class ranker_no_gaps
+    {
+      /** @brief Renaming of tree's size_type */
+      typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type;
+
+    public:
+      /** @brief Convert a rank in the array of nodes considering
+       * valid nodes and gaps, to the corresponding considering only
+       * the valid nodes
+       *
+       * As there are no gaps in this case, get_shifted_rank() and
+       * get_real_rank() are synonyms and make no change on pos
+       * @param pos Rank in the array of nodes considering valid nodes and gaps
+       * @param index Partition which the rank belongs to, unused here
+       * @return Rank in the array of nodes considering only the valid nodes */
+      size_type get_real_rank(const size_type pos, const int index) const
+      {
+	return pos;
+      }
+
+      /** @brief Inverse of get_real_rank: Convert a rank in the array
+       * of nodes considering only valid nodes, to the corresponding
+       * considering valid nodes and gaps
+       *
+       * As there are no gaps in this case, get_shifted_rank() and
+       * get_real_rank() are synonyms and make no change on pos
+       * @param pos Rank in the array of nodes considering only valid nodes
+       * @param index Partition which the rank belongs to, unused here
+       * @return Rank in the array of nodes considering valid nodes and gaps
+       */
+      size_type get_shifted_rank(const size_type pos, const int index) const
+      {
+	return pos;
+      }
+    };
+
+
+    /** @brief Helper comparator class: Invert a binary comparator
+     * @param _Comp Comparator to invert
+     * @param _Iterator Iterator to the elements to compare */
+    template<typename _Comp, typename _Iterator>
+    class gr_or_eq
+    {
+      /** @brief Renaming value_type of _Iterator */
+      typedef typename std::iterator_traits<_Iterator>::value_type value_type;
+
+      /** @brief Comparator to be inverted */
+      const _Comp comp;
+
+    public:
+      /** @brief Constructor
+       * @param c Comparator */
+      gr_or_eq(const _Comp& c) : comp(c) { }
+
+      /** @brief Operator()
+       * @param a First value to compare
+       * @param b Second value to compare */
+      bool operator()(const value_type& a, const value_type& b) const
+      {
+	if (not (comp(_KeyOfValue()(a), _KeyOfValue()(b))))
+	  return true;
+	return false;
+      }
+    };
+
+    /** @brief Helper comparator class: Passed as a parameter of
+	list_partition to check that a sequence is sorted
+     * @param _InputIterator Iterator to the elements to compare
+     * @param _CompIsSorted  Comparator to check for sortedness */
+    template<typename _InputIterator, typename _CompIsSorted>
+    class is_sorted_functor
+    {
+      /** @brief Element to compare with (first parameter of comp) */
+      _InputIterator prev;
+
+      /** @brief Comparator to check for sortedness */
+      const _CompIsSorted comp;
+
+      /** @brief Sum up the history of the operator() of this
+       * comparator class Its value is true if all calls to comp from
+       * this class have returned true. It is false otherwise */
+      bool sorted;
+
+    public:
+      /** @brief Constructor
+       *
+       * Sorted is set to true
+       * @param first Element to compare with the first time the
+       * operator() is called
+       * @param c  Comparator to check for sortednes */
+      is_sorted_functor(const _InputIterator first, const _CompIsSorted c)
+      : prev(first), comp(c), sorted(true) { }
+
+      /** @brief Operator() with only one explicit parameter. Updates
+	  the class member @c prev and sorted.
+       * @param it Iterator to the element which must be compared to
+       * the element pointed by the the class member @c prev */
+      void operator()(const _InputIterator it)
+      {
+	if (sorted and it != prev and comp(_KeyOfValue()(*it),_KeyOfValue()(*prev)))
+	  sorted = false;
+	prev = it;
+      }
+
+      /** @brief Query method for sorted
+       * @return Current value of sorted */
+      bool is_sorted() const
+      {
+	return sorted;
+      }
+    };
+
+    /** @brief Helper functor: sort the input based upon elements
+	instead of keys
+     * @param KeyComparator Comparator for the key of values */
+    template<typename KeyComparator>
+    class ValueCompare
+    : public std::binary_function<value_type, value_type, bool>
+    {
+      /** @brief Comparator for the key of values */
+      const KeyComparator comp;
+
+    public:
+      /** @brief Constructor
+       * @param c Comparator for the key of values */
+      ValueCompare(const KeyComparator& c): comp(c)  { }
+
+      /** @brief Operator(): Analogous to comp but for values and not keys
+       * @param v1 First value to compare
+       * @param v2 Second value to compare
+       * @return Result of the comparison */
+      bool operator()(const value_type& v1, const value_type& v2) const
+      { return comp(_KeyOfValue()(v1),_KeyOfValue()(v2)); }
+    };
+
+    /** @brief Helper comparator: compare a key with the key in a node
+     * @param _Comparator Comparator for keys */
+    template<typename _Comparator>
+    struct compare_node_key
+    {
+      /** @brief Comparator for keys */
+      const _Comparator& c;
+
+      /** @brief Constructor
+       * @param _c Comparator for keys */
+      compare_node_key(const _Comparator& _c) : c(_c) { }
+
+      /** @brief Operator() with the first parameter being a node
+       * @param r Node whose key is to be compared
+       * @param k Key to be compared
+       * @return Result of the comparison */
+      bool operator()(const _Rb_tree_node_ptr r, const key_type& k) const
+      { return c(base_type::_S_key(r),k); }
+
+      /** @brief Operator() with the second parameter being a node
+       * @param k Key to be compared
+       * @param r Node whose key is to be compared
+       * @return Result of the comparison */
+      bool operator()(const key_type& k, const _Rb_tree_node_ptr r) const
+      { return c(k, base_type::_S_key(r)); }
+    };
+
+    /** @brief Helper comparator: compare a key with the key of a value pointed by an iterator
+     * @param _Comparator Comparator for keys */
+    template<typename _Iterator, typename _Comparator>
+    struct compare_value_key
+    {
+      /** @brief Comparator for keys */
+      const _Comparator& c;
+
+      /** @brief Constructor
+       * @param _c Comparator for keys */
+      compare_value_key(const _Comparator& _c) : c(_c){ }
+
+      /** @brief Operator() with the first parameter being an iterator
+       * @param v Iterator to the value whose key is to be compared
+       * @param k Key to be compared
+       * @return Result of the comparison */
+      bool operator()(const _Iterator& v, const key_type& k) const
+      { return c(_KeyOfValue()(*v),k); }
+
+      /** @brief Operator() with the second parameter being an iterator
+       * @param k Key to be compared
+       * @param v Iterator to the value whose key is to be compared
+       * @return Result of the comparison */
+      bool operator()(const key_type& k, const _Iterator& v) const
+      { return c(k, _KeyOfValue()(*v)); }
+    };
+
+    /** @brief Helper class of _Rb_tree to avoid some symmetric code
+	in tree operations */
+    struct LeftRight
+    {
+      /** @brief Obtain the conceptual left child of a node
+       * @param parent Node whose child must be obtained
+       * @return Reference to the child node */
+      static _Rb_tree_node_base*& left(_Rb_tree_node_base* parent)
+      { return parent->_M_left; }
+
+      /** @brief Obtain the conceptual right child of a node
+       * @param parent Node whose child must be obtained
+       * @return Reference to the child node */
+      static _Rb_tree_node_base*& right(_Rb_tree_node_base* parent)
+      { return parent->_M_right; }
+    };
+
+    /** @brief Helper class of _Rb_tree to avoid some symmetric code
+	in tree operations: inverse the symmetry
+     * @param S Symmetry to inverse
+     * @sa LeftRight */
+    template<typename S>
+    struct Opposite
+    {
+      /** @brief Obtain the conceptual left child of a node, inversing
+	  the symmetry
+       * @param parent Node whose child must be obtained
+       * @return Reference to the child node */
+      static _Rb_tree_node_base*& left(_Rb_tree_node_base* parent)
+      { return S::right(parent);}
+
+      /** @brief Obtain the conceptual right child of a node,
+	  inversing the symmetry
+       * @param parent Node whose child must be obtained
+       * @return Reference to the child node */
+      static _Rb_tree_node_base*& right(_Rb_tree_node_base* parent)
+      { return S::left(parent);}
+    };
+
+    /** @brief Inverse symmetry of LeftRight */
+    typedef Opposite<LeftRight> RightLeft;
+
+    /** @brief Helper comparator to compare value pointers, so that
+	the value is taken
+     * @param Comparator Comparator for values
+     * @param _ValuePtr Pointer to values */
+    template<typename Comparator, typename _ValuePtr>
+    class PtrComparator : public std::binary_function<_ValuePtr, _ValuePtr, bool>
+    {
+      /** @brief Comparator for values */
+      Comparator comp;
+
+    public:
+      /** @brief Constructor
+       * @param comp Comparator for values */
+      PtrComparator(Comparator comp) : comp(comp)  { }
+
+      /** @brief Operator(): compare the values instead of the pointers
+       * @param v1 Pointer to the first element to compare
+       * @param v2 Pointer to the second element to compare */
+      bool operator()(const _ValuePtr& v1, const _ValuePtr& v2) const
+      { return comp(*v1,*v2); }
+    };
+
+    /** @brief Iterator whose elements are pointers
+     * @param value_type Type pointed by the pointers */
+    template<typename _ValueTp>
+    class PtrIterator
+    {
+    public:
+      /** @brief The iterator category is random access iterator */
+      typedef typename std::random_access_iterator_tag iterator_category;
+      typedef _ValueTp  value_type;
+      typedef size_t difference_type;
+      typedef value_type* ValuePtr;
+      typedef ValuePtr& reference;
+      typedef value_type** pointer;
+
+      /** @brief Element accessed by the iterator */
+      value_type** ptr;
+
+      /** @brief Trivial constructor */
+      PtrIterator() { }
+
+      /** @brief Constructor from an element */
+      PtrIterator(const ValuePtr& __i) : ptr(&__i) { }
+
+      /** @brief Constructor from a pointer */
+      PtrIterator(const pointer& __i) : ptr(__i) { }
+
+      /** @brief Copy constructor */
+      PtrIterator(const PtrIterator<value_type>& __i) : ptr(__i.ptr) { }
+
+      reference
+      operator*() const
+      { return **ptr; }
+
+      ValuePtr
+      operator->() const
+      { return *ptr; }
+
+      /** @brief Bidirectional iterator requirement */
+      PtrIterator&
+      operator++()
+      {
+	++ptr;
+	return *this;
+      }
+
+      /** @brief Bidirectional iterator requirement */
+      PtrIterator
+      operator++(int)
+      { return PtrIterator(ptr++); }
+
+      /** @brief Bidirectional iterator requirement */
+      PtrIterator&
+      operator--()
+      {
+	--ptr;
+	return *this;
+      }
+
+      /** @brief Bidirectional iterator requirement */
+      PtrIterator
+      operator--(int)
+      { return PtrIterator(ptr--); }
+
+      /** @brief Random access iterator requirement */
+      reference
+      operator[](const difference_type& __n) const
+      { return *ptr[__n]; }
+
+      /** @brief Random access iterator requirement */
+      PtrIterator&
+      operator+=(const difference_type& __n)
+      {
+	ptr += __n;
+	return *this;
+      }
+
+      /** @brief Random access iterator requirement */
+      PtrIterator
+      operator+(const difference_type& __n) const
+      { return PtrIterator(ptr + __n); }
+
+      /** @brief Random access iterator requirement */
+      PtrIterator&
+      operator-=(const difference_type& __n)
+      {
+	ptr -= __n;
+	return *this;
+      }
+
+      /** @brief Random access iterator requirement */
+      PtrIterator
+      operator-(const difference_type& __n) const
+      { return PtrIterator(ptr - __n); }
+
+      /** @brief Random access iterator requirement */
+      difference_type
+      operator-(const PtrIterator<value_type>& iter) const
+      { return ptr - iter.ptr; }
+
+      /** @brief Random access iterator requirement */
+      difference_type
+      operator+(const PtrIterator<value_type>& iter) const
+      { return ptr + iter.ptr; }
+
+      /** @brief Allow assignment of an element ValuePtr to the iterator */
+      PtrIterator<value_type>& operator=(const ValuePtr sptr)
+      {
+	ptr = &sptr;
+	return *this;
+      }
+
+      PtrIterator<value_type>& operator=(const PtrIterator<value_type>& piter)
+      {
+	ptr = piter.ptr;
+	return *this;
+      }
+
+      bool operator==(const PtrIterator<value_type>& piter)
+      { return ptr == piter.ptr; }
+
+      bool operator!=(const PtrIterator<value_type>& piter)
+      { return ptr != piter.ptr; }
+
+    };
+
+
+    /** @brief Bulk insertion helper: synchronization and construction
+	of the tree bottom up */
+    struct concat_problem
+    {
+      /** @brief Root of a tree.
+       *
+       * Input: Middle node to concatenate two subtrees. Out: Root of
+       * the resulting concatenated tree. */
+      _Rb_tree_node_ptr t;
+
+      /** @brief Black height of @c t */
+      int black_h;
+
+      /** @brief Synchronization variable.
+       *
+       * \li READY_YES: the root of the tree can be concatenated with
+       * the result of the children concatenation problems (both of
+       * them have finished).
+       * \li READY_NOT: at least one of the children
+       * concatenation_problem have not finished */
+      int is_ready;
+
+      /** @brief Parent concatenation problem to solve when @c
+	  is_ready = READY_YES */
+      concat_problem* par_problem;
+
+      /** @brief Left concatenation problem */
+      concat_problem* left_problem;
+
+      /** @brief Right concatenation problem */
+      concat_problem* right_problem;
+
+      /** @brief Value NO for the synchronization variable. */
+      static const int READY_NO = 0;
+
+      /** @brief Value YES for the synchronization variable. */
+      static const int READY_YES = 1;
+
+      /** @brief Trivial constructor.
+       *
+       * Initialize the synchronization variable to not ready. */
+      concat_problem(): is_ready(READY_NO) { }
+
+      /** @brief Constructor.
+       *
+       * Initialize the synchronization variable to not ready.
+       * @param _t Root of a tree.
+       * @param _black_h Black height of @c _t
+       * @param _par_problem Parent concatenation problem to solve
+       * when @c is_ready = READY_YES
+       */
+      concat_problem(const _Rb_tree_node_ptr _t, const int _black_h, concat_problem* _par_problem):
+	t(_t),
+	black_h(_black_h),
+	is_ready(READY_NO),
+	par_problem(_par_problem)
+      {
+	// The root of an insertion problem must be black.
+	if (t != NULL and t->_M_color == std::_S_red)
+	  {
+	    t->_M_color = std::_S_black;
+	    ++black_h;
+	  }
+      }
+    };
+
+
+    /** @brief Bulk insertion helper: insertion of a sequence of
+	elements in a subtree
+	@invariant t, pos_beg and pos_end will not change after initialization
+    */
+    struct insertion_problem
+    {
+      /** @brief Renaming of _Rb_tree @c size_type */
+      typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type;
+
+      /** @brief Root of the tree where the elements are to be inserted */
+      _Rb_tree_node_ptr t;
+
+      /** @brief Position of the first node in the array of nodes to
+	  be inserted into @c t */
+      size_type pos_beg;
+
+      /** @brief Positition of the first node in the array of nodes
+	  that won't be inserted into @c t */
+      size_type pos_end;
+
+      /** @brief Partition in the array of nodes of @c pos_beg and @c
+	  pos_end (must be the same for both, and so gaps are
+	  avoided) */
+      int array_partition;
+
+      /** @brief Concatenation problem to solve once the insertion
+	  problem is finished */
+      concat_problem* conc;
+
+      /** @brief Trivial constructor. */
+      insertion_problem()
+      { }
+
+      /** @brief Constructor.
+       * @param b Position of the first node in the array of nodes to
+       * be inserted into @c _conc->t
+       * @param e Position of the first node in the array of nodes
+       * that won't be inserted into @c _conc->t
+       * @param array_p Partition in the array of nodes of @c b and @c e
+       * @param _conc Concatenation problem to solve once the
+       * insertion problem is finished
+       */
+      insertion_problem(const size_type b, const size_type e, const int array_p, concat_problem* _conc)
+      : t(_conc->t), pos_beg(b), pos_end(e), array_partition(array_p), conc(_conc)
+      {
+	_GLIBCXX_PARALLEL_ASSERT(pos_beg <= pos_end);
+
+	//The root of an insertion problem must be black!!
+	_GLIBCXX_PARALLEL_ASSERT(t == NULL or t->_M_color != std::_S_red);
+      }
+    };
+
+
+    /** @brief Main bulk construction and insertion helper method
+     * @param __first First element in a sequence to be added into the tree
+     * @param __last End of the sequence of elements to be added into the tree
+     * @param is_construction If true, the tree was empty and so, this
+     * is constructed. Otherwise, the elements are added to an
+     * existing tree.
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     * The input sequence is preprocessed so that the bulk
+     * construction or insertion can be performed
+     * efficiently. Essentially, the sequence is checked for
+     * sortedness and iterators to the middle of the structure are
+     * saved so that afterwards the sequence can be processed
+     * effectively in parallel. */
+    template<typename _InputIterator, typename StrictlyLessOrLessEqual>
+    void
+    _M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      Timing<_timing_tag> t;
+
+      t.tic();
+
+      thread_index_t num_threads = get_max_threads();
+      size_type n;
+      size_type beg_partition[num_threads+1];
+      _InputIterator access[num_threads+1];
+      beg_partition[0] = 0;
+      bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first));
+
+      t.tic("is_sorted");
+
+      if (not is_sorted)
+	{
+	  _M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal);
+	}
+      else
+	{
+	  // The vector must be moved... all ranges must have at least
+	  // one element, or make just sequential???
+	  if (static_cast<size_type>(num_threads) > n)
+	    {
+	      int j = 1;
+	      for (int i = 1; i <= num_threads; ++i)
+		{
+		  if (beg_partition[j-1] != beg_partition[i])
+		    {
+		      beg_partition[j] = beg_partition[i];
+		      access[j] = access[i];
+		      ++j;
+		    }
+		}
+	      num_threads = static_cast<thread_index_t>(n);
+	    }
+
+	  if (is_construction)
+	    _M_sorted_bulk_construction(access, beg_partition, n, num_threads, strictly_less_or_less_equal);
+	  else
+	    _M_sorted_bulk_insertion(access, beg_partition, n, num_threads, strictly_less_or_less_equal);
+	}
+
+      t.tic("main work");
+
+      t.print();
+    }
+
+    /** @brief Bulk construction and insertion helper method on an
+     * input sequence which is not sorted
+     *
+     * The elements are copied, according to the copy policy, in order
+     * to be sorted. Then the
+     * _M_not_sorted_bulk_insertion_construction() method is called
+     * appropiately
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first element in each subsequence
+     * to be added into the tree.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * each subsequence to be added into the tree.
+     * @param n Size of the sequence to be inserted
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the insertion work is going to be shared
+     * @param is_construction If true, the tree was empty and so, this
+     * is constructed. Otherwise, the elements are added to an
+     * existing tree.
+     * @param strictly_less_or_less_equal Comparator to deal transparently with repetitions with respect to the uniqueness of the wrapping container */
+    template<typename _InputIterator, typename StrictlyLessOrLessEqual>
+    void
+    _M_not_sorted_bulk_insertion_construction(_InputIterator* access,
+					      size_type* beg_partition,
+					      const size_type n,
+					      const thread_index_t num_threads,
+					      const bool is_construction,
+					      StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      // Copy entire elements. In the case of a map, we would be
+      // copying the pair. Therefore, the copy should be reconsidered
+      // when objects are big. Essentially two cases:
+      // - The key is small: make that the pair, is a pointer to data
+      //   instead of a copy to it
+      // - The key is big: we simply have a pointer to the iterator
+#if _GLIBCXX_TREE_FULL_COPY
+      nc_value_type* v = static_cast<nc_value_type*> (::operator new(sizeof(nc_value_type)*(n+1)));
+
+      uninitialized_copy_from_accessors(access, beg_partition, v, num_threads);
+
+      _M_not_sorted_bulk_insertion_construction<nc_value_type, nc_value_type*, ValueCompare<_Compare> >
+	(beg_partition, v, ValueCompare<_Compare>(base_type::_M_impl._M_key_compare), n, num_threads, is_construction, strictly_less_or_less_equal);
+#else
+      // For sorting, we cannot use the new PtrIterator because we
+      // want the pointers to be exchanged and not the elements.
+      typedef PtrComparator<ValueCompare<_Compare>, nc_value_type*>  this_ptr_comparator;
+      nc_value_type** v = static_cast<nc_value_type**> (::operator new(sizeof(nc_value_type*)*(n+1)));
+
+      uninitialized_ptr_copy_from_accessors(access, beg_partition, v, num_threads);
+
+      _M_not_sorted_bulk_insertion_construction<nc_value_type*, PtrIterator<nc_value_type>, this_ptr_comparator>
+	(beg_partition, v, this_ptr_comparator(ValueCompare<_Compare>(base_type::_M_impl._M_key_compare)), n, num_threads, is_construction, strictly_less_or_less_equal);
+#endif
+    }
+
+    /** @brief Bulk construction and insertion helper method on an
+     * input sequence which is not sorted
+     *
+     * The elements are sorted and its accessors calculated. Then,
+     * _M_sorted_bulk_construction() or _M_sorted_bulk_insertion() is
+     * called.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * each subsequence to be added into the tree.
+     * @param v Array of elements to be sorted (copy of the original sequence).
+     * @param comp Comparator to be used for sorting the elements
+     * @param n Size of the sequence to be inserted
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the insertion work is going to be shared
+     * @param is_construction If true, _M_sorted_bulk_construction()
+     * is called. Otherwise, _M_sorted_bulk_insertion() is called.
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename ElementsToSort, typename IteratorSortedElements, typename Comparator, typename StrictlyLessOrLessEqual>
+    void
+    _M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      // The accessors have been calculated for the non sorted.
+      Timing<_timing_tag> t;
+
+      t.tic();
+
+      num_threads = static_cast<thread_index_t>(std::min<size_type>(num_threads, n));
+
+      std::stable_sort(v, v+n, comp);
+
+      t.tic("sort");
+
+      IteratorSortedElements sorted_access[num_threads+1];
+      range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v));
+
+      t.tic("range_accessors");
+
+      // Partial template specialization not available.
+      if (is_construction)
+	_M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
+      else
+	_M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
+      delete v;
+
+      t.tic("actual construction or insertion");
+
+      t.print();
+    }
+
+    /** @brief Construct a tree sequentially using the parallel routine
+     * @param r_array Array of nodes from which to take the nodes to
+     * build the tree
+     * @param pos_beg Position of the first node in the array of nodes
+     * to be part of the tree
+     * @param pos_end Position of the first node in the array of nodes
+     * that will not be part of the tree
+     * @param black_h Black height of the resulting tree (out)
+     */
+    static _Rb_tree_node_ptr
+    simple_tree_construct(_Rb_tree_node_ptr* r_array, const size_type pos_beg, const size_type pos_end, int& black_h)
+    {
+      if (pos_beg == pos_end)
+	{
+	  black_h = 0;
+	  return NULL;
+	}
+      if (pos_beg+1 == pos_end)
+	{
+	  // It is needed, not only for efficiency but because the
+	  // last level in our tree construction is red.
+	  make_leaf(r_array[pos_beg], black_h);
+	  return r_array[pos_beg];
+	}
+
+      // Dummy b_p
+      size_type b_p[2];
+      b_p[0] = 0;
+      b_p[1] = pos_end - pos_beg;
+      _Rb_tree_node_ptr* r= r_array + pos_beg;
+      size_type length = pos_end - pos_beg;
+
+      ranker_no_gaps rank;
+      nodes_initializer<ranker_no_gaps> nodes_init(r, length, 1, rank);
+
+      black_h = nodes_init.get_height();
+
+      size_type split = nodes_init.get_shifted_splitting_point();
+      for (size_type i = 0; i < split; ++i)
+	nodes_init.link_complete(r[i],0);
+
+      for (size_type i = split; i < length; ++i)
+	nodes_init.link_incomplete(r[i],0);
+
+      _Rb_tree_node_ptr t = nodes_init.get_root();
+      _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t));
+      _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black);
+      return t;
+    }
+
+
+    /** @brief Allocation of an array of nodes and initilization of
+	their value fields from an input sequence. Done in parallel.
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence to
+     * be copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the subsequence from which to copy the data to initialize the
+     * nodes.
+     * @param n Size of the sequence and the array of nodes to be allocated.
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the allocation and initialization work is
+     * going to be shared
+     */
+    template<typename _Iterator>
+    _Rb_tree_node_ptr* _M_unsorted_bulk_allocation_and_initialization(const _Iterator* access, const size_type* beg_partition, const size_type n, const thread_index_t num_threads)
+    {
+      _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1)));
+
+      // Allocate and initialize the nodes (don't check for uniqueness
+      // because the sequence is not necessarily sorted.
+#pragma omp parallel num_threads(num_threads)
+      {
+#if USE_PAPI
+	PAPI_register_thread();
+#endif
+
+	int iam = omp_get_thread_num();
+	_Iterator it = access[iam];
+	size_type i = beg_partition[iam];
+	while (it!= access[iam+1])
+	  {
+	    r[i] = base_type::_M_create_node(*it);
+	    ++i;
+	    ++it;
+	  }
+      }
+      return r;
+    }
+
+
+    /** @brief Allocation of an array of nodes and initilization of
+     * their value fields from an input sequence. Done in
+     * parallel. Besides, the sequence is checked for uniqueness while
+     * copying the elements, and if there are repetitions, gaps within
+     * the partitions are created.
+     *
+     * An extra ghost node pointer is reserved in the array to ease
+     * comparisons later while linking the nodes
+     * @pre The sequence is sorted.
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence to
+     * be copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the subsequence from which to copy the data to initialize the
+     * nodes.
+     * @param rank_shift Array of size @c num_threads + 1 containing
+     * the number of accumulated gaps at the beginning of each
+     * partition
+     * @param n Size of the sequence and the array of nodes (-1) to be
+     * allocated.
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the allocation and initialization work is
+     * going to be shared
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename _Iterator, typename StrictlyLessOrLessEqual>
+    _Rb_tree_node_ptr* _M_sorted_bulk_allocation_and_initialization(_Iterator* access, size_type*  beg_partition, size_type* rank_shift, const size_type n, thread_index_t& num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      // Ghost node at the end to avoid extra comparisons in nodes_initializer.
+      _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1)));
+      r[n] = NULL;
+
+      // Dealing with repetitions (EFFICIENCY ISSUE).
+      _Iterator access_copy[num_threads+1];
+      for (int i = 0; i <= num_threads; ++i)
+	access_copy[i] = access[i];
+      // Allocate and initialize the nodes
+#pragma omp parallel num_threads(num_threads)
+      {
+#if USE_PAPI
+	PAPI_register_thread();
+#endif
+	thread_index_t iam = omp_get_thread_num();
+	_Iterator prev = access[iam];
+	size_type i = beg_partition[iam];
+	_Iterator it = prev;
+	if (iam != 0)
+	  {
+	    --prev;
+	    // Dealing with repetitions (CORRECTNESS ISSUE).
+	    while (it!= access_copy[iam+1] and not strictly_less_or_less_equal(_KeyOfValue()(*prev), _KeyOfValue()(*it)))
+	      {
+		_GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev)));
+		++it;
+	      }
+	    access[iam] = it;
+	    if (it != access_copy[iam+1]){
+	      r[i] = base_type::_M_create_node(*it);
+	      ++i;
+	      prev=it;
+	      ++it;
+	    }
+	    //}
+	  }
+	else
+	  {
+	    r[i] = base_type::_M_create_node(*prev);
+	    ++i;
+	    ++it;
+	  }
+	while (it!= access_copy[iam+1])
+	  {
+	    /*****	Dealing with repetitions (CORRECTNESS ISSUE) *****/
+	    if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it)))
+	      {
+		r[i] = base_type::_M_create_node(*it);
+		++i;
+		prev=it;
+	      }
+	    else{
+	      _GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev)));
+	    }
+	    ++it;
+	  }
+	/*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+	rank_shift[iam+1] =  beg_partition[iam+1] - i;
+      }
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+      rank_shift[0] = 0;
+      /* Guarantee that there are no empty intervals.
+      - If an empty interval is found, is joined with the previous one
+	 (the rank_shift of the previous is augmented with all the new
+	 repetitions)
+      */
+      thread_index_t i = 1;
+      while (i <= num_threads and rank_shift[i] != (beg_partition[i] - beg_partition[i-1]))
+	{
+	  rank_shift[i] += rank_shift[i-1];
+	  ++i;
+	}
+      if (i <= num_threads)
+	{
+	  thread_index_t j = i - 1;
+	  while (true)
+	    {
+	      do
+		{
+		  rank_shift[j] += rank_shift[i];
+		  ++i;
+		} while (i <= num_threads and rank_shift[i] == (beg_partition[i] - beg_partition[i-1]));
+
+	      beg_partition[j] = beg_partition[i-1];
+	      access[j] = access[i-1];
+	      if (i > num_threads) break;
+	      ++j;
+
+	      // Initialize with the previous.
+	      rank_shift[j] = rank_shift[j-1];
+	    }
+	  num_threads = j;
+	}
+      return r;
+
+    }
+
+    /** @brief Allocation of an array of nodes and initilization of
+     * their value fields from an input sequence.
+     *
+     * The allocation and initialization is done in parallel. Besides,
+     * the sequence is checked for uniqueness while copying the
+     * elements. However, in contrast to
+     * _M_sorted_bulk_allocation_and_initialization(), if there are
+     * repetitions, no gaps within the partitions are created. To do
+     * so efficiently, some extra memory is needed to compute a prefix
+     * sum.
+     * @pre The sequence is sorted.
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence to
+     * be copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the subsequence from which to copy the data to initialize the
+     * nodes.
+     * @param n Size of the sequence and the array of nodes (-1) to be
+     * allocated.
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the allocation and initialization work is
+     * going to be shared
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename _Iterator, typename StrictlyLessOrLessEqual>
+    _Rb_tree_node_ptr* _M_sorted_no_gapped_bulk_allocation_and_initialization(_Iterator* access, size_type* beg_partition, size_type& n, const thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      size_type* sums = static_cast<size_type*> (::operator new (sizeof(size_type)*n));
+      // Allocate and initialize the nodes
+      /*		try
+	{*/
+#pragma omp parallel num_threads(num_threads)
+      {
+#if USE_PAPI
+	PAPI_register_thread();
+#endif
+	int iam = omp_get_thread_num();
+	_Iterator prev = access[iam];
+	size_type i = beg_partition[iam];
+	_Iterator it = prev;
+	if (iam !=0)
+	  {
+	    --prev;
+
+	    // First iteration here, to update accessor in case was
+	    // equal to the last element of the previous range
+
+	    // Dealing with repetitions (CORRECTNESS ISSUE).
+	    if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it)))
+	      {
+		sums[i] = 0;
+		prev=it;
+	      }
+	    else
+	      {
+		sums[i] = 1;
+	      }
+	    ++i;
+	    ++it;
+	  }
+	else
+	  {
+	    sums[i] = 0;
+	    ++i;
+	    ++it;
+	  }
+	while (it!= access[iam+1])
+	  {
+	    // Dealing with repetitions (CORRECTNESS ISSUE).
+	    if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it)))
+	      {
+		sums[i] = 0;
+		prev=it;
+	      }
+	    else
+	      sums[i] = 1;
+	    ++i;
+	    ++it;
+	  }
+      }
+      // Should be done in parallel.
+      partial_sum(sums,sums + n, sums);
+
+      n -= sums[n-1];
+      _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1)));
+      r[n]=0;
+
+#pragma omp parallel num_threads(num_threads)
+      {
+#if USE_PAPI
+	PAPI_register_thread();
+#endif
+	int iam = omp_get_thread_num();
+	_Iterator it = access[iam];
+	size_type i = beg_partition[iam];
+	size_type j = i;
+	size_type before = 0;
+	if (iam > 0)
+	  {
+	    before = sums[i-1];
+	    j -= sums[i-1];
+	  }
+	beg_partition[iam] = j;
+	while (it!= access[iam+1])
+	  {
+	    while (it!= access[iam+1] and sums[i]!=before)
+	      {
+		before = sums[i];
+		++i;
+		++it;
+	      }
+	    if (it!= access[iam+1])
+	      {
+		r[j] = base_type::_M_create_node(*it);
+		++j;
+		++i;
+		++it;
+	      }
+	  }
+
+      }
+      beg_partition[num_threads] = n;
+
+      // Update beginning of partitions.
+      ::operator delete(sums);
+      return r;
+    }
+
+    /** @brief Main bulk construction method: perform the actual
+	initialization, allocation and finally node linking once the
+	input sequence has already been preprocessed.
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence to
+     * be copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the subsequence from which to copy the data to initialize the
+     * nodes.
+     * @param n Size of the sequence and the array of nodes (-1) to be
+     * allocated.
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the work is going to be shared
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename _Iterator, typename StrictlyLessOrLessEqual>
+    void
+    _M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      Timing<_timing_tag> t;
+
+      // Dealing with repetitions (EFFICIENCY ISSUE).
+      size_type rank_shift[num_threads+1];
+
+      t.tic();
+
+      _Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal);
+
+      t.tic("bulk allocation and initialization");
+
+      // Link the tree appropiately.
+      // Dealing with repetitions (EFFICIENCY ISSUE).
+      ranker_gaps rank(beg_partition, rank_shift, num_threads);
+      nodes_initializer<ranker_gaps> nodes_init(r, n - rank_shift[num_threads], num_threads, rank);
+      size_type split = nodes_init.get_shifted_splitting_point();
+
+#pragma omp parallel num_threads(num_threads)
+      {
+#if USE_PAPI
+	PAPI_register_thread();
+#endif
+	int iam = omp_get_thread_num();
+	size_type beg = beg_partition[iam];
+	// Dealing with repetitions (EFFICIENCY ISSUE).
+	size_type end = beg_partition[iam+1] - (rank_shift[iam+1] - rank_shift[iam]);
+	if (split >= end)
+	  {
+	    for (size_type i = beg; i < end; ++i)
+	      {
+		nodes_init.link_complete(r[i],iam);
+	      }
+	  }
+	else
+	  {
+	    if (split <= beg)
+	      {
+		for (size_type i = beg; i < end; ++i)
+		  nodes_init.link_incomplete(r[i],iam);
+	      }
+	    else
+	      {
+		for (size_type i = beg; i < split; ++i)
+		  nodes_init.link_complete(r[i],iam);
+		for (size_type i = split; i < end; ++i)
+		  nodes_init.link_incomplete(r[i],iam);
+	      }
+	  }
+      }
+      // If the execution reachs this point, there has been no
+      // exception, and so the structure can be initialized.
+
+      // Join the tree laid on the array of ptrs with the header node.
+      // Dealing with repetitions (EFFICIENCY ISSUE).
+      base_type::_M_impl._M_node_count = n - rank_shift[num_threads];
+      base_type::_M_impl._M_header._M_left = r[0];
+      thread_index_t with_element =  num_threads;
+      while ((beg_partition[with_element] - beg_partition[with_element-1]) == (rank_shift[with_element] - rank_shift[with_element-1]))
+	{
+	  --with_element;
+	}
+      base_type::_M_impl._M_header._M_right = r[beg_partition[with_element] - (rank_shift[with_element] - rank_shift[with_element-1]) - 1];
+      base_type::_M_impl._M_header._M_parent = nodes_init.get_root();
+      nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header;
+
+      t.tic("linking nodes");
+      ::operator delete(r);
+
+      t.tic("delete array of pointers");
+      t.print();
+    }
+
+
+    /** @brief Main bulk insertion method: perform the actual
+	initialization, allocation and finally insertion once the
+	input sequence has already been preprocessed.
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence to
+     * be copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the subsequence from which to copy the data to initialize the
+     * nodes.
+     * @param k Size of the sequence to be inserted (including the
+     * possible repeated elements among the sequence itself and
+     * against those elements already in the tree)
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the work is going to be shared
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename _Iterator, typename StrictlyLessOrLessEqual>
+    void
+    _M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      _GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k);
+      Timing<_timing_tag> t;
+
+      t.tic();
+
+      // num_thr-1 problems in the upper part of the tree
+      // num_thr problems to further parallelize
+      std::vector<size_type> existing(num_threads,0);
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+      size_type rank_shift[num_threads+1];
+
+      // Need to create them dynamically because they are so erased
+      concat_problem* conc[2*num_threads-1];
+#endif
+      _Rb_tree_node_ptr* r;
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+      if (not strictly_less_or_less_equal(base_type::_S_key(base_type::_M_root()),base_type::_S_key(base_type::_M_root()) ))
+	{
+	  // Unique container
+	  // Set 1 and 2 could be done in parallel ...
+	  // 1. Construct the nodes with their corresponding data
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+	  r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal);
+	  t.tic("bulk allocation and initialization");
+#else
+	  r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal);
+#endif
+	}
+      else
+	{
+	  // Not unique container.
+	  r = _M_unsorted_bulk_allocation_and_initialization(access, beg_partition, k, num_threads);
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+	  // Trivial initialization of rank_shift.
+	  for (int i=0; i <= num_threads; ++i)
+	    rank_shift[i] = 0;
+#endif
+	}
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+      // Calculate position of last element to be inserted: must be
+      // done now, or otherwise becomes messy.
+
+  /***** Dealing with
+      repetitions (EFFICIENCY ISSUE) *****/
+      size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]);
+
+      t.tic("last element to be inserted");
+
+      //2. Split the tree according to access in num_threads parts
+      //Initialize upper concat_problems
+      //Allocate them dinamically because they are afterwards so erased
+      for (int i=0; i < (2*num_threads-1); ++i)
+	{
+	  conc[i] = new concat_problem ();
+	}
+      concat_problem* root_problem = _M_bulk_insertion_initialize_upper_problems(conc, 0, num_threads, NULL);
+
+      // The first position of access and the last are ignored, so we
+      // have exactly num_threads subtrees.
+      bool before = omp_get_nested();
+      omp_set_nested(true);
+      _M_bulk_insertion_split_tree_by_pivot(static_cast<_Rb_tree_node_ptr>(base_type::_M_root()), r, access, beg_partition, rank_shift, 0, num_threads-1, conc, num_threads, strictly_less_or_less_equal);
+      omp_set_nested(before);
+
+      // Construct upper tree with the first elements of ranges if
+      // they are NULL We cannot do this by default because they could
+      // be repeated and would not be checked.
+      size_type r_s = 0;
+      for (int pos = 1; pos < num_threads; ++pos)
+	{
+	  _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or conc[pos*2-1]->t == NULL or strictly_less_or_less_equal(base_type::_S_key(base_type::_S_maximum(conc[(pos-1)*2]->t)), base_type::_S_key(conc[pos*2-1]->t)));
+	  _GLIBCXX_PARALLEL_ASSERT(conc[pos*2]->t == NULL  or conc[pos*2-1]->t == NULL or strictly_less_or_less_equal( base_type::_S_key(conc[pos*2-1]->t), base_type::_S_key(base_type::_S_minimum(conc[pos*2]->t))));
+	  /*****	Dealing with repetitions (CORRECTNESS ISSUE) *****/
+
+	  // The first element of the range is the root.
+	  if (conc[pos*2-1]->t == NULL or (not(strictly_less_or_less_equal(base_type::_S_key(static_cast<_Rb_tree_node_ptr>(conc[pos*2-1]->t)), _KeyOfValue()(*access[pos])))))
+	    {
+	      // There was not a candidate element
+	      // or
+	      // Exists an initialized position in the array which
+	      // corresponds to conc[pos*2-1]->t */
+	      if (conc[pos*2-1]->t == NULL)
+		{
+		  size_t np = beg_partition[pos];
+		  _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or strictly_less_or_less_equal(base_type::_S_key(base_type::_S_maximum(conc[(pos-1)*2]->t)), base_type::_S_key(r[np])));
+		  _GLIBCXX_PARALLEL_ASSERT(conc[pos*2]->t == NULL  or strictly_less_or_less_equal( base_type::_S_key(r[np]), base_type::_S_key(base_type::_S_minimum(conc[pos*2]->t))));
+		  conc[pos*2-1]->t = r[np];
+		  r[np]->_M_color = std::_S_black;
+		  ++base_type::_M_impl._M_node_count;
+		}
+	      else
+		{
+		  base_type::_M_destroy_node(r[beg_partition[pos]]);
+		}
+	      ++(access[pos]);
+	      ++(beg_partition[pos]);
+	      ++r_s;
+	    }
+	  _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or conc[(pos-1)*2]->t->_M_color == std::_S_black);
+	  /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+	  rank_shift[pos] += r_s;
+	}
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+      rank_shift[num_threads] += r_s;
+#else
+      concat_problem root_problem_on_stack(static_cast<_Rb_tree_node_ptr>(base_type::_M_root()), black_height(static_cast<_Rb_tree_node_ptr>(base_type::_M_root())), NULL);
+      concat_problem * root_problem = &root_problem_on_stack;
+      size_type last = k;
+#endif
+
+      t.tic("sorted_no_gapped...");
+
+      // 3. Split the range according to tree and create
+      // 3. insertion/concatenation problems to be solved in parallel
+#if _GLIBCXX_TREE_DYNAMIC_BALANCING
+      size_type min_problem = (k/num_threads) / (log2(k/num_threads + 1)+1);
+#else
+      size_type min_problem = base_type::size() + k;
+#endif
+
+      RestrictedBoundedConcurrentQueue<insertion_problem>* ins_problems[num_threads];
+
+#pragma omp parallel num_threads(num_threads)
+      {
+	int num_thread = omp_get_thread_num();
+	ins_problems[num_thread] = new RestrictedBoundedConcurrentQueue<insertion_problem>(2*(log2(base_type::size())+1));
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+	/*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+	size_type end_k_thread = beg_partition[num_thread+1]  - (rank_shift[num_thread+1] - rank_shift[num_thread]);
+	ins_problems[num_thread]->push_front(insertion_problem(beg_partition[num_thread], end_k_thread, num_thread, conc[num_thread*2]));
+#else
+	// size_type end_k_thread = beg_partition[num_thread+1];
+#endif
+	insertion_problem ip_to_solve;
+	bool change;
+
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+#pragma omp barrier
+#else
+#pragma omp single
+	ins_problems[num_thread]->push_front(insertion_problem(0, k, num_thread, root_problem));
+#endif
+
+	do
+	  {
+	    // First do own work.
+	    while (ins_problems[num_thread]->pop_front(ip_to_solve))
+	      {
+		_GLIBCXX_PARALLEL_ASSERT(ip_to_solve.pos_beg <= ip_to_solve.pos_end);
+		_M_bulk_insertion_split_sequence(r, ins_problems[num_thread], ip_to_solve, existing[num_thread], min_problem, strictly_less_or_less_equal);
+
+	      }
+	    yield();
+	    change = false;
+
+	    //Then, try to steal from others (and become own).
+	    for (int i=1; i<num_threads; ++i)
+	      {
+		if (ins_problems[(num_thread+i)%num_threads]->pop_back(ip_to_solve))
+		  {
+		    change = true;
+		    _M_bulk_insertion_split_sequence(r, ins_problems[num_thread], ip_to_solve, existing[num_thread], min_problem, strictly_less_or_less_equal);
+		    break;
+		  }
+	      }
+	  } while (change);
+      }
+
+      t.tic("merging");
+
+      // Update root and sizes.
+      base_type::_M_root() = root_problem->t;
+      root_problem->t->_M_parent = &(base_type::_M_impl._M_header);
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+
+      // Add the k elements that wanted to be inserted, minus the ones
+      // that were repeated.
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+      base_type::_M_impl._M_node_count += (k - (rank_shift[num_threads]));
+#else
+      base_type::_M_impl._M_node_count += k;
+#endif
+      // Also then, take out the ones that were already existing in the tree.
+      for (int i = 0; i< num_threads; ++i)
+	{
+	  base_type::_M_impl._M_node_count -= existing[i];
+	}
+      // Update leftmost and rightmost.
+      /*****	Dealing with repetitions (EFFICIENCY ISSUE) *****/
+      if (not strictly_less_or_less_equal(base_type::_S_key(base_type::_M_root()), base_type::_S_key(base_type::_M_root()))){
+	// Unique container.
+	if (base_type::_M_impl._M_key_compare(_KeyOfValue()(*(access[0])), base_type::_S_key(base_type::_M_leftmost())))
+	  base_type::_M_leftmost() = r[0];
+	if (base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_M_rightmost()), _KeyOfValue()(*(--access[num_threads]))))
+	  base_type::_M_rightmost() = r[last - 1];
+      }
+      else{
+	if (strictly_less_or_less_equal(_KeyOfValue()(*(access[0])), base_type::_S_key(base_type::_M_leftmost())))
+	  base_type::_M_leftmost() = base_type::_S_minimum(base_type::_M_root());
+	if (strictly_less_or_less_equal(base_type::_S_key(base_type::_M_rightmost()), _KeyOfValue()(*(--access[num_threads]))))
+	  base_type::_M_rightmost() =  base_type::_S_maximum(base_type::_M_root());
+      }
+
+
+
+
+#if _GLIBCXX_TREE_INITIAL_SPLITTING
+      // Delete root problem
+      delete root_problem;
+#endif
+
+      // Delete queues
+      for (int pos = 0; pos < num_threads; ++pos)
+	{
+	  delete ins_problems[pos];
+	}
+
+      // Delete array of pointers
+      ::operator delete(r);
+
+      t.tic();
+      t.print();
+    }
+
+
+    /** @brief Divide a tree according to the splitter elements of a
+     * given sequence.
+     *
+     * The tree of the intial recursive call is divided in exactly
+     * num_threads partitions, some of which may be empty. Besides,
+     * some nodes may be extracted from it to afterwards concatenate
+     * the subtrees resulting from inserting the elements into it.
+     * This is done sequentially. It could be done in parallel but the
+     * performance is much worse.
+     * @param t Root of the tree to be splitted
+     * @param r Array of nodes to be inserted into the tree (here only
+     * used to look up its elements)
+     * @param access Array of iterators of size @c num_threads +
+     * 1. Each position contains the first value in the subsequence
+     * that has been copied into the corresponding tree node.
+     * @param beg_partition Array of positions of size @c num_threads
+     * + 1. Each position contains the rank of the first element in
+     * the array of nodes to be inserted.
+     * @param rank_shift Array of size @c num_threads + 1 containing
+     * the number of accumulated gaps at the beginning of each
+     * partition
+     * @param pos_beg First position in the access array to be
+     * considered to split @c t
+     * @param pos_end Last position (included) in the access array to
+     * be considered to split @c t
+     * @param conc Array of concatenation problems to be initialized
+     * @param num_threads Number of threads and corresponding
+     * subsequences in which the original sequence has been
+     * partitioned
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename _Iterator, typename StrictlyLessOrLessEqual>
+    void
+    _M_bulk_insertion_split_tree_by_pivot(_Rb_tree_node_ptr t, _Rb_tree_node_ptr* r, _Iterator* access,  size_type* beg_partition, size_type* rank_shift, const size_type pos_beg, const size_type pos_end, concat_problem** conc, const thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      if (pos_beg == pos_end)
+	{
+	  //Elements are in [pos_beg, pos_end]
+	  conc[pos_beg*2]->t = t;
+	  conc[pos_beg*2]->black_h = black_height(t);
+	  force_black_root (conc[pos_beg*2]->t, conc[pos_beg*2]->black_h);
+	  return;
+	}
+      if (t == 0)
+	{
+	  for (size_type i = pos_beg; i < pos_end; ++i)
+	    {
+	      conc[i*2]->t = NULL;
+	      conc[i*2]->black_h = 0;
+	      conc[i*2+1]->t = NULL;
+	    }
+	  conc[pos_end*2]->t = NULL;
+	  conc[pos_end*2]->black_h = 0;
+	  return;
+	}
+
+      // Return the last pos, in which key >= (pos-1).
+      // Search in the range [pos_beg, pos_end]
+      size_type pos = std::upper_bound(access + pos_beg, access + pos_end + 1, base_type::_S_key(t), compare_value_key<_Iterator, _Compare>(base_type::_M_impl._M_key_compare)) - access;
+      if (pos != pos_beg)
+	{
+	  --pos;
+	}
+      _GLIBCXX_PARALLEL_ASSERT(pos == 0 or not base_type::_M_impl._M_key_compare(base_type::_S_key(t), _KeyOfValue()(*access[pos])));
+
+
+      _Rb_tree_node_ptr ll, lr;
+      int black_h_ll, black_h_lr;
+      _Rb_tree_node_ptr rl, rr;
+      int black_h_rl, black_h_rr;
+
+      if (pos != pos_beg)
+	{
+	  _Rb_tree_node_ptr prev = r[beg_partition[pos] - 1 - (rank_shift[pos] - rank_shift[pos - 1])];
+
+	  _GLIBCXX_PARALLEL_ASSERT(strictly_less_or_less_equal(base_type::_S_key(prev), _KeyOfValue()(*access[pos])));
+
+	  split(static_cast<_Rb_tree_node_ptr>(t->_M_left),
+		static_cast<const key_type&>(_KeyOfValue()(*access[pos])),
+		static_cast<const key_type&>(base_type::_S_key(prev)),
+		conc[pos*2-1]->t, ll, lr, black_h_ll, black_h_lr,
+		strictly_less_or_less_equal);
+
+	  _M_bulk_insertion_split_tree_by_pivot(ll, r, access, beg_partition, rank_shift, pos_beg, pos-1, conc,num_threads, strictly_less_or_less_equal);
+	}
+      else
+	{
+	  lr = static_cast<_Rb_tree_node_ptr>(t->_M_left);
+	  black_h_lr = black_height (lr);
+	  force_black_root (lr, black_h_lr);
+	}
+
+      if (pos != pos_end)
+	{
+	  _Rb_tree_node_ptr prev = r[beg_partition[pos+1] - 1 - (rank_shift[pos+1] - rank_shift[pos])];
+
+	  _GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*access[pos+1]), base_type::_S_key(prev)));
+	  _GLIBCXX_PARALLEL_ASSERT(strictly_less_or_less_equal(base_type::_S_key(prev), _KeyOfValue()(*access[pos+1])));
+
+	  split(static_cast<_Rb_tree_node_ptr>(t->_M_right),
+		static_cast<const key_type&>(_KeyOfValue()(*access[pos+1])),
+		static_cast<const key_type&>(base_type::_S_key(prev)),
+		conc[pos*2+1]->t, rl, rr, black_h_rl, black_h_rr,
+		strictly_less_or_less_equal);
+
+	  _M_bulk_insertion_split_tree_by_pivot(rr, r, access, beg_partition, rank_shift, pos+1, pos_end, conc,num_threads, strictly_less_or_less_equal);
+	}
+      else
+	{
+	  rl = static_cast<_Rb_tree_node_ptr>(t->_M_right);
+	  black_h_rl = black_height (rl);
+	  force_black_root (rl, black_h_rl);
+	}
+
+      // When key(t) is equal to key(access[pos]) and no other key in
+      // the left tree satisfies the criteria to be conc[pos*2-1]->t,
+      // key(t) must be assigned to it to avoid repetitions.
+      // Therefore, we do not have a root parameter for the
+      // concatenate function and a new concatenate function must be
+      // provided.
+      if (pos != pos_beg and conc[pos*2-1]->t == NULL and not strictly_less_or_less_equal(_KeyOfValue()(*access[pos]), base_type::_S_key(t)))
+	{
+	  conc[pos*2-1]->t = t;
+	  t = NULL;
+	}
+      concatenate(t, lr, rl, black_h_lr, black_h_rl, conc[pos*2]->t, conc[pos*2]->black_h);
+    }
+
+    /** @brief Divide the insertion problem until a leaf is reached or
+     * the problem is small.
+     *
+     *  During the recursion, the right subproblem is queued, so that
+     *  it can be handled by any thread.  The left subproblem is
+     *  divided recursively, and finally, solved right away
+     *  sequentially.
+     * @param r Array of nodes containing the nodes to added into the tree
+     * @param ins_problems Pointer to a queue of insertion
+     * problems. The calling thread owns this queue, i.e. it is the
+     * only one to push elements, but other threads could pop elements
+     * from it in other methods.
+     * @param ip Current insertion problem to be solved
+     * @param existing Number of existing elements found when solving
+     * the insertion problem (out)
+     * @param min_problem Threshold size on the size of the insertion
+     * problem in which to stop recursion
+     * @param strictly_less_or_less_equal Comparator to deal
+     * transparently with repetitions with respect to the uniqueness
+     * of the wrapping container
+     */
+    template<typename StrictlyLessOrLessEqual>
+    void
+    _M_bulk_insertion_split_sequence(_Rb_tree_node_ptr* r, RestrictedBoundedConcurrentQueue<insertion_problem>* ins_problems,  insertion_problem& ip, size_type& existing, const size_type min_problem, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      _GLIBCXX_PARALLEL_ASSERT(ip.t == ip.conc->t);
+      if (ip.t == NULL or (ip.pos_end- ip.pos_beg) <= min_problem)
+	{
+	  // SOLVE PROBLEM SEQUENTIALLY
+	  // Start solving the problem.
+	  _GLIBCXX_PARALLEL_ASSERT(ip.pos_beg <= ip.pos_end);
+	  _M_bulk_insertion_merge_concatenate(r, ip, existing, strictly_less_or_less_equal);
+	  return;
+	}
+
+      size_type pos_beg_right;
+      size_type pos_end_left = divide(r, ip.pos_beg, ip.pos_end, base_type::_S_key(ip.t), pos_beg_right, existing, strictly_less_or_less_equal);
+
+      int black_h_l, black_h_r;
+      if (ip.t->_M_color == std::_S_black)
+	{
+	  black_h_l = black_h_r = ip.conc->black_h - 1;
+	}
+      else
+	{
+	  black_h_l = black_h_r = ip.conc->black_h;
+	}
+
+      // Right problem into the queue.
+      ip.conc->right_problem = new concat_problem(static_cast<_Rb_tree_node_ptr>(ip.t->_M_right), black_h_r, ip.conc);
+      ip.conc->left_problem = new concat_problem(static_cast<_Rb_tree_node_ptr>(ip.t->_M_left), black_h_l, ip.conc);
+
+      ins_problems->push_front(insertion_problem(pos_beg_right, ip.pos_end, ip.array_partition, ip.conc->right_problem));
+
+      // Solve left problem.
+      insertion_problem ip_left(ip.pos_beg, pos_end_left, ip.array_partition, ip.conc->left_problem);
+      _M_bulk_insertion_split_sequence(r, ins_problems, ip_left, existing, min_problem, strictly_less_or_less_equal);
+    }
+
+
+    /** @brief Insert a sequence of elements into a tree using a
+     * divide-and-conquer scheme.
+     *
+     * The problem is solved recursively and sequentially dividing the
+     * sequence to be inserted according to the root of the tree. This
+     * is done until a leaf is reached or the proportion of elements
+     * to be inserted is small. Finally, the two resulting trees are
+     * concatenated.
+     *  @param r_array Array of nodes containing the nodes to be added
+     *  into the tree (among others)
+     *  @param t Root of the tree
+     *  @param pos_beg Position of the first node in the array of
+     *  nodes to be inserted into the tree
+     *  @param pos_end Position of the first node in the array of
+     *  nodes that will not be inserted into the tree
+     *  @param existing Number of existing elements found while
+     *  inserting the range [@c pos_beg, @c pos_end) (out)
+     *  @param black_h Height of the tree @c t and of the resulting
+     *  tree after the recursive calls (in and out)
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+     *  @return Resulting tree after the elements have been inserted
+     */
+    template<typename StrictlyLessOrLessEqual>
+    _Rb_tree_node_ptr _M_bulk_insertion_merge(_Rb_tree_node_ptr* r_array, _Rb_tree_node_ptr t, const size_type pos_beg, const size_type pos_end,  size_type& existing, int& black_h, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+#ifndef NDEBUG
+      int count;
+#endif
+      _GLIBCXX_PARALLEL_ASSERT(pos_beg<=pos_end);
+
+      // Leaf: a tree with the range must be constructed. Returns its
+      // height in black nodes and its root (in ip.t) If there is
+      // nothing to insert, we still need the height for balancing.
+      if (t == NULL)
+	{
+	  if (pos_end == pos_beg) return NULL;
+	  t = simple_tree_construct(r_array,pos_beg, pos_end, black_h);
+	  _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t,count));
+	  return t;
+	}
+      if (pos_end == pos_beg)
+	return t;
+      if ((pos_end - pos_beg) <= (size_type)(black_h))
+	{
+	  // Exponential size tree with respect the number of elements
+	  // to be inserted.
+	  for (size_type p = pos_beg; p < pos_end; ++p)
+	    {
+	      t = _M_insert_local(t, r_array[p], existing, black_h, strictly_less_or_less_equal);
+	    }
+	  _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t,count));
+	  return t;
+	}
+
+      size_type pos_beg_right;
+      size_type pos_end_left = divide(r_array, pos_beg, pos_end, base_type::_S_key(t), pos_beg_right, existing, strictly_less_or_less_equal);
+
+
+      int black_h_l, black_h_r;
+      if (t->_M_color == std::_S_black)
+	{
+	  black_h_l = black_h_r = black_h - 1;
+	}
+      else
+	{
+	  black_h_l = black_h_r = black_h;
+	}
+      force_black_root(t->_M_left, black_h_l);
+      _Rb_tree_node_ptr l = _M_bulk_insertion_merge(r_array, static_cast<_Rb_tree_node_ptr>(t->_M_left), pos_beg, pos_end_left, existing, black_h_l, strictly_less_or_less_equal);
+      force_black_root(t->_M_right, black_h_r);
+      _Rb_tree_node_ptr r = _M_bulk_insertion_merge(r_array, static_cast<_Rb_tree_node_ptr>(t->_M_right), pos_beg_right, pos_end, existing, black_h_r, strictly_less_or_less_equal);
+
+      concatenate(t, l, r, black_h_l,  black_h_r, t, black_h);
+
+      return t;
+    }
+
+    /** @brief Solve a given insertion problem and all the parent
+     * concatenation problem that are ready to be solved.
+     *
+     *  First, solve an insertion problem.
+
+     *  Then, check if it is possible to solve the parent
+     *  concatenation problem. If this is the case, solve it and go
+     *  up recursively, as far as possible. Quit otherwise.
+     *
+     *  @param r Array of nodes containing the nodes to be added into
+     *  the tree (among others)
+     *  @param ip Insertion problem to solve initially.
+     *  @param existing Number of existing elements found while
+     *  inserting the range defined by the insertion problem (out)
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+     */
+    template<typename StrictlyLessOrLessEqual>
+    void _M_bulk_insertion_merge_concatenate(_Rb_tree_node_ptr* r, insertion_problem& ip, size_type& existing, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      concat_problem* conc = ip.conc;
+      _GLIBCXX_PARALLEL_ASSERT(ip.pos_beg <= ip.pos_end);
+
+      conc->t = _M_bulk_insertion_merge(r, ip.t, ip.pos_beg, ip.pos_end, existing, conc->black_h, strictly_less_or_less_equal);
+      _GLIBCXX_PARALLEL_ASSERT(conc->t == NULL or conc->t->_M_color == std::_S_black);
+
+      bool is_ready = true;
+      while (conc->par_problem != NULL and is_ready)
+	{
+	  // Pre: exists left and right problem, so there is not a deadlock
+	  if (compare_and_swap(&conc->par_problem->is_ready, concat_problem::READY_NO,  concat_problem::READY_YES))
+	    is_ready = false;
+
+	  if (is_ready)
+	    {
+	      conc = conc->par_problem;
+	      _GLIBCXX_PARALLEL_ASSERT(conc->left_problem!=NULL and conc->right_problem!=NULL);
+	      _GLIBCXX_PARALLEL_ASSERT (conc->left_problem->black_h >=0 and conc->right_problem->black_h>=0);
+	      // Finished working with the problems.
+	      concatenate(conc->t, conc->left_problem->t, conc->right_problem->t, conc->left_problem->black_h,  conc->right_problem->black_h, conc->t, conc->black_h);
+
+	      delete conc->left_problem;
+	      delete conc->right_problem;
+	    }
+	}
+    }
+
+    // Begin of sorting, searching and related comparison-based helper methods.
+
+    /** @brief Check whether a random-access sequence is sorted, and
+     * calculate its size.
+     *
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param dist Size of the sequence (out)
+     *  @return sequence is sorted. */
+    template<typename _RandomAccessIterator>
+    bool
+    is_sorted_distance(const _RandomAccessIterator __first, const _RandomAccessIterator __last, size_type& dist, std::random_access_iterator_tag) const
+    {
+      gr_or_eq<_Compare, _RandomAccessIterator> geq(base_type::_M_impl._M_key_compare);
+      dist = __last - __first;
+
+      // In parallel.
+      return equal(__first + 1, __last, __first, geq);
+    }
+
+    /** @brief Check whether an input sequence is sorted, and
+     * calculate its size.
+     *
+     *  The list partitioning tool is used so that all the work is
+     *  done in only one traversal.
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param dist Size of the sequence (out)
+     *  @return sequence is sorted. */
+    template<typename _InputIterator>
+    bool
+    is_sorted_distance(const _InputIterator __first, const _InputIterator __last, size_type& dist, std::input_iterator_tag) const
+    {
+      dist = 1;
+      bool is_sorted = true;
+      _InputIterator it = __first;
+      _InputIterator prev = it++;
+      while (it != __last)
+	{
+	  ++dist;
+	  if (base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev)))
+	    {
+	      is_sorted = false;
+	      ++it;
+	      break;
+	    }
+	  prev = it;
+	  ++it;
+	}
+      while (it != __last)
+	{
+	  ++dist;
+	  ++it;
+	}
+      return is_sorted;
+    }
+
+    /** @brief Check whether a random-access sequence is sorted,
+     * calculate its size, and obtain intermediate accessors to the
+     * sequence to ease parallelization.
+     *
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param access Array of size @c num_pieces + 1 that defines @c
+     *  num_pieces subsequences of the original sequence (out). Each
+     *  position @c i will contain an iterator to the first element in
+     *  the subsequence @c i.
+     *  @param beg_partition Array of size @c num_pieces + 1 that
+     *  defines @c num_pieces subsequences of the original sequence
+     *  (out). Each position @c i will contain the rank of the first
+     *  element in the subsequence @c i.
+     *  @param dist Size of the sequence (out)
+     *  @param num_pieces Number of pieces to generate.
+     *  @return Sequence is sorted. */
+    template<typename _RandomAccessIterator>
+    bool
+    is_sorted_distance_accessors(const _RandomAccessIterator __first, const _RandomAccessIterator __last,  _RandomAccessIterator* access, size_type* beg_partition, size_type& dist, thread_index_t& num_pieces, std::random_access_iterator_tag) const
+    {
+      bool is_sorted = is_sorted_distance(__first, __last, dist,std::__iterator_category(__first));
+      if (dist < (unsigned int) num_pieces)
+	num_pieces = dist;
+
+      // Do it opposite way to use accessors in equal function???
+      range_accessors(__first,__last, access, beg_partition, dist, num_pieces, std::__iterator_category(__first));
+      return is_sorted;
+    }
+
+    /** @brief Check whether an input sequence is sorted, calculate
+     * its size, and obtain intermediate accessors to the sequence to
+     * ease parallelization.
+     *
+     *  The list partitioning tool is used so that all the work is
+     *  done in only one traversal.
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param access Array of size @c num_pieces + 1 that defines @c
+     *  num_pieces subsequences of the original sequence (out). Each
+     *  position @c i will contain an iterator to the first element in
+     *  the subsequence @c i.
+     *  @param beg_partition Array of size @c num_pieces + 1 that
+     *  defines @c num_pieces subsequences of the original sequence
+     *  (out). Each position @c i will contain the rank of the first
+     *  element in the subsequence @c i.
+     *  @param dist Size of the sequence (out)
+     *  @param num_pieces Number of pieces to generate.
+     *  @return Sequence is sorted. */
+    template<typename _InputIterator>
+    bool
+    is_sorted_distance_accessors(const _InputIterator __first, const _InputIterator __last, _InputIterator* access,  size_type* beg_partition, size_type& dist, thread_index_t& num_pieces, std::input_iterator_tag) const
+    {
+      is_sorted_functor<_InputIterator, _Compare> sorted(__first, base_type::_M_impl._M_key_compare);
+      dist = list_partition(__first, __last, access,  (beg_partition+1),  num_pieces, sorted,  0);
+
+      // Calculate the rank of the begining each partition from the
+      // sequence sizes (what is stored at this point in beg_partition
+      // array).
+      beg_partition[0] = 0;
+      for (int i = 0; i < num_pieces; ++i)
+	{
+	  beg_partition[i+1] += beg_partition[i];
+	}
+
+      return sorted.is_sorted();
+    }
+
+    /** @brief Make a full copy of the elements of a sequence
+     *
+     *  The unitialized_copy method from the stl is called in parallel
+     *  using the access array to point to the beginning of each
+     *  partition
+     *  @param access Array of size @c num_threads + 1 that defines @c
+     *  num_threads subsequences. Each position @c i contains an
+     *  iterator to the first element in the subsequence @c i.
+     *  @param beg_partition Array of size @c num_threads + 1 that
+     *  defines @c num_threads subsequences. Each position @c i
+     *  contains the rank of the first element in the subsequence @c
+     *  i.
+     *  @param out Begin iterator of output sequence.
+     *  @param num_threads Number of threads to use. */
+    template<typename _InputIterator, typename _OutputIterator>
+    static void
+    uninitialized_copy_from_accessors(_InputIterator* access, size_type* beg_partition, _OutputIterator out, const thread_index_t num_threads)
+    {
+#pragma omp parallel num_threads(num_threads)
+      {
+	int iam = omp_get_thread_num();
+	uninitialized_copy(access[iam], access[iam+1], out+beg_partition[iam]);
+      }
+    }
+
+    /** @brief Make a copy of the pointers of the elements of a sequence
+     *  @param access Array of size @c num_threads + 1 that defines @c
+     *  num_threads subsequences. Each position @c i contains an
+     *  iterator to the first element in the subsequence @c i.
+     *  @param beg_partition Array of size @c num_threads + 1 that
+     *  defines @c num_threads subsequences. Each position @c i
+     *  contains the rank of the first element in the subsequence @c
+     *  i.
+     *  @param out Begin iterator of output sequence.
+     *  @param num_threads Number of threads to use. */
+    template<typename _InputIterator, typename _OutputIterator>
+    static void
+    uninitialized_ptr_copy_from_accessors(_InputIterator* access, size_type* beg_partition, _OutputIterator out, const thread_index_t num_threads)
+    {
+#pragma omp parallel num_threads(num_threads)
+      {
+	int iam = omp_get_thread_num();
+	_OutputIterator itout = out + beg_partition[iam];
+	for (_InputIterator it = access[iam]; it != access[iam+1]; ++it)
+	  {
+	    *itout = &(*it);
+	    ++itout;
+	  }
+      }
+    }
+
+    /** @brief Split a sorted node array in two parts according to a key.
+     *
+     *  For unique containers, if the splitting key is in the array of
+     *  nodes, the corresponding node is erased.
+     *  @param r Array of nodes containing the nodes to split (among others)
+     *  @param pos_beg Position of the first node in the array of
+     *  nodes to be considered
+     *  @param pos_end Position of the first node in the array of
+     *  nodes to be not considered
+     *  @param key Splitting key
+     *  @param pos_beg_right Position of the first node in the
+     *  resulting right partition (out)
+     *  @param existing Number of existing elements before dividing
+     *  (in) and after (out). Specificically, the counter is
+     *  incremented by one for unique containers if the splitting key
+     *  was already in the array of nodes.
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+     *  @return Position of the last node (not included) in the
+     *  resulting left partition (out)
+     */
+    template<typename StrictlyLessOrLessEqual>
+    size_type
+    divide(_Rb_tree_node_ptr* r, const size_type pos_beg, const size_type pos_end, const key_type& key, size_type& pos_beg_right, size_type& existing, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      pos_beg_right = std::lower_bound(r + pos_beg, r + pos_end, key, compare_node_key<_Compare>(base_type::_M_impl._M_key_compare)) - r;
+
+      //Check if the element exists.
+      size_type pos_end_left = pos_beg_right;
+
+      // If r[pos_beg_right] is equal to key, must be erased
+      /*****	Dealing with repetitions (CORRECTNESS ISSUE) *****/
+      _GLIBCXX_PARALLEL_ASSERT((pos_beg_right == pos_end) or not base_type::_M_impl._M_key_compare(base_type::_S_key(r[pos_beg_right]),key));
+      _GLIBCXX_PARALLEL_ASSERT((pos_beg_right + 1 >= pos_end) or strictly_less_or_less_equal(key, base_type::_S_key(r[pos_beg_right + 1])));
+      if (pos_beg_right != pos_end and not strictly_less_or_less_equal(key, base_type::_S_key(r[pos_beg_right])))
+	{
+	  _M_destroy_node(r[pos_beg_right]);
+	  r[pos_beg_right] = NULL;
+	  ++pos_beg_right;
+	  ++existing;
+	}
+      _GLIBCXX_PARALLEL_ASSERT(pos_end_left <= pos_beg_right and pos_beg_right <= pos_end and pos_end_left >= pos_beg);
+      return pos_end_left;
+    }
+
+
+    /** @brief Parallelization helper method: Given a random-access
+	sequence of known size, divide it into pieces of almost the
+	same size.
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param access Array of size @c num_pieces + 1 that defines @c
+     *  num_pieces subsequences. Each position @c i contains an
+     *  iterator to the first element in the subsequence @c i.
+     *  @param beg_partition Array of size @c num_pieces + 1 that
+     *  defines @c num_pieces subsequences. Each position @c i
+     *  contains the rank of the first element in the subsequence @c
+     *  i.
+     *  @param n Sequence size
+     *  @param num_pieces Number of pieces. */
+    template<typename _RandomAccessIterator>
+    static void
+    range_accessors(const _RandomAccessIterator __first, const _RandomAccessIterator __last,  _RandomAccessIterator* access, size_type* beg_partition, const size_type n, const thread_index_t num_pieces, std::random_access_iterator_tag)
+    {
+      access[0] = __first;
+      for (int i=1; i< num_pieces; ++i)
+	{
+	  access[i] = access[i-1] + (__last-__first)/num_pieces;
+	  beg_partition[i]= beg_partition[i-1]+ (__last-__first)/num_pieces;
+	}
+      beg_partition[num_pieces] = __last - access[num_pieces-1] +  beg_partition[num_pieces-1];
+      access[num_pieces]= __last;
+    }
+
+    /** @brief Parallelization helper method: Given an input-access
+	sequence of known size, divide it into pieces of almost the
+	same size.
+     *  @param __first Begin iterator of sequence.
+     *  @param __last End iterator of sequence.
+     *  @param access Array of size @c num_pieces + 1 that defines @c
+     *  num_pieces subsequences. Each position @c i contains an
+     *  iterator to the first element in the subsequence @c i.
+     *  @param beg_partition Array of size @c num_pieces + 1 that
+     *  defines @c num_pieces subsequences. Each position @c i
+     *  contains the rank of the first element in the subsequence @c
+     *  i.
+     *  @param n Sequence size
+     *  @param num_pieces Number of pieces. */
+    template<typename _InputIterator>
+    static void
+    range_accessors(const _InputIterator __first, const _InputIterator __last, _InputIterator* access,  size_type* beg_partition, const size_type n, const thread_index_t num_pieces, std::input_iterator_tag)
+    {
+      access[0] = __first;
+      _InputIterator it= __first;
+      for (int i=1; i< num_pieces; ++i)
+	{
+	  for (int j=0; j< n/num_pieces; ++j)
+	    ++it;
+	  access[i] = it;
+	  beg_partition[i]= n/num_pieces + beg_partition[i-1];
+	}
+      access[num_pieces] = __last;
+      beg_partition[num_pieces] = n - (num_pieces-1)*(n/num_pieces) + beg_partition[num_pieces-1];
+    }
+
+    /** @brief Initialize an array of concatenation problems for bulk
+	insertion. They are linked as a tree with (end - beg) leaves.
+     *  @param conc Array of concatenation problems pointers to initialize.
+     *  @param beg Rank of the first leave to initialize
+     *  @param end Rank of the last (not included) leave to initialize
+     *  @param parent Pointer to the parent concatenation problem.
+     */
+    static concat_problem*
+    _M_bulk_insertion_initialize_upper_problems(concat_problem** conc, const int beg, const int end, concat_problem* parent)
+    {
+      if (beg + 1 == end)
+	{
+	  conc[2*beg]->par_problem = parent;
+	  return conc[2*beg];
+	}
+
+      int size = end - beg;
+      int mid = beg + size/2;
+      conc[2*mid-1]->par_problem = parent;
+      conc[2*mid-1]->left_problem = _M_bulk_insertion_initialize_upper_problems(conc, beg, mid, conc[2*mid-1]);
+      conc[2*mid-1]->right_problem = _M_bulk_insertion_initialize_upper_problems(conc, mid, end, conc[2*mid-1]);
+      return conc[2*mid-1];
+    }
+
+
+    /** @brief Determine black height of a node recursively.
+     *  @param t Node.
+     *  @return Black height of the node. */
+    static int
+    black_height(const _Rb_tree_node_ptr t)
+    {
+      if (t == NULL) return 0;
+      int bh = black_height (static_cast<const _Rb_tree_node_ptr> (t->_M_left));
+      if (t->_M_color == std::_S_black)
+	++bh;
+      return bh;
+    }
+
+    /** @brief Color a leaf black
+     *  @param t Leaf pointer.
+     *  @param black_h Black height of @c t (out) */
+    static void
+    make_black_leaf(const _Rb_tree_node_ptr t, int& black_h)
+    {
+      black_h = 0;
+      if (t != NULL)
+	{
+	  _GLIBCXX_PARALLEL_ASSERT(t->_M_left == NULL and t->_M_right == NULL);
+	  black_h = 1;
+	  t->_M_color = std::_S_black;
+	}
+    }
+
+    /** @brief Color a node black.
+     *  @param t Node to color black.
+     *  @param black_h Black height of @c t (out) */
+    static void
+    make_leaf(const _Rb_tree_node_ptr t, int& black_h)
+    {
+      _GLIBCXX_PARALLEL_ASSERT(t != NULL);
+      black_h = 1;
+      t->_M_color = std::_S_black;
+      t->_M_left = NULL;
+      t->_M_right = NULL;
+    }
+
+    /** @brief Construct a tree from a root, a left subtree and a
+	right subtree.
+     *  @param root Root of constructed tree.
+     *  @param l Root of left subtree.
+     *  @param r Root of right subtree.
+     *  @pre @c l, @c r are black.
+     */
+    template<typename S>
+    static _Rb_tree_node_ptr
+    plant(const _Rb_tree_node_ptr root, const _Rb_tree_node_ptr l, const _Rb_tree_node_ptr r)
+    {
+      S::left(root) = l;
+      S::right(root) = r;
+      if (l != NULL)
+	l->_M_parent = root;
+      if (r != NULL)
+	r->_M_parent = root;
+      root->_M_color = std::_S_red;
+      return root;
+    }
+
+    /** @brief Concatenate two red-black subtrees using and an
+	intermediate node, which might be NULL
+     *  @param root Intermediate node.
+     *  @param l Left subtree.
+     *  @param r Right subtree.
+     *  @param black_h_l Black height of left subtree.
+     *  @param black_h_r Black height of right subtree.
+     *  @param t Tree resulting of the concatenation
+     *  @param black_h Black height of the resulting tree
+     *  @pre Left tree is higher than left tree
+     *  @post @c t is correct red-black tree with height @c black_h.
+     */
+    void
+    concatenate(_Rb_tree_node_ptr root, _Rb_tree_node_ptr l, _Rb_tree_node_ptr r,  int black_h_l, int black_h_r, _Rb_tree_node_ptr& t, int& black_h) const
+    {
+#ifndef NDEBUG
+      int count = 0, count1 = 0, count2 = 0;
+#endif
+      _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(l, count1));
+      _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(r, count2));
+
+      _GLIBCXX_PARALLEL_ASSERT(l != NULL ? l->_M_color != std::_S_red and black_h_l > 0 : black_h_l == 0);
+      _GLIBCXX_PARALLEL_ASSERT(r != NULL ? r->_M_color != std::_S_red and black_h_r > 0 : black_h_r == 0);
+
+      if (black_h_l > black_h_r)
+	if (root != NULL)
+	  concatenate<LeftRight>(root, l, r, black_h_l, black_h_r, t, black_h);
+	else
+	  {
+	    if (r == NULL)
+	      {
+		t = l;
+		black_h = black_h_l;
+	      }
+	    else
+	      {
+		// XXX SHOULD BE the same as extract_min but slower.
+		/*
+		   root = static_cast<_Rb_tree_node_ptr>(_Rb_tree_node_base::_S_minimum(r));
+		   split(r, _S_key(_Rb_tree_increment(root)), _S_key(root), root, t, r, black_h, black_h_r);
+		*/
+		extract_min(r, root, r, black_h_r);
+		_GLIBCXX_PARALLEL_ASSERT(root != NULL);
+		concatenate<LeftRight>(root, l, r, black_h_l, black_h_r, t, black_h);
+	      }
+	  }
+      else
+	if (root != NULL)
+	  concatenate<RightLeft>(root, r, l, black_h_r, black_h_l, t, black_h);
+	else
+	  {
+	    if (l == NULL)
+	      {
+		t = r;
+		black_h = black_h_r;
+	      }
+	    else
+	      {
+		// XXX SHOULD BE the same as extract_max but slower
+		/*
+		   root = static_cast<_Rb_tree_node_ptr>(_Rb_tree_node_base::_S_maximum(l));
+		   split(l, _S_key(root), _S_key(_Rb_tree_decrement(root)), root, l, t, black_h_l, black_h);
+		*/
+		extract_max(l, root, l, black_h_l);
+		_GLIBCXX_PARALLEL_ASSERT(root != NULL);
+		concatenate<RightLeft>(root, r, l, black_h_r, black_h_l, t, black_h);
+	      }
+	  }
+#ifndef NDEBUG
+      if (root!=NULL) ++count1;
+      _GLIBCXX_PARALLEL_ASSERT(t == NULL or t->_M_color == std::_S_black);
+      bool b = rb_verify_tree(t, count);
+      if (not b){
+	_GLIBCXX_PARALLEL_ASSERT(false);
+      }
+      _GLIBCXX_PARALLEL_ASSERT(count1+count2 == count);
+#endif
+    }
+
+    /** @brief Concatenate two red-black subtrees using and a not NULL
+     * intermediate node.
+     *
+     *  @c S is the symmetry parameter.
+     *  @param rt Intermediate node.
+     *  @param l Left subtree.
+     *  @param r Right subtree.
+     *  @param black_h_l Black height of left subtree.
+     *  @param black_h_r Black height of right subtree.
+     *  @param t Tree resulting of the concatenation
+     *  @param black_h Black height of the resulting tree
+     *  @pre Left tree is higher than right tree. @c rt != NULL
+     *  @post @c t is correct red-black tree with height @c black_h.
+     */
+    template<typename S>
+    static void
+    concatenate(const _Rb_tree_node_ptr rt, _Rb_tree_node_ptr l, _Rb_tree_node_ptr r, int black_h_l, int black_h_r, _Rb_tree_node_ptr& t, int& black_h)
+    {
+      _Rb_tree_node_base* root = l;
+      _Rb_tree_node_ptr parent = NULL;
+      black_h = black_h_l;
+      _GLIBCXX_PARALLEL_ASSERT(black_h_l >= black_h_r);
+      while (black_h_l != black_h_r)
+	{
+	  if (l->_M_color == std::_S_black)
+	    --black_h_l;
+	  parent = l;
+	  l = static_cast<_Rb_tree_node_ptr>(S::right(l));
+	  _GLIBCXX_PARALLEL_ASSERT((black_h_l == 0 and (l == NULL or l->_M_color == std::_S_red)) or (black_h_l != 0 and l != NULL));
+	  _GLIBCXX_PARALLEL_ASSERT((black_h_r == 0 and (r == NULL or r->_M_color == std::_S_red)) or (black_h_r != 0 and r != NULL));
+	}
+      if (l != NULL and l->_M_color == std::_S_red)
+	{
+	  //the root needs to be black
+	  parent = l;
+	  l = static_cast<_Rb_tree_node_ptr>(S::right(l));
+	}
+      _GLIBCXX_PARALLEL_ASSERT(l != NULL ? l->_M_color == std::_S_black : true);
+      _GLIBCXX_PARALLEL_ASSERT(r != NULL ? r->_M_color == std::_S_black : true);
+      t = plant<S>(rt, l, r);
+      t->_M_parent = parent;
+      if (parent != NULL)
+	{
+	  S::right(parent) = t;
+	  black_h += _Rb_tree_rebalance(t, root);
+	  t = static_cast<_Rb_tree_node_ptr> (root);
+	}
+      else
+	{
+	  ++black_h;
+	  t->_M_color = std::_S_black;
+	}
+      _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black);
+    }
+
+    /** @brief Split a tree according to key in three parts: a left
+     * child, a right child and an intermediate node.
+     *
+     *  Trees are concatenated once the recursive call returns. That
+     *  is, from bottom to top (ie. smaller to larger), so the cost
+     *  bounds for split hold.
+     *  @param t Root of the tree to split.
+     *  @param key Key to split according to.
+     *  @param prev_k Key to split the intermediate node
+     *  @param root Out parameter. If a node exists whose key is
+     *  smaller or equal than @c key, but strictly larger than @c
+     *  prev_k, this is returned. Otherwise, it is null.
+     *  @param l Root of left subtree returned, nodes less than @c key.
+     *  @param r Root of right subtree returned, nodes greater or
+     *  equal than @c key.
+     *  @param black_h_l Black height of the left subtree.
+     *  @param black_h_r Black height of the right subtree.
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+     *  @return Black height of t */
+    template<typename StrictlyLessOrEqual>
+    int
+    split(_Rb_tree_node_ptr t, const key_type& key, const key_type& prev_k, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l,  _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r, StrictlyLessOrEqual strictly_less_or_less_equal) const
+    {
+      if (t != NULL)
+	{
+	  // Must be initialized, in case we never go left!!!
+	  root = NULL;
+	  int h = split_not_null(t, key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_less_equal);
+#ifndef NDEBUG
+	  _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key));
+	  _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key));
+	  int count1, count2;
+	  _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(l, count1));
+	  _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(r, count2));
+	  _GLIBCXX_PARALLEL_ASSERT(root == NULL or base_type::_M_impl._M_key_compare(prev_k, base_type::_S_key(root)) and not base_type::_M_impl._M_key_compare(key, base_type::_S_key(root)));
+	  _GLIBCXX_PARALLEL_ASSERT(root != NULL or l==NULL or  not base_type::_M_impl._M_key_compare(prev_k, base_type::_S_key(base_type::_S_maximum(l))));
+#endif
+	  return h;
+	}
+
+      r = NULL;
+      root = NULL;
+      l = NULL;
+      black_h_r = 0;
+      black_h_l = 0;
+      return 0;
+    }
+
+    /** @brief Split a tree according to key in three parts: a left
+     * child, a right child and an intermediate node.
+     *
+     *  @param t Root of the tree to split.
+     *  @param key Key to split according to.
+     *  @param prev_k Key to split the intermediate node
+     *  @param root Out parameter. If a node exists whose key is
+     *  smaller or equal than @c key, but strictly larger than @c
+     *  prev_k, this is returned. Otherwise, it is null.
+     *  @param l Root of left subtree returned, nodes less than @c key.
+     *  @param r Root of right subtree returned, nodes greater or
+     *  equal than @c key.
+     *  @param black_h_l Black height of the left subtree.
+     *  @param black_h_r Black height of the right subtree.
+     *  @param strictly_less_or_equal Comparator to deal transparently
+     *  with repetitions with respect to the uniqueness of the
+     *  wrapping container
+     *  @pre t != NULL
+     *  @return Black height of t */
+    template<typename StrictlyLessOrEqual>
+    int
+    split_not_null(const _Rb_tree_node_ptr t, const key_type& key, const key_type& prev_k, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l, _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r, StrictlyLessOrEqual strictly_less_or_equal) const
+    {
+      _GLIBCXX_PARALLEL_ASSERT (t != NULL);
+      int black_h, b_h;
+      int black_node = 0;
+      if (t->_M_color == std::_S_black)
+	++black_node;
+      if (strictly_less_or_equal(key, base_type::_S_key(t)))
+	{
+	  if (t->_M_left != NULL )
+	    {
+	      // t->M_right is at most one node
+	      // go to the left
+	      b_h = black_h = split_not_null( static_cast<_Rb_tree_node_ptr>(t->_M_left), key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_equal);
+	      // Moin root and right subtree to already existing right
+	      // half, leave left subtree.
+	      force_black_root(t->_M_right, b_h);
+	      concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r);
+	    }
+	  else
+	    {
+	      // t->M_right is at most one node
+	      r = t;
+	      black_h_r = black_node;
+	      force_black_root(r, black_h_r);
+
+	      black_h = 0;
+	      l = NULL;
+	      black_h_l = 0;
+	    }
+	  _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key));
+	  _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key));
+	}
+      else
+	{
+	  if (t->_M_right != NULL )
+	    {
+	      // Go to the right.
+	      if (strictly_less_or_equal(prev_k, base_type::_S_key(t)))
+		root = t;
+	      b_h = black_h = split_not_null(static_cast<_Rb_tree_node_ptr>(t->_M_right), key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_equal);
+	      // Join root and left subtree to already existing left
+	      // half, leave right subtree.
+	      force_black_root(t->_M_left, b_h);
+	      if (root != t)
+		{
+		  // There was another point where we went right.
+		  concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l);
+		}
+	      else
+		{
+		  l = static_cast<_Rb_tree_node_ptr>(t->_M_left);
+		  black_h_l = b_h;
+		}
+	      _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key));
+	      _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key));
+	    }
+	  else
+	    {
+	      if (strictly_less_or_equal(prev_k, base_type::_S_key(t)))
+		{
+		  root = t;
+		  l= static_cast<_Rb_tree_node_ptr>(t->_M_left);
+		  make_black_leaf(l, black_h_l);
+		  _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key));
+		}
+	      else
+		{
+		  l= t;
+		  black_h_l = black_node;
+		  force_black_root(l, black_h_l);
+		  _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key));
+		}
+
+	      r = NULL;
+	      black_h = 0;
+	      black_h_r = 0;
+	    }
+	}
+      return black_h + black_node;
+    }
+
+    /** @brief Color the root black and update the black height accordingly.
+     *
+     * @param t Root of the tree.
+     * @param black_h Black height of the tree @c t (out) */
+    static void force_black_root(_Rb_tree_node_base* t, int& black_h)
+    {
+      if (t != NULL and t->_M_color == std::_S_red)
+	{
+	  t->_M_color = std::_S_black;
+	  ++ black_h;
+	}
+    }
+
+    /** @brief Split the tree in two parts: the minimum element from a
+	tree (i.e. leftmost) and the rest (right subtree)
+     *  @param t Root of the tree
+     *  @param root Minimum element (out)
+     *  @param r Right subtree: @c t - {@c root}
+     *  @param black_h_r Black height of the right subtree.
+     *  @return Black height of the original tree  */
+    int
+    extract_min(const _Rb_tree_node_ptr t, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& r, int& black_h_r) const
+    {
+      _GLIBCXX_PARALLEL_ASSERT (t != NULL);
+      int black_h, b_h;
+      int black_node = 0;
+      if (t->_M_color == std::_S_black)
+	++black_node;
+
+      if (t->_M_left != NULL )
+	{
+	  // t->M_right is at most one node
+	  // go to the left
+	  b_h = black_h = extract_min( static_cast<_Rb_tree_node_ptr>(t->_M_left), root, r, black_h_r);
+
+	  // Join root and right subtree to already existing right
+	  // half, leave left subtree
+	  force_black_root(t->_M_right, b_h);
+	  concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r);
+	}
+      else
+	{
+	  // t->M_right is at most one node
+	  root = t;
+	  if (t->_M_right == NULL)
+	    {
+	      r = NULL;
+	      black_h_r = 0;
+	    }
+	  else
+	    {
+	      r = static_cast<_Rb_tree_node_ptr>(t->_M_right);
+	      black_h_r = 1;
+	      r->_M_color = std::_S_black;
+	    }
+	  black_h = 0;
+	}
+      return black_h + black_node;
+    }
+
+
+    /** @brief Split the tree in two parts: the greatest element from
+	a tree (i.e. rightmost) and the rest (left subtree)
+     *  @param t Root of the tree
+     *  @param root Maximum element (out)
+     *  @param l Left subtree: @c t - {@c root}
+     *  @param black_h_l Black height of the left subtree.
+     *  @return Black height of the original tree  */
+    int
+    extract_max(const _Rb_tree_node_ptr t, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l, int& black_h_l) const
+    {
+      _GLIBCXX_PARALLEL_ASSERT (t != NULL);
+      int black_h, b_h;
+      int black_node = 0;
+      if (t->_M_color == std::_S_black)
+	++black_node;
+
+      if (t->_M_right != NULL )
+	{
+	  b_h = black_h = extract_max(static_cast<_Rb_tree_node_ptr>(t->_M_right), root, l,  black_h_l);
+
+	  // Join root and left subtree to already existing left half,
+	  // leave right subtree.
+	  force_black_root(t->_M_left, b_h);
+
+	  concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l);
+	}
+      else
+	{
+	  root = t;
+	  if (t->_M_left == NULL)
+	    {
+	      l = NULL;
+	      black_h_l = 0;
+	    }
+	  else
+	    {
+	      l = static_cast<_Rb_tree_node_ptr>(t->_M_left);
+	      black_h_l = 1;
+	      l->_M_color = std::_S_black;
+	    }
+	  black_h = 0;
+	}
+      return black_h + black_node;
+    }
+
+    /** @brief Split tree according to key in two parts: a left tree
+     * and a right subtree
+     *
+     *  Trees are concatenated once the recursive call returns. That
+     *  is, from bottom to top (ie. smaller to larger), so the cost
+     *  bounds for split hold.
+     *  @param t Root of the tree to split.
+     *  @param key Key to split according to.
+     *  @param l Root of left subtree returned, nodes less than @c key.
+     *  @param r Root of right subtree returned, nodes greater than @c key.
+     *  @param black_h_l Black height of the left subtree.
+     *  @param black_h_r Black height of the right subtree.
+     *  @return Black height of the original tree */
+    int
+    split(const _Rb_tree_node_ptr t, const key_type& key, _Rb_tree_node_ptr& l, _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r) const
+    {
+      if (t != NULL)
+	{
+	  int black_h, b_h;
+	  int black_node = 0;
+	  if (t->_M_color == std::_S_black)
+	    ++black_node;
+	  if (not (base_type::_M_impl._M_key_compare(base_type::_S_key(t), key)))
+	    {
+	      // Go to the left.
+	      b_h = black_h = split( static_cast<_Rb_tree_node_ptr>(t->_M_left), key, l, r, black_h_l, black_h_r);
+
+	      // Join root and right subtree to already existing right
+	      // half, leave left subtree.
+	      force_black_root(t->_M_right, b_h);
+	      concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r);
+	    }
+	  else
+	    {
+	      // Go to the right.
+	      b_h = black_h = split(static_cast<_Rb_tree_node_ptr>(t->_M_right), key, l, r, black_h_l, black_h_r);
+
+	      // Join root and left subtree to already existing left
+	      // half, leave right subtree.
+	      force_black_root(t->_M_left, b_h);
+	      concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l);
+	    }
+	  return black_h + black_node;
+	}
+      else
+	{
+	  r = NULL;
+	  l = NULL;
+	  black_h_r = 0;
+	  black_h_l = 0;
+	  return 0;
+	}
+    }
+
+    /** @brief Insert an existing node in tree and rebalance it, if
+     * appropriate.
+     *
+     *  The keyword "local" is used because no attributes of the
+     *  red-black tree are changed, so this insertion is not yet seen
+     *  by the global data structure.
+     *  @param t Root of tree to insert into.
+     *  @param new_t Existing node to insert.
+     *  @param existing Number of existing elements before insertion
+     *  (in) and after (out). Specifically, the counter is incremented
+     *  by one for unique containers if the key of new_t was already
+     *  in the tree.
+     *  @param black_h Black height of the resulting tree (out)
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+     *  @return Resulting tree after insertion */
+    template<typename StrictlyLessOrLessEqual>
+    _Rb_tree_node_ptr
+    _M_insert_local(_Rb_tree_node_base* t, const _Rb_tree_node_ptr new_t, size_type& existing, int& black_h, StrictlyLessOrLessEqual strictly_less_or_less_equal)
+    {
+      _GLIBCXX_PARALLEL_ASSERT(t != NULL);
+      if (_M_insert_local_top_down(t, new_t, NULL, NULL, true, strictly_less_or_less_equal))
+	{
+	  t->_M_parent = NULL;
+	  black_h += _Rb_tree_rebalance(new_t, t);
+	  _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black);
+	  return static_cast<_Rb_tree_node_ptr>(t);
+	}
+      else
+	{
+	  base_type::_M_destroy_node(new_t);
+	  ++existing;
+	  force_black_root(t, black_h);
+	  return static_cast<_Rb_tree_node_ptr>(t);
+	}
+    }
+
+    /*****	Dealing with repetitions (CORRECTNESS ISSUE) *****/
+    /** @brief Insert an existing node in tree, do no rebalancing.
+     *  @param t Root of tree to insert into.
+     *  @param new_t Existing node to insert.
+     *  @param eq_t Node candidate to be equal than new_t, only
+     *  relevant for unique containers
+     *  @param parent Parent node of @c t
+     *  @param is_left True if @c t is a left child of @c
+     *  parent. False otherwise.
+     *  @param strictly_less_or_less_equal Comparator to deal
+     *  transparently with repetitions with respect to the uniqueness
+     *  of the wrapping container
+
+     *  @return Success of the insertion 
+     */
+    template<typename StrictlyLessOrLessEqual>
+    bool
+    _M_insert_local_top_down(_Rb_tree_node_base* t, const _Rb_tree_node_ptr new_t, _Rb_tree_node_base* eq_t, _Rb_tree_node_base* parent, const bool is_left, StrictlyLessOrLessEqual strictly_less_or_less_equal) const
+    {
+      if (t != NULL)
+	{
+	  if (strictly_less_or_less_equal(_S_key(new_t), _S_key(static_cast<_Rb_tree_node_ptr>(t))))
+	    {
+	      return _M_insert_local_top_down(t->_M_left, new_t, eq_t, t, true, strictly_less_or_less_equal);
+	    }
+	  else
+	    {
+	      return _M_insert_local_top_down(t->_M_right, new_t, t, t, false, strictly_less_or_less_equal);
+	    }
+	}
+
+      _GLIBCXX_PARALLEL_ASSERT(parent != NULL);
+
+      // Base case.
+      if (eq_t == NULL or strictly_less_or_less_equal(_S_key(static_cast<_Rb_tree_node_ptr>(eq_t)), _S_key(new_t)))
+	{
+	  // The element to be inserted did not existed.
+	  if (is_left)
+	    {
+	      parent->_M_left = new_t;
+	    }
+	  else
+	    {
+	      parent->_M_right = new_t;
+	    }
+
+	  new_t->_M_parent = parent;
+	  new_t->_M_left = NULL;
+	  new_t->_M_right = NULL;
+	  new_t->_M_color = std::_S_red;
+
+	  return true;
+	}
+      else
+	return false;
+    }
+
+    /** @brief Rebalance a tree locally.
+     *
+     *  Essentially, it is the same function as insert_erase from the
+     *  base class, but without the insertion and without using any
+     *  tree attributes.
+     *  @param __x Root of the current subtree to rebalance.
+     *  @param __root Root of tree where @c __x is in (rebalancing
+     *  stops when root is reached)
+     *  @return Increment in the black height after rebalancing
+     */
+    static int
+    _Rb_tree_rebalance(_Rb_tree_node_base* __x, _Rb_tree_node_base*& __root)
+    {
+      _GLIBCXX_PARALLEL_ASSERT(__root->_M_color == std::_S_black);
+      // Rebalance.
+      while (__x != __root and __x->_M_parent != __root and
+	     __x->_M_parent->_M_color == std::_S_red)
+	{
+	  _Rb_tree_node_base* const __xpp = __x->_M_parent->_M_parent;
+
+	  if (__x->_M_parent == __xpp->_M_left)
+	    {
+	      _Rb_tree_node_base* const __y = __xpp->_M_right;
+	      if (__y && __y->_M_color == std::_S_red)
+		{
+		  __x->_M_parent->_M_color = std::_S_black;
+		  __y->_M_color = std::_S_black;
+		  __xpp->_M_color = std::_S_red;
+		  __x = __xpp;
+		}
+	      else
+		{
+		  if (__x == __x->_M_parent->_M_right)
+		    {
+		      __x = __x->_M_parent;
+		      std::_Rb_tree_rotate_left(__x, __root);
+		    }
+		  __x->_M_parent->_M_color = std::_S_black;
+		  __xpp->_M_color = std::_S_red;
+		  std::_Rb_tree_rotate_right(__xpp, __root);
+		}
+	    }
+	  else
+	    {
+	      _Rb_tree_node_base* const __y = __xpp->_M_left;
+	      if (__y && __y->_M_color == std::_S_red)
+		{
+		  __x->_M_parent->_M_color = std::_S_black;
+		  __y->_M_color = std::_S_black;
+		  __xpp->_M_color = std::_S_red;
+		  __x = __xpp;
+		}
+	      else
+		{
+		  if (__x == __x->_M_parent->_M_left)
+		    {
+		      __x = __x->_M_parent;
+		      std::_Rb_tree_rotate_right(__x, __root);
+		    }
+		  __x->_M_parent->_M_color = std::_S_black;
+		  __xpp->_M_color = std::_S_red;
+		  std::_Rb_tree_rotate_left(__xpp, __root);
+		}
+	    }
+	}
+      if (__root->_M_color == std::_S_red)
+	{
+	  __root->_M_color = std::_S_black;
+	  _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(static_cast<typename base_type::_Const_Link_type>(__root)));
+	  return 1;
+	}
+      _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(static_cast<typename base_type::_Const_Link_type>(__root)));
+      return 0;
+    }
+
+    /** @brief Analogous to class method rb_verify() but only for a subtree.
+     *  @param __x Pointer to root of subtree to check.
+     *  @param count Returned number of nodes.
+     *  @return Tree correct. 
+     */
+    bool
+    rb_verify_tree(const typename base_type::_Const_Link_type __x, int& count) const
+    {
+      int bh;
+      return rb_verify_tree_node(__x) and rb_verify_tree(__x, count, bh);
+    }
+
+    /** @brief Verify that a subtree is binary search tree (verifies
+	key relationships)
+     *  @param __x Pointer to root of subtree to check.
+     *  @return Tree correct. 
+     */
+    bool
+    rb_verify_tree_node(const typename base_type::_Const_Link_type __x) const
+    {
+      if (__x == NULL)
+	return true;
+      else
+	{
+	  return rb_verify_node(__x) and
+	    rb_verify_tree_node(base_type::_S_left(__x)) and
+	    rb_verify_tree_node( base_type::_S_right(__x));
+	}
+    }
+
+    /** @brief Verify all the properties of a red-black tree except
+	for the key ordering
+     *  @param __x Pointer to (subtree) root node.
+     *  @return Tree correct. 
+     */
+    static  bool
+    rb_verify_tree(const typename base_type::_Const_Link_type __x)
+    {
+      int bh, count;
+      return rb_verify_tree(__x, count, bh);
+    }
+
+    /** @brief Verify all the properties of a red-black tree except
+	for the key ordering
+     *  @param __x Pointer to (subtree) root node.
+     *  @param count Number of nodes of @c __x (out).
+     *  @param black_h Black height of @c __x (out).
+     *  @return Tree correct. 
+     */
+    static bool
+    rb_verify_tree(const typename base_type::_Const_Link_type __x, int& count, int& black_h)
+    {
+      if (__x == NULL)
+	{
+	  count = 0;
+	  black_h = 0;
+	  return true;
+	}
+      typename base_type::_Const_Link_type __L = base_type::_S_left(__x);
+      typename base_type::_Const_Link_type __R = base_type::_S_right(__x);
+      int countL, countR = 0, bhL, bhR;
+      bool ret = rb_verify_tree(__L, countL, bhL);
+      ret = ret and rb_verify_tree(__R, countR, bhR);
+      count = 1 + countL + countR;
+      ret = ret and bhL == bhR;
+      black_h = bhL + ((__x->_M_color == std::_S_red)? 0 : 1);
+      return ret;
+    }
+
+    /** @brief Verify red-black properties (including key based) for a node
+     *  @param __x Pointer to node.
+     *  @return Node correct. 
+     */
+    bool
+    rb_verify_node(const typename base_type::_Const_Link_type __x) const
+    {
+      typename base_type::_Const_Link_type __L = base_type::_S_left(__x);
+      typename base_type::_Const_Link_type __R = base_type::_S_right(__x);
+      if (__x->_M_color == std::_S_red)
+	if ((__L && __L->_M_color == std::_S_red)
+	    || (__R && __R->_M_color == std::_S_red))
+	  {
+	    return false;
+	  }
+      if (__L != NULL)
+	{
+	  __L = static_cast<typename base_type::_Const_Link_type>(base_type::_S_maximum(__L));
+	  if (base_type::_M_impl._M_key_compare(base_type::_S_key(__x), base_type::_S_key(__L)))
+	    {
+	      return false;
+	    }
+	}
+
+      if (__R != NULL)
+	{
+	  __R = static_cast<typename base_type::_Const_Link_type>(base_type::_S_minimum(__R));
+	  if (base_type::_M_impl._M_key_compare(base_type::_S_key(__R), base_type::_S_key(__x)))
+	    {
+	      return false;
+	    }
+	}
+
+      return true;
+    }
+
+    /** @brief Print all the information of the root.
+     *  @param t Root of the tree. 
+     */
+    static void
+    print_root(_Rb_tree_node_base* t)
+    {
+      /*
+       if (t != NULL)
+       std::cout<< base_type::_S_key(t) << std::endl;
+       else
+       std::cout<< "NULL" << std::endl;
+      */
+    }
+
+    /** @brief Print all the information of the tree.
+     *  @param t Root of the tree. 
+     */
+    static void
+    print_tree(_Rb_tree_node_base* t)
+    {
+      /*
+       if (t != NULL)
+       {
+       print_tree(t->_M_left);
+       std::cout<< base_type::_S_key(t) << std::endl;
+       print_tree(t->_M_right);
+       }
+      */
+    }
+
+    /** @brief Print blanks.
+     *  @param b Number of blanks to print.
+     *  @return A string with @c b blanks */
+    inline static std::string
+    blanks(int b)
+    {
+      /*
+       std::string s = "";
+       for (int i=0; i < b; ++i)
+       s += " ";
+       return s;
+      */
+    }
+
+    /** @brief Print all the information of the tree.
+     *  @param t Root of the tree.
+     *  @param c Width of a printed key. 
+     */
+    template<typename Pointer>
+    static void
+    draw_tree(Pointer t, const int c)
+    {
+      /*
+       if (t == NULL)
+       {
+       std::cout << blanks(c) << "NULL" << std::endl;
+       return;
+       }
+       draw_tree(static_cast<Pointer>(t->_M_right), c + 8);
+       std::cout << blanks(c) << "" << base_type::_S_key(t) << " ";
+       if (t->_M_color == std::_S_black)
+       std::cout << "B" << std::endl;
+       else
+       std::cout << "R" << std::endl;
+       draw_tree(static_cast<Pointer>(t->_M_left), c + 8);
+      */
+    }
+
+  public:
+    /** @brief Verify that all the red-black tree properties hold for
+	the stored tree, as well as the additional properties that the
+	STL implementation imposes.
+     */
+    bool
+    rb_verify()
+    {
+      if (base_type::_M_impl._M_node_count == 0 || base_type::begin() == base_type::end())
+	{
+	  bool res = base_type::_M_impl._M_node_count == 0 && base_type::begin() == base_type::end()
+	    && base_type::_M_impl._M_header._M_left ==base_type::_M_end()
+	    && base_type::_M_impl._M_header._M_right == base_type::_M_end();
+	  _GLIBCXX_PARALLEL_ASSERT(res);
+	  return res;
+	}
+      size_type i=0;
+      unsigned int __len = _Rb_tree_black_count(base_type::_M_leftmost(), base_type::_M_root());
+      for (typename base_type::const_iterator __it =base_type::begin(); __it != base_type::end(); ++__it)
+	{
+	  typename base_type::_Const_Link_type __x = static_cast<typename base_type::_Const_Link_type>(__it._M_node);
+	  if (not rb_verify_node(__x)) return false;
+	  if (!base_type::_S_left(__x)&& !base_type::_S_right(__x) && _Rb_tree_black_count(__x,base_type::_M_root()) != __len)
+	    {
+	      _GLIBCXX_PARALLEL_ASSERT(false);
+	      return false;
+	    }
+	  ++i;
+	}
+
+      if (i != base_type::_M_impl._M_node_count)
+	printf("%ld != %ld\n", i, base_type::_M_impl._M_node_count);
+
+      if (base_type::_M_leftmost() != std::_Rb_tree_node_base::_S_minimum(base_type::_M_root()))
+	{
+	  _GLIBCXX_PARALLEL_ASSERT(false);
+	  return false;
+	}
+      if (base_type::_M_rightmost() != std::_Rb_tree_node_base::_S_maximum(base_type::_M_root()))
+	{
+	  _GLIBCXX_PARALLEL_ASSERT(false);
+	  return false;
+	}
+      _GLIBCXX_PARALLEL_ASSERT(i == base_type::_M_impl._M_node_count);
+      return true;
+    }
+  };
+
+}
+
+#endif
diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h
new file mode 100644
index 00000000000..5f8014f5c71
--- /dev/null
+++ b/libstdc++-v3/include/parallel/types.h
@@ -0,0 +1,98 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/types.h
+ *  @brief Basic typedefs.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler and Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_TYPES_H
+#define _GLIBCXX_PARALLEL_TYPES_H 1
+
+#include <cstdlib>
+
+namespace __gnu_parallel
+{
+  // XXX need to use <cstdint>
+  /** @brief 8-bit signed integer. */
+  typedef char int8;
+
+  /** @brief 8-bit unsigned integer. */
+  typedef unsigned char uint8;
+
+  /** @brief 16-bit signed integer. */
+  typedef short int16;
+
+  /** @brief 16-bit unsigned integer. */
+  typedef unsigned short uint16;
+
+  /** @brief 32-bit signed integer. */
+  typedef int int32;
+
+  /** @brief 32-bit unsigned integer. */
+  typedef unsigned int uint32;
+
+  /** @brief 64-bit signed integer. */
+  typedef long long int64;
+
+  /** @brief 64-bit unsigned integer. */
+  typedef unsigned long long uint64;
+
+  /**
+   * @brief Unsigned integer to index elements.
+   * The total number of elements for each algorithm must fit into this type.
+   */
+  typedef uint64 sequence_index_t;
+
+  /**
+   * @brief Unsigned integer to index a thread number.
+   * The maximum thread number (for each processor) must fit into this type.
+   */
+  typedef uint16 thread_index_t;
+
+  /**
+   * @brief Longest compare-and-swappable integer type on this platform.
+   */
+  typedef int64 lcas_t;
+
+  /**
+   * @brief Number of bits of ::lcas_t.
+   */
+  static const int lcas_t_bits = sizeof(lcas_t) * 8;
+
+  /**
+   * @brief ::lcas_t with the right half of bits set to 1.
+   */
+  static const lcas_t lcas_t_mask = (((lcas_t)1 << (lcas_t_bits / 2)) - 1);
+}
+
+#endif /* _GLIBCXX_TYPES_H */
diff --git a/libstdc++-v3/include/parallel/unique_copy.h b/libstdc++-v3/include/parallel/unique_copy.h
new file mode 100644
index 00000000000..93a030429eb
--- /dev/null
+++ b/libstdc++-v3/include/parallel/unique_copy.h
@@ -0,0 +1,193 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/unique_copy.h
+ *  @brief Parallel implementations of std::unique_copy().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Robert Geisberger and Robin Dapp.
+
+#ifndef _GLIBCXX_PARALLEL_UNIQUE_H
+#define _GLIBCXX_PARALLEL_UNIQUE_H 1
+
+#include <parallel/parallel.h>
+#include <parallel/multiseq_selection.h>
+
+namespace __gnu_parallel
+{
+
+  /** @brief Parallel std::unique_copy(), without explicit equality predicate.
+   *  @param first Begin iterator of input sequence.
+   *  @param last End iterator of input sequence.
+   *  @param result Begin iterator of result sequence.
+   *  @param binary_pred Equality predicate.
+   *  @return End iterator of result sequence. */
+  template<typename InputIterator, class OutputIterator, class BinaryPredicate>
+  inline OutputIterator
+  parallel_unique_copy(InputIterator first, InputIterator last,
+		       OutputIterator result, BinaryPredicate binary_pred)
+  {
+    _GLIBCXX_CALL(last - first)
+
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type size = last - first;
+    int num_threads = __gnu_parallel::get_max_threads();
+    difference_type counter[num_threads + 1];
+
+    if (size == 0)
+      return result;
+
+    // Let the first thread process two parts.
+    difference_type borders[num_threads + 2];
+    __gnu_parallel::equally_split(size, num_threads + 1, borders);
+
+    // First part contains at least one element.
+#pragma omp parallel num_threads(num_threads)
+    {
+      int iam = omp_get_thread_num();
+
+      difference_type begin, end;
+
+      // Check for length without duplicates
+      // Needed for position in output
+      difference_type i = 0;
+      OutputIterator out = result;
+      if (iam == 0)
+	{
+	  begin = borders[0] + 1;	// == 1
+	  end = borders[iam + 1];
+
+	  i++;
+	  new (static_cast<void *>(&*out)) value_type(*first);
+	  out++;
+
+	  for (InputIterator iter = first + begin; iter < first + end; ++iter)
+	    {
+	      if (!binary_pred(*iter, *(iter-1)))
+		{
+		  i++;
+		  new (static_cast<void *>(&*out)) value_type(*iter);
+		  out++;
+		}
+	    }
+	}
+      else
+	{
+	  begin = borders[iam]; //one part
+	  end = borders[iam + 1];
+
+	  for (InputIterator iter = first + begin; iter < first + end; ++iter)
+	    {
+	      if (!binary_pred(*iter, *(iter-1)))
+		{
+		  i++;
+		}
+	    }
+	}
+      counter[iam] = i;
+
+      // Last part still untouched.
+      difference_type begin_output;
+
+#pragma omp barrier
+
+      // Store result in output on calculated positions.
+      begin_output = 0;
+
+      if (iam == 0)
+	{
+	  for (int t = 0; t < num_threads; t++)
+	    begin_output += counter[t];
+
+	  i = 0;
+
+	  OutputIterator iter_out = result + begin_output;
+
+	  begin = borders[num_threads];
+	  end = size;
+
+	  for (InputIterator iter = first + begin; iter < first + end; ++iter)
+	    {
+	      if (iter == first || !binary_pred(*iter, *(iter-1)))
+		{
+		  i++;
+		  new (static_cast<void *>(&*iter_out)) value_type(*iter);
+		  iter_out++;
+		}
+	    }
+
+	  counter[num_threads] = i;
+	}
+      else
+	{
+	  for (int t = 0; t < iam; t++)
+	    begin_output += counter[t];
+
+	  OutputIterator iter_out = result + begin_output;
+	  for (InputIterator iter = first + begin; iter < first + end; ++iter)
+	    {
+	      if (!binary_pred(*iter, *(iter-1)))
+		{
+		  new (static_cast<void *> (&*iter_out)) value_type(*iter);
+		  iter_out++;
+		}
+	    }
+	}
+    }
+
+    difference_type end_output = 0;
+    for (int t = 0; t < num_threads + 1; t++)
+      end_output += counter[t];
+
+    return result + end_output;
+  }
+
+  /** @brief Parallel std::unique_copy(), without explicit equality predicate
+   *  @param first Begin iterator of input sequence.
+   *  @param last End iterator of input sequence.
+   *  @param result Begin iterator of result sequence.
+   *  @return End iterator of result sequence. */
+  template<typename InputIterator, class OutputIterator>
+  inline OutputIterator
+  parallel_unique_copy(InputIterator first, InputIterator last,
+		       OutputIterator result)
+  {
+    typedef typename std::iterator_traits<InputIterator>::value_type value_type;
+
+    return parallel_unique_copy(first, last, result, std::equal_to<value_type>());
+  }
+
+}//namespace __gnu_parallel
+
+#endif
diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h
new file mode 100644
index 00000000000..cc8f37e8d09
--- /dev/null
+++ b/libstdc++-v3/include/parallel/workstealing.h
@@ -0,0 +1,289 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 2, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING.  If not, write to
+// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+// MA 02111-1307, USA.
+
+// As a special exception, you may use this file as part of a free
+// software library without restriction.  Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License.  This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+/** @file parallel/workstealing.h
+ *  @brief Parallelization of embarrassingly parallel execution by
+ *  means of work-stealing.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Felix Putze.
+
+#ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H
+#define _GLIBCXX_PARALLEL_WORKSTEALING_H 1
+
+#include <parallel/parallel.h>
+#include <parallel/random_number.h>
+#include <parallel/compatibility.h>
+
+namespace __gnu_parallel
+{
+
+#define _GLIBCXX_JOB_VOLATILE volatile
+
+  /** @brief One job for a certain thread. */
+  template<typename _DifferenceTp>
+  struct Job
+  {
+    typedef _DifferenceTp difference_type;
+
+    /** @brief First element.
+     *
+     *  Changed by owning and stealing thread. By stealing thread,
+     *  always incremented. */
+    _GLIBCXX_JOB_VOLATILE difference_type first;
+
+    /** @brief Last element.
+     *
+     *  Changed by owning thread only. */
+    _GLIBCXX_JOB_VOLATILE difference_type last;
+
+    /** @brief Number of elements, i. e. @c last-first+1.
+     *
+     *  Changed by owning thread only. */
+    _GLIBCXX_JOB_VOLATILE difference_type load;
+  };
+
+  /** @brief Work stealing algorithm for random access iterators.
+   *
+   *  Uses O(1) additional memory. Synchronization at job lists is
+   *  done with atomic operations.
+   *  @param begin Begin iterator of element sequence.
+   *  @param end End iterator of element sequence.
+   *  @param op User-supplied functor (comparator, predicate, adding
+   *  functor, ...).
+   *  @param f Functor to "process" an element with op (depends on
+   *  desired functionality, e. g. for std::for_each(), ...).
+   *  @param r Functor to "add" a single result to the already
+   *  processed elements (depends on functionality).
+   *  @param base Base value for reduction.
+   *  @param output Pointer to position where final result is written to
+   *  @param bound Maximum number of elements processed (e. g. for
+   *  std::count_n()).
+   *  @return User-supplied functor (that may contain a part of the result).
+   */
+  template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+  Op
+  for_each_template_random_access_workstealing(RandomAccessIterator begin,
+					       RandomAccessIterator end,
+					       Op op, Fu& f, Red r,
+					       Result base, Result& output,
+					       typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+  {
+    _GLIBCXX_CALL(end - begin)
+
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+
+
+    difference_type chunk_size = static_cast<difference_type>(Settings::workstealing_chunk_size);
+
+    // How many jobs?
+    difference_type length = (bound < 0) ? (end - begin) : bound;
+
+    // To avoid false sharing in a cache line.
+    const int stride = Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
+
+    // Total number of threads currently working.
+    thread_index_t busy = 0;
+    thread_index_t num_threads = get_max_threads();
+    difference_type num_threads_min = num_threads < end - begin ? num_threads : end - begin;
+
+    // No more threads than jobs, at least one thread.
+    difference_type num_threads_max = num_threads_min > 1 ? num_threads_min : 1;
+    num_threads = static_cast<thread_index_t>(num_threads_max);
+
+    // Create job description array.
+    Job<difference_type> *job = new Job<difference_type>[num_threads * stride];
+
+    // Write base value to output.
+    output = base;
+
+#pragma omp parallel shared(busy) num_threads(num_threads)
+    {
+      // Initialization phase.
+
+      // Flags for every thread if it is doing productive work.
+      bool iam_working = false;
+
+      // Thread id.
+      thread_index_t iam = omp_get_thread_num();
+
+      // This job.
+      Job<difference_type>& my_job = job[iam * stride];
+
+      // Random number (for work stealing).
+      thread_index_t victim;
+
+      // Local value for reduction.
+      Result result = Result();
+
+      // Number of elements to steal in one attempt.
+      difference_type steal;
+
+      // Every thread has its own random number generator (modulo num_threads).
+      random_number rand_gen(iam, num_threads);
+
+#pragma omp atomic
+      // This thread is currently working.
+      busy++;
+
+      iam_working = true;
+
+      // How many jobs per thread? last thread gets the rest.
+      my_job.first = static_cast<difference_type>(iam * (length / num_threads));
+
+      my_job.last = (iam == (num_threads - 1)) ? (length - 1) : ((iam + 1) * (length / num_threads) - 1);
+      my_job.load = my_job.last - my_job.first + 1;
+
+      // Init result with first value (to have a base value for reduction).
+      if (my_job.first <= my_job.last)
+	{
+	  // Cannot use volatile variable directly.
+	  difference_type my_first = my_job.first;
+	  result = f(op, begin + my_first);
+	  my_job.first++;
+	  my_job.load--;
+	}
+
+      RandomAccessIterator current;
+
+#pragma omp barrier
+
+      // Actual work phase
+      // Work on own or stolen start
+      while (busy > 0)
+	{
+	  // Work until no productive thread left.
+#pragma omp flush(busy)
+
+	  // Thread has own work to do
+	  while (my_job.first <= my_job.last)
+	    {
+	      // fetch-and-add call
+	      // Reserve current job block (size chunk_size) in my queue.
+	      difference_type current_job = fetch_and_add<difference_type>(&(my_job.first), chunk_size);
+
+	      // Update load, to make the three values consistent,
+	      // first might have been changed in the meantime
+	      my_job.load = my_job.last - my_job.first + 1;
+	      for (difference_type job_counter = 0; job_counter < chunk_size && current_job <= my_job.last; job_counter++)
+		{
+		  // Yes: process it!
+		  current = begin + current_job;
+		  current_job++;
+
+		  // Do actual work.
+		  result = r(result, f(op, current));
+		}
+
+#pragma omp flush(busy)
+
+	    }
+
+	  // After reaching this point, a thread's job list is empty.
+	  if (iam_working)
+	    {
+#pragma omp atomic
+	      // This thread no longer has work.
+	      busy--;
+
+	      iam_working = false;
+	    }
+
+	  difference_type supposed_first, supposed_last, supposed_load;
+	  do
+	    {
+	      // Find random nonempty deque (not own) and do consistency check.
+	      yield();
+#pragma omp flush(busy)
+	      victim = rand_gen();
+	      supposed_first = job[victim * stride].first;
+	      supposed_last = job[victim * stride].last;
+	      supposed_load = job[victim * stride].load;
+	    }
+	  while (busy > 0
+		 && ((supposed_load <= 0) || ((supposed_first + supposed_load - 1) != supposed_last)));
+
+	  if (busy == 0)
+	    break;
+
+	  if (supposed_load > 0)
+	    {
+	      // Has work and work to do.
+	      // Number of elements to steal (at least one).
+	      steal = (supposed_load < 2) ? 1 : supposed_load / 2;
+
+	      // Protects against stealing threads
+	      // omp_set_lock(&(job[victim * stride].lock));
+
+	      // Push victim's start forward.
+	      difference_type stolen_first = fetch_and_add<difference_type>(&(job[victim * stride].first), steal);
+	      difference_type stolen_try = stolen_first + steal - difference_type(1);
+
+	      // Protects against working thread
+	      // omp_unset_lock(&(job[victim * stride].lock));
+
+	      my_job.first = stolen_first;
+	      
+	      // Avoid std::min dependencies.
+	      my_job.last = stolen_try < supposed_last ? stolen_try : supposed_last;
+
+	      my_job.load = my_job.last - my_job.first + 1;
+
+	      //omp_unset_lock(&(my_job.lock));
+
+#pragma omp atomic
+	      // Has potential work again.
+	      busy++;
+	      iam_working = true;
+
+#pragma omp flush(busy)
+	    }
+#pragma omp flush(busy)
+	} // end while busy > 0
+#pragma omp critical(writeOutput)
+      // Add accumulated result to output.
+      output = r(output, result);
+
+      //omp_destroy_lock(&(my_job.lock));
+    }
+
+    delete[] job;
+
+    // Points to last element processed (needed as return value for
+    // some algorithms like transform)
+    f.finish_iterator = begin + length;
+
+    return op;
+  }
+} // end namespace
+
+#endif