diff options
Diffstat (limited to 'libstdc++-v3/include/parallel')
43 files changed, 16063 insertions, 0 deletions
diff --git a/libstdc++-v3/include/parallel/algo.h b/libstdc++-v3/include/parallel/algo.h new file mode 100644 index 000000000..b27de9b37 --- /dev/null +++ b/libstdc++-v3/include/parallel/algo.h @@ -0,0 +1,2363 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algo.h + * @brief Parallel STL function calls corresponding to the stl_algo.h header. + * + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGO_H +#define _GLIBCXX_PARALLEL_ALGO_H 1 + +#include <parallel/algorithmfwd.h> +#include <bits/stl_algobase.h> +#include <bits/stl_algo.h> +#include <parallel/iterator.h> +#include <parallel/base.h> +#include <parallel/sort.h> +#include <parallel/workstealing.h> +#include <parallel/par_loop.h> +#include <parallel/omp_loop.h> +#include <parallel/omp_loop_static.h> +#include <parallel/for_each_selectors.h> +#include <parallel/for_each.h> +#include <parallel/find.h> +#include <parallel/find_selectors.h> +#include <parallel/search.h> +#include <parallel/random_shuffle.h> +#include <parallel/partition.h> +#include <parallel/merge.h> +#include <parallel/unique_copy.h> +#include <parallel/set_operations.h> + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __parallel +{ + // Sequential fallback + template<typename _IIter, typename _Function> + inline _Function + for_each(_IIter __begin, _IIter __end, _Function __f, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::for_each(__begin, __end, __f); } + + + // Sequential fallback for input iterator case + template<typename _IIter, typename _Function, typename _IteratorTag> + inline _Function + __for_each_switch(_IIter __begin, _IIter __end, _Function __f, + _IteratorTag) + { return for_each(__begin, __end, __f, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename _RAIter, typename _Function> + _Function + __for_each_switch(_RAIter __begin, _RAIter __end, + _Function __f, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().for_each_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy; + __gnu_parallel::__for_each_selector<_RAIter> __functionality; + + return __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __f, __functionality, + __gnu_parallel::_DummyReduct(), true, __dummy, -1, + __parallelism_tag); + } + else + return for_each(__begin, __end, __f, __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename _Iterator, typename _Function> + inline _Function + for_each(_Iterator __begin, _Iterator __end, _Function __f, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_Iterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __for_each_switch(__begin, __end, __f, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _Iterator, typename _Function> + inline _Function + for_each(_Iterator __begin, _Iterator __end, _Function __f) + { + typedef std::iterator_traits<_Iterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __for_each_switch(__begin, __end, __f, _IteratorCategory()); + } + + + // Sequential fallback + template<typename _IIter, typename _Tp> + inline _IIter + find(_IIter __begin, _IIter __end, const _Tp& __val, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::find(__begin, __end, __val); } + + // Sequential fallback for input iterator case + template<typename _IIter, typename _Tp, typename _IteratorTag> + inline _IIter + __find_switch(_IIter __begin, _IIter __end, const _Tp& __val, + _IteratorTag) + { return _GLIBCXX_STD_A::find(__begin, __end, __val); } + + // Parallel find for random access iterators + template<typename _RAIter, typename _Tp> + _RAIter + __find_switch(_RAIter __begin, _RAIter __end, + const _Tp& __val, random_access_iterator_tag) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + std::binder2nd<__gnu_parallel::_EqualTo<_ValueType, const _Tp&> > + __comp(__gnu_parallel::_EqualTo<_ValueType, const _Tp&>(), __val); + return __gnu_parallel::__find_template( + __begin, __end, __begin, __comp, + __gnu_parallel::__find_if_selector()).first; + } + else + return _GLIBCXX_STD_A::find(__begin, __end, __val); + } + + // Public interface + template<typename _IIter, typename _Tp> + inline _IIter + find(_IIter __begin, _IIter __end, const _Tp& __val) + { + typedef std::iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __find_switch(__begin, __end, __val, _IteratorCategory()); + } + + // Sequential fallback + template<typename _IIter, typename _Predicate> + inline _IIter + find_if(_IIter __begin, _IIter __end, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::find_if(__begin, __end, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter, typename _Predicate, typename _IteratorTag> + inline _IIter + __find_if_switch(_IIter __begin, _IIter __end, _Predicate __pred, + _IteratorTag) + { return _GLIBCXX_STD_A::find_if(__begin, __end, __pred); } + + // Parallel find_if for random access iterators + template<typename _RAIter, typename _Predicate> + _RAIter + __find_if_switch(_RAIter __begin, _RAIter __end, + _Predicate __pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::__find_template(__begin, __end, __begin, __pred, + __gnu_parallel:: + __find_if_selector()).first; + else + return _GLIBCXX_STD_A::find_if(__begin, __end, __pred); + } + + // Public interface + template<typename _IIter, typename _Predicate> + inline _IIter + find_if(_IIter __begin, _IIter __end, _Predicate __pred) + { + typedef std::iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __find_if_switch(__begin, __end, __pred, _IteratorCategory()); + } + + // Sequential fallback + template<typename _IIter, typename _FIterator> + inline _IIter + find_first_of(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::find_first_of(__begin1, __end1, __begin2, __end2); + } + + // Sequential fallback + template<typename _IIter, typename _FIterator, + typename _BinaryPredicate> + inline _IIter + find_first_of(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2, + _BinaryPredicate __comp, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::find_first_of( + __begin1, __end1, __begin2, __end2, __comp); } + + // Sequential fallback for input iterator type + template<typename _IIter, typename _FIterator, + typename _IteratorTag1, typename _IteratorTag2> + inline _IIter + __find_first_of_switch(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2, + _IteratorTag1, _IteratorTag2) + { return find_first_of(__begin1, __end1, __begin2, __end2, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename _RAIter, typename _FIterator, + typename _BinaryPredicate, typename _IteratorTag> + inline _RAIter + __find_first_of_switch(_RAIter __begin1, + _RAIter __end1, + _FIterator __begin2, _FIterator __end2, + _BinaryPredicate __comp, random_access_iterator_tag, + _IteratorTag) + { + return __gnu_parallel:: + __find_template(__begin1, __end1, __begin1, __comp, + __gnu_parallel::__find_first_of_selector + <_FIterator>(__begin2, __end2)).first; + } + + // Sequential fallback for input iterator type + template<typename _IIter, typename _FIterator, + typename _BinaryPredicate, typename _IteratorTag1, + typename _IteratorTag2> + inline _IIter + __find_first_of_switch(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2, + _BinaryPredicate __comp, _IteratorTag1, _IteratorTag2) + { return find_first_of(__begin1, __end1, __begin2, __end2, __comp, + __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename _IIter, typename _FIterator, + typename _BinaryPredicate> + inline _IIter + find_first_of(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2, + _BinaryPredicate __comp) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_FIterator> _FIterTraits; + typedef typename _IIterTraits::iterator_category _IIteratorCategory; + typedef typename _FIterTraits::iterator_category _FIteratorCategory; + + return __find_first_of_switch(__begin1, __end1, __begin2, __end2, __comp, + _IIteratorCategory(), _FIteratorCategory()); + } + + // Public interface, insert default comparator + template<typename _IIter, typename _FIterator> + inline _IIter + find_first_of(_IIter __begin1, _IIter __end1, + _FIterator __begin2, _FIterator __end2) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_FIterator> _FIterTraits; + typedef typename _IIterTraits::value_type _IValueType; + typedef typename _FIterTraits::value_type _FValueType; + + return __gnu_parallel::find_first_of(__begin1, __end1, __begin2, __end2, + __gnu_parallel::_EqualTo<_IValueType, _FValueType>()); + } + + // Sequential fallback + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + unique_copy(_IIter __begin1, _IIter __end1, _OutputIterator __out, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::unique_copy(__begin1, __end1, __out); } + + // Sequential fallback + template<typename _IIter, typename _OutputIterator, + typename _Predicate> + inline _OutputIterator + unique_copy(_IIter __begin1, _IIter __end1, _OutputIterator __out, + _Predicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::unique_copy(__begin1, __end1, __out, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter, typename _OutputIterator, + typename _Predicate, typename _IteratorTag1, typename _IteratorTag2> + inline _OutputIterator + __unique_copy_switch(_IIter __begin, _IIter __last, + _OutputIterator __out, _Predicate __pred, + _IteratorTag1, _IteratorTag2) + { return _GLIBCXX_STD_A::unique_copy(__begin, __last, __out, __pred); } + + // Parallel unique_copy for random access iterators + template<typename _RAIter, typename RandomAccessOutputIterator, + typename _Predicate> + RandomAccessOutputIterator + __unique_copy_switch(_RAIter __begin, _RAIter __last, + RandomAccessOutputIterator __out, _Predicate __pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__last - __begin) + > __gnu_parallel::_Settings::get().unique_copy_minimal_n)) + return __gnu_parallel::__parallel_unique_copy( + __begin, __last, __out, __pred); + else + return _GLIBCXX_STD_A::unique_copy(__begin, __last, __out, __pred); + } + + // Public interface + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + unique_copy(_IIter __begin1, _IIter __end1, _OutputIterator __out) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits::iterator_category _IIteratorCategory; + typedef typename _IIterTraits::value_type _ValueType; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __unique_copy_switch( + __begin1, __end1, __out, equal_to<_ValueType>(), + _IIteratorCategory(), _OIterCategory()); + } + + // Public interface + template<typename _IIter, typename _OutputIterator, typename _Predicate> + inline _OutputIterator + unique_copy(_IIter __begin1, _IIter __end1, _OutputIterator __out, + _Predicate __pred) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits::iterator_category _IIteratorCategory; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __unique_copy_switch( + __begin1, __end1, __out, __pred, + _IIteratorCategory(), _OIterCategory()); + } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_union(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_union( + __begin1, __end1, __begin2, __end2, __out); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_union(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_union(__begin1, __end1, + __begin2, __end2, __out, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OutputIterator, typename _IteratorTag1, + typename _IteratorTag2, typename _IteratorTag3> + inline _OutputIterator + __set_union_switch( + _IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Predicate __pred, + _IteratorTag1, _IteratorTag2, _IteratorTag3) + { return _GLIBCXX_STD_A::set_union(__begin1, __end1, + __begin2, __end2, __result, __pred); } + + // Parallel set_union for random access iterators + template<typename _RAIter1, typename _RAIter2, + typename _Output_RAIter, typename _Predicate> + _Output_RAIter + __set_union_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _RAIter2 __end2, + _Output_RAIter __result, _Predicate __pred, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().set_union_minimal_n + || static_cast<__gnu_parallel::_SequenceIndex>(__end2 - __begin2) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) + return __gnu_parallel::__parallel_set_union( + __begin1, __end1, __begin2, __end2, __result, __pred); + else + return _GLIBCXX_STD_A::set_union(__begin1, __end1, + __begin2, __end2, __result, __pred); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_union(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, _OutputIterator __out) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + typedef typename _IIterTraits1::value_type _ValueType1; + typedef typename _IIterTraits2::value_type _ValueType2; + + return __set_union_switch( + __begin1, __end1, __begin2, __end2, __out, + __gnu_parallel::_Less<_ValueType1, _ValueType2>(), + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_union(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __set_union_switch( + __begin1, __end1, __begin2, __end2, __out, __pred, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Sequential fallback. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_intersection(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_intersection(__begin1, __end1, + __begin2, __end2, __out); } + + // Sequential fallback. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_intersection(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_intersection( + __begin1, __end1, __begin2, __end2, __out, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _OutputIterator, + typename _IteratorTag1, typename _IteratorTag2, + typename _IteratorTag3> + inline _OutputIterator + __set_intersection_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Predicate __pred, + _IteratorTag1, _IteratorTag2, _IteratorTag3) + { return _GLIBCXX_STD_A::set_intersection(__begin1, __end1, __begin2, + __end2, __result, __pred); } + + // Parallel set_intersection for random access iterators + template<typename _RAIter1, typename _RAIter2, + typename _Output_RAIter, typename _Predicate> + _Output_RAIter + __set_intersection_switch(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, + _RAIter2 __end2, + _Output_RAIter __result, + _Predicate __pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().set_union_minimal_n + || static_cast<__gnu_parallel::_SequenceIndex>(__end2 - __begin2) + >= __gnu_parallel::_Settings::get().set_union_minimal_n)) + return __gnu_parallel::__parallel_set_intersection( + __begin1, __end1, __begin2, __end2, __result, __pred); + else + return _GLIBCXX_STD_A::set_intersection( + __begin1, __end1, __begin2, __end2, __result, __pred); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_intersection(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + typedef typename _IIterTraits1::value_type _ValueType1; + typedef typename _IIterTraits2::value_type _ValueType2; + + return __set_intersection_switch( + __begin1, __end1, __begin2, __end2, __out, + __gnu_parallel::_Less<_ValueType1, _ValueType2>(), + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_intersection(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __set_intersection_switch( + __begin1, __end1, __begin2, __end2, __out, __pred, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_symmetric_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_symmetric_difference( + __begin1, __end1, __begin2, __end2, __out); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_symmetric_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_symmetric_difference( + __begin1, __end1, __begin2, __end2, __out, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _OutputIterator, + typename _IteratorTag1, typename _IteratorTag2, + typename _IteratorTag3> + inline _OutputIterator + __set_symmetric_difference_switch( + _IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Predicate __pred, + _IteratorTag1, _IteratorTag2, _IteratorTag3) + { return _GLIBCXX_STD_A::set_symmetric_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); } + + // Parallel set_symmetric_difference for random access iterators + template<typename _RAIter1, typename _RAIter2, + typename _Output_RAIter, typename _Predicate> + _Output_RAIter + __set_symmetric_difference_switch(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, + _RAIter2 __end2, + _Output_RAIter __result, + _Predicate __pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n + || static_cast<__gnu_parallel::_SequenceIndex>(__end2 - __begin2) + >= __gnu_parallel::_Settings::get().set_symmetric_difference_minimal_n)) + return __gnu_parallel::__parallel_set_symmetric_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); + else + return _GLIBCXX_STD_A::set_symmetric_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); + } + + // Public interface. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_symmetric_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + typedef typename _IIterTraits1::value_type _ValueType1; + typedef typename _IIterTraits2::value_type _ValueType2; + + return __set_symmetric_difference_switch( + __begin1, __end1, __begin2, __end2, __out, + __gnu_parallel::_Less<_ValueType1, _ValueType2>(), + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Public interface. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_symmetric_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __set_symmetric_difference_switch( + __begin1, __end1, __begin2, __end2, __out, __pred, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Sequential fallback. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_difference( + __begin1,__end1, __begin2, __end2, __out); } + + // Sequential fallback. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::set_difference(__begin1, __end1, + __begin2, __end2, __out, __pred); } + + // Sequential fallback for input iterator case. + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OutputIterator, typename _IteratorTag1, + typename _IteratorTag2, typename _IteratorTag3> + inline _OutputIterator + __set_difference_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Predicate __pred, + _IteratorTag1, _IteratorTag2, _IteratorTag3) + { return _GLIBCXX_STD_A::set_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); } + + // Parallel set_difference for random access iterators + template<typename _RAIter1, typename _RAIter2, + typename _Output_RAIter, typename _Predicate> + _Output_RAIter + __set_difference_switch(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, + _RAIter2 __end2, + _Output_RAIter __result, _Predicate __pred, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().set_difference_minimal_n + || static_cast<__gnu_parallel::_SequenceIndex>(__end2 - __begin2) + >= __gnu_parallel::_Settings::get().set_difference_minimal_n)) + return __gnu_parallel::__parallel_set_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); + else + return _GLIBCXX_STD_A::set_difference( + __begin1, __end1, __begin2, __end2, __result, __pred); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + set_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + typedef typename _IIterTraits1::value_type _ValueType1; + typedef typename _IIterTraits2::value_type _ValueType2; + + return __set_difference_switch( + __begin1, __end1, __begin2, __end2, __out, + __gnu_parallel::_Less<_ValueType1, _ValueType2>(), + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Predicate> + inline _OutputIterator + set_difference(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __out, _Predicate __pred) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __set_difference_switch( + __begin1, __end1, __begin2, __end2, __out, __pred, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Sequential fallback + template<typename _FIterator> + inline _FIterator + adjacent_find(_FIterator __begin, _FIterator __end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::adjacent_find(__begin, __end); } + + // Sequential fallback + template<typename _FIterator, typename _BinaryPredicate> + inline _FIterator + adjacent_find(_FIterator __begin, _FIterator __end, + _BinaryPredicate __binary_pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::adjacent_find(__begin, __end, __binary_pred); } + + // Parallel algorithm for random access iterators + template<typename _RAIter> + _RAIter + __adjacent_find_switch(_RAIter __begin, _RAIter __end, + random_access_iterator_tag) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + _RAIter __spot = __gnu_parallel:: + __find_template( + __begin, __end - 1, __begin, equal_to<_ValueType>(), + __gnu_parallel::__adjacent_find_selector()) + .first; + if (__spot == (__end - 1)) + return __end; + else + return __spot; + } + else + return adjacent_find(__begin, __end, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _IteratorTag> + inline _FIterator + __adjacent_find_switch(_FIterator __begin, _FIterator __end, + _IteratorTag) + { return adjacent_find(__begin, __end, __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename _FIterator> + inline _FIterator + adjacent_find(_FIterator __begin, _FIterator __end) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __adjacent_find_switch(__begin, __end, _IteratorCategory()); + } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _BinaryPredicate, + typename _IteratorTag> + inline _FIterator + __adjacent_find_switch(_FIterator __begin, _FIterator __end, + _BinaryPredicate __pred, _IteratorTag) + { return adjacent_find(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename _RAIter, typename _BinaryPredicate> + _RAIter + __adjacent_find_switch(_RAIter __begin, _RAIter __end, + _BinaryPredicate __pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::__find_template(__begin, __end, __begin, __pred, + __gnu_parallel:: + __adjacent_find_selector()).first; + else + return adjacent_find(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename _FIterator, typename _BinaryPredicate> + inline _FIterator + adjacent_find(_FIterator __begin, _FIterator __end, + _BinaryPredicate __pred) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __adjacent_find_switch(__begin, __end, __pred, + _IteratorCategory()); + } + + // Sequential fallback + template<typename _IIter, typename _Tp> + inline typename iterator_traits<_IIter>::difference_type + count(_IIter __begin, _IIter __end, const _Tp& __value, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::count(__begin, __end, __value); } + + // Parallel code for random access iterators + template<typename _RAIter, typename _Tp> + typename iterator_traits<_RAIter>::difference_type + __count_switch(_RAIter __begin, _RAIter __end, + const _Tp& __value, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef __gnu_parallel::_SequenceIndex _SequenceIndex; + + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().count_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + __gnu_parallel::__count_selector<_RAIter, _DifferenceType> + __functionality; + _DifferenceType __res = 0; + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __value, __functionality, + std::plus<_SequenceIndex>(), __res, __res, -1, + __parallelism_tag); + return __res; + } + else + return count(__begin, __end, __value, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename _IIter, typename _Tp, typename _IteratorTag> + inline typename iterator_traits<_IIter>::difference_type + __count_switch(_IIter __begin, _IIter __end, const _Tp& __value, + _IteratorTag) + { return count(__begin, __end, __value, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _IIter, typename _Tp> + inline typename iterator_traits<_IIter>::difference_type + count(_IIter __begin, _IIter __end, const _Tp& __value, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __count_switch(__begin, __end, __value, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _IIter, typename _Tp> + inline typename iterator_traits<_IIter>::difference_type + count(_IIter __begin, _IIter __end, const _Tp& __value) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __count_switch(__begin, __end, __value, _IteratorCategory()); + } + + + // Sequential fallback. + template<typename _IIter, typename _Predicate> + inline typename iterator_traits<_IIter>::difference_type + count_if(_IIter __begin, _IIter __end, _Predicate __pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::count_if(__begin, __end, __pred); } + + // Parallel count_if for random access iterators + template<typename _RAIter, typename _Predicate> + typename iterator_traits<_RAIter>::difference_type + __count_if_switch(_RAIter __begin, _RAIter __end, + _Predicate __pred, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef __gnu_parallel::_SequenceIndex _SequenceIndex; + + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().count_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + _DifferenceType __res = 0; + __gnu_parallel:: + __count_if_selector<_RAIter, _DifferenceType> + __functionality; + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __pred, __functionality, + std::plus<_SequenceIndex>(), __res, __res, -1, + __parallelism_tag); + return __res; + } + else + return count_if(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename _IIter, typename _Predicate, typename _IteratorTag> + inline typename iterator_traits<_IIter>::difference_type + __count_if_switch(_IIter __begin, _IIter __end, _Predicate __pred, + _IteratorTag) + { return count_if(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename _IIter, typename _Predicate> + inline typename iterator_traits<_IIter>::difference_type + count_if(_IIter __begin, _IIter __end, _Predicate __pred, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __count_if_switch(__begin, __end, __pred, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _IIter, typename _Predicate> + inline typename iterator_traits<_IIter>::difference_type + count_if(_IIter __begin, _IIter __end, _Predicate __pred) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __count_if_switch(__begin, __end, __pred, _IteratorCategory()); + } + + + // Sequential fallback. + template<typename _FIterator1, typename _FIterator2> + inline _FIterator1 + search(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::search(__begin1, __end1, __begin2, __end2); } + + // Parallel algorithm for random access iterator + template<typename _RAIter1, typename _RAIter2> + _RAIter1 + __search_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _RAIter2 __end2, + random_access_iterator_tag, random_access_iterator_tag) + { + typedef std::iterator_traits<_RAIter1> _Iterator1Traits; + typedef typename _Iterator1Traits::value_type _ValueType1; + typedef std::iterator_traits<_RAIter2> _Iterator2Traits; + typedef typename _Iterator2Traits::value_type _ValueType2; + + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().search_minimal_n)) + return __gnu_parallel:: + __search_template( + __begin1, __end1, __begin2, __end2, + __gnu_parallel::_EqualTo<_ValueType1, _ValueType2>()); + else + return search(__begin1, __end1, __begin2, __end2, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename _FIterator1, typename _FIterator2, + typename _IteratorTag1, typename _IteratorTag2> + inline _FIterator1 + __search_switch(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2, + _IteratorTag1, _IteratorTag2) + { return search(__begin1, __end1, __begin2, __end2, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename _FIterator1, typename _FIterator2> + inline _FIterator1 + search(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2) + { + typedef std::iterator_traits<_FIterator1> _Iterator1Traits; + typedef typename _Iterator1Traits::iterator_category _IteratorCategory1; + typedef std::iterator_traits<_FIterator2> _Iterator2Traits; + typedef typename _Iterator2Traits::iterator_category _IteratorCategory2; + + return __search_switch(__begin1, __end1, __begin2, __end2, + _IteratorCategory1(), _IteratorCategory2()); + } + + // Public interface. + template<typename _FIterator1, typename _FIterator2, + typename _BinaryPredicate> + inline _FIterator1 + search(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2, + _BinaryPredicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::search( + __begin1, __end1, __begin2, __end2, __pred); } + + // Parallel algorithm for random access iterator. + template<typename _RAIter1, typename _RAIter2, + typename _BinaryPredicate> + _RAIter1 + __search_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _RAIter2 __end2, + _BinaryPredicate __pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().search_minimal_n)) + return __gnu_parallel::__search_template(__begin1, __end1, + __begin2, __end2, __pred); + else + return search(__begin1, __end1, __begin2, __end2, __pred, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template<typename _FIterator1, typename _FIterator2, + typename _BinaryPredicate, typename _IteratorTag1, + typename _IteratorTag2> + inline _FIterator1 + __search_switch(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2, + _BinaryPredicate __pred, _IteratorTag1, _IteratorTag2) + { return search(__begin1, __end1, __begin2, __end2, __pred, + __gnu_parallel::sequential_tag()); } + + // Public interface + template<typename _FIterator1, typename _FIterator2, + typename _BinaryPredicate> + inline _FIterator1 + search(_FIterator1 __begin1, _FIterator1 __end1, + _FIterator2 __begin2, _FIterator2 __end2, + _BinaryPredicate __pred) + { + typedef std::iterator_traits<_FIterator1> _Iterator1Traits; + typedef typename _Iterator1Traits::iterator_category _IteratorCategory1; + typedef std::iterator_traits<_FIterator2> _Iterator2Traits; + typedef typename _Iterator2Traits::iterator_category _IteratorCategory2; + return __search_switch(__begin1, __end1, __begin2, __end2, __pred, + _IteratorCategory1(), _IteratorCategory2()); + } + + // Sequential fallback + template<typename _FIterator, typename _Integer, typename _Tp> + inline _FIterator + search_n(_FIterator __begin, _FIterator __end, _Integer __count, + const _Tp& __val, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::search_n(__begin, __end, __count, __val); } + + // Sequential fallback + template<typename _FIterator, typename _Integer, typename _Tp, + typename _BinaryPredicate> + inline _FIterator + search_n(_FIterator __begin, _FIterator __end, _Integer __count, + const _Tp& __val, _BinaryPredicate __binary_pred, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::search_n( + __begin, __end, __count, __val, __binary_pred); } + + // Public interface. + template<typename _FIterator, typename _Integer, typename _Tp> + inline _FIterator + search_n(_FIterator __begin, _FIterator __end, _Integer __count, + const _Tp& __val) + { + typedef typename iterator_traits<_FIterator>::value_type _ValueType; + return __gnu_parallel::search_n(__begin, __end, __count, __val, + __gnu_parallel::_EqualTo<_ValueType, _Tp>()); + } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _Integer, + typename _Tp, typename _BinaryPredicate> + _RAIter + __search_n_switch(_RAIter __begin, _RAIter __end, _Integer __count, + const _Tp& __val, _BinaryPredicate __binary_pred, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().search_minimal_n)) + { + __gnu_parallel::_PseudoSequence<_Tp, _Integer> __ps(__val, __count); + return __gnu_parallel::__search_template( + __begin, __end, __ps.begin(), __ps.end(), __binary_pred); + } + else + return _GLIBCXX_STD_A::search_n(__begin, __end, __count, __val, + __binary_pred); + } + + // Sequential fallback for input iterator case. + template<typename _FIterator, typename _Integer, typename _Tp, + typename _BinaryPredicate, typename _IteratorTag> + inline _FIterator + __search_n_switch(_FIterator __begin, _FIterator __end, _Integer __count, + const _Tp& __val, _BinaryPredicate __binary_pred, + _IteratorTag) + { return _GLIBCXX_STD_A::search_n(__begin, __end, __count, __val, + __binary_pred); } + + // Public interface. + template<typename _FIterator, typename _Integer, typename _Tp, + typename _BinaryPredicate> + inline _FIterator + search_n(_FIterator __begin, _FIterator __end, _Integer __count, + const _Tp& __val, _BinaryPredicate __binary_pred) + { + return __search_n_switch(__begin, __end, __count, __val, __binary_pred, + typename std::iterator_traits<_FIterator>:: + iterator_category()); + } + + + // Sequential fallback. + template<typename _IIter, typename _OutputIterator, + typename _UnaryOperation> + inline _OutputIterator + transform(_IIter __begin, _IIter __end, _OutputIterator __result, + _UnaryOperation __unary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::transform(__begin, __end, __result, __unary_op); } + + // Parallel unary transform for random access iterators. + template<typename _RAIter1, typename _RAIter2, + typename _UnaryOperation> + _RAIter2 + __transform1_switch(_RAIter1 __begin, _RAIter1 __end, + _RAIter2 __result, _UnaryOperation __unary_op, + random_access_iterator_tag, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().transform_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy = true; + typedef __gnu_parallel::_IteratorPair<_RAIter1, + _RAIter2, random_access_iterator_tag> _ItTrip; + _ItTrip __begin_pair(__begin, __result), + __end_pair(__end, __result + (__end - __begin)); + __gnu_parallel::__transform1_selector<_ItTrip> __functionality; + __gnu_parallel:: + __for_each_template_random_access( + __begin_pair, __end_pair, __unary_op, __functionality, + __gnu_parallel::_DummyReduct(), + __dummy, __dummy, -1, __parallelism_tag); + return __functionality._M_finish_iterator; + } + else + return transform(__begin, __end, __result, __unary_op, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename _RAIter1, typename _RAIter2, + typename _UnaryOperation, typename _IteratorTag1, + typename _IteratorTag2> + inline _RAIter2 + __transform1_switch(_RAIter1 __begin, _RAIter1 __end, + _RAIter2 __result, _UnaryOperation __unary_op, + _IteratorTag1, _IteratorTag2) + { return transform(__begin, __end, __result, __unary_op, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename _IIter, typename _OutputIterator, + typename _UnaryOperation> + inline _OutputIterator + transform(_IIter __begin, _IIter __end, _OutputIterator __result, + _UnaryOperation __unary_op, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits::iterator_category _IIteratorCategory; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __transform1_switch(__begin, __end, __result, __unary_op, + _IIteratorCategory(), _OIterCategory(), + __parallelism_tag); + } + + template<typename _IIter, typename _OutputIterator, + typename _UnaryOperation> + inline _OutputIterator + transform(_IIter __begin, _IIter __end, _OutputIterator __result, + _UnaryOperation __unary_op) + { + typedef std::iterator_traits<_IIter> _IIterTraits; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits::iterator_category _IIteratorCategory; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __transform1_switch(__begin, __end, __result, __unary_op, + _IIteratorCategory(), _OIterCategory()); + } + + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _BinaryOperation> + inline _OutputIterator + transform(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _OutputIterator __result, + _BinaryOperation __binary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::transform(__begin1, __end1, + __begin2, __result, __binary_op); } + + // Parallel binary transform for random access iterators. + template<typename _RAIter1, typename _RAIter2, + typename _RAIter3, typename _BinaryOperation> + _RAIter3 + __transform2_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, + _RAIter3 __result, _BinaryOperation __binary_op, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + (__end1 - __begin1) >= + __gnu_parallel::_Settings::get().transform_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy = true; + typedef __gnu_parallel::_IteratorTriple<_RAIter1, + _RAIter2, _RAIter3, + random_access_iterator_tag> _ItTrip; + _ItTrip __begin_triple(__begin1, __begin2, __result), + __end_triple(__end1, __begin2 + (__end1 - __begin1), + __result + (__end1 - __begin1)); + __gnu_parallel::__transform2_selector<_ItTrip> __functionality; + __gnu_parallel:: + __for_each_template_random_access(__begin_triple, __end_triple, + __binary_op, __functionality, + __gnu_parallel::_DummyReduct(), + __dummy, __dummy, -1, + __parallelism_tag); + return __functionality._M_finish_iterator; + } + else + return transform(__begin1, __end1, __begin2, __result, __binary_op, + __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _BinaryOperation, + typename _Tag1, typename _Tag2, typename _Tag3> + inline _OutputIterator + __transform2_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _OutputIterator __result, + _BinaryOperation __binary_op, _Tag1, _Tag2, _Tag3) + { return transform(__begin1, __end1, __begin2, __result, __binary_op, + __gnu_parallel::sequential_tag()); } + + // Public interface. + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _BinaryOperation> + inline _OutputIterator + transform(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _OutputIterator __result, + _BinaryOperation __binary_op, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __transform2_switch( + __begin1, __end1, __begin2, __result, __binary_op, + _IIterCategory1(), _IIterCategory2(), _OIterCategory(), + __parallelism_tag); + } + + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _BinaryOperation> + inline _OutputIterator + transform(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _OutputIterator __result, + _BinaryOperation __binary_op) + { + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __transform2_switch( + __begin1, __end1, __begin2, __result, __binary_op, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + // Sequential fallback + template<typename _FIterator, typename _Tp> + inline void + replace(_FIterator __begin, _FIterator __end, const _Tp& __old_value, + const _Tp& __new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::replace(__begin, __end, __old_value, __new_value); } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _Tp, typename _IteratorTag> + inline void + __replace_switch(_FIterator __begin, _FIterator __end, + const _Tp& __old_value, const _Tp& __new_value, + _IteratorTag) + { replace(__begin, __end, __old_value, __new_value, + __gnu_parallel::sequential_tag()); } + + // Parallel replace for random access iterators + template<typename _RAIter, typename _Tp> + inline void + __replace_switch(_RAIter __begin, _RAIter __end, + const _Tp& __old_value, const _Tp& __new_value, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + // XXX parallel version is where? + replace(__begin, __end, __old_value, __new_value, + __gnu_parallel::sequential_tag()); + } + + // Public interface + template<typename _FIterator, typename _Tp> + inline void + replace(_FIterator __begin, _FIterator __end, const _Tp& __old_value, + const _Tp& __new_value, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + __replace_switch(__begin, __end, __old_value, __new_value, + _IteratorCategory(), + __parallelism_tag); + } + + template<typename _FIterator, typename _Tp> + inline void + replace(_FIterator __begin, _FIterator __end, const _Tp& __old_value, + const _Tp& __new_value) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + __replace_switch(__begin, __end, __old_value, __new_value, + _IteratorCategory()); + } + + + // Sequential fallback + template<typename _FIterator, typename _Predicate, typename _Tp> + inline void + replace_if(_FIterator __begin, _FIterator __end, _Predicate __pred, + const _Tp& __new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::replace_if(__begin, __end, __pred, __new_value); } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _Predicate, typename _Tp, + typename _IteratorTag> + inline void + __replace_if_switch(_FIterator __begin, _FIterator __end, + _Predicate __pred, const _Tp& __new_value, _IteratorTag) + { replace_if(__begin, __end, __pred, __new_value, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _Predicate, typename _Tp> + void + __replace_if_switch(_RAIter __begin, _RAIter __end, + _Predicate __pred, const _Tp& __new_value, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().replace_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy; + __gnu_parallel:: + __replace_if_selector<_RAIter, _Predicate, _Tp> + __functionality(__new_value); + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __pred, __functionality, + __gnu_parallel::_DummyReduct(), + true, __dummy, -1, __parallelism_tag); + } + else + replace_if(__begin, __end, __pred, __new_value, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _FIterator, typename _Predicate, typename _Tp> + inline void + replace_if(_FIterator __begin, _FIterator __end, + _Predicate __pred, const _Tp& __new_value, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_FIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + __replace_if_switch(__begin, __end, __pred, __new_value, + _IteratorCategory(), __parallelism_tag); + } + + template<typename _FIterator, typename _Predicate, typename _Tp> + inline void + replace_if(_FIterator __begin, _FIterator __end, + _Predicate __pred, const _Tp& __new_value) + { + typedef std::iterator_traits<_FIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + __replace_if_switch(__begin, __end, __pred, __new_value, + _IteratorCategory()); + } + + // Sequential fallback + template<typename _FIterator, typename _Generator> + inline void + generate(_FIterator __begin, _FIterator __end, _Generator __gen, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::generate(__begin, __end, __gen); } + + // Sequential fallback for input iterator case. + template<typename _FIterator, typename _Generator, typename _IteratorTag> + inline void + __generate_switch(_FIterator __begin, _FIterator __end, _Generator __gen, + _IteratorTag) + { generate(__begin, __end, __gen, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _Generator> + void + __generate_switch(_RAIter __begin, _RAIter __end, + _Generator __gen, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().generate_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy; + __gnu_parallel::__generate_selector<_RAIter> + __functionality; + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __gen, __functionality, + __gnu_parallel::_DummyReduct(), + true, __dummy, -1, __parallelism_tag); + } + else + generate(__begin, __end, __gen, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _FIterator, typename _Generator> + inline void + generate(_FIterator __begin, _FIterator __end, + _Generator __gen, __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_FIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + __generate_switch(__begin, __end, __gen, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _FIterator, typename _Generator> + inline void + generate(_FIterator __begin, _FIterator __end, _Generator __gen) + { + typedef std::iterator_traits<_FIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + __generate_switch(__begin, __end, __gen, _IteratorCategory()); + } + + + // Sequential fallback. + template<typename _OutputIterator, typename _Size, typename _Generator> + inline _OutputIterator + generate_n(_OutputIterator __begin, _Size __n, _Generator __gen, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::generate_n(__begin, __n, __gen); } + + // Sequential fallback for input iterator case. + template<typename _OutputIterator, typename _Size, typename _Generator, + typename _IteratorTag> + inline _OutputIterator + __generate_n_switch(_OutputIterator __begin, _Size __n, _Generator __gen, + _IteratorTag) + { return generate_n(__begin, __n, __gen, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _Size, typename _Generator> + inline _RAIter + __generate_n_switch(_RAIter __begin, _Size __n, _Generator __gen, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + // XXX parallel version is where? + return generate_n(__begin, __n, __gen, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _OutputIterator, typename _Size, typename _Generator> + inline _OutputIterator + generate_n(_OutputIterator __begin, _Size __n, _Generator __gen, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_OutputIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __generate_n_switch(__begin, __n, __gen, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _OutputIterator, typename _Size, typename _Generator> + inline _OutputIterator + generate_n(_OutputIterator __begin, _Size __n, _Generator __gen) + { + typedef std::iterator_traits<_OutputIterator> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __generate_n_switch(__begin, __n, __gen, _IteratorCategory()); + } + + + // Sequential fallback. + template<typename _RAIter> + inline void + random_shuffle(_RAIter __begin, _RAIter __end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::random_shuffle(__begin, __end); } + + // Sequential fallback. + template<typename _RAIter, typename _RandomNumberGenerator> + inline void + random_shuffle(_RAIter __begin, _RAIter __end, + _RandomNumberGenerator& __rand, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::random_shuffle(__begin, __end, __rand); } + + + /** @brief Functor wrapper for std::rand(). */ + template<typename _MustBeInt = int> + struct _CRandNumber + { + int + operator()(int __limit) + { return rand() % __limit; } + }; + + // Fill in random number generator. + template<typename _RAIter> + inline void + random_shuffle(_RAIter __begin, _RAIter __end) + { + _CRandNumber<> __r; + // Parallelization still possible. + __gnu_parallel::random_shuffle(__begin, __end, __r); + } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _RandomNumberGenerator> + void + random_shuffle(_RAIter __begin, _RAIter __end, +#ifdef __GXX_EXPERIMENTAL_CXX0X__ + _RandomNumberGenerator&& __rand) +#else + _RandomNumberGenerator& __rand) +#endif + { + if (__begin == __end) + return; + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().random_shuffle_minimal_n)) + __gnu_parallel::__parallel_random_shuffle(__begin, __end, __rand); + else + __gnu_parallel::__sequential_random_shuffle(__begin, __end, __rand); + } + + // Sequential fallback. + template<typename _FIterator, typename _Predicate> + inline _FIterator + partition(_FIterator __begin, _FIterator __end, + _Predicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::partition(__begin, __end, __pred); } + + // Sequential fallback for input iterator case. + template<typename _FIterator, typename _Predicate, typename _IteratorTag> + inline _FIterator + __partition_switch(_FIterator __begin, _FIterator __end, + _Predicate __pred, _IteratorTag) + { return partition(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _RAIter, typename _Predicate> + _RAIter + __partition_switch(_RAIter __begin, _RAIter __end, + _Predicate __pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().partition_minimal_n)) + { + typedef typename std::iterator_traits<_RAIter>:: + difference_type _DifferenceType; + _DifferenceType __middle = __gnu_parallel:: + __parallel_partition(__begin, __end, __pred, + __gnu_parallel::__get_max_threads()); + return __begin + __middle; + } + else + return partition(__begin, __end, __pred, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _FIterator, typename _Predicate> + inline _FIterator + partition(_FIterator __begin, _FIterator __end, _Predicate __pred) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __partition_switch(__begin, __end, __pred, _IteratorCategory()); + } + + // sort interface + + // Sequential fallback + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::sort(__begin, __end); } + + // Sequential fallback + template<typename _RAIter, typename _Compare> + inline void + sort(_RAIter __begin, _RAIter __end, _Compare __comp, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::sort<_RAIter, _Compare>(__begin, __end, + __comp); } + + // Public interface + template<typename _RAIter, typename _Compare, + typename _Parallelism> + void + sort(_RAIter __begin, _RAIter __end, _Compare __comp, + _Parallelism __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + + if (__begin != __end) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) >= + __gnu_parallel::_Settings::get().sort_minimal_n)) + __gnu_parallel::__parallel_sort<false>( + __begin, __end, __comp, __parallelism); + else + sort(__begin, __end, __comp, __gnu_parallel::sequential_tag()); + } + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), + __gnu_parallel::default_parallel_tag()); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::default_parallel_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::parallel_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::multiway_mergesort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::multiway_mergesort_sampling_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::multiway_mergesort_exact_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::quicksort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::balanced_quicksort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface + template<typename _RAIter, typename _Compare> + void + sort(_RAIter __begin, _RAIter __end, _Compare __comp) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + sort(__begin, __end, __comp, __gnu_parallel::default_parallel_tag()); + } + + + // stable_sort interface + + + // Sequential fallback + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::stable_sort(__begin, __end); } + + // Sequential fallback + template<typename _RAIter, typename _Compare> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::stable_sort<_RAIter, _Compare>( + __begin, __end, __comp); } + + // Public interface + template<typename _RAIter, typename _Compare, + typename _Parallelism> + void + stable_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, _Parallelism __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + + if (__begin != __end) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) >= + __gnu_parallel::_Settings::get().sort_minimal_n)) + __gnu_parallel::__parallel_sort<true>( + __begin, __end, __comp, __parallelism); + else + stable_sort(__begin, __end, __comp, + __gnu_parallel::sequential_tag()); + } + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), + __gnu_parallel::default_parallel_tag()); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::default_parallel_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::parallel_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::multiway_mergesort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::quicksort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + stable_sort(_RAIter __begin, _RAIter __end, + __gnu_parallel::balanced_quicksort_tag __parallelism) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort(__begin, __end, std::less<_ValueType>(), __parallelism); + } + + // Public interface + template<typename _RAIter, typename _Compare> + void + stable_sort(_RAIter __begin, _RAIter __end, + _Compare __comp) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + stable_sort( + __begin, __end, __comp, __gnu_parallel::default_parallel_tag()); + } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + merge(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _IIter2 __end2, _OutputIterator __result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::merge( + __begin1, __end1, __begin2, __end2, __result); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Compare> + inline _OutputIterator + merge(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _IIter2 __end2, _OutputIterator __result, _Compare __comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::merge( + __begin1, __end1, __begin2, __end2, __result, __comp); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, typename _OutputIterator, + typename _Compare, typename _IteratorTag1, + typename _IteratorTag2, typename _IteratorTag3> + inline _OutputIterator + __merge_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Compare __comp, + _IteratorTag1, _IteratorTag2, _IteratorTag3) + { return _GLIBCXX_STD_A::merge(__begin1, __end1, __begin2, __end2, + __result, __comp); } + + // Parallel algorithm for random access iterators + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Compare> + _OutputIterator + __merge_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _OutputIterator __result, _Compare __comp, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + (static_cast<__gnu_parallel::_SequenceIndex>(__end1 - __begin1) + >= __gnu_parallel::_Settings::get().merge_minimal_n + || static_cast<__gnu_parallel::_SequenceIndex>(__end2 - __begin2) + >= __gnu_parallel::_Settings::get().merge_minimal_n))) + return __gnu_parallel::__parallel_merge_advance( + __begin1, __end1, __begin2, __end2, __result, + (__end1 - __begin1) + (__end2 - __begin2), __comp); + else + return __gnu_parallel::__merge_advance( + __begin1, __end1, __begin2, __end2, __result, + (__end1 - __begin1) + (__end2 - __begin2), __comp); + } + + // Public interface + template<typename _IIter1, typename _IIter2, + typename _OutputIterator, typename _Compare> + inline _OutputIterator + merge(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _IIter2 __end2, _OutputIterator __result, _Compare __comp) + { + typedef typename iterator_traits<_IIter1>::value_type _ValueType; + + typedef std::iterator_traits<_IIter1> _IIterTraits1; + typedef std::iterator_traits<_IIter2> _IIterTraits2; + typedef std::iterator_traits<_OutputIterator> _OIterTraits; + typedef typename _IIterTraits1::iterator_category + _IIterCategory1; + typedef typename _IIterTraits2::iterator_category + _IIterCategory2; + typedef typename _OIterTraits::iterator_category _OIterCategory; + + return __merge_switch( + __begin1, __end1, __begin2, __end2, __result, __comp, + _IIterCategory1(), _IIterCategory2(), _OIterCategory()); + } + + + // Public interface, insert default comparator + template<typename _IIter1, typename _IIter2, + typename _OutputIterator> + inline _OutputIterator + merge(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _IIter2 __end2, _OutputIterator __result) + { + typedef std::iterator_traits<_IIter1> _Iterator1Traits; + typedef std::iterator_traits<_IIter2> _Iterator2Traits; + typedef typename _Iterator1Traits::value_type _ValueType1; + typedef typename _Iterator2Traits::value_type _ValueType2; + + return __gnu_parallel::merge(__begin1, __end1, __begin2, __end2, + __result, __gnu_parallel::_Less<_ValueType1, _ValueType2>()); + } + + // Sequential fallback + template<typename _RAIter> + inline void + nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::nth_element(__begin, __nth, __end); } + + // Sequential fallback + template<typename _RAIter, typename _Compare> + inline void + nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end, _Compare __comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::nth_element(__begin, __nth, __end, __comp); } + + // Public interface + template<typename _RAIter, typename _Compare> + inline void + nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end, _Compare __comp) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().nth_element_minimal_n)) + __gnu_parallel::__parallel_nth_element(__begin, __nth, __end, __comp); + else + nth_element(__begin, __nth, __end, __comp, + __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + __gnu_parallel::nth_element(__begin, __nth, __end, + std::less<_ValueType>()); + } + + // Sequential fallback + template<typename _RAIter, typename _Compare> + inline void + partial_sort(_RAIter __begin, _RAIter __middle, + _RAIter __end, _Compare __comp, + __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::partial_sort(__begin, __middle, __end, __comp); } + + // Sequential fallback + template<typename _RAIter> + inline void + partial_sort(_RAIter __begin, _RAIter __middle, + _RAIter __end, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_A::partial_sort(__begin, __middle, __end); } + + // Public interface, parallel algorithm for random access iterators + template<typename _RAIter, typename _Compare> + void + partial_sort(_RAIter __begin, _RAIter __middle, + _RAIter __end, _Compare __comp) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().partial_sort_minimal_n)) + __gnu_parallel:: + __parallel_partial_sort(__begin, __middle, __end, __comp); + else + partial_sort(__begin, __middle, __end, __comp, + __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename _RAIter> + inline void + partial_sort(_RAIter __begin, _RAIter __middle, + _RAIter __end) + { + typedef iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + __gnu_parallel::partial_sort(__begin, __middle, __end, + std::less<_ValueType>()); + } + + // Sequential fallback + template<typename _FIterator> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::max_element(__begin, __end); } + + // Sequential fallback + template<typename _FIterator, typename _Compare> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end, _Compare __comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::max_element(__begin, __end, __comp); } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _Compare, typename _IteratorTag> + inline _FIterator + __max_element_switch(_FIterator __begin, _FIterator __end, + _Compare __comp, _IteratorTag) + { return max_element(__begin, __end, __comp, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename _RAIter, typename _Compare> + _RAIter + __max_element_switch(_RAIter __begin, _RAIter __end, + _Compare __comp, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().max_element_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + _RAIter __res(__begin); + __gnu_parallel::__identity_selector<_RAIter> + __functionality; + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __gnu_parallel::_Nothing(), __functionality, + __gnu_parallel::__max_element_reduct<_Compare, _RAIter>(__comp), + __res, __res, -1, __parallelism_tag); + return __res; + } + else + return max_element(__begin, __end, __comp, + __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename _FIterator> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef typename iterator_traits<_FIterator>::value_type _ValueType; + return max_element(__begin, __end, std::less<_ValueType>(), + __parallelism_tag); + } + + template<typename _FIterator> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end) + { + typedef typename iterator_traits<_FIterator>::value_type _ValueType; + return __gnu_parallel::max_element(__begin, __end, + std::less<_ValueType>()); + } + + // Public interface + template<typename _FIterator, typename _Compare> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end, _Compare __comp, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __max_element_switch(__begin, __end, __comp, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _FIterator, typename _Compare> + inline _FIterator + max_element(_FIterator __begin, _FIterator __end, _Compare __comp) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __max_element_switch(__begin, __end, __comp, _IteratorCategory()); + } + + + // Sequential fallback + template<typename _FIterator> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::min_element(__begin, __end); } + + // Sequential fallback + template<typename _FIterator, typename _Compare> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end, _Compare __comp, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::min_element(__begin, __end, __comp); } + + // Sequential fallback for input iterator case + template<typename _FIterator, typename _Compare, typename _IteratorTag> + inline _FIterator + __min_element_switch(_FIterator __begin, _FIterator __end, + _Compare __comp, _IteratorTag) + { return min_element(__begin, __end, __comp, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template<typename _RAIter, typename _Compare> + _RAIter + __min_element_switch(_RAIter __begin, _RAIter __end, + _Compare __comp, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().min_element_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + _RAIter __res(__begin); + __gnu_parallel::__identity_selector<_RAIter> + __functionality; + __gnu_parallel:: + __for_each_template_random_access( + __begin, __end, __gnu_parallel::_Nothing(), __functionality, + __gnu_parallel::__min_element_reduct<_Compare, _RAIter>(__comp), + __res, __res, -1, __parallelism_tag); + return __res; + } + else + return min_element(__begin, __end, __comp, + __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template<typename _FIterator> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef typename iterator_traits<_FIterator>::value_type _ValueType; + return min_element(__begin, __end, std::less<_ValueType>(), + __parallelism_tag); + } + + template<typename _FIterator> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end) + { + typedef typename iterator_traits<_FIterator>::value_type _ValueType; + return __gnu_parallel::min_element(__begin, __end, + std::less<_ValueType>()); + } + + // Public interface + template<typename _FIterator, typename _Compare> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end, _Compare __comp, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __min_element_switch(__begin, __end, __comp, _IteratorCategory(), + __parallelism_tag); + } + + template<typename _FIterator, typename _Compare> + inline _FIterator + min_element(_FIterator __begin, _FIterator __end, _Compare __comp) + { + typedef iterator_traits<_FIterator> _TraitsType; + typedef typename _TraitsType::iterator_category _IteratorCategory; + return __min_element_switch(__begin, __end, __comp, _IteratorCategory()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_ALGO_H */ diff --git a/libstdc++-v3/include/parallel/algobase.h b/libstdc++-v3/include/parallel/algobase.h new file mode 100644 index 000000000..20456b57b --- /dev/null +++ b/libstdc++-v3/include/parallel/algobase.h @@ -0,0 +1,279 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algobase.h + * @brief Parallel STL function calls corresponding to the + * stl_algobase.h header. The functions defined here mainly do case + * switches and call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one + * function call, are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGOBASE_H +#define _GLIBCXX_PARALLEL_ALGOBASE_H 1 + +#include <bits/stl_algobase.h> +#include <parallel/base.h> +#include <parallel/tags.h> +#include <parallel/settings.h> +#include <parallel/find.h> +#include <parallel/find_selectors.h> + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __parallel +{ + // NB: equal and lexicographical_compare require mismatch. + + // Sequential fallback + template<typename _IIter1, typename _IIter2> + inline pair<_IIter1, _IIter2> + mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::mismatch(__begin1, __end1, __begin2); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline pair<_IIter1, _IIter2> + mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _Predicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::mismatch(__begin1, __end1, __begin2, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _IteratorTag1, typename _IteratorTag2> + inline pair<_IIter1, _IIter2> + __mismatch_switch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _Predicate __pred, _IteratorTag1, _IteratorTag2) + { return _GLIBCXX_STD_A::mismatch(__begin1, __end1, __begin2, __pred); } + + // Parallel mismatch for random access iterators + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + pair<_RAIter1, _RAIter2> + __mismatch_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Predicate __pred, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + _RAIter1 __res = + __gnu_parallel::__find_template(__begin1, __end1, __begin2, __pred, + __gnu_parallel:: + __mismatch_selector()).first; + return make_pair(__res , __begin2 + (__res - __begin1)); + } + else + return _GLIBCXX_STD_A::mismatch(__begin1, __end1, __begin2, __pred); + } + + // Public interface + template<typename _IIter1, typename _IIter2> + inline pair<_IIter1, _IIter2> + mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2) + { + typedef std::iterator_traits<_IIter1> _Iterator1Traits; + typedef std::iterator_traits<_IIter2> _Iterator2Traits; + typedef typename _Iterator1Traits::value_type _ValueType1; + typedef typename _Iterator2Traits::value_type _ValueType2; + typedef typename _Iterator1Traits::iterator_category _IteratorCategory1; + typedef typename _Iterator2Traits::iterator_category _IteratorCategory2; + + typedef __gnu_parallel::_EqualTo<_ValueType1, _ValueType2> _EqualTo; + + return __mismatch_switch(__begin1, __end1, __begin2, _EqualTo(), + _IteratorCategory1(), _IteratorCategory2()); + } + + // Public interface + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline pair<_IIter1, _IIter2> + mismatch(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _Predicate __pred) + { + typedef std::iterator_traits<_IIter1> _Iterator1Traits; + typedef std::iterator_traits<_IIter2> _Iterator2Traits; + typedef typename _Iterator1Traits::iterator_category _IteratorCategory1; + typedef typename _Iterator2Traits::iterator_category _IteratorCategory2; + + return __mismatch_switch(__begin1, __end1, __begin2, __pred, + _IteratorCategory1(), _IteratorCategory2()); + } + + // Sequential fallback + template<typename _IIter1, typename _IIter2> + inline bool + equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::equal(__begin1, __end1, __begin2); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline bool + equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _Predicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::equal(__begin1, __end1, __begin2, __pred); } + + // Public interface + template<typename _IIter1, typename _IIter2> + inline bool + equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2) + { + return __gnu_parallel::mismatch(__begin1, __end1, __begin2).first + == __end1; + } + + // Public interface + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline bool + equal(_IIter1 __begin1, _IIter1 __end1, _IIter2 __begin2, + _Predicate __pred) + { + return __gnu_parallel::mismatch(__begin1, __end1, __begin2, __pred).first + == __end1; + } + + // Sequential fallback + template<typename _IIter1, typename _IIter2> + inline bool + lexicographical_compare(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::lexicographical_compare(__begin1, __end1, + __begin2, __end2); } + + // Sequential fallback + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline bool + lexicographical_compare(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _Predicate __pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::lexicographical_compare( + __begin1, __end1, __begin2, __end2, __pred); } + + // Sequential fallback for input iterator case + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _IteratorTag1, typename _IteratorTag2> + inline bool + __lexicographical_compare_switch(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _Predicate __pred, + _IteratorTag1, _IteratorTag2) + { return _GLIBCXX_STD_A::lexicographical_compare( + __begin1, __end1, __begin2, __end2, __pred); } + + // Parallel lexicographical_compare for random access iterators + // Limitation: Both valuetypes must be the same + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + bool + __lexicographical_compare_switch(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _RAIter2 __end2, + _Predicate __pred, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + typedef iterator_traits<_RAIter1> _TraitsType1; + typedef typename _TraitsType1::value_type _ValueType1; + + typedef iterator_traits<_RAIter2> _TraitsType2; + typedef typename _TraitsType2::value_type _ValueType2; + + typedef __gnu_parallel:: + _EqualFromLess<_ValueType1, _ValueType2, _Predicate> + _EqualFromLessCompare; + + // Longer sequence in first place. + if ((__end1 - __begin1) < (__end2 - __begin2)) + { + typedef pair<_RAIter1, _RAIter2> _SpotType; + _SpotType __mm = __mismatch_switch(__begin1, __end1, __begin2, + _EqualFromLessCompare(__pred), + random_access_iterator_tag(), + random_access_iterator_tag()); + + return (__mm.first == __end1) + || bool(__pred(*__mm.first, *__mm.second)); + } + else + { + typedef pair<_RAIter2, _RAIter1> _SpotType; + _SpotType __mm = __mismatch_switch(__begin2, __end2, __begin1, + _EqualFromLessCompare(__pred), + random_access_iterator_tag(), + random_access_iterator_tag()); + + return (__mm.first != __end2) + && bool(__pred(*__mm.second, *__mm.first)); + } + } + else + return _GLIBCXX_STD_A::lexicographical_compare( + __begin1, __end1, __begin2, __end2, __pred); + } + + // Public interface + template<typename _IIter1, typename _IIter2> + inline bool + lexicographical_compare(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::value_type _ValueType1; + typedef typename _TraitsType1::iterator_category _IteratorCategory1; + + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::value_type _ValueType2; + typedef typename _TraitsType2::iterator_category _IteratorCategory2; + typedef __gnu_parallel::_Less<_ValueType1, _ValueType2> _LessType; + + return __lexicographical_compare_switch( + __begin1, __end1, __begin2, __end2, _LessType(), + _IteratorCategory1(), _IteratorCategory2()); + } + + // Public interface + template<typename _IIter1, typename _IIter2, typename _Predicate> + inline bool + lexicographical_compare(_IIter1 __begin1, _IIter1 __end1, + _IIter2 __begin2, _IIter2 __end2, + _Predicate __pred) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::iterator_category _IteratorCategory1; + + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::iterator_category _IteratorCategory2; + + return __lexicographical_compare_switch( + __begin1, __end1, __begin2, __end2, __pred, + _IteratorCategory1(), _IteratorCategory2()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_ALGOBASE_H */ diff --git a/libstdc++-v3/include/parallel/algorithm b/libstdc++-v3/include/parallel/algorithm new file mode 100644 index 000000000..3c93d5b68 --- /dev/null +++ b/libstdc++-v3/include/parallel/algorithm @@ -0,0 +1,40 @@ +// Algorithm extensions -*- C++ -*- + +// Copyright (C) 2007, 2009 +// Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algorithm + * This file is a GNU extension to the Standard C++ Library. + */ + +#ifndef _PARALLEL_ALGORITHM +#define _PARALLEL_ALGORITHM 1 + +#pragma GCC system_header + +#include <algorithm> +#include <parallel/algorithmfwd.h> +#include <parallel/algobase.h> +#include <parallel/algo.h> + +#endif diff --git a/libstdc++-v3/include/parallel/algorithmfwd.h b/libstdc++-v3/include/parallel/algorithmfwd.h new file mode 100644 index 000000000..7d4a792cb --- /dev/null +++ b/libstdc++-v3/include/parallel/algorithmfwd.h @@ -0,0 +1,908 @@ +// <algorithm> parallel extensions -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/algorithmfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_ALGORITHMFWD_H +#define _GLIBCXX_PARALLEL_ALGORITHMFWD_H 1 + +#pragma GCC system_header + +#include <parallel/tags.h> +#include <parallel/settings.h> + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __parallel +{ + template<typename _FIter> + _FIter + adjacent_find(_FIter, _FIter); + + template<typename _FIter> + _FIter + adjacent_find(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _IterTag> + _FIter + __adjacent_find_switch(_FIter, _FIter, _IterTag); + + template<typename _RAIter> + _RAIter + __adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag); + + + template<typename _FIter, typename _BiPredicate> + _FIter + adjacent_find(_FIter, _FIter, _BiPredicate); + + template<typename _FIter, typename _BiPredicate> + _FIter + adjacent_find(_FIter, _FIter, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _BiPredicate, typename _IterTag> + _FIter + __adjacent_find_switch(_FIter, _FIter, _BiPredicate, _IterTag); + + template<typename _RAIter, typename _BiPredicate> + _RAIter + __adjacent_find_switch(_RAIter, _RAIter, _BiPredicate, + random_access_iterator_tag); + + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&); + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _IterTag> + typename iterator_traits<_IIter>::difference_type + __count_switch(_IIter, _IIter, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + typename iterator_traits<_RAIter>::difference_type + __count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_unbalanced); + + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate); + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Predicate> + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Predicate, typename _IterTag> + typename iterator_traits<_IIter>::difference_type + __count_if_switch(_IIter, _IIter, _Predicate, _IterTag); + + template<typename _RAIter, typename _Predicate> + typename iterator_traits<_RAIter>::difference_type + __count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_unbalanced); + + // algobase.h + template<typename _IIter1, typename _IIter2> + bool + equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + equal(_IIter1, _IIter1, _IIter2, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + bool + equal(_IIter1, _IIter1, _IIter2); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + equal(_IIter1, _IIter1, _IIter2, _Predicate); + + template<typename _IIter, typename _Tp> + _IIter + find(_IIter, _IIter, const _Tp&, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + _IIter + find(_IIter, _IIter, const _Tp& __val); + + template<typename _IIter, typename _Tp, typename _IterTag> + _IIter + __find_switch(_IIter, _IIter, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + _RAIter + __find_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag); + + template<typename _IIter, typename _Predicate> + _IIter + find_if(_IIter, _IIter, _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Predicate> + _IIter + find_if(_IIter, _IIter, _Predicate); + + template<typename _IIter, typename _Predicate, typename _IterTag> + _IIter + __find_if_switch(_IIter, _IIter, _Predicate, _IterTag); + + template<typename _RAIter, typename _Predicate> + _RAIter + __find_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag); + + template<typename _IIter, typename _FIter> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _FIter, typename _BiPredicate> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _FIter, typename _BiPredicate> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, _BiPredicate); + + template<typename _IIter, typename _FIter> + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter); + + template<typename _IIter, typename _FIter, + typename _IterTag1, typename _IterTag2> + _IIter + __find_first_of_switch( + _IIter, _IIter, _FIter, _FIter, _IterTag1, _IterTag2); + + template<typename _RAIter, typename _FIter, typename _BiPredicate, + typename _IterTag> + _RAIter + __find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, _BiPredicate, + random_access_iterator_tag, _IterTag); + + template<typename _IIter, typename _FIter, typename _BiPredicate, + typename _IterTag1, typename _IterTag2> + _IIter + __find_first_of_switch(_IIter, _IIter, _FIter, _FIter, _BiPredicate, + _IterTag1, _IterTag2); + + + template<typename _IIter, typename _Function> + _Function + for_each(_IIter, _IIter, _Function); + + template<typename _IIter, typename _Function> + _Function + for_each(_IIter, _IIter, _Function, __gnu_parallel::sequential_tag); + + template<typename _Iterator, typename _Function> + _Function + for_each(_Iterator, _Iterator, _Function, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Function, typename _IterTag> + _Function + __for_each_switch(_IIter, _IIter, _Function, _IterTag); + + template<typename _RAIter, typename _Function> + _Function + __for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator); + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Generator> + void + generate(_FIter, _FIter, _Generator, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Generator, typename _IterTag> + void + __generate_switch(_FIter, _FIter, _Generator, _IterTag); + + template<typename _RAIter, typename _Generator> + void + __generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator, __gnu_parallel::sequential_tag); + + template<typename _OIter, typename _Size, typename _Generator> + _OIter + generate_n(_OIter, _Size, _Generator, __gnu_parallel::_Parallelism); + + template<typename _OIter, typename _Size, typename _Generator, + typename _IterTag> + _OIter + __generate_n_switch(_OIter, _Size, _Generator, _IterTag); + + template<typename _RAIter, typename _Size, typename _Generator> + _RAIter + __generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _IIter1, typename _IIter2> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, _Predicate); + + template<typename _IIter1, typename _IIter2, + typename _Predicate, typename _IterTag1, typename _IterTag2> + bool + __lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2, + _Predicate, _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + bool + __lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Predicate, random_access_iterator_tag, + random_access_iterator_tag); + + // algo.h + template<typename _IIter1, typename _IIter2> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2); + + template<typename _IIter1, typename _IIter2, typename _Predicate> + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _IterTag1, typename _IterTag2> + pair<_IIter1, _IIter2> + __mismatch_switch(_IIter1, _IIter1, _IIter2, _Predicate, + _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _Predicate> + pair<_RAIter1, _RAIter2> + __mismatch_switch(_RAIter1, _RAIter1, _RAIter2, _Predicate, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, __gnu_parallel::sequential_tag); + + template<typename _FIter1, typename _FIter2> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate> + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate); + + template<typename _RAIter1, typename _RAIter2> + _RAIter1 + __search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2, typename _IterTag1, + typename _IterTag2> + _FIter1 + __search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _IterTag1, _IterTag2); + + template<typename _RAIter1, typename _RAIter2, typename _BiPredicate> + _RAIter1 + __search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _BiPredicate, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _FIter1, typename _FIter2, typename _BiPredicate, + typename _IterTag1, typename _IterTag2> + _FIter1 + __search_switch(_FIter1, _FIter1, _FIter2, _FIter2, _BiPredicate, + _IterTag1, _IterTag2); + + template<typename _FIter, typename _Integer, typename _Tp> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, _BiPredicate, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Integer, typename _Tp> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _FIter + search_n(_FIter, _FIter, _Integer, const _Tp&, _BiPredicate); + + template<typename _RAIter, typename _Integer, typename _Tp, + typename _BiPredicate> + _RAIter + __search_n_switch(_RAIter, _RAIter, _Integer, const _Tp&, + _BiPredicate, random_access_iterator_tag); + + template<typename _FIter, typename _Integer, typename _Tp, + typename _BiPredicate, typename _IterTag> + _FIter + __search_n_switch(_FIter, _FIter, _Integer, const _Tp&, + _BiPredicate, _IterTag); + + + template<typename _IIter, typename _OIter, typename _UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, _UnaryOperation); + + template<typename _IIter, typename _OIter, typename _UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, _UnaryOperation, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _UnaryOperation> + _OIter + transform(_IIter, _IIter, _OIter, _UnaryOperation, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename _UnaryOperation, + typename _IterTag1, typename _IterTag2> + _OIter + __transform1_switch(_IIter, _IIter, _OIter, _UnaryOperation, + _IterTag1, _IterTag2); + + + template<typename _RAIIter, typename _RAOIter, typename _UnaryOperation> + _RAOIter + __transform1_switch(_RAIIter, _RAIIter, _RAOIter, _UnaryOperation, + random_access_iterator_tag, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation> + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + __gnu_parallel::_Parallelism); + + template<typename _RAIter1, typename _RAIter2, typename _RAIter3, + typename _BiOperation> + _RAIter3 + __transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _BiOperation, typename _Tag1, + typename _Tag2, typename _Tag3> + _OIter + __transform2_switch(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, + _Tag1, _Tag2, _Tag3); + + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&); + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Tp> + void + replace(_FIter, _FIter, const _Tp&, const _Tp&, + __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Tp, typename _IterTag> + void + __replace_switch(_FIter, _FIter, const _Tp&, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Tp> + void + __replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&, + random_access_iterator_tag, __gnu_parallel::_Parallelism); + + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&); + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&, + __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Predicate, typename _Tp> + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&, + __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Predicate, typename _Tp, + typename _IterTag> + void + __replace_if_switch(_FIter, _FIter, _Predicate, const _Tp&, _IterTag); + + template<typename _RAIter, typename _Predicate, typename _Tp> + void + __replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&, + random_access_iterator_tag, + __gnu_parallel::_Parallelism); + + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter); + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter> + _FIter + max_element(_FIter, _FIter, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Compare> + _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare, typename _IterTag> + _FIter + __max_element_switch(_FIter, _FIter, _Compare, _IterTag); + + template<typename _RAIter, typename _Compare> + _RAIter + __max_element_switch( + _RAIter, _RAIter, _Compare, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + __merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + _IterTag1, _IterTag2, _IterTag3); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Compare> + _OIter + __merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, + random_access_iterator_tag, random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter); + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template<typename _FIter> + _FIter + min_element(_FIter, _FIter, + __gnu_parallel::_Parallelism __parallelism_tag); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Compare> + _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism); + + template<typename _FIter, typename _Compare, typename _IterTag> + _FIter + __min_element_switch(_FIter, _FIter, _Compare, _IterTag); + + template<typename _RAIter, typename _Compare> + _RAIter + __min_element_switch( + _RAIter, _RAIter, _Compare, random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_balanced); + + template<typename _RAIter> + void + nth_element(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + nth_element(_RAIter, _RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare, + __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + partial_sort(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + partial_sort(_RAIter, _RAIter, _RAIter); + + template<typename _FIter, typename _Predicate> + _FIter + partition(_FIter, _FIter, _Predicate, __gnu_parallel::sequential_tag); + + template<typename _FIter, typename _Predicate> + _FIter + partition(_FIter, _FIter, _Predicate); + + template<typename _FIter, typename _Predicate, typename _IterTag> + _FIter + __partition_switch(_FIter, _FIter, _Predicate, _IterTag); + + template<typename _RAIter, typename _Predicate> + _RAIter + __partition_switch( + _RAIter, _RAIter, _Predicate, random_access_iterator_tag); + + template<typename _RAIter> + void + random_shuffle(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _RandomNumberGenerator> + void + random_shuffle(_RAIter, _RAIter, _RandomNumberGenerator&, + __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + random_shuffle(_RAIter, _RAIter); + + template<typename _RAIter, typename _RandomNumberGenerator> + void + random_shuffle(_RAIter, _RAIter, +#ifdef __GXX_EXPERIMENTAL_CXX0X__ + _RandomNumberGenerator&&); +#else + _RandomNumberGenerator&); +#endif + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + __set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + __set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, _Output_RAIter, + _Predicate, random_access_iterator_tag, + random_access_iterator_tag, random_access_iterator_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + __set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + __set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + __set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, + _OIter, _Predicate, _IterTag1, _IterTag2, + _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + __set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _OIter> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template<typename _IIter1, typename _IIter2, typename _OIter, + typename _Predicate> + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Predicate); + + template<typename _IIter1, typename _IIter2, typename _Predicate, + typename _OIter, typename _IterTag1, typename _IterTag2, + typename _IterTag3> + _OIter + __set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, + _Predicate, _IterTag1, _IterTag2, _IterTag3); + + template<typename _RAIter1, typename _RAIter2, typename _Output_RAIter, + typename _Predicate> + _Output_RAIter + __set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, + _Output_RAIter, _Predicate, + random_access_iterator_tag, + random_access_iterator_tag, + random_access_iterator_tag); + + + template<typename _RAIter> + void + sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + sort(_RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + sort(_RAIter, _RAIter, _Compare); + + template<typename _RAIter> + void + stable_sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template<typename _RAIter, typename _Compare> + void + stable_sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template<typename _RAIter> + void + stable_sort(_RAIter, _RAIter); + + template<typename _RAIter, typename _Compare> + void + stable_sort(_RAIter, _RAIter, _Compare); + + template<typename _IIter, typename _OIter> + _OIter + unique_copy(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _Predicate> + _OIter + unique_copy(_IIter, _IIter, _OIter, _Predicate, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + unique_copy(_IIter, _IIter, _OIter); + + template<typename _IIter, typename _OIter, typename _Predicate> + _OIter + unique_copy(_IIter, _IIter, _OIter, _Predicate); + + template<typename _IIter, typename _OIter, typename _Predicate, + typename _IterTag1, typename _IterTag2> + _OIter + __unique_copy_switch(_IIter, _IIter, _OIter, _Predicate, + _IterTag1, _IterTag2); + + template<typename _RAIter, typename _RandomAccess_OIter, typename _Predicate> + _RandomAccess_OIter + __unique_copy_switch(_RAIter, _RAIter, _RandomAccess_OIter, _Predicate, + random_access_iterator_tag, random_access_iterator_tag); +} // end namespace __parallel +} // end namespace std + +#endif /* _GLIBCXX_PARALLEL_ALGORITHMFWD_H */ diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h new file mode 100644 index 000000000..441e7b44a --- /dev/null +++ b/libstdc++-v3/include/parallel/balanced_quicksort.h @@ -0,0 +1,492 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/balanced_quicksort.h + * @brief Implementation of a dynamically load-balanced parallel quicksort. + * + * It works in-place and needs only logarithmic extra memory. + * The algorithm is similar to the one proposed in + * + * P. Tsigas and Y. Zhang. + * A simple, fast parallel implementation of quicksort and + * its performance evaluation on SUN enterprise 10000. + * In 11th Euromicro Conference on Parallel, Distributed and + * Network-Based Processing, page 372, 2003. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H +#define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> +#include <bits/stl_function.h> + +#include <parallel/settings.h> +#include <parallel/partition.h> +#include <parallel/random_number.h> +#include <parallel/queue.h> + +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +namespace __gnu_parallel +{ + /** @brief Information local to one thread in the parallel quicksort run. */ + template<typename _RAIter> + struct _QSBThreadLocal + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + + /** @brief Continuous part of the sequence, described by an + iterator pair. */ + typedef std::pair<_RAIter, _RAIter> _Piece; + + /** @brief Initial piece to work on. */ + _Piece _M_initial; + + /** @brief Work-stealing queue. */ + _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; + + /** @brief Number of threads involved in this algorithm. */ + _ThreadIndex _M_num_threads; + + /** @brief Pointer to a counter of elements left over to sort. */ + volatile _DifferenceType* _M_elements_leftover; + + /** @brief The complete sequence to sort. */ + _Piece _M_global; + + /** @brief Constructor. + * @param __queue_size size of the work-stealing queue. */ + _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } + }; + + /** @brief Balanced quicksort divide step. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + * @pre @c (__end-__begin)>=1 */ + template<typename _RAIter, typename _Compare> + typename std::iterator_traits<_RAIter>::difference_type + __qsb_divide(_RAIter __begin, _RAIter __end, + _Compare __comp, _ThreadIndex __num_threads) + { + _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _RAIter __pivot_pos = + __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, + __end - 1, __comp); + +#if defined(_GLIBCXX_ASSERTIONS) + // Must be in between somewhere. + _DifferenceType __n = __end - __begin; + + _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin) + && !__comp(*(__begin + __n / 2), + *__pivot_pos)) + || (!__comp(*__pivot_pos, *__begin) + && !__comp(*(__end - 1), *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__begin + __n / 2)) + && !__comp(*__begin, *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__begin + __n / 2)) + && !__comp(*(__end - 1), *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__end - 1)) + && !__comp(*__begin, *__pivot_pos)) + || (!__comp(*__pivot_pos, *(__end - 1)) + && !__comp(*(__begin + __n / 2), + *__pivot_pos))); +#endif + + // Swap pivot value to end. + if (__pivot_pos != (__end - 1)) + std::iter_swap(__pivot_pos, __end - 1); + __pivot_pos = __end - 1; + + __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); + + // Divide, returning __end - __begin - 1 in the worst case. + _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1, + __pred, + __num_threads); + + // Swap back pivot to middle. + std::iter_swap(__begin + __split_pos, __pivot_pos); + __pivot_pos = __begin + __split_pos; + +#if _GLIBCXX_ASSERTIONS + _RAIter __r; + for (__r = __begin; __r != __pivot_pos; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); + for (; __r != __end; ++__r) + _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); +#endif + + return __split_pos; + } + + /** @brief Quicksort conquer step. + * @param __tls Array of thread-local storages. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __iam Number of the thread processing this function. + * @param __num_threads + * Number of threads that are allowed to work on this part. */ + template<typename _RAIter, typename _Compare> + void + __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls, + _RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __iam, _ThreadIndex __num_threads, + bool __parent_wait) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + if (__num_threads <= 1 || __n <= 1) + { + __tls[__iam]->_M_initial.first = __begin; + __tls[__iam]->_M_initial.second = __end; + + __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); + + return; + } + + // Divide step. + _DifferenceType __split_pos = + __qsb_divide(__begin, __end, __comp, __num_threads); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && + __split_pos < (__end - __begin)); +#endif + + _ThreadIndex + __num_threads_leftside = std::max<_ThreadIndex> + (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos + * __num_threads / __n)); + +# pragma omp atomic + *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; + + // Conquer step. +# pragma omp parallel num_threads(2) + { + bool __wait; + if(omp_get_num_threads() < 2) + __wait = false; + else + __wait = __parent_wait; + +# pragma omp sections + { +# pragma omp section + { + __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, + __iam, __num_threads_leftside, __wait); + __wait = __parent_wait; + } + // The pivot_pos is left in place, to ensure termination. +# pragma omp section + { + __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, + __iam + __num_threads_leftside, + __num_threads - __num_threads_leftside, __wait); + __wait = __parent_wait; + } + } + } + } + + /** + * @brief Quicksort step doing load-balanced local sort. + * @param __tls Array of thread-local storages. + * @param __comp Comparator. + * @param __iam Number of the thread processing this function. + */ + template<typename _RAIter, typename _Compare> + void + __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, + _Compare& __comp, _ThreadIndex __iam, + bool __wait) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::pair<_RAIter, _RAIter> _Piece; + + _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam]; + + _DifferenceType + __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n; + if (__base_case_n < 2) + __base_case_n = 2; + _ThreadIndex __num_threads = __tl._M_num_threads; + + // Every thread has its own random number generator. + _RandomNumber __rng(__iam + 1); + + _Piece __current = __tl._M_initial; + + _DifferenceType __elements_done = 0; +#if _GLIBCXX_ASSERTIONS + _DifferenceType __total_elements_done = 0; +#endif + + for (;;) + { + // Invariant: __current must be a valid (maybe empty) range. + _RAIter __begin = __current.first, __end = __current.second; + _DifferenceType __n = __end - __begin; + + if (__n > __base_case_n) + { + // Divide. + _RAIter __pivot_pos = __begin + __rng(__n); + + // Swap __pivot_pos value to end. + if (__pivot_pos != (__end - 1)) + std::iter_swap(__pivot_pos, __end - 1); + __pivot_pos = __end - 1; + + __gnu_parallel::__binder2nd + <_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); + + // Divide, leave pivot unchanged in last place. + _RAIter __split_pos1, __split_pos2; + __split_pos1 = __gnu_sequential::partition(__begin, __end - 1, + __pred); + + // Left side: < __pivot_pos; __right side: >= __pivot_pos. +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 + && __split_pos1 < __end); +#endif + // Swap pivot back to middle. + if (__split_pos1 != __pivot_pos) + std::iter_swap(__split_pos1, __pivot_pos); + __pivot_pos = __split_pos1; + + // In case all elements are equal, __split_pos1 == 0. + if ((__split_pos1 + 1 - __begin) < (__n >> 7) + || (__end - __split_pos1) < (__n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st + <_Compare, _ValueType, _ValueType, bool>, _ValueType> + __pred(__gnu_parallel::__binder1st + <_Compare, _ValueType, _ValueType, bool> + (__comp, *__pivot_pos)); + + // Find other end of pivot-equal range. + __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, + __end, __pred); + } + else + // Only skip the pivot. + __split_pos2 = __split_pos1 + 1; + + // Elements equal to pivot are done. + __elements_done += (__split_pos2 - __split_pos1); +#if _GLIBCXX_ASSERTIONS + __total_elements_done += (__split_pos2 - __split_pos1); +#endif + // Always push larger part onto stack. + if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) + { + // Right side larger. + if ((__split_pos2) != __end) + __tl._M_leftover_parts.push_front + (std::make_pair(__split_pos2, __end)); + + //__current.first = __begin; //already set anyway + __current.second = __split_pos1; + continue; + } + else + { + // Left side larger. + if (__begin != __split_pos1) + __tl._M_leftover_parts.push_front(std::make_pair + (__begin, __split_pos1)); + + __current.first = __split_pos2; + //__current.second = __end; //already set anyway + continue; + } + } + else + { + __gnu_sequential::sort(__begin, __end, __comp); + __elements_done += __n; +#if _GLIBCXX_ASSERTIONS + __total_elements_done += __n; +#endif + + // Prefer own stack, small pieces. + if (__tl._M_leftover_parts.pop_front(__current)) + continue; + +# pragma omp atomic + *__tl._M_elements_leftover -= __elements_done; + + __elements_done = 0; + +#if _GLIBCXX_ASSERTIONS + double __search_start = omp_get_wtime(); +#endif + + // Look for new work. + bool __successfully_stolen = false; + while (__wait && *__tl._M_elements_leftover > 0 + && !__successfully_stolen +#if _GLIBCXX_ASSERTIONS + // Possible dead-lock. + && (omp_get_wtime() < (__search_start + 1.0)) +#endif + ) + { + _ThreadIndex __victim; + __victim = __rng(__num_threads); + + // Large pieces. + __successfully_stolen = (__victim != __iam) + && __tls[__victim]->_M_leftover_parts.pop_back(__current); + if (!__successfully_stolen) + __yield(); +#if !defined(__ICC) && !defined(__ECC) +# pragma omp flush +#endif + } + +#if _GLIBCXX_ASSERTIONS + if (omp_get_wtime() >= (__search_start + 1.0)) + { + sleep(1); + _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() + < (__search_start + 1.0)); + } +#endif + if (!__successfully_stolen) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); +#endif + return; + } + } + } + } + + /** @brief Top-level quicksort routine. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_sort_qsb(_RAIter __begin, _RAIter __end, + _Compare __comp, _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::pair<_RAIter, _RAIter> _Piece; + + typedef _QSBThreadLocal<_RAIter> _TLSType; + + _DifferenceType __n = __end - __begin; + + if (__n <= 1) + return; + + // At least one element per processor. + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + // Initialize thread local storage + _TLSType** __tls = new _TLSType*[__num_threads]; + _DifferenceType __queue_size = (__num_threads + * (_ThreadIndex)(__rd_log2(__n) + 1)); + for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) + __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); + + // There can never be more than ceil(__rd_log2(__n)) ranges on the + // stack, because + // 1. Only one processor pushes onto the stack + // 2. The largest range has at most length __n + // 3. Each range is larger than half of the range remaining + volatile _DifferenceType __elements_leftover = __n; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + __tls[__i]->_M_elements_leftover = &__elements_leftover; + __tls[__i]->_M_num_threads = __num_threads; + __tls[__i]->_M_global = std::make_pair(__begin, __end); + + // Just in case nothing is left to assign. + __tls[__i]->_M_initial = std::make_pair(__end, __end); + } + + // Main recursion call. + __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, + __num_threads, true); + +#if _GLIBCXX_ASSERTIONS + // All stack must be empty. + _Piece __dummy; + for (_ThreadIndex __i = 1; __i < __num_threads; ++__i) + _GLIBCXX_PARALLEL_ASSERT( + !__tls[__i]->_M_leftover_parts.pop_back(__dummy)); +#endif + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + delete __tls[__i]; + delete[] __tls; + } +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h new file mode 100644 index 000000000..86adea816 --- /dev/null +++ b/libstdc++-v3/include/parallel/base.h @@ -0,0 +1,426 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/base.h + * @brief Sequential helper functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASE_H +#define _GLIBCXX_PARALLEL_BASE_H 1 + +#include <bits/c++config.h> +#include <bits/stl_function.h> +#include <omp.h> +#include <parallel/features.h> +#include <parallel/basic_iterator.h> +#include <parallel/parallel.h> + +// Parallel mode namespaces. + +/** + * @namespace std::__parallel + * @brief GNU parallel code, replaces standard behavior with parallel behavior. + */ +namespace std _GLIBCXX_VISIBILITY(default) +{ + namespace __parallel { } +} + +/** + * @namespace __gnu_parallel + * @brief GNU parallel code for public use. + */ +namespace __gnu_parallel +{ + // Import all the parallel versions of components in namespace std. + using namespace std::__parallel; +} + +/** + * @namespace __gnu_sequential + * @brief GNU sequential classes for public use. + */ +namespace __gnu_sequential +{ + // Import whatever is the serial version. +#ifdef _GLIBCXX_PARALLEL + using namespace std::_GLIBCXX_STD_A; +#else + using namespace std; +#endif +} + + +namespace __gnu_parallel +{ + // NB: Including this file cannot produce (unresolved) symbols from + // the OpenMP runtime unless the parallel mode is actually invoked + // and active, which imples that the OpenMP runtime is actually + // going to be linked in. + inline _ThreadIndex + __get_max_threads() + { + _ThreadIndex __i = omp_get_max_threads(); + return __i > 1 ? __i : 1; + } + + + inline bool + __is_parallel(const _Parallelism __p) { return __p != sequential; } + + + /** @brief Calculates the rounded-down logarithm of @c __n for base 2. + * @param __n Argument. + * @return Returns 0 for any argument <1. + */ + template<typename _Size> + inline _Size + __rd_log2(_Size __n) + { + _Size __k; + for (__k = 0; __n > 1; __n >>= 1) + ++__k; + return __k; + } + + /** @brief Encode two integers into one gnu_parallel::_CASable. + * @param __a First integer, to be encoded in the most-significant @c + * _CASable_bits/2 bits. + * @param __b Second integer, to be encoded in the least-significant + * @c _CASable_bits/2 bits. + * @return value encoding @c __a and @c __b. + * @see __decode2 + */ + inline _CASable + __encode2(int __a, int __b) //must all be non-negative, actually + { + return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); + } + + /** @brief Decode two integers from one gnu_parallel::_CASable. + * @param __x __gnu_parallel::_CASable to decode integers from. + * @param __a First integer, to be decoded from the most-significant + * @c _CASable_bits/2 bits of @c __x. + * @param __b Second integer, to be encoded in the least-significant + * @c _CASable_bits/2 bits of @c __x. + * @see __encode2 + */ + inline void + __decode2(_CASable __x, int& __a, int& __b) + { + __a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask); + __b = (int)((__x >> 0 ) & _CASable_mask); + } + + //needed for parallel "numeric", even if "algorithm" not included + + /** @brief Equivalent to std::min. */ + template<typename _Tp> + inline const _Tp& + min(const _Tp& __a, const _Tp& __b) + { return (__a < __b) ? __a : __b; } + + /** @brief Equivalent to std::max. */ + template<typename _Tp> + inline const _Tp& + max(const _Tp& __a, const _Tp& __b) + { return (__a > __b) ? __a : __b; } + + /** @brief Constructs predicate for equality from strict weak + * ordering predicate + */ + template<typename _T1, typename _T2, typename _Compare> + class _EqualFromLess : public std::binary_function<_T1, _T2, bool> + { + private: + _Compare& _M_comp; + + public: + _EqualFromLess(_Compare& __comp) : _M_comp(__comp) { } + + bool operator()(const _T1& __a, const _T2& __b) + { return !_M_comp(__a, __b) && !_M_comp(__b, __a); } + }; + + + /** @brief Similar to std::unary_negate, + * but giving the argument types explicitly. */ + template<typename _Predicate, typename argument_type> + class __unary_negate + : public std::unary_function<argument_type, bool> + { + protected: + _Predicate _M_pred; + + public: + explicit + __unary_negate(const _Predicate& __x) : _M_pred(__x) { } + + bool + operator()(const argument_type& __x) + { return !_M_pred(__x); } + }; + + /** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ + template<typename _Operation, typename _FirstArgumentType, + typename _SecondArgumentType, typename _ResultType> + class __binder1st + : public std::unary_function<_SecondArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _FirstArgumentType _M_value; + + public: + __binder1st(const _Operation& __x, const _FirstArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _SecondArgumentType& __x) + { return _M_op(_M_value, __x); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_SecondArgumentType& __x) const + { return _M_op(_M_value, __x); } + }; + + /** + * @brief Similar to std::binder2nd, but giving the argument types + * explicitly. + */ + template<typename _Operation, typename _FirstArgumentType, + typename _SecondArgumentType, typename _ResultType> + class __binder2nd + : public std::unary_function<_FirstArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _SecondArgumentType _M_value; + + public: + __binder2nd(const _Operation& __x, const _SecondArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _FirstArgumentType& __x) const + { return _M_op(__x, _M_value); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_FirstArgumentType& __x) + { return _M_op(__x, _M_value); } + }; + + /** @brief Similar to std::equal_to, but allows two different types. */ + template<typename _T1, typename _T2> + struct _EqualTo : std::binary_function<_T1, _T2, bool> + { + bool operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 == __t2; } + }; + + /** @brief Similar to std::less, but allows two different types. */ + template<typename _T1, typename _T2> + struct _Less : std::binary_function<_T1, _T2, bool> + { + bool + operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 < __t2; } + + bool + operator()(const _T2& __t2, const _T1& __t1) const + { return __t2 < __t1; } + }; + + // Partial specialization for one type. Same as std::less. + template<typename _Tp> + struct _Less<_Tp, _Tp> + : public std::less<_Tp> { }; + + /** @brief Similar to std::plus, but allows two different types. */ + template<typename _Tp1, typename _Tp2, typename _Result + = __typeof__(*static_cast<_Tp1*>(0) + + *static_cast<_Tp2*>(0))> + struct _Plus : public std::binary_function<_Tp1, _Tp2, _Result> + { + _Result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x + __y; } + }; + + // Partial specialization for one type. Same as std::plus. + template<typename _Tp> + struct _Plus<_Tp, _Tp, _Tp> + : public std::plus<_Tp> { }; + + /** @brief Similar to std::multiplies, but allows two different types. */ + template<typename _Tp1, typename _Tp2, typename _Result + = __typeof__(*static_cast<_Tp1*>(0) + * *static_cast<_Tp2*>(0))> + struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Result> + { + _Result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x * __y; } + }; + + // Partial specialization for one type. Same as std::multiplies. + template<typename _Tp> + struct _Multiplies<_Tp, _Tp, _Tp> + : public std::multiplies<_Tp> { }; + + /** @brief _Iterator associated with __gnu_parallel::_PseudoSequence. + * If features the usual random-access iterator functionality. + * @param _Tp Sequence _M_value type. + * @param _DifferenceTp Sequence difference type. + */ + template<typename _Tp, typename _DifferenceTp> + class _PseudoSequenceIterator + { + public: + typedef _DifferenceTp _DifferenceType; + + _PseudoSequenceIterator(const _Tp& __val, _DifferenceType __pos) + : _M_val(__val), _M_pos(__pos) { } + + // Pre-increment operator. + _PseudoSequenceIterator& + operator++() + { + ++_M_pos; + return *this; + } + + // Post-increment operator. + _PseudoSequenceIterator + operator++(int) + { return _PseudoSequenceIterator(_M_pos++); } + + const _Tp& + operator*() const + { return _M_val; } + + const _Tp& + operator[](_DifferenceType) const + { return _M_val; } + + bool + operator==(const _PseudoSequenceIterator& __i2) + { return _M_pos == __i2._M_pos; } + + bool + operator!=(const _PseudoSequenceIterator& __i2) + { return _M_pos != __i2._M_pos; } + + _DifferenceType + operator-(const _PseudoSequenceIterator& __i2) + { return _M_pos - __i2._M_pos; } + + private: + const _Tp& _M_val; + _DifferenceType _M_pos; + }; + + /** @brief Sequence that conceptually consists of multiple copies of + the same element. + * The copies are not stored explicitly, of course. + * @param _Tp Sequence _M_value type. + * @param _DifferenceTp Sequence difference type. + */ + template<typename _Tp, typename _DifferenceTp> + class _PseudoSequence + { + public: + typedef _DifferenceTp _DifferenceType; + + // Better cast down to uint64_t, than up to _DifferenceTp. + typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator; + + /** @brief Constructor. + * @param _M_val Element of the sequence. + * @param __count Number of (virtual) copies. + */ + _PseudoSequence(const _Tp& __val, _DifferenceType __count) + : _M_val(__val), _M_count(__count) { } + + /** @brief Begin iterator. */ + iterator + begin() const + { return iterator(_M_val, 0); } + + /** @brief End iterator. */ + iterator + end() const + { return iterator(_M_val, _M_count); } + + private: + const _Tp& _M_val; + _DifferenceType _M_count; + }; + + /** @brief Compute the median of three referenced elements, + according to @c __comp. + * @param __a First iterator. + * @param __b Second iterator. + * @param __c Third iterator. + * @param __comp Comparator. + */ + template<typename _RAIter, typename _Compare> + _RAIter + __median_of_three_iterators(_RAIter __a, _RAIter __b, + _RAIter __c, _Compare __comp) + { + if (__comp(*__a, *__b)) + if (__comp(*__b, *__c)) + return __b; + else + if (__comp(*__a, *__c)) + return __c; + else + return __a; + else + { + // Just swap __a and __b. + if (__comp(*__a, *__c)) + return __a; + else + if (__comp(*__b, *__c)) + return __c; + else + return __b; + } + } + +#define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition) + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_BASE_H */ diff --git a/libstdc++-v3/include/parallel/basic_iterator.h b/libstdc++-v3/include/parallel/basic_iterator.h new file mode 100644 index 000000000..a624edc1a --- /dev/null +++ b/libstdc++-v3/include/parallel/basic_iterator.h @@ -0,0 +1,41 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/basic_iterator.h + * @brief Includes the original header files concerned with iterators + * except for stream iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASIC_ITERATOR_H +#define _GLIBCXX_PARALLEL_BASIC_ITERATOR_H 1 + +#include <bits/c++config.h> +#include <bits/stl_iterator_base_types.h> +#include <bits/stl_iterator_base_funcs.h> +#include <bits/stl_iterator.h> + +#endif /* _GLIBCXX_PARALLEL_BASIC_ITERATOR_H */ diff --git a/libstdc++-v3/include/parallel/checkers.h b/libstdc++-v3/include/parallel/checkers.h new file mode 100644 index 000000000..32507fd8c --- /dev/null +++ b/libstdc++-v3/include/parallel/checkers.h @@ -0,0 +1,73 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/checkers.h + * @brief Routines for checking the correctness of algorithm results. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_CHECKERS_H +#define _GLIBCXX_PARALLEL_CHECKERS_H 1 + +#include <cstdio> +#include <bits/stl_algobase.h> +#include <bits/stl_function.h> + +namespace __gnu_parallel +{ + /** + * @brief Check whether @c [__begin, @c __end) is sorted according + * to @c __comp. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + template<typename _IIter, typename _Compare> + bool + __is_sorted(_IIter __begin, _IIter __end, _Compare __comp) + { + if (__begin == __end) + return true; + + _IIter __current(__begin), __recent(__begin); + + unsigned long long __position = 1; + for (__current++; __current != __end; __current++) + { + if (__comp(*__current, *__recent)) + { + return false; + } + __recent = __current; + __position++; + } + + return true; + } +} + +#endif /* _GLIBCXX_PARALLEL_CHECKERS_H */ diff --git a/libstdc++-v3/include/parallel/compatibility.h b/libstdc++-v3/include/parallel/compatibility.h new file mode 100644 index 000000000..9fffd8e01 --- /dev/null +++ b/libstdc++-v3/include/parallel/compatibility.h @@ -0,0 +1,364 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/compatibility.h + * @brief Compatibility layer, mostly concerned with atomic operations. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H +#define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 + +#include <parallel/types.h> +#include <parallel/base.h> + +#if defined(__SUNPRO_CC) && defined(__sparc) +#include <sys/atomic.h> +#endif + +#if !defined(_WIN32) || defined (__CYGWIN__) +#include <sched.h> +#endif + +#if defined(_MSC_VER) +#include <Windows.h> +#include <intrin.h> +#undef max +#undef min +#endif + +#ifdef __MINGW32__ +// Including <windows.h> will drag in all the windows32 names. Since +// that can cause user code portability problems, we just declare the +// one needed function here. +extern "C" +__attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); +#endif + +namespace __gnu_parallel +{ +#if defined(__ICC) + template<typename _MustBeInt = int> + int32_t __faa32(int32_t* __x, int32_t __inc) + { + asm volatile("lock xadd %0,%1" + : "=__r" (__inc), "=__m" (*__x) + : "0" (__inc) + : "memory"); + return __inc; + } +#if defined(__x86_64) + template<typename _MustBeInt = int> + int64_t __faa64(int64_t* __x, int64_t __inc) + { + asm volatile("lock xadd %0,%1" + : "=__r" (__inc), "=__m" (*__x) + : "0" (__inc) + : "memory"); + return __inc; + } +#endif +#endif + + // atomic functions only work on integers + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to a 32-bit signed integer. + * @param __addend Value to add. + */ + inline int32_t + __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend) + { +#if defined(__ICC) //x86 version + return _InterlockedExchangeAdd((void*)__ptr, __addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd((void*)__ptr, __addend); +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr), + __addend); +#elif defined(__GNUC__) + return __sync_fetch_and_add(__ptr, __addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int32_t __before, __after; + do + { + __before = *__ptr; + __after = __before + __addend; + } while (atomic_cas_32((volatile unsigned int*)__ptr, __before, + __after) != __before); + return __before; +#else //fallback, slow +#pragma message("slow __fetch_and_add_32") + int32_t __res; +#pragma omp critical + { + __res = *__ptr; + *(__ptr) += __addend; + } + return __res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to a 64-bit signed integer. + * @param __addend Value to add. + */ + inline int64_t + __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return __faa64<int>((int64_t*)__ptr, __addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd64((void*)__ptr, __addend); +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedExchangeAdd64(__ptr, __addend); +#endif +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_fetch_and_add(__ptr, __addend); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon) \ + || defined(__k8) || defined(__core2)) + return __sync_fetch_and_add(__ptr, __addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int64_t __before, __after; + do + { + __before = *__ptr; + __after = __before + __addend; + } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before, + __after) != __before); + return __before; +#else //fallback, slow +#if defined(__GNUC__) && defined(__i386) + // XXX doesn'__t work with -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow __fetch_and_add_64") + int64_t __res; +#pragma omp critical + { + __res = *__ptr; + *(__ptr) += __addend; + } + return __res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to a signed integer. + * @param __addend Value to add. + */ + template<typename _Tp> + inline _Tp + __fetch_and_add(volatile _Tp* __ptr, _Tp __addend) + { + if (sizeof(_Tp) == sizeof(int32_t)) + return + (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend); + else if (sizeof(_Tp) == sizeof(int64_t)) + return + (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + +#if defined(__ICC) + + template<typename _MustBeInt = int> + inline int32_t + __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw) + { + int32_t __before; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(__before) + : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), + "0"(__old) + : "memory"); + return __before; + } + +#if defined(__x86_64) + template<typename _MustBeInt = int> + inline int64_t + __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw) + { + int64_t __before; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(__before) + : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), + "0"(__old) + : "memory"); + return __before; + } +#endif + +#endif + + /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c + * *__ptr=__replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to 32-bit signed integer. + * @param __comparand Compare value. + * @param __replacement Replacement value. + */ + inline bool + __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand, + int32_t __replacement) + { +#if defined(__ICC) //x86 version + return _InterlockedCompareExchange((void*)__ptr, __replacement, + __comparand) == __comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange((void*)__ptr, __replacement, + __comparand) == __comparand; +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedCompareExchange( + reinterpret_cast<volatile long*>(__ptr), + __replacement, __comparand) + == __comparand; +#elif defined(__GNUC__) + return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_32((volatile unsigned int*)__ptr, __comparand, + __replacement) == __comparand; +#else +#pragma message("slow __compare_and_swap_32") + bool __res = false; +#pragma omp critical + { + if (*__ptr == __comparand) + { + *__ptr = __replacement; + __res = true; + } + } + return __res; +#endif + } + + /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c + * *__ptr=__replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to 64-bit signed integer. + * @param __comparand Compare value. + * @param __replacement Replacement value. + */ + inline bool + __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand, + int64_t __replacement) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return __cas64<int>(__ptr, __comparand, __replacement) == __comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange64((void*)__ptr, __replacement, + __comparand) == __comparand; +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedCompareExchange64(__ptr, __replacement, + __comparand) == __comparand; +#endif + +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon) \ + || defined(__k8) || defined(__core2)) + return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_64((volatile unsigned long long*)__ptr, + __comparand, __replacement) == __comparand; +#else +#if defined(__GNUC__) && defined(__i386) + // XXX -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow __compare_and_swap_64") + bool __res = false; +#pragma omp critical + { + if (*__ptr == __comparand) + { + *__ptr = __replacement; + __res = true; + } + } + return __res; +#endif + } + + /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c + * *__ptr=__replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param __ptr Pointer to signed integer. + * @param __comparand Compare value. + * @param __replacement Replacement value. */ + template<typename _Tp> + inline bool + __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement) + { + if (sizeof(_Tp) == sizeof(int32_t)) + return __compare_and_swap_32((volatile int32_t*) __ptr, + (int32_t)__comparand, + (int32_t)__replacement); + else if (sizeof(_Tp) == sizeof(int64_t)) + return __compare_and_swap_64((volatile int64_t*) __ptr, + (int64_t)__comparand, + (int64_t)__replacement); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + /** @brief Yield the control to another thread, without waiting for + the end to the time slice. */ + inline void + __yield() + { +#if defined (_WIN32) && !defined (__CYGWIN__) + Sleep(0); +#else + sched_yield(); +#endif + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */ diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h new file mode 100644 index 000000000..e375a94a9 --- /dev/null +++ b/libstdc++-v3/include/parallel/compiletime_settings.h @@ -0,0 +1,75 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/compiletime_settings.h + * @brief Defines on options concerning debugging and performance, at + * compile-time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#include <cstdio> + +/** @brief Determine verbosity level of the parallel mode. + * Level 1 prints a message each time a parallel-mode function is entered. */ +#define _GLIBCXX_VERBOSE_LEVEL 0 + +/** @def _GLIBCXX_CALL + * @brief Macro to produce log message when entering a function. + * @param __n Input size. + * @see _GLIBCXX_VERBOSE_LEVEL */ +#if (_GLIBCXX_VERBOSE_LEVEL == 0) +#define _GLIBCXX_CALL(__n) +#endif +#if (_GLIBCXX_VERBOSE_LEVEL == 1) +#define _GLIBCXX_CALL(__n) \ + printf(" %__s:\niam = %d, __n = %ld, __num_threads = %d\n", \ + __PRETTY_FUNCTION__, omp_get_thread_num(), (__n), __get_max_threads()); +#endif + +#ifndef _GLIBCXX_SCALE_DOWN_FPU +/** @brief Use floating-point scaling instead of modulo for mapping + * random numbers to a range. This can be faster on certain CPUs. */ +#define _GLIBCXX_SCALE_DOWN_FPU 0 +#endif + +#ifndef _GLIBCXX_ASSERTIONS +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Should be switched on only locally. */ +#define _GLIBCXX_ASSERTIONS 0 +#endif + +#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the L1 cache for +* gnu_parallel::__parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0 +#endif +#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the TLB for +* gnu_parallel::__parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0 +#endif diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h new file mode 100644 index 000000000..481c98868 --- /dev/null +++ b/libstdc++-v3/include/parallel/equally_split.h @@ -0,0 +1,88 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/equally_split.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H +#define _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H 1 + +namespace __gnu_parallel +{ + /** @brief function to split a sequence into parts of almost equal size. + * + * The resulting sequence __s of length __num_threads+1 contains the + * splitting positions when splitting the range [0,__n) into parts of + * almost equal size (plus minus 1). The first entry is 0, the last + * one n. There may result empty parts. + * @param __n Number of elements + * @param __num_threads Number of parts + * @param __s Splitters + * @returns End of __splitter sequence, i.e. @c __s+__num_threads+1 */ + template<typename _DifferenceType, typename _OutputIterator> + _OutputIterator + equally_split(_DifferenceType __n, _ThreadIndex __num_threads, + _OutputIterator __s) + { + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __num_longer_chunks = __n % __num_threads; + _DifferenceType __pos = 0; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + *__s++ = __pos; + __pos += ((__i < __num_longer_chunks) + ? (__chunk_length + 1) : __chunk_length); + } + *__s++ = __n; + return __s; + } + + /** @brief function to split a sequence into parts of almost equal size. + * + * Returns the position of the splitting point between + * thread number __thread_no (included) and + * thread number __thread_no+1 (excluded). + * @param __n Number of elements + * @param __num_threads Number of parts + * @returns splitting point */ + template<typename _DifferenceType> + _DifferenceType + equally_split_point(_DifferenceType __n, + _ThreadIndex __num_threads, + _ThreadIndex __thread_no) + { + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __num_longer_chunks = __n % __num_threads; + if (__thread_no < __num_longer_chunks) + return __thread_no * (__chunk_length + 1); + else + return __num_longer_chunks * (__chunk_length + 1) + + (__thread_no - __num_longer_chunks) * __chunk_length; + } +} + +#endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */ diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h new file mode 100644 index 000000000..077429f16 --- /dev/null +++ b/libstdc++-v3/include/parallel/features.h @@ -0,0 +1,104 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/features.h + * @brief Defines on whether to include algorithm variants. + * + * Less variants reduce executable size and compile time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FEATURES_H +#define _GLIBCXX_PARALLEL_FEATURES_H 1 + +#ifndef _GLIBCXX_MERGESORT +/** @def _GLIBCXX_MERGESORT + * @brief Include parallel multi-way mergesort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_MERGESORT 1 +#endif + +#ifndef _GLIBCXX_QUICKSORT +/** @def _GLIBCXX_QUICKSORT + * @brief Include parallel unbalanced quicksort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_BAL_QUICKSORT +/** @def _GLIBCXX_BAL_QUICKSORT + * @brief Include parallel dynamically load-balanced quicksort. + * @see __gnu_parallel::_Settings::sort_algorithm */ +#define _GLIBCXX_BAL_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_FIND_GROWING_BLOCKS +/** @brief Include the growing blocks variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_GROWING_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS +/** @brief Include the equal-sized blocks variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_EQUAL_SPLIT +/** @def _GLIBCXX_FIND_EQUAL_SPLIT + * @brief Include the equal splitting variant for std::find. + * @see __gnu_parallel::_Settings::find_algorithm */ +#define _GLIBCXX_FIND_EQUAL_SPLIT 1 +#endif + + +#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING +/** @def _GLIBCXX_TREE_INITIAL_SPLITTING + * @brief Include the initial splitting variant for + * _Rb_tree::insert_unique(_IIter beg, _IIter __end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_INITIAL_SPLITTING 1 +#endif + +#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING +/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING + * @brief Include the dynamic balancing variant for + * _Rb_tree::insert_unique(_IIter beg, _IIter __end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1 +#endif + +#ifndef _GLIBCXX_TREE_FULL_COPY +/** @def _GLIBCXX_TREE_FULL_COPY + * @brief In order to sort the input sequence of + * _Rb_tree::insert_unique(_IIter beg, _IIter __end) a + * full copy of the input elements is done. + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_FULL_COPY 1 +#endif + + +#endif /* _GLIBCXX_PARALLEL_FEATURES_H */ diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h new file mode 100644 index 000000000..28bc703e5 --- /dev/null +++ b/libstdc++-v3/include/parallel/find.h @@ -0,0 +1,405 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/find.h + * @brief Parallel implementation base for std::find(), std::equal() + * and related functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FIND_H +#define _GLIBCXX_PARALLEL_FIND_H 1 + +#include <bits/stl_algobase.h> + +#include <parallel/features.h> +#include <parallel/parallel.h> +#include <parallel/compatibility.h> +#include <parallel/equally_split.h> + +namespace __gnu_parallel +{ + /** + * @brief Parallel std::find, switch for different algorithms. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Must have same + * length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + inline std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector) + { + switch (_Settings::get().find_algorithm) + { + case GROWING_BLOCKS: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, growing_blocks_tag()); + case CONSTANT_SIZE_BLOCKS: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, constant_size_blocks_tag()); + case EQUAL_SPLIT: + return __find_template(__begin1, __end1, __begin2, __pred, + __selector, equal_split_tag()); + default: + _GLIBCXX_PARALLEL_ASSERT(false); + return std::make_pair(__begin1, __begin2); + } + } + +#if _GLIBCXX_FIND_EQUAL_SPLIT + + /** + * @brief Parallel std::find, equal splitting variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, + _Selector __selector, equal_split_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + _DifferenceType __length = __end1 - __begin1; + _DifferenceType __result = __length; + _DifferenceType* __borders; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __borders = new _DifferenceType[__num_threads + 1]; + equally_split(__length, __num_threads, __borders); + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + _DifferenceType __start = __borders[__iam], + __stop = __borders[__iam + 1]; + + _RAIter1 __i1 = __begin1 + __start; + _RAIter2 __i2 = __begin2 + __start; + for (_DifferenceType __pos = __start; __pos < __stop; ++__pos) + { +# pragma omp flush(__result) + // Result has been set to something lower. + if (__result < __pos) + break; + + if (__selector(__i1, __i2, __pred)) + { + omp_set_lock(&__result_lock); + if (__pos < __result) + __result = __pos; + omp_unset_lock(&__result_lock); + break; + } + ++__i1; + ++__i2; + } + } //parallel + + omp_destroy_lock(&__result_lock); + delete[] __borders; + + return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } + +#endif + +#if _GLIBCXX_FIND_GROWING_BLOCKS + + /** + * @brief Parallel std::find, growing block size variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_scale_factor + * + * There are two main differences between the growing blocks and + * the constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; + * for CSB, the blocks are allocated in a predetermined manner, + * namely spacial round-robin. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector, + growing_blocks_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __length = __end1 - __begin1; + + _DifferenceType + __sequential_search_size = std::min<_DifferenceType> + (__length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<_RAIter1, _RAIter2> + __find_seq_result = __selector._M_sequential_algorithm + (__begin1, __begin1 + __sequential_search_size, + __begin2, __pred); + + if (__find_seq_result.first != (__begin1 + __sequential_search_size)) + return __find_seq_result; + + // Index of beginning of next free block (after sequential find). + _DifferenceType __next_block_start = __sequential_search_size; + _DifferenceType __result = __length; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + const float __scale_factor = __s.find_scale_factor; + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel shared(__result) num_threads(__num_threads) + { +# pragma omp single + __num_threads = omp_get_num_threads(); + + // Not within first __k elements -> start parallel. + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __block_size = + std::max<_DifferenceType>(1, __scale_factor * __next_block_start); + _DifferenceType __start = __fetch_and_add<_DifferenceType> + (&__next_block_start, __block_size); + + // Get new block, update pointer to next block. + _DifferenceType __stop = + std::min<_DifferenceType>(__length, __start + __block_size); + + std::pair<_RAIter1, _RAIter2> __local_result; + + while (__start < __length) + { +# pragma omp flush(__result) + // Get new value of result. + if (__result < __start) + { + // No chance to find first element. + break; + } + + __local_result = __selector._M_sequential_algorithm + (__begin1 + __start, __begin1 + __stop, + __begin2 + __start, __pred); + + if (__local_result.first != (__begin1 + __stop)) + { + omp_set_lock(&__result_lock); + if ((__local_result.first - __begin1) < __result) + { + __result = __local_result.first - __begin1; + + // Result cannot be in future blocks, stop algorithm. + __fetch_and_add<_DifferenceType>(&__next_block_start, + __length); + } + omp_unset_lock(&__result_lock); + } + + _DifferenceType __block_size = + std::max<_DifferenceType>(1, __scale_factor * __next_block_start); + + // Get new block, update pointer to next block. + __start = __fetch_and_add<_DifferenceType>(&__next_block_start, + __block_size); + __stop = + std::min<_DifferenceType>(__length, __start + __block_size); + } + } //parallel + + omp_destroy_lock(&__result_lock); + + // Return iterator on found element. + return + std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } + +#endif + +#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS + + /** + * @brief Parallel std::find, constant block size variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. Second __sequence + * must have same length as first sequence. + * @param __pred Find predicate. + * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::_Settings::find_sequential_search_size + * @see __gnu_parallel::_Settings::find_block_size + * There are two main differences between the growing blocks and the + * constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; for CSB, the + * blocks are allocated in a predetermined manner, namely spacial + * round-robin. + */ + template<typename _RAIter1, + typename _RAIter2, + typename _Pred, + typename _Selector> + std::pair<_RAIter1, _RAIter2> + __find_template(_RAIter1 __begin1, _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred, _Selector __selector, + constant_size_blocks_tag) + { + _GLIBCXX_CALL(__end1 - __begin1) + typedef std::iterator_traits<_RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename _TraitsType::value_type _ValueType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __length = __end1 - __begin1; + + _DifferenceType __sequential_search_size = std::min<_DifferenceType> + (__length, __s.find_sequential_search_size); + + // Try it sequentially first. + std::pair<_RAIter1, _RAIter2> + __find_seq_result = __selector._M_sequential_algorithm + (__begin1, __begin1 + __sequential_search_size, __begin2, __pred); + + if (__find_seq_result.first != (__begin1 + __sequential_search_size)) + return __find_seq_result; + + _DifferenceType __result = __length; + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + // Not within first __sequential_search_size elements -> start parallel. + + _ThreadIndex __num_threads = __get_max_threads(); +# pragma omp parallel shared(__result) num_threads(__num_threads) + { +# pragma omp single + __num_threads = omp_get_num_threads(); + + _ThreadIndex __iam = omp_get_thread_num(); + _DifferenceType __block_size = __s.find_initial_block_size; + + // First element of thread's current iteration. + _DifferenceType __iteration_start = __sequential_search_size; + + // Where to work (initialization). + _DifferenceType __start = __iteration_start + __iam * __block_size; + _DifferenceType __stop = std::min<_DifferenceType>(__length, + __start + + __block_size); + + std::pair<_RAIter1, _RAIter2> __local_result; + + while (__start < __length) + { + // Get new value of result. +# pragma omp flush(__result) + // No chance to find first element. + if (__result < __start) + break; + + __local_result = __selector._M_sequential_algorithm + (__begin1 + __start, __begin1 + __stop, + __begin2 + __start, __pred); + + if (__local_result.first != (__begin1 + __stop)) + { + omp_set_lock(&__result_lock); + if ((__local_result.first - __begin1) < __result) + __result = __local_result.first - __begin1; + omp_unset_lock(&__result_lock); + // Will not find better value in its interval. + break; + } + + __iteration_start += __num_threads * __block_size; + + // Where to work. + __start = __iteration_start + __iam * __block_size; + __stop = std::min<_DifferenceType>(__length, + __start + __block_size); + } + } //parallel + + omp_destroy_lock(&__result_lock); + + // Return iterator on found element. + return std::pair<_RAIter1, _RAIter2>(__begin1 + __result, + __begin2 + __result); + } +#endif +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_FIND_H */ diff --git a/libstdc++-v3/include/parallel/find_selectors.h b/libstdc++-v3/include/parallel/find_selectors.h new file mode 100644 index 000000000..df77978a9 --- /dev/null +++ b/libstdc++-v3/include/parallel/find_selectors.h @@ -0,0 +1,197 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/find_selectors.h + * @brief _Function objects representing different tasks to be plugged + * into the parallel find algorithm. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FIND_SELECTORS_H +#define _GLIBCXX_PARALLEL_FIND_SELECTORS_H 1 + +#include <parallel/tags.h> +#include <parallel/basic_iterator.h> +#include <bits/stl_pair.h> + +namespace __gnu_parallel +{ + /** @brief Base class of all __gnu_parallel::__find_template selectors. */ + struct __generic_find_selector + { }; + + /** + * @brief Test predicate on a single element, used for std::find() + * and std::find_if (). + */ + struct __find_if_selector : public __generic_find_selector + { + /** @brief Test on one position. + * @param __i1 _Iterator on first sequence. + * @param __i2 _Iterator on second sequence (unused). + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + bool + operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) + { return __pred(*__i1); } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + std::pair<_RAIter1, _RAIter2> + _M_sequential_algorithm(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { return std::make_pair(find_if(__begin1, __end1, __pred, + sequential_tag()), __begin2); } + }; + + /** @brief Test predicate on two adjacent elements. */ + struct __adjacent_find_selector : public __generic_find_selector + { + /** @brief Test on one position. + * @param __i1 _Iterator on first sequence. + * @param __i2 _Iterator on second sequence (unused). + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + bool + operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) + { + // Passed end iterator is one short. + return __pred(*__i1, *(__i1 + 1)); + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + std::pair<_RAIter1, _RAIter2> + _M_sequential_algorithm(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { + // Passed end iterator is one short. + _RAIter1 __spot = adjacent_find(__begin1, __end1 + 1, + __pred, sequential_tag()); + if (__spot == (__end1 + 1)) + __spot = __end1; + return std::make_pair(__spot, __begin2); + } + }; + + /** @brief Test inverted predicate on a single element. */ + struct __mismatch_selector : public __generic_find_selector + { + /** + * @brief Test on one position. + * @param __i1 _Iterator on first sequence. + * @param __i2 _Iterator on second sequence (unused). + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + bool + operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) + { return !__pred(*__i1, *__i2); } + + /** + * @brief Corresponding sequential algorithm on a sequence. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __pred Find predicate. + */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + std::pair<_RAIter1, _RAIter2> + _M_sequential_algorithm(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { return mismatch(__begin1, __end1, __begin2, + __pred, sequential_tag()); } + }; + + + /** @brief Test predicate on several elements. */ + template<typename _FIterator> + struct __find_first_of_selector : public __generic_find_selector + { + _FIterator _M_begin; + _FIterator _M_end; + + explicit __find_first_of_selector(_FIterator __begin, + _FIterator __end) + : _M_begin(__begin), _M_end(__end) { } + + /** @brief Test on one position. + * @param __i1 _Iterator on first sequence. + * @param __i2 _Iterator on second sequence (unused). + * @param __pred Find predicate. */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + bool + operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) + { + for (_FIterator __pos_in_candidates = _M_begin; + __pos_in_candidates != _M_end; ++__pos_in_candidates) + if (__pred(*__i1, *__pos_in_candidates)) + return true; + return false; + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __pred Find predicate. */ + template<typename _RAIter1, typename _RAIter2, + typename _Pred> + std::pair<_RAIter1, _RAIter2> + _M_sequential_algorithm(_RAIter1 __begin1, + _RAIter1 __end1, + _RAIter2 __begin2, _Pred __pred) + { + return std::make_pair(find_first_of(__begin1, __end1, + _M_begin, _M_end, __pred, + sequential_tag()), __begin2); + } + }; +} + +#endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */ diff --git a/libstdc++-v3/include/parallel/for_each.h b/libstdc++-v3/include/parallel/for_each.h new file mode 100644 index 000000000..013259315 --- /dev/null +++ b/libstdc++-v3/include/parallel/for_each.h @@ -0,0 +1,90 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/for_each.h + * @brief Main interface for embarrassingly parallel functions. + * + * The explicit implementation are in other header files, like + * workstealing.h, par_loop.h, omp_loop.h, and omp_loop_static.h. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_H +#define _GLIBCXX_PARALLEL_FOR_EACH_H 1 + +#include <parallel/settings.h> +#include <parallel/par_loop.h> +#include <parallel/omp_loop.h> +#include <parallel/workstealing.h> + +namespace __gnu_parallel +{ + /** @brief Chose the desired algorithm by evaluating @c __parallelism_tag. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __user_op A user-specified functor (comparator, predicate, + * associative operator,...) + * @param __functionality functor to @a process an element with + * __user_op (depends on desired functionality, e. g. accumulate, + * for_each,... + * @param __reduction Reduction functor. + * @param __reduction_start Initial value for reduction. + * @param __output Output iterator. + * @param __bound Maximum number of elements processed. + * @param __parallelism_tag Parallelization method */ + template<typename _IIter, typename _UserOp, + typename _Functionality, typename _Red, typename _Result> + _UserOp + __for_each_template_random_access(_IIter __begin, _IIter __end, + _UserOp __user_op, + _Functionality& __functionality, + _Red __reduction, + _Result __reduction_start, + _Result& __output, typename + std::iterator_traits<_IIter>:: + difference_type __bound, + _Parallelism __parallelism_tag) + { + if (__parallelism_tag == parallel_unbalanced) + return __for_each_template_random_access_ed + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); + else if (__parallelism_tag == parallel_omp_loop) + return __for_each_template_random_access_omp_loop + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); + else if (__parallelism_tag == parallel_omp_loop_static) + return __for_each_template_random_access_omp_loop + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); + else //e. g. parallel_balanced + return __for_each_template_random_access_workstealing + (__begin, __end, __user_op, __functionality, __reduction, + __reduction_start, __output, __bound); + } +} + +#endif /* _GLIBCXX_PARALLEL_FOR_EACH_H */ diff --git a/libstdc++-v3/include/parallel/for_each_selectors.h b/libstdc++-v3/include/parallel/for_each_selectors.h new file mode 100644 index 000000000..3a37e5ca6 --- /dev/null +++ b/libstdc++-v3/include/parallel/for_each_selectors.h @@ -0,0 +1,349 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/for_each_selectors.h + * @brief Functors representing different tasks to be plugged into the + * generic parallelization methods for embarrassingly parallel functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H +#define _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H 1 + +#include <parallel/basic_iterator.h> + +namespace __gnu_parallel +{ + /** @brief Generic __selector for embarrassingly parallel functions. */ + template<typename _It> + struct __generic_for_each_selector + { + /** @brief _Iterator on last element processed; needed for some + * algorithms (e. g. std::transform()). + */ + _It _M_finish_iterator; + }; + + /** @brief std::for_each() selector. */ + template<typename _It> + struct __for_each_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. */ + template<typename _Op> + bool + operator()(_Op& __o, _It __i) + { + __o(*__i); + return true; + } + }; + + /** @brief std::generate() selector. */ + template<typename _It> + struct __generate_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. */ + template<typename _Op> + bool + operator()(_Op& __o, _It __i) + { + *__i = __o(); + return true; + } + }; + + /** @brief std::fill() selector. */ + template<typename _It> + struct __fill_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __v Current value. + * @param __i iterator referencing object. */ + template<typename _ValueType> + bool + operator()(_ValueType& __v, _It __i) + { + *__i = __v; + return true; + } + }; + + /** @brief std::transform() __selector, one input sequence variant. */ + template<typename _It> + struct __transform1_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. */ + template<typename _Op> + bool + operator()(_Op& __o, _It __i) + { + *__i.second = __o(*__i.first); + return true; + } + }; + + /** @brief std::transform() __selector, two input sequences variant. */ + template<typename _It> + struct __transform2_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. */ + template<typename _Op> + bool + operator()(_Op& __o, _It __i) + { + *__i._M_third = __o(*__i._M_first, *__i._M_second); + return true; + } + }; + + /** @brief std::replace() selector. */ + template<typename _It, typename _Tp> + struct __replace_selector : public __generic_for_each_selector<_It> + { + /** @brief Value to replace with. */ + const _Tp& __new_val; + + /** @brief Constructor + * @param __new_val Value to replace with. */ + explicit + __replace_selector(const _Tp &__new_val) : __new_val(__new_val) {} + + /** @brief Functor execution. + * @param __v Current value. + * @param __i iterator referencing object. */ + bool + operator()(_Tp& __v, _It __i) + { + if (*__i == __v) + *__i = __new_val; + return true; + } + }; + + /** @brief std::replace() selector. */ + template<typename _It, typename _Op, typename _Tp> + struct __replace_if_selector : public __generic_for_each_selector<_It> + { + /** @brief Value to replace with. */ + const _Tp& __new_val; + + /** @brief Constructor. + * @param __new_val Value to replace with. */ + explicit + __replace_if_selector(const _Tp &__new_val) : __new_val(__new_val) { } + + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. */ + bool + operator()(_Op& __o, _It __i) + { + if (__o(*__i)) + *__i = __new_val; + return true; + } + }; + + /** @brief std::count() selector. */ + template<typename _It, typename _Diff> + struct __count_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __v Current value. + * @param __i iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template<typename _ValueType> + _Diff + operator()(_ValueType& __v, _It __i) + { return (__v == *__i) ? 1 : 0; } + }; + + /** @brief std::count_if () selector. */ + template<typename _It, typename _Diff> + struct __count_if_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator. + * @param __i iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template<typename _Op> + _Diff + operator()(_Op& __o, _It __i) + { return (__o(*__i)) ? 1 : 0; } + }; + + /** @brief std::accumulate() selector. */ + template<typename _It> + struct __accumulate_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator (unused). + * @param __i iterator referencing object. + * @return The current value. */ + template<typename _Op> + typename std::iterator_traits<_It>::value_type + operator()(_Op __o, _It __i) + { return *__i; } + }; + + /** @brief std::inner_product() selector. */ + template<typename _It, typename _It2, typename _Tp> + struct __inner_product_selector : public __generic_for_each_selector<_It> + { + /** @brief Begin iterator of first sequence. */ + _It __begin1_iterator; + + /** @brief Begin iterator of second sequence. */ + _It2 __begin2_iterator; + + /** @brief Constructor. + * @param b1 Begin iterator of first sequence. + * @param b2 Begin iterator of second sequence. */ + explicit + __inner_product_selector(_It __b1, _It2 __b2) + : __begin1_iterator(__b1), __begin2_iterator(__b2) { } + + /** @brief Functor execution. + * @param __mult Multiplication functor. + * @param __current iterator referencing object. + * @return Inner product elemental __result. */ + template<typename _Op> + _Tp + operator()(_Op __mult, _It __current) + { + typename std::iterator_traits<_It>::difference_type __position + = __current - __begin1_iterator; + return __mult(*__current, *(__begin2_iterator + __position)); + } + }; + + /** @brief Selector that just returns the passed iterator. */ + template<typename _It> + struct __identity_selector : public __generic_for_each_selector<_It> + { + /** @brief Functor execution. + * @param __o Operator (unused). + * @param __i iterator referencing object. + * @return Passed iterator. */ + template<typename _Op> + _It + operator()(_Op __o, _It __i) + { return __i; } + }; + + /** @brief Selector that returns the difference between two adjacent + * __elements. + */ + template<typename _It> + struct __adjacent_difference_selector + : public __generic_for_each_selector<_It> + { + template<typename _Op> + bool + operator()(_Op& __o, _It __i) + { + typename _It::first_type __go_back_one = __i.first; + --__go_back_one; + *__i.second = __o(*__i.first, *__go_back_one); + return true; + } + }; + + /** @brief Functor doing nothing + * + * For some __reduction tasks (this is not a function object, but is + * passed as __selector __dummy parameter. + */ + struct _Nothing + { + /** @brief Functor execution. + * @param __i iterator referencing object. */ + template<typename _It> + void + operator()(_It) { } + }; + + /** @brief Reduction function doing nothing. */ + struct _DummyReduct + { + bool + operator()(bool, bool) const + { return true; } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template<typename _Compare, typename _It> + struct __min_element_reduct + { + _Compare& __comp; + + explicit + __min_element_reduct(_Compare &__c) : __comp(__c) { } + + _It + operator()(_It __x, _It __y) + { return (__comp(*__x, *__y)) ? __x : __y; } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template<typename _Compare, typename _It> + struct __max_element_reduct + { + _Compare& __comp; + + explicit + __max_element_reduct(_Compare& __c) : __comp(__c) { } + + _It + operator()(_It __x, _It __y) + { return (__comp(*__x, *__y)) ? __y : __x; } + }; + + /** @brief General reduction, using a binary operator. */ + template<typename _BinOp> + struct __accumulate_binop_reduct + { + _BinOp& __binop; + + explicit + __accumulate_binop_reduct(_BinOp& __b) : __binop(__b) { } + + template<typename _Result, typename _Addend> + _Result + operator()(const _Result& __x, const _Addend& __y) + { return __binop(__x, __y); } + }; +} + +#endif /* _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H */ diff --git a/libstdc++-v3/include/parallel/iterator.h b/libstdc++-v3/include/parallel/iterator.h new file mode 100644 index 000000000..013855076 --- /dev/null +++ b/libstdc++-v3/include/parallel/iterator.h @@ -0,0 +1,198 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/iterator.h + * @brief Helper iterator classes for the std::transform() functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_ITERATOR_H +#define _GLIBCXX_PARALLEL_ITERATOR_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_pair.h> + +namespace __gnu_parallel +{ + /** @brief A pair of iterators. The usual iterator operations are + * applied to both child iterators. + */ + template<typename _Iterator1, typename _Iterator2, + typename _IteratorCategory> + class _IteratorPair : public std::pair<_Iterator1, _Iterator2> + { + private: + typedef std::pair<_Iterator1, _Iterator2> _Base; + + public: + typedef _IteratorCategory iterator_category; + typedef void value_type; + + typedef std::iterator_traits<_Iterator1> _TraitsType; + typedef typename _TraitsType::difference_type difference_type; + typedef _IteratorPair* pointer; + typedef _IteratorPair& reference; + + _IteratorPair() { } + + _IteratorPair(const _Iterator1& __first, const _Iterator2& __second) + : _Base(__first, __second) { } + + // Pre-increment operator. + _IteratorPair& + operator++() + { + ++_Base::first; + ++_Base::second; + return *this; + } + + // Post-increment operator. + const _IteratorPair + operator++(int) + { return _IteratorPair(_Base::first++, _Base::second++); } + + // Pre-decrement operator. + _IteratorPair& + operator--() + { + --_Base::first; + --_Base::second; + return *this; + } + + // Post-decrement operator. + const _IteratorPair + operator--(int) + { return _IteratorPair(_Base::first--, _Base::second--); } + + // Type conversion. + operator _Iterator2() const + { return _Base::second; } + + _IteratorPair& + operator=(const _IteratorPair& __other) + { + _Base::first = __other.first; + _Base::second = __other.second; + return *this; + } + + _IteratorPair + operator+(difference_type __delta) const + { return _IteratorPair(_Base::first + __delta, _Base::second + __delta); + } + + difference_type + operator-(const _IteratorPair& __other) const + { return _Base::first - __other.first; } + }; + + + /** @brief A triple of iterators. The usual iterator operations are + applied to all three child iterators. + */ + template<typename _Iterator1, typename _Iterator2, typename _Iterator3, + typename _IteratorCategory> + class _IteratorTriple + { + public: + typedef _IteratorCategory iterator_category; + typedef void value_type; + typedef typename std::iterator_traits<_Iterator1>::difference_type + difference_type; + typedef _IteratorTriple* pointer; + typedef _IteratorTriple& reference; + + _Iterator1 _M_first; + _Iterator2 _M_second; + _Iterator3 _M_third; + + _IteratorTriple() { } + + _IteratorTriple(const _Iterator1& __first, const _Iterator2& __second, + const _Iterator3& __third) + { + _M_first = __first; + _M_second = __second; + _M_third = __third; + } + + // Pre-increment operator. + _IteratorTriple& + operator++() + { + ++_M_first; + ++_M_second; + ++_M_third; + return *this; + } + + // Post-increment operator. + const _IteratorTriple + operator++(int) + { return _IteratorTriple(_M_first++, _M_second++, _M_third++); } + + // Pre-decrement operator. + _IteratorTriple& + operator--() + { + --_M_first; + --_M_second; + --_M_third; + return *this; + } + + // Post-decrement operator. + const _IteratorTriple + operator--(int) + { return _IteratorTriple(_M_first--, _M_second--, _M_third--); } + + // Type conversion. + operator _Iterator3() const + { return _M_third; } + + _IteratorTriple& + operator=(const _IteratorTriple& __other) + { + _M_first = __other._M_first; + _M_second = __other._M_second; + _M_third = __other._M_third; + return *this; + } + + _IteratorTriple + operator+(difference_type __delta) const + { return _IteratorTriple(_M_first + __delta, _M_second + __delta, + _M_third + __delta); } + + difference_type + operator-(const _IteratorTriple& __other) const + { return _M_first - __other._M_first; } + }; +} + +#endif /* _GLIBCXX_PARALLEL_ITERATOR_H */ diff --git a/libstdc++-v3/include/parallel/list_partition.h b/libstdc++-v3/include/parallel/list_partition.h new file mode 100644 index 000000000..1f1bfa22e --- /dev/null +++ b/libstdc++-v3/include/parallel/list_partition.h @@ -0,0 +1,179 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute __it and/or modify __it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that __it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/list_partition.h + * @brief _Functionality to split __sequence referenced by only input + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Leonor Frias Moya and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LIST_PARTITION_H +#define _GLIBCXX_PARALLEL_LIST_PARTITION_H 1 + +#include <parallel/parallel.h> +#include <vector> + +namespace __gnu_parallel +{ + /** @brief Shrinks and doubles the ranges. + * @param __os_starts Start positions worked on (oversampled). + * @param __count_to_two Counts up to 2. + * @param __range_length Current length of a chunk. + * @param __make_twice Whether the @c __os_starts is allowed to be + * grown or not + */ + template<typename _IIter> + void + __shrink_and_double(std::vector<_IIter>& __os_starts, + size_t& __count_to_two, size_t& __range_length, + const bool __make_twice) + { + ++__count_to_two; + if (!__make_twice || __count_to_two < 2) + __shrink(__os_starts, __count_to_two, __range_length); + else + { + __os_starts.resize((__os_starts.size() - 1) * 2 + 1); + __count_to_two = 0; + } + } + + /** @brief Combines two ranges into one and thus halves the number of ranges. + * @param __os_starts Start positions worked on (oversampled). + * @param __count_to_two Counts up to 2. + * @param __range_length Current length of a chunk. */ + template<typename _IIter> + void + __shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two, + size_t& __range_length) + { + for (typename std::vector<_IIter>::size_type __i = 0; + __i <= (__os_starts.size() / 2); ++__i) + __os_starts[__i] = __os_starts[__i * 2]; + __range_length *= 2; + } + + /** @brief Splits a sequence given by input iterators into parts of + * almost equal size + * + * The function needs only one pass over the sequence. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __starts Start iterators for the resulting parts, dimension + * @c __num_parts+1. For convenience, @c __starts @c [__num_parts] + * contains the end iterator of the sequence. + * @param __lengths Length of the resulting parts. + * @param __num_parts Number of parts to split the sequence into. + * @param __f Functor to be applied to each element by traversing __it + * @param __oversampling Oversampling factor. If 0, then the + * partitions will differ in at most + * \sqrt{\mathrm{__end} - \mathrm{__begin}} + * __elements. Otherwise, the ratio between the + * longest and the shortest part is bounded by + * 1/(\mathrm{__oversampling} \cdot \mathrm{num\_parts}) + * @return Length of the whole sequence. + */ + template<typename _IIter, typename _FunctorType> + size_t + list_partition(const _IIter __begin, const _IIter __end, + _IIter* __starts, size_t* __lengths, const int __num_parts, + _FunctorType& __f, int __oversampling = 0) + { + bool __make_twice = false; + + // The resizing algorithm is chosen according to the oversampling factor. + if (__oversampling == 0) + { + __make_twice = true; + __oversampling = 1; + } + + std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1); + + __os_starts[0] = __begin; + _IIter __prev = __begin, __it = __begin; + size_t __dist_limit = 0, __dist = 0; + size_t __cur = 1, __next = 1; + size_t __range_length = 1; + size_t __count_to_two = 0; + while (__it != __end) + { + __cur = __next; + for (; __cur < __os_starts.size() and __it != __end; ++__cur) + { + for (__dist_limit += __range_length; + __dist < __dist_limit and __it != __end; ++__dist) + { + __f(__it); + ++__it; + } + __os_starts[__cur] = __it; + } + + // Must compare for end and not __cur < __os_starts.size() , because + // __cur could be == __os_starts.size() as well + if (__it == __end) + break; + + __shrink_and_double(__os_starts, __count_to_two, __range_length, + __make_twice); + __next = __os_starts.size() / 2 + 1; + } + + // Calculation of the parts (one must be extracted from __current + // because the partition beginning at end, consists only of + // itself). + size_t __size_part = (__cur - 1) / __num_parts; + int __size_greater = static_cast<int>((__cur - 1) % __num_parts); + __starts[0] = __os_starts[0]; + + size_t __index = 0; + + // Smallest partitions. + for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i) + { + __lengths[__i - 1] = __size_part * __range_length; + __index += __size_part; + __starts[__i] = __os_starts[__index]; + } + + // Biggest partitions. + for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; + ++__i) + { + __lengths[__i - 1] = (__size_part+1) * __range_length; + __index += (__size_part+1); + __starts[__i] = __os_starts[__index]; + } + + // Correction of the end size (the end iteration has not finished). + __lengths[__num_parts - 1] -= (__dist_limit - __dist); + + return __dist; + } +} + +#endif /* _GLIBCXX_PARALLEL_LIST_PARTITION_H */ diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h new file mode 100644 index 000000000..cb24f7f67 --- /dev/null +++ b/libstdc++-v3/include/parallel/losertree.h @@ -0,0 +1,1055 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/losertree.h +* @brief Many generic loser tree variants. +* This file is a GNU parallel extension to the Standard C++ Library. +*/ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LOSERTREE_H +#define _GLIBCXX_PARALLEL_LOSERTREE_H 1 + +#include <bits/stl_algobase.h> +#include <bits/stl_function.h> +#include <parallel/features.h> +#include <parallel/base.h> + +namespace __gnu_parallel +{ + /** + * @brief Guarded loser/tournament tree. + * + * The smallest element is at the top. + * + * Guarding is done explicitly through one flag _M_sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. + * + * @param _Tp the element type + * @param _Compare the comparator to use, defaults to std::less<_Tp> + */ + template<typename _Tp, typename _Compare> + class _LoserTreeBase + { + protected: + /** @brief Internal representation of a _LoserTree element. */ + struct _Loser + { + /** @brief flag, true iff this is a "maximum" __sentinel. */ + bool _M_sup; + /** @brief __index of the __source __sequence. */ + int _M_source; + /** @brief _M_key of the element in the _LoserTree. */ + _Tp _M_key; + }; + + unsigned int _M_ik, _M_k, _M_offset; + + /** log_2{_M_k} */ + unsigned int _M_log_k; + + /** @brief _LoserTree __elements. */ + _Loser* _M_losers; + + /** @brief _Compare to use. */ + _Compare _M_comp; + + /** + * @brief State flag that determines whether the _LoserTree is empty. + * + * Only used for building the _LoserTree. + */ + bool _M_first_insert; + + public: + /** + * @brief The constructor. + * + * @param __k The number of sequences to merge. + * @param __comp The comparator to use. + */ + _LoserTreeBase(unsigned int __k, _Compare __comp) + : _M_comp(__comp) + { + _M_ik = __k; + + // Compute log_2{_M_k} for the _Loser Tree + _M_log_k = __rd_log2(_M_ik - 1) + 1; + + // Next greater power of 2. + _M_k = 1 << _M_log_k; + _M_offset = _M_k; + + // Avoid default-constructing _M_losers[]._M_key + _M_losers = static_cast<_Loser*>(::operator new(2 * _M_k + * sizeof(_Loser))); + for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i) + _M_losers[__i + _M_k]._M_sup = true; + + _M_first_insert = true; + } + + /** + * @brief The destructor. + */ + ~_LoserTreeBase() + { + for (unsigned int __i = 0; __i < (2 * _M_k); ++__i) + _M_losers[__i].~_Loser(); + ::operator delete(_M_losers); + } + + /** + * @brief Initializes the sequence "_M_source" with the element "__key". + * + * @param __key the element to insert + * @param __source __index of the __source __sequence + * @param __sup flag that determines whether the value to insert is an + * explicit __supremum. + */ + void + __insert_start(const _Tp& __key, int __source, bool __sup) + { + unsigned int __pos = _M_k + __source; + + if (_M_first_insert) + { + // Construct all keys, so we can easily destruct them. + for (unsigned int __i = 0; __i < (2 * _M_k); ++__i) + ::new(&(_M_losers[__i]._M_key)) _Tp(__key); + _M_first_insert = false; + } + else + _M_losers[__pos]._M_key = __key; + + _M_losers[__pos]._M_sup = __sup; + _M_losers[__pos]._M_source = __source; + } + + /** + * @return the index of the sequence with the smallest element. + */ + int __get_min_source() + { return _M_losers[0]._M_source; } + }; + + /** + * @brief Stable _LoserTree variant. + * + * Provides the stable implementations of insert_start, __init_winner, + * __init and __delete_min_insert. + * + * Unstable variant is done using partial specialisation below. + */ + template<bool __stable/* default == true */, typename _Tp, + typename _Compare> + class _LoserTree + : public _LoserTreeBase<_Tp, _Compare> + { + typedef _LoserTreeBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + using _Base::_M_first_insert; + + public: + _LoserTree(unsigned int __k, _Compare __comp) + : _Base::_LoserTreeBase(__k, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + /** + * @brief Delete the smallest element and insert a new element from + * the previously smallest element's sequence. + * + * This implementation is stable. + */ + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool __sup) + { + using std::swap; +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if ((__sup && (!_M_losers[__pos]._M_sup + || _M_losers[__pos]._M_source < __source)) + || (!__sup && !_M_losers[__pos]._M_sup + && ((_M_comp(_M_losers[__pos]._M_key, __key)) + || (!_M_comp(__key, _M_losers[__pos]._M_key) + && _M_losers[__pos]._M_source < __source)))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; + + /** + * @brief Unstable _LoserTree variant. + * + * Stability (non-stable here) is selected with partial specialization. + */ + template<typename _Tp, typename _Compare> + class _LoserTree</* __stable == */false, _Tp, _Compare> + : public _LoserTreeBase<_Tp, _Compare> + { + typedef _LoserTreeBase<_Tp, _Compare> _Base; + using _Base::_M_log_k; + using _Base::_M_k; + using _Base::_M_losers; + using _Base::_M_first_insert; + + public: + _LoserTree(unsigned int __k, _Compare __comp) + : _Base::_LoserTreeBase(__k, __comp) + { } + + /** + * Computes the winner of the competition at position "__root". + * + * Called recursively (starting at 0) to build the initial tree. + * + * @param __root __index of the "game" to start. + */ + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void + __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + /** + * Delete the _M_key smallest element and insert the element __key + * instead. + * + * @param __key the _M_key to insert + * @param __sup true iff __key is an explicitly marked supremum + */ + // Do not pass a const reference since __key will be used as local + // variable. + void + __delete_min_insert(_Tp __key, bool __sup) + { + using std::swap; +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (__sup || (!_M_losers[__pos]._M_sup + && _M_comp(_M_losers[__pos]._M_key, __key))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; + + /** + * @brief Base class of _Loser Tree implementation using pointers. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointerBase + { + protected: + /** @brief Internal representation of _LoserTree __elements. */ + struct _Loser + { + bool _M_sup; + int _M_source; + const _Tp* _M_keyp; + }; + + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; + + public: + _LoserTreePointerBase(unsigned int __k, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) + { + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + _M_losers = new _Loser[_M_k * 2]; + for (unsigned int __i = _M_ik - 1; __i < _M_k; __i++) + _M_losers[__i + _M_k]._M_sup = true; + } + + ~_LoserTreePointerBase() + { delete[] _M_losers; } + + int __get_min_source() + { return _M_losers[0]._M_source; } + + void __insert_start(const _Tp& __key, int __source, bool __sup) + { + unsigned int __pos = _M_k + __source; + + _M_losers[__pos]._M_sup = __sup; + _M_losers[__pos]._M_source = __source; + _M_losers[__pos]._M_keyp = &__key; + } + }; + + /** + * @brief Stable _LoserTree implementation. + * + * The unstable variant is implemented using partial instantiation below. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreePointer + : public _LoserTreePointerBase<_Tp, _Compare> + { + typedef _LoserTreePointerBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerBase(__k, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + void __delete_min_insert(const _Tp& __key, bool __sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by __source. + if ((__sup && (!_M_losers[__pos]._M_sup + || _M_losers[__pos]._M_source < __source)) + || (!__sup && !_M_losers[__pos]._M_sup && + ((_M_comp(*_M_losers[__pos]._M_keyp, *__keyp)) + || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp) + && _M_losers[__pos]._M_source < __source)))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; + } + }; + + /** + * @brief Unstable _LoserTree implementation. + * + * The stable variant is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointer</* __stable == */false, _Tp, _Compare> + : public _LoserTreePointerBase<_Tp, _Compare> + { + typedef _LoserTreePointerBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerBase(__k, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (_M_losers[__right]._M_sup + || (!_M_losers[__left]._M_sup + && !_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp))) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void __init() + { _M_losers[0] = _M_losers[__init_winner(1)]; } + + void __delete_min_insert(const _Tp& __key, bool __sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (__sup || (!_M_losers[__pos]._M_sup + && _M_comp(*_M_losers[__pos]._M_keyp, *__keyp))) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_sup, __sup); + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_sup = __sup; + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; + } + }; + + /** @brief Base class for unguarded _LoserTree implementation. + * + * The whole element is copied into the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. Unused __sequence heads are marked with a sentinel which + * is > all elements that are to be merged. + * + * This is a very fast variant. + */ + template<typename _Tp, typename _Compare> + class _LoserTreeUnguardedBase + { + protected: + struct _Loser + { + int _M_source; + _Tp _M_key; + }; + + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; + + public: + _LoserTreeUnguardedBase(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) + { + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + // Avoid default-constructing _M_losers[]._M_key + _M_losers = static_cast<_Loser*>(::operator new(2 * _M_k + * sizeof(_Loser))); + + for (unsigned int __i = 0; __i < _M_k; ++__i) + { + ::new(&(_M_losers[__i]._M_key)) _Tp(__sentinel); + _M_losers[__i]._M_source = -1; + } + for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + { + ::new(&(_M_losers[__i]._M_key)) _Tp(__sentinel); + _M_losers[__i]._M_source = -1; + } + } + + ~_LoserTreeUnguardedBase() + { + for (unsigned int __i = 0; __i < (2 * _M_k); ++__i) + _M_losers[__i].~_Loser(); + ::operator delete(_M_losers); + } + + int + __get_min_source() + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + return _M_losers[0]._M_source; + } + + void + __insert_start(const _Tp& __key, int __source, bool) + { + unsigned int __pos = _M_k + __source; + + ::new(&(_M_losers[__pos]._M_key)) _Tp(__key); + _M_losers[__pos]._M_source = __source; + } + }; + + /** + * @brief Stable implementation of unguarded _LoserTree. + * + * Unstable variant is selected below with partial specialization. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreeUnguarded + : public _LoserTreeUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreeUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (!_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + } + + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool) + { + using std::swap; +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if (_M_comp(_M_losers[__pos]._M_key, __key) + || (!_M_comp(__key, _M_losers[__pos]._M_key) + && _M_losers[__pos]._M_source < __source)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; + + /** + * @brief Non-Stable implementation of unguarded _LoserTree. + * + * Stable implementation is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare> + : public _LoserTreeUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreeUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + +#if _GLIBCXX_ASSERTIONS + // If __left one is sentinel then __right one must be, too. + if (_M_losers[__left]._M_source == -1) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); +#endif + + if (!_M_comp(_M_losers[__right]._M_key, + _M_losers[__left]._M_key)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + } + + // Do not pass a const reference since __key will be used as + // local variable. + void + __delete_min_insert(_Tp __key, bool) + { + using std::swap; +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (_M_comp(_M_losers[__pos]._M_key, __key)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + swap(_M_losers[__pos]._M_key, __key); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_key = __key; + } + }; + + /** @brief Unguarded loser tree, keeping only pointers to the + * elements in the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. This is a very fast variant. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointerUnguardedBase + { + protected: + struct _Loser + { + int _M_source; + const _Tp* _M_keyp; + }; + + unsigned int _M_ik, _M_k, _M_offset; + _Loser* _M_losers; + _Compare _M_comp; + + public: + + _LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _M_comp(__comp) + { + _M_ik = __k; + + // Next greater power of 2. + _M_k = 1 << (__rd_log2(_M_ik - 1) + 1); + _M_offset = _M_k; + // Avoid default-constructing _M_losers[]._M_key + _M_losers = new _Loser[2 * _M_k]; + + for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) + { + _M_losers[__i]._M_keyp = &__sentinel; + _M_losers[__i]._M_source = -1; + } + } + + ~_LoserTreePointerUnguardedBase() + { delete[] _M_losers; } + + int + __get_min_source() + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + return _M_losers[0]._M_source; + } + + void + __insert_start(const _Tp& __key, int __source, bool) + { + unsigned int __pos = _M_k + __source; + + _M_losers[__pos]._M_keyp = &__key; + _M_losers[__pos]._M_source = __source; + } + }; + + /** + * @brief Stable unguarded _LoserTree variant storing pointers. + * + * Unstable variant is implemented below using partial specialization. + */ + template<bool __stable/* default == true */, typename _Tp, typename _Compare> + class _LoserTreePointerUnguarded + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + if (!_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + } + + void + __delete_min_insert(const _Tp& __key, bool __sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted, ties are broken by _M_source. + if (_M_comp(*_M_losers[__pos]._M_keyp, *__keyp) + || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp) + && _M_losers[__pos]._M_source < __source)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; + } + }; + + /** + * @brief Unstable unguarded _LoserTree variant storing pointers. + * + * Stable variant is above. + */ + template<typename _Tp, typename _Compare> + class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare> + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> + { + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + using _Base::_M_k; + using _Base::_M_losers; + + public: + _LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel, + _Compare __comp = std::less<_Tp>()) + : _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp) + { } + + unsigned int + __init_winner(unsigned int __root) + { + if (__root >= _M_k) + return __root; + else + { + unsigned int __left = __init_winner(2 * __root); + unsigned int __right = __init_winner(2 * __root + 1); + +#if _GLIBCXX_ASSERTIONS + // If __left one is sentinel then __right one must be, too. + if (_M_losers[__left]._M_source == -1) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); +#endif + + if (!_M_comp(*_M_losers[__right]._M_keyp, + *_M_losers[__left]._M_keyp)) + { + // Left one is less or equal. + _M_losers[__root] = _M_losers[__right]; + return __left; + } + else + { + // Right one is less. + _M_losers[__root] = _M_losers[__left]; + return __right; + } + } + } + + void + __init() + { + _M_losers[0] = _M_losers[__init_winner(1)]; + +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top at the beginning + // (0 sequences!) + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + } + + void + __delete_min_insert(const _Tp& __key, bool __sup) + { +#if _GLIBCXX_ASSERTIONS + // no dummy sequence can ever be at the top! + _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); +#endif + + const _Tp* __keyp = &__key; + int __source = _M_losers[0]._M_source; + for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0; + __pos /= 2) + { + // The smaller one gets promoted. + if (_M_comp(*(_M_losers[__pos]._M_keyp), *__keyp)) + { + // The other one is smaller. + std::swap(_M_losers[__pos]._M_source, __source); + std::swap(_M_losers[__pos]._M_keyp, __keyp); + } + } + + _M_losers[0]._M_source = __source; + _M_losers[0]._M_keyp = __keyp; + } + }; +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_LOSERTREE_H */ diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h new file mode 100644 index 000000000..a19e996a0 --- /dev/null +++ b/libstdc++-v3/include/parallel/merge.h @@ -0,0 +1,251 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/merge.h + * @brief Parallel implementation of std::merge(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MERGE_H +#define _GLIBCXX_PARALLEL_MERGE_H 1 + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> + +namespace __gnu_parallel +{ + /** @brief Merge routine being able to merge only the @c __max_length + * smallest elements. + * + * The @c __begin iterators are advanced accordingly, they might not + * reach @c __end, in contrast to the usual variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __target Target begin iterator. + * @param __max_length Maximum number of elements to merge. + * @param __comp Comparator. + * @return Output end iterator. */ + template<typename _RAIter1, typename _RAIter2, + typename _OutputIterator, typename _DifferenceTp, + typename _Compare> + _OutputIterator + __merge_advance_usual(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, + _DifferenceTp __max_length, _Compare __comp) + { + typedef _DifferenceTp _DifferenceType; + while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) + { + // array1[__i1] < array0[i0] + if (__comp(*__begin2, *__begin1)) + *__target++ = *__begin2++; + else + *__target++ = *__begin1++; + --__max_length; + } + + if (__begin1 != __end1) + { + __target = std::copy(__begin1, __begin1 + __max_length, __target); + __begin1 += __max_length; + } + else + { + __target = std::copy(__begin2, __begin2 + __max_length, __target); + __begin2 += __max_length; + } + return __target; + } + + /** @brief Merge routine being able to merge only the @c __max_length + * smallest elements. + * + * The @c __begin iterators are advanced accordingly, they might not + * reach @c __end, in contrast to the usual variant. + * Specially designed code should allow the compiler to generate + * conditional moves instead of branches. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __target Target begin iterator. + * @param __max_length Maximum number of elements to merge. + * @param __comp Comparator. + * @return Output end iterator. */ + template<typename _RAIter1, typename _RAIter2, + typename _OutputIterator, typename _DifferenceTp, + typename _Compare> + _OutputIterator + __merge_advance_movc(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, + _DifferenceTp __max_length, _Compare __comp) + { + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType1; + typedef typename std::iterator_traits<_RAIter2>::value_type + _ValueType2; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__max_length >= 0); +#endif + + while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) + { + _RAIter1 __next1 = __begin1 + 1; + _RAIter2 __next2 = __begin2 + 1; + _ValueType1 __element1 = *__begin1; + _ValueType2 __element2 = *__begin2; + + if (__comp(__element2, __element1)) + { + __element1 = __element2; + __begin2 = __next2; + } + else + __begin1 = __next1; + + *__target = __element1; + + ++__target; + --__max_length; + } + if (__begin1 != __end1) + { + __target = std::copy(__begin1, __begin1 + __max_length, __target); + __begin1 += __max_length; + } + else + { + __target = std::copy(__begin2, __begin2 + __max_length, __target); + __begin2 += __max_length; + } + return __target; + } + + /** @brief Merge routine being able to merge only the @c __max_length + * smallest elements. + * + * The @c __begin iterators are advanced accordingly, they might not + * reach @c __end, in contrast to the usual variant. + * Static switch on whether to use the conditional-move variant. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __target Target begin iterator. + * @param __max_length Maximum number of elements to merge. + * @param __comp Comparator. + * @return Output end iterator. */ + template<typename _RAIter1, typename _RAIter2, + typename _OutputIterator, typename _DifferenceTp, + typename _Compare> + inline _OutputIterator + __merge_advance(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, _RAIter2 __end2, + _OutputIterator __target, _DifferenceTp __max_length, + _Compare __comp) + { + _GLIBCXX_CALL(__max_length) + + return __merge_advance_movc(__begin1, __end1, __begin2, __end2, + __target, __max_length, __comp); + } + + /** @brief Merge routine fallback to sequential in case the + iterators of the two input sequences are of different type. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __target Target begin iterator. + * @param __max_length Maximum number of elements to merge. + * @param __comp Comparator. + * @return Output end iterator. */ + template<typename _RAIter1, typename _RAIter2, + typename _RAIter3, typename _Compare> + inline _RAIter3 + __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter2& __begin2, + // different iterators, parallel implementation + // not available + _RAIter2 __end2, _RAIter3 __target, typename + std::iterator_traits<_RAIter1>:: + difference_type __max_length, _Compare __comp) + { return __merge_advance(__begin1, __end1, __begin2, __end2, __target, + __max_length, __comp); } + + /** @brief Parallel merge routine being able to merge only the @c + * __max_length smallest elements. + * + * The @c __begin iterators are advanced accordingly, they might not + * reach @c __end, in contrast to the usual variant. + * The functionality is projected onto parallel_multiway_merge. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __target Target begin iterator. + * @param __max_length Maximum number of elements to merge. + * @param __comp Comparator. + * @return Output end iterator. + */ + template<typename _RAIter1, typename _RAIter3, + typename _Compare> + inline _RAIter3 + __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1, + _RAIter1& __begin2, _RAIter1 __end2, + _RAIter3 __target, typename + std::iterator_traits<_RAIter1>:: + difference_type __max_length, _Compare __comp) + { + typedef typename + std::iterator_traits<_RAIter1>::value_type _ValueType; + typedef typename std::iterator_traits<_RAIter1>:: + difference_type _DifferenceType1 /* == difference_type2 */; + typedef typename std::iterator_traits<_RAIter3>:: + difference_type _DifferenceType3; + typedef typename std::pair<_RAIter1, _RAIter1> + _IteratorPair; + + _IteratorPair __seqs[2] = { std::make_pair(__begin1, __end1), + std::make_pair(__begin2, __end2) }; + _RAIter3 __target_end = parallel_multiway_merge + < /* __stable = */ true, /* __sentinels = */ false> + (__seqs, __seqs + 2, __target, multiway_merge_exact_splitting + < /* __stable = */ true, _IteratorPair*, + _Compare, _DifferenceType1>, __max_length, __comp, + omp_get_max_threads()); + + return __target_end; + } +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MERGE_H */ diff --git a/libstdc++-v3/include/parallel/multiseq_selection.h b/libstdc++-v3/include/parallel/multiseq_selection.h new file mode 100644 index 000000000..e77653aee --- /dev/null +++ b/libstdc++-v3/include/parallel/multiseq_selection.h @@ -0,0 +1,646 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiseq_selection.h + * @brief Functions to find elements of a certain global __rank in + * multiple sorted sequences. Also serves for splitting such + * sequence sets. + * + * The algorithm description can be found in + * + * P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard. + * Merging Multiple Lists on Hierarchical-Memory Multiprocessors. + * Journal of Parallel and Distributed Computing, 12(2):171–177, 1991. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H +#define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1 + +#include <vector> +#include <queue> + +#include <bits/stl_algo.h> + +#include <parallel/sort.h> + +namespace __gnu_parallel +{ + /** @brief Compare __a pair of types lexicographically, ascending. */ + template<typename _T1, typename _T2, typename _Compare> + class _Lexicographic + : public std::binary_function<std::pair<_T1, _T2>, + std::pair<_T1, _T2>, bool> + { + private: + _Compare& _M_comp; + + public: + _Lexicographic(_Compare& __comp) : _M_comp(__comp) { } + + bool + operator()(const std::pair<_T1, _T2>& __p1, + const std::pair<_T1, _T2>& __p2) const + { + if (_M_comp(__p1.first, __p2.first)) + return true; + + if (_M_comp(__p2.first, __p1.first)) + return false; + + // Firsts are equal. + return __p1.second < __p2.second; + } + }; + + /** @brief Compare __a pair of types lexicographically, descending. */ + template<typename _T1, typename _T2, typename _Compare> + class _LexicographicReverse : public std::binary_function<_T1, _T2, bool> + { + private: + _Compare& _M_comp; + + public: + _LexicographicReverse(_Compare& __comp) : _M_comp(__comp) { } + + bool + operator()(const std::pair<_T1, _T2>& __p1, + const std::pair<_T1, _T2>& __p2) const + { + if (_M_comp(__p2.first, __p1.first)) + return true; + + if (_M_comp(__p1.first, __p2.first)) + return false; + + // Firsts are equal. + return __p2.second < __p1.second; + } + }; + + /** + * @brief Splits several sorted sequences at a certain global __rank, + * resulting in a splitting point for each sequence. + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. If there + * are several equal elements across the split, the ones on the + * __left side will be chosen from sequences with smaller number. + * @param __begin_seqs Begin of the sequence of iterator pairs. + * @param __end_seqs End of the sequence of iterator pairs. + * @param __rank The global rank to partition at. + * @param __begin_offsets A random-access __sequence __begin where the + * __result will be stored in. Each element of the sequence is an + * iterator that points to the first element on the greater part of + * the respective __sequence. + * @param __comp The ordering functor, defaults to std::less<_Tp>. + */ + template<typename _RanSeqs, typename _RankType, typename _RankIterator, + typename _Compare> + void + multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, + _RankType __rank, + _RankIterator __begin_offsets, + _Compare __comp = std::less< + typename std::iterator_traits<typename + std::iterator_traits<_RanSeqs>::value_type:: + first_type>::value_type>()) // std::less<_Tp> + { + _GLIBCXX_CALL(__end_seqs - __begin_seqs) + + typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type + _It; + typedef typename std::iterator_traits<_RanSeqs>::difference_type + _SeqNumber; + typedef typename std::iterator_traits<_It>::difference_type + _DifferenceType; + typedef typename std::iterator_traits<_It>::value_type _ValueType; + + _Lexicographic<_ValueType, _SeqNumber, _Compare> __lcomp(__comp); + _LexicographicReverse<_ValueType, _SeqNumber, _Compare> __lrcomp(__comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + _DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0, + __nmax, __n, __r; + + for (_SeqNumber __i = 0; __i < __m; __i++) + { + __nn += std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second); + _GLIBCXX_PARALLEL_ASSERT( + std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second) > 0); + } + + if (__rank == __nn) + { + for (_SeqNumber __i = 0; __i < __m; __i++) + __begin_offsets[__i] = __begin_seqs[__i].second; // Very end. + // Return __m - 1; + return; + } + + _GLIBCXX_PARALLEL_ASSERT(__m != 0); + _GLIBCXX_PARALLEL_ASSERT(__nn != 0); + _GLIBCXX_PARALLEL_ASSERT(__rank >= 0); + _GLIBCXX_PARALLEL_ASSERT(__rank < __nn); + + _DifferenceType* __ns = new _DifferenceType[__m]; + _DifferenceType* __a = new _DifferenceType[__m]; + _DifferenceType* __b = new _DifferenceType[__m]; + _DifferenceType __l; + + __ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second); + __nmax = __ns[0]; + for (_SeqNumber __i = 0; __i < __m; __i++) + { + __ns[__i] = std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second); + __nmax = std::max(__nmax, __ns[__i]); + } + + __r = __rd_log2(__nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[__i], + // equality iff __nmax = 2^__k - 1. + __l = (1ULL << __r) - 1; + + for (_SeqNumber __i = 0; __i < __m; __i++) + { + __a[__i] = 0; + __b[__i] = __l; + } + __n = __l / 2; + + // Invariants: + // 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l + +#define __S(__i) (__begin_seqs[__i].first) + + // Initial partition. + std::vector<std::pair<_ValueType, _SeqNumber> > __sample; + + for (_SeqNumber __i = 0; __i < __m; __i++) + if (__n < __ns[__i]) //__sequence long enough + __sample.push_back(std::make_pair(__S(__i)[__n], __i)); + __gnu_sequential::sort(__sample.begin(), __sample.end(), __lcomp); + + for (_SeqNumber __i = 0; __i < __m; __i++) //conceptual infinity + if (__n >= __ns[__i]) //__sequence too short, conceptual infinity + __sample.push_back( + std::make_pair(__S(__i)[0] /*__dummy element*/, __i)); + + _DifferenceType __localrank = __rank / __l; + + _SeqNumber __j; + for (__j = 0; + __j < __localrank && ((__n + 1) <= __ns[__sample[__j].second]); + ++__j) + __a[__sample[__j].second] += __n + 1; + for (; __j < __m; __j++) + __b[__sample[__j].second] -= __n + 1; + + // Further refinement. + while (__n > 0) + { + __n /= 2; + + _SeqNumber __lmax_seq = -1; // to avoid warning + const _ValueType* __lmax = 0; // impossible to avoid the warning? + for (_SeqNumber __i = 0; __i < __m; __i++) + { + if (__a[__i] > 0) + { + if (!__lmax) + { + __lmax = &(__S(__i)[__a[__i] - 1]); + __lmax_seq = __i; + } + else + { + // Max, favor rear sequences. + if (!__comp(__S(__i)[__a[__i] - 1], *__lmax)) + { + __lmax = &(__S(__i)[__a[__i] - 1]); + __lmax_seq = __i; + } + } + } + } + + _SeqNumber __i; + for (__i = 0; __i < __m; __i++) + { + _DifferenceType __middle = (__b[__i] + __a[__i]) / 2; + if (__lmax && __middle < __ns[__i] && + __lcomp(std::make_pair(__S(__i)[__middle], __i), + std::make_pair(*__lmax, __lmax_seq))) + __a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]); + else + __b[__i] -= __n + 1; + } + + _DifferenceType __leftsize = 0; + for (_SeqNumber __i = 0; __i < __m; __i++) + __leftsize += __a[__i] / (__n + 1); + + _DifferenceType __skew = __rank / (__n + 1) - __leftsize; + + if (__skew > 0) + { + // Move to the left, find smallest. + std::priority_queue<std::pair<_ValueType, _SeqNumber>, + std::vector<std::pair<_ValueType, _SeqNumber> >, + _LexicographicReverse<_ValueType, _SeqNumber, _Compare> > + __pq(__lrcomp); + + for (_SeqNumber __i = 0; __i < __m; __i++) + if (__b[__i] < __ns[__i]) + __pq.push(std::make_pair(__S(__i)[__b[__i]], __i)); + + for (; __skew != 0 && !__pq.empty(); --__skew) + { + _SeqNumber __source = __pq.top().second; + __pq.pop(); + + __a[__source] + = std::min(__a[__source] + __n + 1, __ns[__source]); + __b[__source] += __n + 1; + + if (__b[__source] < __ns[__source]) + __pq.push( + std::make_pair(__S(__source)[__b[__source]], __source)); + } + } + else if (__skew < 0) + { + // Move to the right, find greatest. + std::priority_queue<std::pair<_ValueType, _SeqNumber>, + std::vector<std::pair<_ValueType, _SeqNumber> >, + _Lexicographic<_ValueType, _SeqNumber, _Compare> > + __pq(__lcomp); + + for (_SeqNumber __i = 0; __i < __m; __i++) + if (__a[__i] > 0) + __pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i)); + + for (; __skew != 0; ++__skew) + { + _SeqNumber __source = __pq.top().second; + __pq.pop(); + + __a[__source] -= __n + 1; + __b[__source] -= __n + 1; + + if (__a[__source] > 0) + __pq.push(std::make_pair( + __S(__source)[__a[__source] - 1], __source)); + } + } + } + + // Postconditions: + // __a[__i] == __b[__i] in most cases, except when __a[__i] has been + // clamped because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + _ValueType* __maxleft = 0; + _ValueType* __minright = 0; + for (_SeqNumber __i = 0; __i < __m; __i++) + { + if (__a[__i] > 0) + { + if (!__maxleft) + __maxleft = &(__S(__i)[__a[__i] - 1]); + else + { + // Max, favor rear sequences. + if (!__comp(__S(__i)[__a[__i] - 1], *__maxleft)) + __maxleft = &(__S(__i)[__a[__i] - 1]); + } + } + if (__b[__i] < __ns[__i]) + { + if (!__minright) + __minright = &(__S(__i)[__b[__i]]); + else + { + // Min, favor fore sequences. + if (__comp(__S(__i)[__b[__i]], *__minright)) + __minright = &(__S(__i)[__b[__i]]); + } + } + } + + _SeqNumber __seq = 0; + for (_SeqNumber __i = 0; __i < __m; __i++) + __begin_offsets[__i] = __S(__i) + __a[__i]; + + delete[] __ns; + delete[] __a; + delete[] __b; + } + + + /** + * @brief Selects the element at a certain global __rank from several + * sorted sequences. + * + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. + * @param __begin_seqs Begin of the sequence of iterator pairs. + * @param __end_seqs End of the sequence of iterator pairs. + * @param __rank The global rank to partition at. + * @param __offset The rank of the selected element in the global + * subsequence of elements equal to the selected element. If the + * selected element is unique, this number is 0. + * @param __comp The ordering functor, defaults to std::less. + */ + template<typename _Tp, typename _RanSeqs, typename _RankType, + typename _Compare> + _Tp + multiseq_selection(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, + _RankType __rank, + _RankType& __offset, _Compare __comp = std::less<_Tp>()) + { + _GLIBCXX_CALL(__end_seqs - __begin_seqs) + + typedef typename std::iterator_traits<_RanSeqs>::value_type::first_type + _It; + typedef typename std::iterator_traits<_RanSeqs>::difference_type + _SeqNumber; + typedef typename std::iterator_traits<_It>::difference_type + _DifferenceType; + + _Lexicographic<_Tp, _SeqNumber, _Compare> __lcomp(__comp); + _LexicographicReverse<_Tp, _SeqNumber, _Compare> __lrcomp(__comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + _DifferenceType __m = std::distance(__begin_seqs, __end_seqs); + _DifferenceType __nn = 0; + _DifferenceType __nmax, __n, __r; + + for (_SeqNumber __i = 0; __i < __m; __i++) + __nn += std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second); + + if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn) + { + // result undefined if there is no data or __rank is outside bounds + throw std::exception(); + } + + + _DifferenceType* __ns = new _DifferenceType[__m]; + _DifferenceType* __a = new _DifferenceType[__m]; + _DifferenceType* __b = new _DifferenceType[__m]; + _DifferenceType __l; + + __ns[0] = std::distance(__begin_seqs[0].first, __begin_seqs[0].second); + __nmax = __ns[0]; + for (_SeqNumber __i = 0; __i < __m; ++__i) + { + __ns[__i] = std::distance(__begin_seqs[__i].first, + __begin_seqs[__i].second); + __nmax = std::max(__nmax, __ns[__i]); + } + + __r = __rd_log2(__nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[__i], + // equality iff __nmax = 2^__k - 1 + __l = __round_up_to_pow2(__r) - 1; + + for (_SeqNumber __i = 0; __i < __m; ++__i) + { + __a[__i] = 0; + __b[__i] = __l; + } + __n = __l / 2; + + // Invariants: + // 0 <= __a[__i] <= __ns[__i], 0 <= __b[__i] <= __l + +#define __S(__i) (__begin_seqs[__i].first) + + // Initial partition. + std::vector<std::pair<_Tp, _SeqNumber> > __sample; + + for (_SeqNumber __i = 0; __i < __m; __i++) + if (__n < __ns[__i]) + __sample.push_back(std::make_pair(__S(__i)[__n], __i)); + __gnu_sequential::sort(__sample.begin(), __sample.end(), + __lcomp, sequential_tag()); + + // Conceptual infinity. + for (_SeqNumber __i = 0; __i < __m; __i++) + if (__n >= __ns[__i]) + __sample.push_back( + std::make_pair(__S(__i)[0] /*__dummy element*/, __i)); + + _DifferenceType __localrank = __rank / __l; + + _SeqNumber __j; + for (__j = 0; + __j < __localrank && ((__n + 1) <= __ns[__sample[__j].second]); + ++__j) + __a[__sample[__j].second] += __n + 1; + for (; __j < __m; ++__j) + __b[__sample[__j].second] -= __n + 1; + + // Further refinement. + while (__n > 0) + { + __n /= 2; + + const _Tp* __lmax = 0; + for (_SeqNumber __i = 0; __i < __m; ++__i) + { + if (__a[__i] > 0) + { + if (!__lmax) + __lmax = &(__S(__i)[__a[__i] - 1]); + else + { + if (__comp(*__lmax, __S(__i)[__a[__i] - 1])) //max + __lmax = &(__S(__i)[__a[__i] - 1]); + } + } + } + + _SeqNumber __i; + for (__i = 0; __i < __m; __i++) + { + _DifferenceType __middle = (__b[__i] + __a[__i]) / 2; + if (__lmax && __middle < __ns[__i] + && __comp(__S(__i)[__middle], *__lmax)) + __a[__i] = std::min(__a[__i] + __n + 1, __ns[__i]); + else + __b[__i] -= __n + 1; + } + + _DifferenceType __leftsize = 0; + for (_SeqNumber __i = 0; __i < __m; ++__i) + __leftsize += __a[__i] / (__n + 1); + + _DifferenceType __skew = __rank / (__n + 1) - __leftsize; + + if (__skew > 0) + { + // Move to the left, find smallest. + std::priority_queue<std::pair<_Tp, _SeqNumber>, + std::vector<std::pair<_Tp, _SeqNumber> >, + _LexicographicReverse<_Tp, _SeqNumber, _Compare> > + __pq(__lrcomp); + + for (_SeqNumber __i = 0; __i < __m; ++__i) + if (__b[__i] < __ns[__i]) + __pq.push(std::make_pair(__S(__i)[__b[__i]], __i)); + + for (; __skew != 0 && !__pq.empty(); --__skew) + { + _SeqNumber __source = __pq.top().second; + __pq.pop(); + + __a[__source] + = std::min(__a[__source] + __n + 1, __ns[__source]); + __b[__source] += __n + 1; + + if (__b[__source] < __ns[__source]) + __pq.push( + std::make_pair(__S(__source)[__b[__source]], __source)); + } + } + else if (__skew < 0) + { + // Move to the right, find greatest. + std::priority_queue<std::pair<_Tp, _SeqNumber>, + std::vector<std::pair<_Tp, _SeqNumber> >, + _Lexicographic<_Tp, _SeqNumber, _Compare> > __pq(__lcomp); + + for (_SeqNumber __i = 0; __i < __m; ++__i) + if (__a[__i] > 0) + __pq.push(std::make_pair(__S(__i)[__a[__i] - 1], __i)); + + for (; __skew != 0; ++__skew) + { + _SeqNumber __source = __pq.top().second; + __pq.pop(); + + __a[__source] -= __n + 1; + __b[__source] -= __n + 1; + + if (__a[__source] > 0) + __pq.push(std::make_pair( + __S(__source)[__a[__source] - 1], __source)); + } + } + } + + // Postconditions: + // __a[__i] == __b[__i] in most cases, except when __a[__i] has been + // clamped because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + bool __maxleftset = false, __minrightset = false; + + // Impossible to avoid the warning? + _Tp __maxleft, __minright; + for (_SeqNumber __i = 0; __i < __m; ++__i) + { + if (__a[__i] > 0) + { + if (!__maxleftset) + { + __maxleft = __S(__i)[__a[__i] - 1]; + __maxleftset = true; + } + else + { + // Max. + if (__comp(__maxleft, __S(__i)[__a[__i] - 1])) + __maxleft = __S(__i)[__a[__i] - 1]; + } + } + if (__b[__i] < __ns[__i]) + { + if (!__minrightset) + { + __minright = __S(__i)[__b[__i]]; + __minrightset = true; + } + else + { + // Min. + if (__comp(__S(__i)[__b[__i]], __minright)) + __minright = __S(__i)[__b[__i]]; + } + } + } + + // Minright is the __splitter, in any case. + + if (!__maxleftset || __comp(__minright, __maxleft)) + { + // Good luck, everything is split unambiguously. + __offset = 0; + } + else + { + // We have to calculate an offset. + __offset = 0; + + for (_SeqNumber __i = 0; __i < __m; ++__i) + { + _DifferenceType lb + = std::lower_bound(__S(__i), __S(__i) + __ns[__i], + __minright, + __comp) - __S(__i); + __offset += __a[__i] - lb; + } + } + + delete[] __ns; + delete[] __a; + delete[] __b; + + return __minright; + } +} + +#undef __S + +#endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */ diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h new file mode 100644 index 000000000..00df31c13 --- /dev/null +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -0,0 +1,2069 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiway_merge.h +* @brief Implementation of sequential and parallel multiway merge. +* +* Explanations on the high-speed merging routines in the appendix of +* +* P. Sanders. +* Fast priority queues for cached memory. +* ACM Journal of Experimental Algorithmics, 5, 2000. +* +* This file is a GNU parallel extension to the Standard C++ Library. +*/ + +// Written by Johannes Singler and Manuel Holtgrewe. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H + +#include <vector> + +#include <bits/stl_algo.h> +#include <parallel/features.h> +#include <parallel/parallel.h> +#include <parallel/losertree.h> +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +/** @brief Length of a sequence described by a pair of iterators. */ +#define _GLIBCXX_PARALLEL_LENGTH(__s) ((__s).second - (__s).first) + +namespace __gnu_parallel +{ + /** @brief _Iterator wrapper supporting an implicit supremum at the end + * of the sequence, dominating all comparisons. + * + * The implicit supremum comes with a performance cost. + * + * Deriving from _RAIter is not possible since + * _RAIter need not be a class. + */ + template<typename _RAIter, typename _Compare> + class _GuardedIterator + { + private: + /** @brief Current iterator __position. */ + _RAIter _M_current; + + /** @brief End iterator of the sequence. */ + _RAIter _M_end; + + /** @brief _Compare. */ + _Compare& __comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator provided for associated overloaded + * compare operators. */ + _GuardedIterator(_RAIter __begin, _RAIter __end, _Compare& __comp) + : _M_current(__begin), _M_end(__end), __comp(__comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + _GuardedIterator<_RAIter, _Compare>& + operator++() + { + ++_M_current; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<_RAIter>::value_type& + operator*() + { return *_M_current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator _RAIter() + { return _M_current; } + + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @c true if less. */ + friend bool + operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi1._M_current == __bi1._M_end) // __bi1 is sup + return __bi2._M_current == __bi2._M_end; // __bi2 is not sup + if (__bi2._M_current == __bi2._M_end) // __bi2 is sup + return true; + return (__bi1.__comp)(*__bi1, *__bi2); // normal compare + } + + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @c True if less equal. */ + friend bool + operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi2._M_current == __bi2._M_end) // __bi1 is sup + return __bi1._M_current != __bi1._M_end; // __bi2 is not sup + if (__bi1._M_current == __bi1._M_end) // __bi2 is sup + return false; + return !(__bi1.__comp)(*__bi2, *__bi1); // normal compare + } + }; + + template<typename _RAIter, typename _Compare> + class _UnguardedIterator + { + private: + /** @brief Current iterator __position. */ + _RAIter _M_current; + /** @brief _Compare. */ + _Compare& __comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param __begin Begin iterator of sequence. + * @param __end Unused, only for compatibility. + * @param __comp Unused, only for compatibility. */ + _UnguardedIterator(_RAIter __begin, + _RAIter /* __end */, _Compare& __comp) + : _M_current(__begin), __comp(__comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + _UnguardedIterator<_RAIter, _Compare>& + operator++() + { + ++_M_current; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + typename std::iterator_traits<_RAIter>::value_type& + operator*() + { return *_M_current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + operator _RAIter() + { return _M_current; } + + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @c true if less. */ + friend bool + operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return (__bi1.__comp)(*__bi1, *__bi2); + } + + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @c True if less equal. */ + friend bool + operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return !(__bi1.__comp)(*__bi2, *__bi1); + } + }; + + /** @brief Highly efficient 3-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does @a not come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<template<typename RAI, typename C> class iterator, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_3_variant(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length); + + typedef _DifferenceTp _DifferenceType; + + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + if (__length == 0) + return __target; + +#if _GLIBCXX_ASSERTIONS + _DifferenceTp __orig_length = __length; +#endif + + iterator<_RAIter1, _Compare> + __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), + __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), + __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp); + + if (__seq0 <= __seq1) + { + if (__seq1 <= __seq2) + goto __s012; + else + if (__seq2 < __seq0) + goto __s201; + else + goto __s021; + } + else + { + if (__seq1 <= __seq2) + { + if (__seq0 <= __seq2) + goto __s102; + else + goto __s120; + } + else + goto __s210; + } +#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a, __b, __c, __c0, __c1) \ + __s ## __a ## __b ## __c : \ + *__target = *__seq ## __a; \ + ++__target; \ + --__length; \ + ++__seq ## __a; \ + if (__length == 0) goto __finish; \ + if (__seq ## __a __c0 __seq ## __b) goto __s ## __a ## __b ## __c; \ + if (__seq ## __a __c1 __seq ## __c) goto __s ## __b ## __a ## __c; \ + goto __s ## __b ## __c ## __a; + + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < ); + _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=); + _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < ); + +#undef _GLIBCXX_PARALLEL_MERGE_3_CASE + + __finish: + ; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + ((_RAIter1)__seq0 - __seqs_begin[0].first) + + ((_RAIter1)__seq1 - __seqs_begin[1].first) + + ((_RAIter1)__seq2 - __seqs_begin[2].first) + == __orig_length); +#endif + + __seqs_begin[0].first = __seq0; + __seqs_begin[1].first = __seq1; + __seqs_begin[2].first = __seq2; + + return __target; + } + + /** + * @brief Highly efficient 4-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into goto labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does @a not come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<template<typename RAI, typename C> class iterator, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_4_variant(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length); + typedef _DifferenceTp _DifferenceType; + + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + iterator<_RAIter1, _Compare> + __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp), + __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp), + __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp), + __seq3(__seqs_begin[3].first, __seqs_begin[3].second, __comp); + +#define _GLIBCXX_PARALLEL_DECISION(__a, __b, __c, __d) { \ + if (__seq ## __d < __seq ## __a) \ + goto __s ## __d ## __a ## __b ## __c; \ + if (__seq ## __d < __seq ## __b) \ + goto __s ## __a ## __d ## __b ## __c; \ + if (__seq ## __d < __seq ## __c) \ + goto __s ## __a ## __b ## __d ## __c; \ + goto __s ## __a ## __b ## __c ## __d; } + + if (__seq0 <= __seq1) + { + if (__seq1 <= __seq2) + _GLIBCXX_PARALLEL_DECISION(0,1,2,3) + else + if (__seq2 < __seq0) + _GLIBCXX_PARALLEL_DECISION(2,0,1,3) + else + _GLIBCXX_PARALLEL_DECISION(0,2,1,3) + } + else + { + if (__seq1 <= __seq2) + { + if (__seq0 <= __seq2) + _GLIBCXX_PARALLEL_DECISION(1,0,2,3) + else + _GLIBCXX_PARALLEL_DECISION(1,2,0,3) + } + else + _GLIBCXX_PARALLEL_DECISION(2,1,0,3) + } + +#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a, __b, __c, __d, \ + __c0, __c1, __c2) \ + __s ## __a ## __b ## __c ## __d: \ + if (__length == 0) goto __finish; \ + *__target = *__seq ## __a; \ + ++__target; \ + --__length; \ + ++__seq ## __a; \ + if (__seq ## __a __c0 __seq ## __b) \ + goto __s ## __a ## __b ## __c ## __d; \ + if (__seq ## __a __c1 __seq ## __c) \ + goto __s ## __b ## __a ## __c ## __d; \ + if (__seq ## __a __c2 __seq ## __d) \ + goto __s ## __b ## __c ## __a ## __d; \ + goto __s ## __b ## __c ## __d ## __a; + + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < ); + _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < ); + +#undef _GLIBCXX_PARALLEL_MERGE_4_CASE +#undef _GLIBCXX_PARALLEL_DECISION + + __finish: + ; + + __seqs_begin[0].first = __seq0; + __seqs_begin[1].first = __seq1; + __seqs_begin[2].first = __seq2; + __seqs_begin[3].first = __seq3; + + return __target; + } + + /** @brief Multi-way merging procedure for a high branching factor, + * guarded case. + * + * This merging variant uses a LoserTree class as selected by <tt>_LT</tt>. + * + * Stability is selected through the used LoserTree class <tt>_LT</tt>. + * + * At least one non-empty sequence is required. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<typename _LT, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_loser_tree(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + _SeqNumber __k = static_cast<_SeqNumber>(__seqs_end - __seqs_begin); + + _LT __lt(__k, __comp); + + // Default value for potentially non-default-constructible types. + _ValueType* __arbitrary_element = 0; + + for (_SeqNumber __t = 0; __t < __k; ++__t) + { + if(!__arbitrary_element + && _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__t]) > 0) + __arbitrary_element = &(*__seqs_begin[__t].first); + } + + for (_SeqNumber __t = 0; __t < __k; ++__t) + { + if (__seqs_begin[__t].first == __seqs_begin[__t].second) + __lt.__insert_start(*__arbitrary_element, __t, true); + else + __lt.__insert_start(*__seqs_begin[__t].first, __t, false); + } + + __lt.__init(); + + _SeqNumber __source; + + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + //take out + __source = __lt.__get_min_source(); + + *(__target++) = *(__seqs_begin[__source].first++); + + // Feed. + if (__seqs_begin[__source].first == __seqs_begin[__source].second) + __lt.__delete_min_insert(*__arbitrary_element, true); + else + // Replace from same __source. + __lt.__delete_min_insert(*__seqs_begin[__source].first, false); + } + + return __target; + } + + /** @brief Multi-way merging procedure for a high branching factor, + * unguarded case. + * + * Merging is done using the LoserTree class <tt>_LT</tt>. + * + * Stability is selected by the used LoserTrees. + * + * @pre No input will run out of elements during the merge. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<typename _LT, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, typename _Compare> + _RAIter3 + multiway_merge_loser_tree_unguarded(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, + _Compare __comp) + { + _GLIBCXX_CALL(__length) + typedef _DifferenceTp _DifferenceType; + + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + _SeqNumber __k = __seqs_end - __seqs_begin; + + _LT __lt(__k, __sentinel, __comp); + + for (_SeqNumber __t = 0; __t < __k; ++__t) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__seqs_begin[__t].first + != __seqs_begin[__t].second); +#endif + __lt.__insert_start(*__seqs_begin[__t].first, __t, false); + } + + __lt.__init(); + + _SeqNumber __source; + +#if _GLIBCXX_ASSERTIONS + _DifferenceType __i = 0; +#endif + + _RAIter3 __target_end = __target + __length; + while (__target < __target_end) + { + // Take out. + __source = __lt.__get_min_source(); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= __source && __source < __k); + _GLIBCXX_PARALLEL_ASSERT(__i == 0 + || !__comp(*(__seqs_begin[__source].first), *(__target - 1))); +#endif + + // Feed. + *(__target++) = *(__seqs_begin[__source].first++); + +#if _GLIBCXX_ASSERTIONS + ++__i; +#endif + // Replace from same __source. + __lt.__delete_min_insert(*__seqs_begin[__source].first, false); + } + + return __target; + } + + + /** @brief Multi-way merging procedure for a high branching factor, + * requiring sentinels to exist. + * + * @param __stable The value must the same as for the used LoserTrees. + * @param UnguardedLoserTree _Loser Tree variant to use for the unguarded + * merging. + * @param GuardedLoserTree _Loser Tree variant to use for the guarded + * merging. + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, less equal than the + * total number of elements available. + * + * @return End iterator of output sequence. + */ + template<typename UnguardedLoserTree, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + multiway_merge_loser_tree_sentinel(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, + _Compare __comp) + { + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef std::iterator_traits<_RAIterIterator> _TraitsType; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + _RAIter3 __target_end; + + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + // Move the sequence ends to the sentinel. This has the + // effect that the sentinel appears to be within the sequence. Then, + // we can use the unguarded variant if we merge out as many + // non-sentinel elements as we have. + ++((*__s).second); + + __target_end = multiway_merge_loser_tree_unguarded<UnguardedLoserTree> + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__target_end == __target + __length); + _GLIBCXX_PARALLEL_ASSERT(__is_sorted(__target, __target_end, __comp)); +#endif + + // Restore the sequence ends so the sentinels are not contained in the + // sequence any more (see comment in loop above). + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + --((*__s).second); + + return __target_end; + } + + /** + * @brief Traits for determining whether the loser tree should + * use pointers or copies. + * + * The field "_M_use_pointer" is used to determine whether to use pointers + * in he loser trees or whether to copy the values into the loser tree. + * + * The default behavior is to use pointers if the data type is 4 times as + * big as the pointer to it. + * + * Specialize for your data type to customize the behavior. + * + * Example: + * + * template<> + * struct _LoserTreeTraits<int> + * { static const bool _M_use_pointer = false; }; + * + * template<> + * struct _LoserTreeTraits<heavyweight_type> + * { static const bool _M_use_pointer = true; }; + * + * @param _Tp type to give the loser tree traits for. + */ + template <typename _Tp> + struct _LoserTreeTraits + { + /** + * @brief True iff to use pointers instead of values in loser trees. + * + * The default behavior is to use pointers if the data type is four + * times as big as the pointer to it. + */ + static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*)); + }; + + /** + * @brief Switch for 3-way merging with __sentinels turned off. + * + * Note that 3-way merging is always stable! + */ + template<bool __sentinels /*default == false*/, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_3_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_3_variant<_GuardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for 3-way merging with __sentinels turned on. + * + * Note that 3-way merging is always stable! + */ + template<typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_3_variant_sentinel_switch<true, _RAIterIterator, + _RAIter3, _DifferenceTp, + _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_3_variant<_UnguardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for 4-way merging with __sentinels turned off. + * + * Note that 4-way merging is always stable! + */ + template<bool __sentinels /*default == false*/, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_4_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_4_variant<_GuardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for 4-way merging with __sentinels turned on. + * + * Note that 4-way merging is always stable! + */ + template<typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_4_variant_sentinel_switch<true, _RAIterIterator, + _RAIter3, _DifferenceTp, + _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) + { return multiway_merge_4_variant<_UnguardedIterator> + (__seqs_begin, __seqs_end, __target, __length, __comp); } + }; + + /** + * @brief Switch for k-way merging with __sentinels turned on. + */ + template<bool __sentinels, + bool __stable, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_k_variant_sentinel_switch + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + return multiway_merge_loser_tree_sentinel< + typename __gnu_cxx::__conditional_type< + _LoserTreeTraits<_ValueType>::_M_use_pointer, + _LoserTreePointerUnguarded<__stable, _ValueType, _Compare>, + _LoserTreeUnguarded<__stable, _ValueType, _Compare> + >::__type> + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + } + }; + + /** + * @brief Switch for k-way merging with __sentinels turned off. + */ + template<bool __stable, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + struct __multiway_merge_k_variant_sentinel_switch<false, __stable, + _RAIterIterator, + _RAIter3, _DifferenceTp, + _Compare> + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + return multiway_merge_loser_tree< + typename __gnu_cxx::__conditional_type< + _LoserTreeTraits<_ValueType>::_M_use_pointer, + _LoserTreePointer<__stable, _ValueType, _Compare>, + _LoserTree<__stable, _ValueType, _Compare> + >::__type >(__seqs_begin, __seqs_end, __target, __length, __comp); + } + }; + + /** @brief Sequential multi-way merging switch. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and + * runtime settings. + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * @param __stable Stable merging incurs a performance penalty. + * @param __sentinel The sequences have __a __sentinel element. + * @return End iterator of output sequence. */ + template<bool __stable, + bool __sentinels, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Compare> + _RAIter3 + __sequential_multiway_merge(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits<typename std::iterator_traits< + _RAIterIterator>::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) + { + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + +#if _GLIBCXX_ASSERTIONS + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + { + _GLIBCXX_PARALLEL_ASSERT(__is_sorted((*__s).first, + (*__s).second, __comp)); + } +#endif + + _DifferenceTp __total_length = 0; + for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s) + __total_length += _GLIBCXX_PARALLEL_LENGTH(*__s); + + __length = std::min<_DifferenceTp>(__length, __total_length); + + if(__length == 0) + return __target; + + _RAIter3 __return_target = __target; + _SeqNumber __k = static_cast<_SeqNumber>(__seqs_end - __seqs_begin); + + switch (__k) + { + case 0: + break; + case 1: + __return_target = std::copy(__seqs_begin[0].first, + __seqs_begin[0].first + __length, + __target); + __seqs_begin[0].first += __length; + break; + case 2: + __return_target = __merge_advance(__seqs_begin[0].first, + __seqs_begin[0].second, + __seqs_begin[1].first, + __seqs_begin[1].second, + __target, __length, __comp); + break; + case 3: + __return_target = __multiway_merge_3_variant_sentinel_switch + <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>() + (__seqs_begin, __seqs_end, __target, __length, __comp); + break; + case 4: + __return_target = __multiway_merge_4_variant_sentinel_switch + <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>() + (__seqs_begin, __seqs_end, __target, __length, __comp); + break; + default: + __return_target = __multiway_merge_k_variant_sentinel_switch + <__sentinels, __stable, _RAIterIterator, _RAIter3, _DifferenceTp, + _Compare>() + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + break; + } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + __is_sorted(__target, __target + __length, __comp)); +#endif + + return __return_target; + } + + /** + * @brief Stable sorting functor. + * + * Used to reduce code instanciation in multiway_merge_sampling_splitting. + */ + template<bool __stable, class _RAIter, class _StrictWeakOrdering> + struct _SamplingSorter + { + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) + { __gnu_sequential::stable_sort(__first, __last, __comp); } + }; + + /** + * @brief Non-__stable sorting functor. + * + * Used to reduce code instantiation in multiway_merge_sampling_splitting. + */ + template<class _RAIter, class _StrictWeakOrdering> + struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering> + { + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) + { __gnu_sequential::sort(__first, __last, __comp); } + }; + + /** + * @brief Sampling based splitting for parallel multiway-merge routine. + */ + template<bool __stable, + typename _RAIterIterator, + typename _Compare, + typename _DifferenceType> + void + multiway_merge_sampling_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, + std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + // __k sequences. + const _SeqNumber __k + = static_cast<_SeqNumber>(__seqs_end - __seqs_begin); + + const _ThreadIndex __num_threads = omp_get_num_threads(); + + const _DifferenceType __num_samples = + __gnu_parallel::_Settings::get().merge_oversampling * __num_threads; + + _ValueType* __samples = static_cast<_ValueType*> + (::operator new(sizeof(_ValueType) * __k * __num_samples)); + // Sample. + for (_SeqNumber __s = 0; __s < __k; ++__s) + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + { + _DifferenceType sample_index = static_cast<_DifferenceType> + (_GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__s]) + * (double(__i + 1) / (__num_samples + 1)) + * (double(__length) / __total_length)); + new(&(__samples[__s * __num_samples + __i])) + _ValueType(__seqs_begin[__s].first[sample_index]); + } + + // Sort stable or non-stable, depending on value of template parameter + // "__stable". + _SamplingSorter<__stable, _ValueType*, _Compare>() + (__samples, __samples + (__num_samples * __k), __comp); + + for (_ThreadIndex __slab = 0; __slab < __num_threads; ++__slab) + // For each slab / processor. + for (_SeqNumber __seq = 0; __seq < __k; ++__seq) + { + // For each sequence. + if (__slab > 0) + __pieces[__slab][__seq].first = std::upper_bound + (__seqs_begin[__seq].first, __seqs_begin[__seq].second, + __samples[__num_samples * __k * __slab / __num_threads], + __comp) + - __seqs_begin[__seq].first; + else + // Absolute beginning. + __pieces[__slab][__seq].first = 0; + if ((__slab + 1) < __num_threads) + __pieces[__slab][__seq].second = std::upper_bound + (__seqs_begin[__seq].first, __seqs_begin[__seq].second, + __samples[__num_samples * __k * (__slab + 1) / __num_threads], + __comp) + - __seqs_begin[__seq].first; + else + // Absolute end. + __pieces[__slab][__seq].second = + _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); + } + + for (_SeqNumber __s = 0; __s < __k; ++__s) + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + __samples[__s * __num_samples + __i].~_ValueType(); + ::operator delete(__samples); + } + + /** + * @brief Exact splitting for parallel multiway-merge routine. + * + * None of the passed sequences may be empty. + */ + template<bool __stable, + typename _RAIterIterator, + typename _Compare, + typename _DifferenceType> + void + multiway_merge_exact_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, + std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + + const bool __tight = (__total_length == __length); + + // __k sequences. + const _SeqNumber __k = __seqs_end - __seqs_begin; + + const _ThreadIndex __num_threads = omp_get_num_threads(); + + // (Settings::multiway_merge_splitting + // == __gnu_parallel::_Settings::EXACT). + std::vector<_RAIter1>* __offsets = + new std::vector<_RAIter1>[__num_threads]; + std::vector<std::pair<_RAIter1, _RAIter1> > __se(__k); + + copy(__seqs_begin, __seqs_end, __se.begin()); + + _DifferenceType* __borders = + new _DifferenceType[__num_threads + 1]; + equally_split(__length, __num_threads, __borders); + + for (_ThreadIndex __s = 0; __s < (__num_threads - 1); ++__s) + { + __offsets[__s].resize(__k); + multiseq_partition(__se.begin(), __se.end(), __borders[__s + 1], + __offsets[__s].begin(), __comp); + + // Last one also needed and available. + if (!__tight) + { + __offsets[__num_threads - 1].resize(__k); + multiseq_partition(__se.begin(), __se.end(), + _DifferenceType(__length), + __offsets[__num_threads - 1].begin(), + __comp); + } + } + delete[] __borders; + + for (_ThreadIndex __slab = 0; __slab < __num_threads; ++__slab) + { + // For each slab / processor. + for (_SeqNumber __seq = 0; __seq < __k; ++__seq) + { + // For each sequence. + if (__slab == 0) + { + // Absolute beginning. + __pieces[__slab][__seq].first = 0; + } + else + __pieces[__slab][__seq].first = + __pieces[__slab - 1][__seq].second; + if (!__tight || __slab < (__num_threads - 1)) + __pieces[__slab][__seq].second = + __offsets[__slab][__seq] - __seqs_begin[__seq].first; + else + { + // __slab == __num_threads - 1 + __pieces[__slab][__seq].second = + _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]); + } + } + } + delete[] __offsets; + } + + /** @brief Parallel multi-way merge routine. + * + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor + * and runtime settings. + * + * Must not be called if the number of sequences is 1. + * + * @param _Splitter functor to split input (either __exact or sampling based) + * + * @param __seqs_begin Begin iterator of iterator pair input sequence. + * @param __seqs_end End iterator of iterator pair input sequence. + * @param __target Begin iterator of output sequence. + * @param __comp Comparator. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * @param __stable Stable merging incurs a performance penalty. + * @param __sentinel Ignored. + * @return End iterator of output sequence. + */ + template<bool __stable, + bool __sentinels, + typename _RAIterIterator, + typename _RAIter3, + typename _DifferenceTp, + typename _Splitter, + typename _Compare> + _RAIter3 + parallel_multiway_merge(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _Splitter __splitter, + _DifferenceTp __length, + _Compare __comp, + _ThreadIndex __num_threads) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__seqs_end - __seqs_begin > 1); +#endif + + _GLIBCXX_CALL(__length) + + typedef _DifferenceTp _DifferenceType; + typedef typename std::iterator_traits<_RAIterIterator> + ::difference_type _SeqNumber; + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename + std::iterator_traits<_RAIter1>::value_type _ValueType; + + // Leave only non-empty sequences. + typedef std::pair<_RAIter1, _RAIter1> seq_type; + seq_type* __ne_seqs = new seq_type[__seqs_end - __seqs_begin]; + _SeqNumber __k = 0; + _DifferenceType __total_length = 0; + for (_RAIterIterator __raii = __seqs_begin; + __raii != __seqs_end; ++__raii) + { + _DifferenceTp __seq_length = _GLIBCXX_PARALLEL_LENGTH(*__raii); + if(__seq_length > 0) + { + __total_length += __seq_length; + __ne_seqs[__k++] = *__raii; + } + } + + _GLIBCXX_CALL(__total_length) + + __length = std::min<_DifferenceTp>(__length, __total_length); + + if (__total_length == 0 || __k == 0) + { + delete[] __ne_seqs; + return __target; + } + + std::vector<std::pair<_DifferenceType, _DifferenceType> >* __pieces; + + __num_threads = static_cast<_ThreadIndex> + (std::min<_DifferenceType>(__num_threads, __total_length)); + +# pragma omp parallel num_threads (__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + // Thread __t will have to merge pieces[__iam][0..__k - 1] + __pieces = new std::vector< + std::pair<_DifferenceType, _DifferenceType> >[__num_threads]; + for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) + __pieces[__s].resize(__k); + + _DifferenceType __num_samples = + __gnu_parallel::_Settings::get().merge_oversampling + * __num_threads; + + __splitter(__ne_seqs, __ne_seqs + __k, __length, __total_length, + __comp, __pieces); + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __target_position = 0; + + for (_SeqNumber __c = 0; __c < __k; ++__c) + __target_position += __pieces[__iam][__c].first; + + seq_type* __chunks = new seq_type[__k]; + + for (_SeqNumber __s = 0; __s < __k; ++__s) + __chunks[__s] = std::make_pair(__ne_seqs[__s].first + + __pieces[__iam][__s].first, + __ne_seqs[__s].first + + __pieces[__iam][__s].second); + + if(__length > __target_position) + __sequential_multiway_merge<__stable, __sentinels> + (__chunks, __chunks + __k, __target + __target_position, + *(__seqs_begin->second), __length - __target_position, __comp); + + delete[] __chunks; + } // parallel + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + __is_sorted(__target, __target + __length, __comp)); +#endif + + __k = 0; + // Update ends of sequences. + for (_RAIterIterator __raii = __seqs_begin; + __raii != __seqs_end; ++__raii) + { + _DifferenceTp __length = _GLIBCXX_PARALLEL_LENGTH(*__raii); + if(__length > 0) + (*__raii).first += __pieces[__num_threads - 1][__k++].second; + } + + delete[] __pieces; + delete[] __ne_seqs; + + return __target + __length; + } + + /** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and __seqs_end into + * __target. __seqs_begin and __seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the _M_end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - not using sentinels + * + * Example: + * + * <pre> + * int sequences[10][10]; + * for (int __i = 0; __i < 10; ++__i) + * for (int __j = 0; __i < 10; ++__j) + * sequences[__i][__j] = __j; + * + * int __out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int __i = 0; __i < 10; ++__i) + * { seqs.push(std::make_pair<int*>(sequences[__i], + * sequences[__i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); + * </pre> + * + * @see stable_multiway_merge + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * + * @post [__target, return __value) contains merged __elements from the + * input sequences. + * @post return __value - __target = min(__length, number of elements in all + * sequences). + * + * @param _RAIterPairIterator iterator over sequence + * of pairs of iterators + * @param _RAIterOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param _Compare strict weak ordering type to compare elements + * in sequences + * + * @param __seqs_begin __begin of sequence __sequence + * @param __seqs_end _M_end of sequence __sequence + * @param __target target sequence to merge to. + * @param __comp strict weak ordering to use for element comparison. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return _M_end iterator of output sequence + */ + // multiway_merge + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { return multiway_merge(__seqs_begin, __seqs_end, __target, __length, + __comp, exact_tag(__tag.__get_num_threads())); } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { return multiway_merge(__seqs_begin, __seqs_end, __target, __length, + __comp, exact_tag(__tag.__get_num_threads())); } + + // stable_multiway_merge + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ false> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return stable_multiway_merge + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return stable_multiway_merge + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + + /** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and __seqs_end into + * __target. __seqs_begin and __seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the _M_end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward accordingly. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - using sentinels + * + * You have to take care that the element the _M_end iterator points to is + * readable and contains a value that is greater than any other non-sentinel + * value in all sequences. + * + * Example: + * + * <pre> + * int sequences[10][11]; + * for (int __i = 0; __i < 10; ++__i) + * for (int __j = 0; __i < 11; ++__j) + * sequences[__i][__j] = __j; // __last one is sentinel! + * + * int __out[33]; + * std::vector<std::pair<int*> > seqs; + * for (int __i = 0; __i < 10; ++__i) + * { seqs.push(std::make_pair<int*>(sequences[__i], + * sequences[__i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33); + * </pre> + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * @pre For each @c __i, @c __seqs_begin[__i].second must be the end + * marker of the sequence, but also reference the one more __sentinel + * element. + * + * @post [__target, return __value) contains merged __elements from the + * input sequences. + * @post return __value - __target = min(__length, number of elements in all + * sequences). + * + * @see stable_multiway_merge_sentinels + * + * @param _RAIterPairIterator iterator over sequence + * of pairs of iterators + * @param _RAIterOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param _Compare strict weak ordering type to compare elements + * in sequences + * + * @param __seqs_begin __begin of sequence __sequence + * @param __seqs_end _M_end of sequence __sequence + * @param __target target sequence to merge to. + * @param __comp strict weak ordering to use for element comparison. + * @param __length Maximum length to merge, possibly larger than the + * number of elements available. + * + * @return _M_end iterator of output sequence + */ + // multiway_merge_sentinels + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, + __target, *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ false, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ false, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */false, /* __sentinels = */ true>( + __seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + + // stable_multiway_merge_sentinels + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::sequential_tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute multiway merge *sequentially*. + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + __gnu_parallel::exact_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_exact_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, + _Compare __comp, + sampling_tag __tag) + { + typedef _DifferenceTp _DifferenceType; + _GLIBCXX_CALL(__seqs_end - __seqs_begin) + + // catch special case: no sequences + if (__seqs_begin == __seqs_end) + return __target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + if ((__seqs_end - __seqs_begin > 1) + && _GLIBCXX_PARALLEL_CONDITION( + ((__seqs_end - __seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((_SequenceIndex)__length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + return parallel_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + multiway_merge_sampling_splitting</* __stable = */ true, + typename std::iterator_traits<_RAIterPairIterator> + ::value_type*, _Compare, _DifferenceTp>, + static_cast<_DifferenceType>(__length), __comp, + __tag.__get_num_threads()); + else + return __sequential_multiway_merge + </* __stable = */ true, /* __sentinels = */ true> + (__seqs_begin, __seqs_end, __target, + *(__seqs_begin->second), __length, __comp); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, + _Compare __comp, + parallel_tag __tag = parallel_tag(0)) + { + return stable_multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } + + // public interface + template<typename _RAIterPairIterator, + typename _RAIterOut, + typename _DifferenceTp, + typename _Compare> + _RAIterOut + stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin, + _RAIterPairIterator __seqs_end, + _RAIterOut __target, + _DifferenceTp __length, _Compare __comp, + default_parallel_tag __tag) + { + return stable_multiway_merge_sentinels + (__seqs_begin, __seqs_end, __target, __length, __comp, + exact_tag(__tag.__get_num_threads())); + } +}; // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H */ diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h new file mode 100644 index 000000000..af7c10ac1 --- /dev/null +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -0,0 +1,481 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiway_mergesort.h + * @brief Parallel multiway merge sort. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 + +#include <vector> + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> +#include <parallel/parallel.h> +#include <parallel/multiway_merge.h> + +namespace __gnu_parallel +{ + /** @brief Subsequence description. */ + template<typename _DifferenceTp> + struct _Piece + { + typedef _DifferenceTp _DifferenceType; + + /** @brief Begin of subsequence. */ + _DifferenceType _M_begin; + + /** @brief End of subsequence. */ + _DifferenceType _M_end; + }; + + /** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ + template<typename _RAIter> + struct _PMWMSSortingData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + /** @brief Number of threads involved. */ + _ThreadIndex _M_num_threads; + + /** @brief Input __begin. */ + _RAIter _M_source; + + /** @brief Start indices, per thread. */ + _DifferenceType* _M_starts; + + /** @brief Storage in which to sort. */ + _ValueType** _M_temporary; + + /** @brief Samples. */ + _ValueType* _M_samples; + + /** @brief Offsets to add to the found positions. */ + _DifferenceType* _M_offsets; + + /** @brief Pieces of data to merge @c [thread][__sequence] */ + std::vector<_Piece<_DifferenceType> >* _M_pieces; + }; + + /** + * @brief Select _M_samples from a sequence. + * @param __sd Pointer to algorithm data. _Result will be placed in + * @c __sd->_M_samples. + * @param __num_samples Number of _M_samples to select. + */ + template<typename _RAIter, typename _DifferenceTp> + void + __determine_samples(_PMWMSSortingData<_RAIter>* __sd, + _DifferenceTp __num_samples) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef _DifferenceTp _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; + + equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], + __num_samples + 1, __es); + + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) + _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + + __es[__i + 1]]); + + delete[] __es; + } + + /** @brief Split consistently. */ + template<bool __exact, typename _RAIter, + typename _Compare, typename _SortingPlacesIterator> + struct _SplitConsistently + { }; + + /** @brief Split by exact splitting. */ + template<typename _RAIter, typename _Compare, + typename _SortingPlacesIterator> + struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator> + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { +# pragma omp barrier + + std::vector<std::pair<_SortingPlacesIterator, + _SortingPlacesIterator> > + __seqs(__sd->_M_num_threads); + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s])); + + std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); + + // if not last thread + if (__iam < __sd->_M_num_threads - 1) + multiseq_partition(__seqs.begin(), __seqs.end(), + __sd->_M_starts[__iam + 1], __offsets.begin(), + __comp); + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // for each sequence + if (__iam < (__sd->_M_num_threads - 1)) + __sd->_M_pieces[__iam][__seq]._M_end + = __offsets[__seq] - __seqs[__seq].first; + else + // very end of this sequence + __sd->_M_pieces[__iam][__seq]._M_end = + __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; + } + +# pragma omp barrier + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // For each sequence. + if (__iam > 0) + __sd->_M_pieces[__iam][__seq]._M_begin = + __sd->_M_pieces[__iam - 1][__seq]._M_end; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__seq]._M_begin = 0; + } + } + }; + + /** @brief Split by sampling. */ + template<typename _RAIter, typename _Compare, + typename _SortingPlacesIterator> + struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator> + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + __determine_samples(__sd, __num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(__sd->_M_samples, + __sd->_M_samples + + (__num_samples * __sd->_M_num_threads), + __comp); + +# pragma omp barrier + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + // For each sequence. + if (__num_samples * __iam > 0) + __sd->_M_pieces[__iam][__s]._M_begin = + std::lower_bound(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * __iam], + __comp) + - __sd->_M_temporary[__s]; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__s]._M_begin = 0; + + if ((__num_samples * (__iam + 1)) < + (__num_samples * __sd->_M_num_threads)) + __sd->_M_pieces[__iam][__s]._M_end = + std::lower_bound(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * (__iam + 1)], + __comp) + - __sd->_M_temporary[__s]; + else + // Absolute end. + __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]); + } + } + }; + + template<bool __stable, typename _RAIter, typename _Compare> + struct __possibly_stable_sort + { }; + + template<typename _RAIter, typename _Compare> + struct __possibly_stable_sort<true, _RAIter, _Compare> + { + void operator()(const _RAIter& __begin, + const _RAIter& __end, _Compare& __comp) const + { __gnu_sequential::stable_sort(__begin, __end, __comp); } + }; + + template<typename _RAIter, typename _Compare> + struct __possibly_stable_sort<false, _RAIter, _Compare> + { + void operator()(const _RAIter __begin, + const _RAIter __end, _Compare& __comp) const + { __gnu_sequential::sort(__begin, __end, __comp); } + }; + + template<bool __stable, typename Seq_RAIter, + typename _RAIter, typename _Compare, + typename DiffType> + struct __possibly_stable_multiway_merge + { }; + + template<typename Seq_RAIter, typename _RAIter, + typename _Compare, typename _DiffType> + struct __possibly_stable_multiway_merge<true, Seq_RAIter, + _RAIter, _Compare, _DiffType> + { + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { stable_multiway_merge(__seqs_begin, __seqs_end, __target, + __length_am, __comp, sequential_tag()); } + }; + + template<typename Seq_RAIter, typename _RAIter, + typename _Compare, typename _DiffType> + struct __possibly_stable_multiway_merge<false, Seq_RAIter, + _RAIter, _Compare, _DiffType> + { + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, + __comp, sequential_tag()); } + }; + + /** @brief PMWMS code executed by each thread. + * @param __sd Pointer to algorithm data. + * @param __comp Comparator. + */ + template<bool __stable, bool __exact, typename _RAIter, + typename _Compare> + void + parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + _DifferenceType __length_local = + __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; + + // Sort in temporary storage, leave space for sentinel. + + typedef _ValueType* _SortingPlacesIterator; + + __sd->_M_temporary[__iam] = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + * (__length_local + 1))); + + // Copy there. + std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], + __sd->_M_source + __sd->_M_starts[__iam] + + __length_local, + __sd->_M_temporary[__iam]); + + __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() + (__sd->_M_temporary[__iam], + __sd->_M_temporary[__iam] + __length_local, + __comp); + + // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], + // __sd->_M_temporary[__iam] + __length_local. + + // No barrier here: Synchronization is done by the splitting routine. + + _DifferenceType __num_samples = + _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; + _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() + (__iam, __sd, __comp, __num_samples); + + // Offset from __target __begin, __length after merging. + _DifferenceType __offset = 0, __length_am = 0; + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + { + __length_am += (__sd->_M_pieces[__iam][__s]._M_end + - __sd->_M_pieces[__iam][__s]._M_begin); + __offset += __sd->_M_pieces[__iam][__s]._M_begin; + } + + typedef std::vector< + std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > + _SeqVector; + _SeqVector __seqs(__sd->_M_num_threads); + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + __seqs[__s] = + std::make_pair(__sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_begin, + __sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_end); + } + + __possibly_stable_multiway_merge< + __stable, typename _SeqVector::iterator, + _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), + __sd->_M_source + __offset, __comp, + __length_am); + +# pragma omp barrier + + for (_DifferenceType __i = 0; __i < __length_local; ++__i) + __sd->_M_temporary[__iam][__i].~_ValueType(); + ::operator delete(__sd->_M_temporary[__iam]); + } + + /** @brief PMWMS main call. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + */ + template<bool __stable, bool __exact, typename _RAIter, + typename _Compare> + void + parallel_sort_mwms(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + if (__n <= 1) + return; + + // at least one element per thread + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + // shared variables + _PMWMSSortingData<_RAIter> __sd; + _DifferenceType* __starts; + _DifferenceType __size; + +# pragma omp parallel num_threads(__num_threads) + { + __num_threads = omp_get_num_threads(); //no more threads than requested + +# pragma omp single + { + __sd._M_num_threads = __num_threads; + __sd._M_source = __begin; + + __sd._M_temporary = new _ValueType*[__num_threads]; + + if (!__exact) + { + __size = + (_Settings::get().sort_mwms_oversampling * __num_threads - 1) + * __num_threads; + __sd._M_samples = static_cast<_ValueType*> + (::operator new(__size * sizeof(_ValueType))); + } + else + __sd._M_samples = 0; + + __sd._M_offsets = new _DifferenceType[__num_threads - 1]; + __sd._M_pieces + = new std::vector<_Piece<_DifferenceType> >[__num_threads]; + for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) + __sd._M_pieces[__s].resize(__num_threads); + __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; + + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __split = __n % __num_threads; + _DifferenceType __pos = 0; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + __starts[__i] = __pos; + __pos += ((__i < __split) + ? (__chunk_length + 1) : __chunk_length); + } + __starts[__num_threads] = __pos; + } //single + + // Now sort in parallel. + parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); + } //parallel + + delete[] __starts; + delete[] __sd._M_temporary; + + if (!__exact) + { + for (_DifferenceType __i = 0; __i < __size; ++__i) + __sd._M_samples[__i].~_ValueType(); + ::operator delete(__sd._M_samples); + } + + delete[] __sd._M_offsets; + delete[] __sd._M_pieces; + } + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ diff --git a/libstdc++-v3/include/parallel/numeric b/libstdc++-v3/include/parallel/numeric new file mode 100644 index 000000000..64624a742 --- /dev/null +++ b/libstdc++-v3/include/parallel/numeric @@ -0,0 +1,514 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/numeric +* + * @brief Parallel STL function calls corresponding to stl_numeric.h. + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_NUMERIC_H +#define _GLIBCXX_PARALLEL_NUMERIC_H 1 + +#include <numeric> +#include <bits/stl_function.h> +#include <parallel/numericfwd.h> +#include <parallel/iterator.h> +#include <parallel/for_each.h> +#include <parallel/for_each_selectors.h> +#include <parallel/partial_sum.h> + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __parallel +{ + // Sequential fallback. + template<typename _IIter, typename _Tp> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::accumulate(__begin, __end, __init); } + + template<typename _IIter, typename _Tp, typename _BinaryOperation> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init, + _BinaryOperation __binary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::accumulate(__begin, __end, __init, __binary_op); } + + // Sequential fallback for input iterator case. + template<typename _IIter, typename _Tp, typename _IteratorTag> + inline _Tp + __accumulate_switch(_IIter __begin, _IIter __end, + _Tp __init, _IteratorTag) + { return accumulate(__begin, __end, __init, + __gnu_parallel::sequential_tag()); } + + template<typename _IIter, typename _Tp, typename _BinaryOperation, + typename _IteratorTag> + inline _Tp + __accumulate_switch(_IIter __begin, _IIter __end, _Tp __init, + _BinaryOperation __binary_op, _IteratorTag) + { return accumulate(__begin, __end, __init, __binary_op, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename __RAIter, typename _Tp, typename _BinaryOperation> + _Tp + __accumulate_switch(__RAIter __begin, __RAIter __end, + _Tp __init, _BinaryOperation __binary_op, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().accumulate_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + _Tp __res = __init; + __gnu_parallel::__accumulate_selector<__RAIter> + __my_selector; + __gnu_parallel:: + __for_each_template_random_access_ed(__begin, __end, + __gnu_parallel::_Nothing(), + __my_selector, + __gnu_parallel:: + __accumulate_binop_reduct + <_BinaryOperation>(__binary_op), + __res, __res, -1); + return __res; + } + else + return accumulate(__begin, __end, __init, __binary_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _IIter, typename _Tp> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef std::iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::value_type _ValueType; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + + return __accumulate_switch(__begin, __end, __init, + __gnu_parallel::_Plus<_Tp, _ValueType>(), + _IteratorCategory(), __parallelism_tag); + } + + template<typename _IIter, typename _Tp> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init) + { + typedef std::iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::value_type _ValueType; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + + return __accumulate_switch(__begin, __end, __init, + __gnu_parallel::_Plus<_Tp, _ValueType>(), + _IteratorCategory()); + } + + template<typename _IIter, typename _Tp, typename _BinaryOperation> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init, + _BinaryOperation __binary_op, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __accumulate_switch(__begin, __end, __init, __binary_op, + _IteratorCategory(), __parallelism_tag); + } + + template<typename _IIter, typename _Tp, typename _BinaryOperation> + inline _Tp + accumulate(_IIter __begin, _IIter __end, _Tp __init, + _BinaryOperation __binary_op) + { + typedef iterator_traits<_IIter> _IteratorTraits; + typedef typename _IteratorTraits::iterator_category _IteratorCategory; + return __accumulate_switch(__begin, __end, __init, __binary_op, + _IteratorCategory()); + } + + + // Sequential fallback. + template<typename _IIter1, typename _IIter2, typename _Tp> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::inner_product( + __first1, __last1, __first2, __init); } + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, _BinaryFunction1 __binary_op1, + _BinaryFunction2 __binary_op2, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::inner_product(__first1, __last1, __first2, __init, + __binary_op1, __binary_op2); } + + // Parallel algorithm for random access iterators. + template<typename _RAIter1, typename _RAIter2, + typename _Tp, typename _BinaryFunction1, typename _BinaryFunction2> + _Tp + __inner_product_switch(_RAIter1 __first1, + _RAIter1 __last1, + _RAIter2 __first2, _Tp __init, + _BinaryFunction1 __binary_op1, + _BinaryFunction2 __binary_op2, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism_tag + = __gnu_parallel::parallel_unbalanced) + { + if (_GLIBCXX_PARALLEL_CONDITION((__last1 - __first1) + >= __gnu_parallel::_Settings::get(). + accumulate_minimal_n + && __gnu_parallel:: + __is_parallel(__parallelism_tag))) + { + _Tp __res = __init; + __gnu_parallel:: + __inner_product_selector<_RAIter1, + _RAIter2, _Tp> __my_selector(__first1, __first2); + __gnu_parallel:: + __for_each_template_random_access_ed( + __first1, __last1, __binary_op2, __my_selector, __binary_op1, + __res, __res, -1); + return __res; + } + else + return inner_product(__first1, __last1, __first2, __init, + __gnu_parallel::sequential_tag()); + } + + // No parallelism for input iterators. + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2, + typename _IteratorTag1, typename _IteratorTag2> + inline _Tp + __inner_product_switch(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, + _BinaryFunction1 __binary_op1, + _BinaryFunction2 __binary_op2, + _IteratorTag1, _IteratorTag2) + { return inner_product(__first1, __last1, __first2, __init, __binary_op1, + __binary_op2, __gnu_parallel::sequential_tag()); } + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, _BinaryFunction1 __binary_op1, + _BinaryFunction2 __binary_op2, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::iterator_category _IteratorCategory1; + + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::iterator_category _IteratorCategory2; + + return __inner_product_switch(__first1, __last1, __first2, __init, + __binary_op1, __binary_op2, + _IteratorCategory1(), _IteratorCategory2(), + __parallelism_tag); + } + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, _BinaryFunction1 __binary_op1, + _BinaryFunction2 __binary_op2) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::iterator_category _IteratorCategory1; + + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::iterator_category _IteratorCategory2; + + return __inner_product_switch(__first1, __last1, __first2, __init, + __binary_op1, __binary_op2, + _IteratorCategory1(), + _IteratorCategory2()); + } + + template<typename _IIter1, typename _IIter2, typename _Tp> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::value_type _ValueType1; + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::value_type _ValueType2; + + typedef typename + __gnu_parallel::_Multiplies<_ValueType1, _ValueType2>::result_type + _MultipliesResultType; + return __gnu_parallel::inner_product(__first1, __last1, __first2, __init, + __gnu_parallel::_Plus<_Tp, _MultipliesResultType>(), + __gnu_parallel:: + _Multiplies<_ValueType1, _ValueType2>(), + __parallelism_tag); + } + + template<typename _IIter1, typename _IIter2, typename _Tp> + inline _Tp + inner_product(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _Tp __init) + { + typedef iterator_traits<_IIter1> _TraitsType1; + typedef typename _TraitsType1::value_type _ValueType1; + typedef iterator_traits<_IIter2> _TraitsType2; + typedef typename _TraitsType2::value_type _ValueType2; + + typedef typename + __gnu_parallel::_Multiplies<_ValueType1, _ValueType2>::result_type + _MultipliesResultType; + return __gnu_parallel::inner_product(__first1, __last1, __first2, __init, + __gnu_parallel::_Plus<_Tp, _MultipliesResultType>(), + __gnu_parallel:: + _Multiplies<_ValueType1, _ValueType2>()); + } + + // Sequential fallback. + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::partial_sum(__begin, __end, __result); } + + // Sequential fallback. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + inline _OutputIterator + partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result, + _BinaryOperation __bin_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::partial_sum(__begin, __end, __result, __bin_op); } + + // Sequential fallback for input iterator case. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation, typename _IteratorTag1, + typename _IteratorTag2> + inline _OutputIterator + __partial_sum_switch(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op, + _IteratorTag1, _IteratorTag2) + { return _GLIBCXX_STD_A::partial_sum(__begin, __end, __result, __bin_op); } + + // Parallel algorithm for random access iterators. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __partial_sum_switch(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op, + random_access_iterator_tag, + random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().partial_sum_minimal_n)) + return __gnu_parallel::__parallel_partial_sum(__begin, __end, + __result, __bin_op); + else + return partial_sum(__begin, __end, __result, __bin_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result) + { + typedef typename iterator_traits<_IIter>::value_type _ValueType; + return __gnu_parallel::partial_sum(__begin, __end, + __result, std::plus<_ValueType>()); + } + + // Public interface + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + inline _OutputIterator + partial_sum(_IIter __begin, _IIter __end, _OutputIterator __result, + _BinaryOperation __binary_op) + { + typedef iterator_traits<_IIter> _ITraitsType; + typedef typename _ITraitsType::iterator_category _IIteratorCategory; + + typedef iterator_traits<_OutputIterator> _OTraitsType; + typedef typename _OTraitsType::iterator_category _OIterCategory; + + return __partial_sum_switch(__begin, __end, __result, __binary_op, + _IIteratorCategory(), _OIterCategory()); + } + + // Sequential fallback. + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, _OutputIterator __result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::adjacent_difference(__begin, __end, __result); } + + // Sequential fallback. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_A::adjacent_difference(__begin, __end, + __result, __bin_op); } + + // Sequential fallback for input iterator case. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation, typename _IteratorTag1, + typename _IteratorTag2> + inline _OutputIterator + __adjacent_difference_switch(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, _IteratorTag1, + _IteratorTag2) + { return adjacent_difference(__begin, __end, __result, __bin_op, + __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __adjacent_difference_switch(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism + __parallelism_tag + = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION( + static_cast<__gnu_parallel::_SequenceIndex>(__end - __begin) + >= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n + && __gnu_parallel::__is_parallel(__parallelism_tag))) + { + bool __dummy = true; + typedef __gnu_parallel::_IteratorPair<_IIter, _OutputIterator, + random_access_iterator_tag> _ItTrip; + *__result = *__begin; + _ItTrip __begin_pair(__begin + 1, __result + 1), + __end_pair(__end, __result + (__end - __begin)); + __gnu_parallel::__adjacent_difference_selector<_ItTrip> + __functionality; + __gnu_parallel:: + __for_each_template_random_access_ed( + __begin_pair, __end_pair, __bin_op, __functionality, + __gnu_parallel::_DummyReduct(), __dummy, __dummy, -1); + return __functionality._M_finish_iterator; + } + else + return adjacent_difference(__begin, __end, __result, __bin_op, + __gnu_parallel::sequential_tag()); + } + + // Public interface. + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, + _OutputIterator __result, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + return adjacent_difference(__begin, __end, __result, + std::minus<_ValueType>(), + __parallelism_tag); + } + + template<typename _IIter, typename _OutputIterator> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, + _OutputIterator __result) + { + typedef iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + return adjacent_difference(__begin, __end, __result, + std::minus<_ValueType>()); + } + + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __binary_op, + __gnu_parallel::_Parallelism __parallelism_tag) + { + typedef iterator_traits<_IIter> _ITraitsType; + typedef typename _ITraitsType::iterator_category _IIteratorCategory; + + typedef iterator_traits<_OutputIterator> _OTraitsType; + typedef typename _OTraitsType::iterator_category _OIterCategory; + + return __adjacent_difference_switch(__begin, __end, __result, + __binary_op, + _IIteratorCategory(), + _OIterCategory(), + __parallelism_tag); + } + + template<typename _IIter, typename _OutputIterator, + typename _BinaryOperation> + inline _OutputIterator + adjacent_difference(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __binary_op) + { + typedef iterator_traits<_IIter> _ITraitsType; + typedef typename _ITraitsType::iterator_category _IIteratorCategory; + + typedef iterator_traits<_OutputIterator> _OTraitsType; + typedef typename _OTraitsType::iterator_category _OIterCategory; + + return __adjacent_difference_switch(__begin, __end, __result, + __binary_op, + _IIteratorCategory(), + _OIterCategory()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_NUMERIC_H */ diff --git a/libstdc++-v3/include/parallel/numericfwd.h b/libstdc++-v3/include/parallel/numericfwd.h new file mode 100644 index 000000000..5ee3d6d4f --- /dev/null +++ b/libstdc++-v3/include/parallel/numericfwd.h @@ -0,0 +1,203 @@ +// <numeric> parallel extensions -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/numericfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_NUMERICFWD_H +#define _GLIBCXX_PARALLEL_NUMERICFWD_H 1 + +#pragma GCC system_header + +#include <parallel/tags.h> +#include <parallel/settings.h> + +namespace std _GLIBCXX_VISIBILITY(default) +{ +namespace __parallel +{ + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp); + + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp> + _Tp + accumulate(_IIter, _IIter, _Tp, __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _Tag> + _Tp + __accumulate_switch(_IIter, _IIter, _Tp, _Tag); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _Tp, typename _BinaryOper> + _Tp + accumulate(_IIter, _IIter, _Tp, _BinaryOper, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _Tp, typename _BinaryOper, + typename _Tag> + _Tp + __accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag); + + template<typename _RAIter, typename _Tp, typename _BinaryOper> + _Tp + __accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::_Parallelism); + + template<typename _IIter, typename _OIter, typename _BinaryOper, + typename _Tag1, typename _Tag2> + _OIter + __adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, + _Tag1, _Tag2); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + __adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, + random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism __parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Tp> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + __gnu_parallel::_Parallelism); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, + _BinaryFunction1, _BinaryFunction2); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, + _BinaryFunction2, __gnu_parallel::sequential_tag); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename BinaryFunction1, typename BinaryFunction2> + _Tp + inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1, + BinaryFunction2, __gnu_parallel::_Parallelism); + + template<typename _RAIter1, typename _RAIter2, typename _Tp, + typename BinaryFunction1, typename BinaryFunction2> + _Tp + __inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, + BinaryFunction2, random_access_iterator_tag, + random_access_iterator_tag, + __gnu_parallel::_Parallelism + = __gnu_parallel::parallel_unbalanced); + + template<typename _IIter1, typename _IIter2, typename _Tp, + typename _BinaryFunction1, typename _BinaryFunction2, + typename _Tag1, typename _Tag2> + _Tp + __inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, + _BinaryFunction2, _Tag1, _Tag2); + + + template<typename _IIter, typename _OIter> + _OIter + partial_sum(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper, + __gnu_parallel::sequential_tag); + + template<typename _IIter, typename _OIter> + _OIter + partial_sum(_IIter, _IIter, _OIter __result); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper); + + template<typename _IIter, typename _OIter, typename _BinaryOper, + typename _Tag1, typename _Tag2> + _OIter + __partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2); + + template<typename _IIter, typename _OIter, typename _BinaryOper> + _OIter + __partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, + random_access_iterator_tag, random_access_iterator_tag); +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_NUMERICFWD_H */ diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h new file mode 100644 index 000000000..94f9e6bba --- /dev/null +++ b/libstdc++-v3/include/parallel/omp_loop.h @@ -0,0 +1,115 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/omp_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/basic_iterator.h> +#include <parallel/base.h> + +namespace __gnu_parallel +{ + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, etc.). + * @param __f Functor to @a process an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to @a add a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_omp_loop(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type + _DifferenceType; + + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType> + (__get_max_threads(), __length); + + _Result *__thread_results; + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } + + _ThreadIndex __iam = omp_get_thread_num(); + +#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) + for (_DifferenceType __pos = 0; __pos < __length; ++__pos) + __thread_results[__iam] = __r(__thread_results[__iam], + __f(__o, __begin+__pos)); + } //parallel + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __output = __r(__output, __thread_results[__i]); + + delete [] __thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; + + return __o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_OMP_LOOP_H */ diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h new file mode 100644 index 000000000..ca22d5999 --- /dev/null +++ b/libstdc++-v3/include/parallel/omp_loop_static.h @@ -0,0 +1,115 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/omp_loop_static.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop with static scheduling. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/basic_iterator.h> + +namespace __gnu_parallel +{ + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop with static scheduling. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param __f Functor to @a process an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to @a add a single __result to the already processed + * __elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_omp_loop_static(_RAIter __begin, + _RAIter __end, _Op __o, + _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type + _DifferenceType; + + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = std::min<_DifferenceType> + (__get_max_threads(), __length); + + _Result *__thread_results; + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } + + _ThreadIndex __iam = omp_get_thread_num(); + +#pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) + for (_DifferenceType __pos = 0; __pos < __length; ++__pos) + __thread_results[__iam] = __r(__thread_results[__iam], + __f(__o, __begin+__pos)); + } //parallel + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __output = __r(__output, __thread_results[__i]); + + delete [] __thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f.finish_iterator = __begin + __length; + + return __o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H */ diff --git a/libstdc++-v3/include/parallel/par_loop.h b/libstdc++-v3/include/parallel/par_loop.h new file mode 100644 index 000000000..b776df4b7 --- /dev/null +++ b/libstdc++-v3/include/parallel/par_loop.h @@ -0,0 +1,139 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/par_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of equal splitting. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H +#define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 + +#include <omp.h> +#include <parallel/settings.h> +#include <parallel/base.h> +#include <parallel/equally_split.h> + +namespace __gnu_parallel +{ + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using hand-crafted parallelization by equal splitting + * the work. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, ...) + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_ed(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + const _DifferenceType __length = __end - __begin; + _Result *__thread_results; + bool* __constructed; + + _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType> + (__get_max_threads(), __length); + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __thread_results = static_cast<_Result*> + (::operator new(__num_threads * sizeof(_Result))); + __constructed = new bool[__num_threads]; + } + + _ThreadIndex __iam = omp_get_thread_num(); + + // Neutral element. + _Result* __reduct; + + _DifferenceType + __start = equally_split_point(__length, __num_threads, __iam), + __stop = equally_split_point(__length, __num_threads, __iam + 1); + + if (__start < __stop) + { + __reduct = new _Result(__f(__o, __begin + __start)); + ++__start; + __constructed[__iam] = true; + } + else + __constructed[__iam] = false; + + for (; __start < __stop; ++__start) + *__reduct = __r(*__reduct, __f(__o, __begin + __start)); + + if (__constructed[__iam]) + { + ::new(&__thread_results[__iam]) _Result(*__reduct); + delete __reduct; + } + } //parallel + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + if (__constructed[__i]) + { + __output = __r(__output, __thread_results[__i]); + __thread_results[__i].~_Result(); + } + + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; + + ::operator delete(__thread_results); + + delete[] __constructed; + + return __o; + } + +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_PAR_LOOP_H */ diff --git a/libstdc++-v3/include/parallel/parallel.h b/libstdc++-v3/include/parallel/parallel.h new file mode 100644 index 000000000..c4bfc057a --- /dev/null +++ b/libstdc++-v3/include/parallel/parallel.h @@ -0,0 +1,42 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/parallel.h + * @brief End-user include file. Provides advanced settings and + * tuning options. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARALLEL_H +#define _GLIBCXX_PARALLEL_PARALLEL_H 1 + +#include <parallel/features.h> +#include <parallel/compiletime_settings.h> +#include <parallel/types.h> +#include <parallel/tags.h> +#include <parallel/settings.h> + +#endif /* _GLIBCXX_PARALLEL_PARALLEL_H */ diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h new file mode 100644 index 000000000..0be988178 --- /dev/null +++ b/libstdc++-v3/include/parallel/partial_sum.h @@ -0,0 +1,231 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/partial_sum.h + * @brief Parallel implementation of std::partial_sum(), i.e. prefix +* sums. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H +#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 + +#include <omp.h> +#include <new> +#include <bits/stl_algobase.h> +#include <parallel/parallel.h> +#include <parallel/numericfwd.h> + +namespace __gnu_parallel +{ + // Problem: there is no 0-element given. + + /** @brief Base case prefix sum routine. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum_basecase(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits <_IIter>::value_type __value) + { + if (__begin == __end) + return __result; + + while (__begin != __end) + { + __value = __bin_op(__value, *__begin); + *__result = __value; + ++__result; + ++__begin; + } + return __result; + } + + /** @brief Parallel partial sum implementation, two-phase approach, + no recursion. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + * @return End iterator of output sequence. + */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum_linear(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits<_IIter>::difference_type __n) + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + if (__begin == __end) + return __result; + + _ThreadIndex __num_threads = + std::min<_DifferenceType>(__get_max_threads(), __n - 1); + + if (__num_threads < 2) + { + *__result = *__begin; + return __parallel_partial_sum_basecase(__begin + 1, __end, + __result + 1, __bin_op, + *__begin); + } + + _DifferenceType* __borders; + _ValueType* __sums; + + const _Settings& __s = _Settings::get(); + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + + __borders = new _DifferenceType[__num_threads + 2]; + + if (__s.partial_sum_dilation == 1.0f) + equally_split(__n, __num_threads + 1, __borders); + else + { + _DifferenceType __first_part_length = + std::max<_DifferenceType>(1, + __n / (1.0f + __s.partial_sum_dilation * __num_threads)); + _DifferenceType __chunk_length = + (__n - __first_part_length) / __num_threads; + _DifferenceType __borderstart = + __n - __num_threads * __chunk_length; + __borders[0] = 0; + for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i) + { + __borders[__i] = __borderstart; + __borderstart += __chunk_length; + } + __borders[__num_threads + 1] = __n; + } + + __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + * __num_threads)); + _OutputIterator __target_end; + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + if (__iam == 0) + { + *__result = *__begin; + __parallel_partial_sum_basecase(__begin + 1, + __begin + __borders[1], + __result + 1, + __bin_op, *__begin); + ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); + } + else + { + ::new(&(__sums[__iam])) + _ValueType(__gnu_parallel::accumulate( + __begin + __borders[__iam] + 1, + __begin + __borders[__iam + 1], + *(__begin + __borders[__iam]), + __bin_op, + __gnu_parallel::sequential_tag())); + } + +# pragma omp barrier + +# pragma omp single + __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, + __sums + 1, __bin_op, __sums[0]); + +# pragma omp barrier + + // Still same team. + __parallel_partial_sum_basecase(__begin + __borders[__iam + 1], + __begin + __borders[__iam + 2], + __result + __borders[__iam + 1], + __bin_op, __sums[__iam]); + } //parallel + + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __sums[__i].~_ValueType(); + ::operator delete(__sums); + + delete[] __borders; + + return __result + __n; + } + + /** @brief Parallel partial sum front-__end. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @return End iterator of output sequence. */ + template<typename _IIter, + typename _OutputIterator, + typename _BinaryOperation> + _OutputIterator + __parallel_partial_sum(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op) + { + _GLIBCXX_CALL(__begin - __end) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + switch (_Settings::get().partial_sum_algorithm) + { + case LINEAR: + // Need an initial offset. + return __parallel_partial_sum_linear(__begin, __end, __result, + __bin_op, __n); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return __result + __n; + } + } +} + +#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h new file mode 100644 index 000000000..c65132158 --- /dev/null +++ b/libstdc++-v3/include/parallel/partition.h @@ -0,0 +1,434 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/partition.h + * @brief Parallel implementation of std::partition(), + * std::nth_element(), and std::partial_sort(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PARTITION_H +#define _GLIBCXX_PARALLEL_PARTITION_H 1 + +#include <parallel/basic_iterator.h> +#include <parallel/sort.h> +#include <parallel/random_number.h> +#include <bits/stl_algo.h> +#include <parallel/parallel.h> + +/** @brief Decide whether to declare certain variables volatile. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ + /** @brief Parallel implementation of std::partition. + * @param __begin Begin iterator of input sequence to split. + * @param __end End iterator of input sequence to split. + * @param __pred Partition predicate, possibly including some kind + * of pivot. + * @param __num_threads Maximum number of threads to use for this task. + * @return Number of elements not fulfilling the predicate. */ + template<typename _RAIter, typename _Predicate> + typename std::iterator_traits<_RAIter>::difference_type + __parallel_partition(_RAIter __begin, _RAIter __end, + _Predicate __pred, _ThreadIndex __num_threads) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + _GLIBCXX_CALL(__n) + + const _Settings& __s = _Settings::get(); + + // shared + _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1, + __dist = __n, + __leftover_left, __leftover_right, + __leftnew, __rightnew; + + // just 0 or 1, but int to allow atomic operations + int* __reserved_left = 0, * __reserved_right = 0; + + _DifferenceType __chunk_size = __s.partition_chunk_size; + + //at least two chunks per thread + if (__dist >= 2 * __num_threads * __chunk_size) +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __reserved_left = new int[__num_threads]; + __reserved_right = new int[__num_threads]; + + if (__s.partition_chunk_share > 0.0) + __chunk_size = std::max<_DifferenceType> + (__s.partition_chunk_size, (double)__n + * __s.partition_chunk_share / (double)__num_threads); + else + __chunk_size = __s.partition_chunk_size; + } + + while (__dist >= 2 * __num_threads * __chunk_size) + { +# pragma omp single + { + _DifferenceType __num_chunks = __dist / __chunk_size; + + for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) + { + __reserved_left [__r] = 0; // false + __reserved_right[__r] = 0; // false + } + __leftover_left = 0; + __leftover_right = 0; + } //implicit barrier + + // Private. + _DifferenceType __thread_left, __thread_left_border, + __thread_right, __thread_right_border; + + __thread_left = __left + 1; + // Just to satisfy the condition below. + __thread_left_border = __thread_left - 1; + + __thread_right = __n - 1; + // Just to satisfy the condition below. + __thread_right_border = __thread_right + 1; + + bool __iam_finished = false; + while (!__iam_finished) + { + if (__thread_left > __thread_left_border) + { + _DifferenceType __former_dist = + __fetch_and_add(&__dist, -__chunk_size); + if (__former_dist < __chunk_size) + { + __fetch_and_add(&__dist, __chunk_size); + __iam_finished = true; + break; + } + else + { + __thread_left = + __fetch_and_add(&__left, __chunk_size); + __thread_left_border = + __thread_left + (__chunk_size - 1); + } + } + + if (__thread_right < __thread_right_border) + { + _DifferenceType __former_dist = + __fetch_and_add(&__dist, -__chunk_size); + if (__former_dist < __chunk_size) + { + __fetch_and_add(&__dist, __chunk_size); + __iam_finished = true; + break; + } + else + { + __thread_right = + __fetch_and_add(&__right, -__chunk_size); + __thread_right_border = + __thread_right - (__chunk_size - 1); + } + } + + // Swap as usual. + while (__thread_left < __thread_right) + { + while (__pred(__begin[__thread_left]) + && __thread_left <= __thread_left_border) + ++__thread_left; + while (!__pred(__begin[__thread_right]) + && __thread_right >= __thread_right_border) + --__thread_right; + + if (__thread_left > __thread_left_border + || __thread_right < __thread_right_border) + // Fetch new chunk(__s). + break; + + std::iter_swap(__begin + __thread_left, + __begin + __thread_right); + ++__thread_left; + --__thread_right; + } + } + + // Now swap the leftover chunks to the right places. + if (__thread_left <= __thread_left_border) +# pragma omp atomic + ++__leftover_left; + if (__thread_right >= __thread_right_border) +# pragma omp atomic + ++__leftover_right; + +# pragma omp barrier + + _DifferenceType + __leftold = __left, + __leftnew = __left - __leftover_left * __chunk_size, + __rightold = __right, + __rightnew = __right + __leftover_right * __chunk_size; + + // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew + if (__thread_left <= __thread_left_border + && __thread_left_border >= __leftnew) + { + // Chunk already in place, reserve spot. + __reserved_left[(__left - (__thread_left_border + 1)) + / __chunk_size] = 1; + } + + // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew + if (__thread_right >= __thread_right_border + && __thread_right_border <= __rightnew) + { + // Chunk already in place, reserve spot. + __reserved_right[((__thread_right_border - 1) - __right) + / __chunk_size] = 1; + } + +# pragma omp barrier + + if (__thread_left <= __thread_left_border + && __thread_left_border < __leftnew) + { + // Find spot and swap. + _DifferenceType __swapstart = -1; + for (int __r = 0; __r < __leftover_left; ++__r) + if (__reserved_left[__r] == 0 + && __compare_and_swap(&(__reserved_left[__r]), 0, 1)) + { + __swapstart = __leftold - (__r + 1) * __chunk_size; + break; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); +#endif + + std::swap_ranges(__begin + __thread_left_border + - (__chunk_size - 1), + __begin + __thread_left_border + 1, + __begin + __swapstart); + } + + if (__thread_right >= __thread_right_border + && __thread_right_border > __rightnew) + { + // Find spot and swap + _DifferenceType __swapstart = -1; + for (int __r = 0; __r < __leftover_right; ++__r) + if (__reserved_right[__r] == 0 + && __compare_and_swap(&(__reserved_right[__r]), 0, 1)) + { + __swapstart = __rightold + __r * __chunk_size + 1; + break; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); +#endif + + std::swap_ranges(__begin + __thread_right_border, + __begin + __thread_right_border + + __chunk_size, __begin + __swapstart); + } +#if _GLIBCXX_ASSERTIONS +# pragma omp barrier + +# pragma omp single + { + for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1); + for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) + _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1); + } +#endif + + __left = __leftnew; + __right = __rightnew; + __dist = __right - __left + 1; + } + +# pragma omp flush(__left, __right) + } // end "recursion" //parallel + + _DifferenceType __final_left = __left, __final_right = __right; + + while (__final_left < __final_right) + { + // Go right until key is geq than pivot. + while (__pred(__begin[__final_left]) + && __final_left < __final_right) + ++__final_left; + + // Go left until key is less than pivot. + while (!__pred(__begin[__final_right]) + && __final_left < __final_right) + --__final_right; + + if (__final_left == __final_right) + break; + std::iter_swap(__begin + __final_left, __begin + __final_right); + ++__final_left; + --__final_right; + } + + // All elements on the left side are < piv, all elements on the + // right are >= piv + delete[] __reserved_left; + delete[] __reserved_right; + + // Element "between" __final_left and __final_right might not have + // been regarded yet + if (__final_left < __n && !__pred(__begin[__final_left])) + // Really swapped. + return __final_left; + else + return __final_left + 1; + } + + /** + * @brief Parallel implementation of std::nth_element(). + * @param __begin Begin iterator of input sequence. + * @param __nth _Iterator of element that must be in position afterwards. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_nth_element(_RAIter __begin, _RAIter __nth, + _RAIter __end, _Compare __comp) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _GLIBCXX_CALL(__end - __begin) + + _RAIter __split; + _RandomNumber __rng; + + const _Settings& __s = _Settings::get(); + _DifferenceType __minimum_length = std::max<_DifferenceType>(2, + std::max(__s.nth_element_minimal_n, __s.partition_minimal_n)); + + // Break if input range to small. + while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) + { + _DifferenceType __n = __end - __begin; + + _RAIter __pivot_pos = __begin + __rng(__n); + + // Swap __pivot_pos value to end. + if (__pivot_pos != (__end - 1)) + std::iter_swap(__pivot_pos, __end - 1); + __pivot_pos = __end - 1; + + // _Compare must have first_value_type, second_value_type, + // result_type + // _Compare == + // __gnu_parallel::_Lexicographic<S, int, + // __gnu_parallel::_Less<S, S> > + // __pivot_pos == std::pair<S, int>* + __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> + __pred(__comp, *__pivot_pos); + + // Divide, leave pivot unchanged in last place. + _RAIter __split_pos1, __split_pos2; + __split_pos1 = __begin + __parallel_partition(__begin, __end - 1, + __pred, + __get_max_threads()); + + // Left side: < __pivot_pos; __right side: >= __pivot_pos + + // Swap pivot back to middle. + if (__split_pos1 != __pivot_pos) + std::iter_swap(__split_pos1, __pivot_pos); + __pivot_pos = __split_pos1; + + // In case all elements are equal, __split_pos1 == 0 + if ((__split_pos1 + 1 - __begin) < (__n >> 7) + || (__end - __split_pos1) < (__n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not strictly larger than the pivot. + __gnu_parallel::__unary_negate<__gnu_parallel:: + __binder1st<_Compare, _ValueType, + _ValueType, bool>, _ValueType> + __pred(__gnu_parallel::__binder1st<_Compare, _ValueType, + _ValueType, bool>(__comp, *__pivot_pos)); + + // Find other end of pivot-equal range. + __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, + __end, __pred); + } + else + // Only skip the pivot. + __split_pos2 = __split_pos1 + 1; + + // Compare iterators. + if (__split_pos2 <= __nth) + __begin = __split_pos2; + else if (__nth < __split_pos1) + __end = __split_pos1; + else + break; + } + + // Only at most _Settings::partition_minimal_n __elements __left. + __gnu_sequential::nth_element(__begin, __nth, __end, __comp); + } + + /** @brief Parallel implementation of std::partial_sort(). + * @param __begin Begin iterator of input sequence. + * @param __middle Sort until this position. + * @param __end End iterator of input sequence. + * @param __comp Comparator. */ + template<typename _RAIter, typename _Compare> + void + __parallel_partial_sort(_RAIter __begin, + _RAIter __middle, + _RAIter __end, _Compare __comp) + { + __parallel_nth_element(__begin, __middle, __end, __comp); + std::sort(__begin, __middle, __comp); + } + +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif /* _GLIBCXX_PARALLEL_PARTITION_H */ diff --git a/libstdc++-v3/include/parallel/queue.h b/libstdc++-v3/include/parallel/queue.h new file mode 100644 index 000000000..03c1c78bb --- /dev/null +++ b/libstdc++-v3/include/parallel/queue.h @@ -0,0 +1,155 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/queue.h + * @brief Lock-free double-ended queue. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUEUE_H +#define _GLIBCXX_PARALLEL_QUEUE_H 1 + +#include <parallel/types.h> +#include <parallel/base.h> +#include <parallel/compatibility.h> + +/** @brief Decide whether to declare certain variable volatile in this file. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ + /**@brief Double-ended queue of bounded size, allowing lock-free + * atomic access. push_front() and pop_front() must not be called + * concurrently to each other, while pop_back() can be called + * concurrently at all times. + * @c empty(), @c size(), and @c top() are intentionally not provided. + * Calling them would not make sense in a concurrent setting. + * @param _Tp Contained element type. */ + template<typename _Tp> + class _RestrictedBoundedConcurrentQueue + { + private: + /** @brief Array of elements, seen as cyclic buffer. */ + _Tp* _M_base; + + /** @brief Maximal number of elements contained at the same time. */ + _SequenceIndex _M_max_size; + + /** @brief Cyclic __begin and __end pointers contained in one + atomically changeable value. */ + _GLIBCXX_VOLATILE _CASable _M_borders; + + public: + /** @brief Constructor. Not to be called concurrent, of course. + * @param _M_max_size Maximal number of elements to be contained. */ + _RestrictedBoundedConcurrentQueue(_SequenceIndex __max_size) + { + _M_max_size = __max_size; + _M_base = new _Tp[__max_size]; + _M_borders = __encode2(0, 0); +#pragma omp flush + } + + /** @brief Destructor. Not to be called concurrent, of course. */ + ~_RestrictedBoundedConcurrentQueue() + { delete[] _M_base; } + + /** @brief Pushes one element into the queue at the front end. + * Must not be called concurrently with pop_front(). */ + void + push_front(const _Tp& __t) + { + _CASable __former_borders = _M_borders; + int __former_front, __former_back; + __decode2(__former_borders, __former_front, __former_back); + *(_M_base + __former_front % _M_max_size) = __t; +#if _GLIBCXX_ASSERTIONS + // Otherwise: front - back > _M_max_size eventually. + _GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back) + <= _M_max_size); +#endif + __fetch_and_add(&_M_borders, __encode2(1, 0)); + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool + pop_front(_Tp& __t) + { + int __former_front, __former_back; +#pragma omp flush + __decode2(_M_borders, __former_front, __former_back); + while (__former_front > __former_back) + { + // Chance. + _CASable __former_borders = __encode2(__former_front, + __former_back); + _CASable __new_borders = __encode2(__former_front - 1, + __former_back); + if (__compare_and_swap(&_M_borders, __former_borders, + __new_borders)) + { + __t = *(_M_base + (__former_front - 1) % _M_max_size); + return true; + } +#pragma omp flush + __decode2(_M_borders, __former_front, __former_back); + } + return false; + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool + pop_back(_Tp& __t) //queue behavior + { + int __former_front, __former_back; +#pragma omp flush + __decode2(_M_borders, __former_front, __former_back); + while (__former_front > __former_back) + { + // Chance. + _CASable __former_borders = __encode2(__former_front, + __former_back); + _CASable __new_borders = __encode2(__former_front, + __former_back + 1); + if (__compare_and_swap(&_M_borders, __former_borders, + __new_borders)) + { + __t = *(_M_base + __former_back % _M_max_size); + return true; + } +#pragma omp flush + __decode2(_M_borders, __former_front, __former_back); + } + return false; + } + }; +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif /* _GLIBCXX_PARALLEL_QUEUE_H */ diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h new file mode 100644 index 000000000..1b4d5983d --- /dev/null +++ b/libstdc++-v3/include/parallel/quicksort.h @@ -0,0 +1,176 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/quicksort.h + * @brief Implementation of a unbalanced parallel quicksort (in-place). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUICKSORT_H +#define _GLIBCXX_PARALLEL_QUICKSORT_H 1 + +#include <parallel/parallel.h> +#include <parallel/partition.h> + +namespace __gnu_parallel +{ + /** @brief Unbalanced quicksort divide step. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __pivot_rank Desired __rank of the pivot. + * @param __num_samples Choose pivot from that many samples. + * @param __num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename _RAIter, typename _Compare> + typename std::iterator_traits<_RAIter>::difference_type + __parallel_sort_qs_divide(_RAIter __begin, _RAIter __end, + _Compare __comp, typename std::iterator_traits + <_RAIter>::difference_type __pivot_rank, + typename std::iterator_traits + <_RAIter>::difference_type + __num_samples, _ThreadIndex __num_threads) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + __num_samples = std::min(__num_samples, __n); + + // Allocate uninitialized, to avoid default constructor. + _ValueType* __samples = static_cast<_ValueType*> + (::operator new(__num_samples * sizeof(_ValueType))); + + for (_DifferenceType __s = 0; __s < __num_samples; ++__s) + { + const unsigned long long __index = static_cast<unsigned long long> + (__s) * __n / __num_samples; + ::new(&(__samples[__s])) _ValueType(__begin[__index]); + } + + __gnu_sequential::sort(__samples, __samples + __num_samples, __comp); + + _ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n]; + + __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> + __pred(__comp, __pivot); + _DifferenceType __split = __parallel_partition(__begin, __end, + __pred, __num_threads); + + for (_DifferenceType __s = 0; __s < __num_samples; ++__s) + __samples[__s].~_ValueType(); + ::operator delete(__samples); + + return __split; + } + + /** @brief Unbalanced quicksort conquer step. + * @param __begin Begin iterator of subsequence. + * @param __end End iterator of subsequence. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + if (__num_threads <= 1) + { + __gnu_sequential::sort(__begin, __end, __comp); + return; + } + + _DifferenceType __n = __end - __begin, __pivot_rank; + + if (__n <= 1) + return; + + _ThreadIndex __num_threads_left; + + if ((__num_threads % 2) == 1) + __num_threads_left = __num_threads / 2 + 1; + else + __num_threads_left = __num_threads / 2; + + __pivot_rank = __n * __num_threads_left / __num_threads; + + _DifferenceType __split = __parallel_sort_qs_divide + (__begin, __end, __comp, __pivot_rank, + _Settings::get().sort_qs_num_samples_preset, __num_threads); + +#pragma omp parallel sections num_threads(2) + { +#pragma omp section + __parallel_sort_qs_conquer(__begin, __begin + __split, + __comp, __num_threads_left); +#pragma omp section + __parallel_sort_qs_conquer(__begin + __split, __end, + __comp, __num_threads - __num_threads_left); + } + } + + + /** @brief Unbalanced quicksort main call. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator input sequence, ignored. + * @param __comp Comparator. + * @param __num_threads Number of threads that are allowed to work on + * this part. + */ + template<typename _RAIter, typename _Compare> + void + __parallel_sort_qs(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__n) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + // At least one element per processor. + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + __parallel_sort_qs_conquer( + __begin, __begin + __n, __comp, __num_threads); + } + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_QUICKSORT_H */ diff --git a/libstdc++-v3/include/parallel/random_number.h b/libstdc++-v3/include/parallel/random_number.h new file mode 100644 index 000000000..c39f15e12 --- /dev/null +++ b/libstdc++-v3/include/parallel/random_number.h @@ -0,0 +1,125 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/random_number.h + * @brief Random number generator based on the Mersenne twister. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_NUMBER_H +#define _GLIBCXX_PARALLEL_RANDOM_NUMBER_H 1 + +#include <parallel/types.h> +#include <tr1/random> +#include <limits> + +namespace __gnu_parallel +{ + /** @brief Random number generator, based on the Mersenne twister. */ + class _RandomNumber + { + private: + std::tr1::mt19937 _M_mt; + uint64_t _M_supremum; + uint64_t _M_rand_sup; + double _M_supremum_reciprocal; + double _M_rand_sup_reciprocal; + + // Assumed to be twice as long as the usual random number. + uint64_t __cache; + + // Bit results. + int __bits_left; + + static uint32_t + __scale_down(uint64_t __x, +#if _GLIBCXX_SCALE_DOWN_FPU + uint64_t /*_M_supremum*/, double _M_supremum_reciprocal) +#else + uint64_t _M_supremum, double /*_M_supremum_reciprocal*/) +#endif + { +#if _GLIBCXX_SCALE_DOWN_FPU + return uint32_t(__x * _M_supremum_reciprocal); +#else + return static_cast<uint32_t>(__x % _M_supremum); +#endif + } + + public: + /** @brief Default constructor. Seed with 0. */ + _RandomNumber() + : _M_mt(0), _M_supremum(0x100000000ULL), + _M_rand_sup(1ULL << std::numeric_limits<uint32_t>::digits), + _M_supremum_reciprocal(double(_M_supremum) / double(_M_rand_sup)), + _M_rand_sup_reciprocal(1.0 / double(_M_rand_sup)), + __cache(0), __bits_left(0) { } + + /** @brief Constructor. + * @param __seed Random __seed. + * @param _M_supremum Generate integer random numbers in the + * interval @c [0,_M_supremum). */ + _RandomNumber(uint32_t __seed, uint64_t _M_supremum = 0x100000000ULL) + : _M_mt(__seed), _M_supremum(_M_supremum), + _M_rand_sup(1ULL << std::numeric_limits<uint32_t>::digits), + _M_supremum_reciprocal(double(_M_supremum) / double(_M_rand_sup)), + _M_rand_sup_reciprocal(1.0 / double(_M_rand_sup)), + __cache(0), __bits_left(0) { } + + /** @brief Generate unsigned random 32-bit integer. */ + uint32_t + operator()() + { return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); } + + /** @brief Generate unsigned random 32-bit integer in the + interval @c [0,local_supremum). */ + uint32_t + operator()(uint64_t local_supremum) + { + return __scale_down(_M_mt(), local_supremum, + double(local_supremum * _M_rand_sup_reciprocal)); + } + + /** @brief Generate a number of random bits, run-time parameter. + * @param bits Number of bits to generate. */ + unsigned long + __genrand_bits(int __bits) + { + unsigned long __res = __cache & ((1 << __bits) - 1); + __cache = __cache >> __bits; + __bits_left -= __bits; + if (__bits_left < 32) + { + __cache |= ((uint64_t(_M_mt())) << __bits_left); + __bits_left += 32; + } + return __res; + } +}; + +} // namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_RANDOM_NUMBER_H */ diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h new file mode 100644 index 000000000..c3967c228 --- /dev/null +++ b/libstdc++-v3/include/parallel/random_shuffle.h @@ -0,0 +1,533 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/random_shuffle.h + * @brief Parallel implementation of std::random_shuffle(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H +#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 + +#include <limits> +#include <bits/stl_numeric.h> +#include <parallel/parallel.h> +#include <parallel/random_number.h> + +namespace __gnu_parallel +{ + /** @brief Type to hold the index of a bin. + * + * Since many variables of this type are allocated, it should be + * chosen as small as possible. + */ + typedef unsigned short _BinIndex; + + /** @brief Data known to every thread participating in + __gnu_parallel::__parallel_random_shuffle(). */ + template<typename _RAIter> + struct _DRandomShufflingGlobalData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + /** @brief Begin iterator of the __source. */ + _RAIter& _M_source; + + /** @brief Temporary arrays for each thread. */ + _ValueType** _M_temporaries; + + /** @brief Two-dimensional array to hold the thread-bin distribution. + * + * Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */ + _DifferenceType** _M_dist; + + /** @brief Start indexes of the threads' __chunks. */ + _DifferenceType* _M_starts; + + /** @brief Number of the thread that will further process the + corresponding bin. */ + _ThreadIndex* _M_bin_proc; + + /** @brief Number of bins to distribute to. */ + int _M_num_bins; + + /** @brief Number of bits needed to address the bins. */ + int _M_num_bits; + + /** @brief Constructor. */ + _DRandomShufflingGlobalData(_RAIter& __source) + : _M_source(__source) { } + }; + + /** @brief Local data for a thread participating in + __gnu_parallel::__parallel_random_shuffle(). + */ + template<typename _RAIter, typename _RandomNumberGenerator> + struct _DRSSorterPU + { + /** @brief Number of threads participating in total. */ + int _M_num_threads; + + /** @brief Begin index for bins taken care of by this thread. */ + _BinIndex _M_bins_begin; + + /** @brief End index for bins taken care of by this thread. */ + _BinIndex __bins_end; + + /** @brief Random _M_seed for this thread. */ + uint32_t _M_seed; + + /** @brief Pointer to global data. */ + _DRandomShufflingGlobalData<_RAIter>* _M_sd; + }; + + /** @brief Generate a random number in @c [0,2^__logp). + * @param __logp Logarithm (basis 2) of the upper range __bound. + * @param __rng Random number generator to use. + */ + template<typename _RandomNumberGenerator> + inline int + __random_number_pow2(int __logp, _RandomNumberGenerator& __rng) + { return __rng.__genrand_bits(__logp); } + + /** @brief Random shuffle code executed by each thread. + * @param __pus Array of thread-local data records. */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter, + _RandomNumberGenerator>* __pus) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam]; + _DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd; + + // Indexing: _M_dist[bin][processor] + _DifferenceType __length = (__sd->_M_starts[__iam + 1] + - __sd->_M_starts[__iam]); + _BinIndex* __oracles = new _BinIndex[__length]; + _DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1]; + _BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins]; + _ValueType** __temporaries = new _ValueType*[__d->_M_num_threads]; + + // Compute oracles and count appearances. + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __dist[__b] = 0; + int __num_bits = __sd->_M_num_bits; + + _RandomNumber __rng(__d->_M_seed); + + // First main loop. + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + _BinIndex __oracle = __random_number_pow2(__num_bits, __rng); + __oracles[__i] = __oracle; + + // To allow prefix (partial) sum. + ++(__dist[__oracle + 1]); + } + + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __sd->_M_dist[__b][__iam + 1] = __dist[__b]; + +# pragma omp barrier + +# pragma omp single + { + // Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now + // contains the total number of items in bin __s + for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s) + __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1], + __sd->_M_dist[__s + 1] + + __d->_M_num_threads + 1, + __sd->_M_dist[__s + 1]); + } + +# pragma omp barrier + + _SequenceIndex __offset = 0, __global_offset = 0; + for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s) + __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads]; + +# pragma omp barrier + + for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s) + { + for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t) + __sd->_M_dist[__s + 1][__t] += __offset; + __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads]; + } + + __sd->_M_temporaries[__iam] = static_cast<_ValueType*> + (::operator new(sizeof(_ValueType) * __offset)); + +# pragma omp barrier + + // Draw local copies to avoid false sharing. + for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) + __dist[__b] = __sd->_M_dist[__b][__iam]; + for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b) + __bin_proc[__b] = __sd->_M_bin_proc[__b]; + for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t) + __temporaries[__t] = __sd->_M_temporaries[__t]; + + _RAIter __source = __sd->_M_source; + _DifferenceType __start = __sd->_M_starts[__iam]; + + // Distribute according to oracles, second main loop. + for (_DifferenceType __i = 0; __i < __length; ++__i) + { + _BinIndex __target_bin = __oracles[__i]; + _ThreadIndex __target_p = __bin_proc[__target_bin]; + + // Last column [__d->_M_num_threads] stays unchanged. + ::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++])) + _ValueType(*(__source + __i + __start)); + } + + delete[] __oracles; + delete[] __dist; + delete[] __bin_proc; + delete[] __temporaries; + +# pragma omp barrier + + // Shuffle bins internally. + for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b) + { + _ValueType* __begin = + (__sd->_M_temporaries[__iam] + + (__b == __d->_M_bins_begin + ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])), + *__end = (__sd->_M_temporaries[__iam] + + __sd->_M_dist[__b + 1][__d->_M_num_threads]); + + __sequential_random_shuffle(__begin, __end, __rng); + std::copy(__begin, __end, __sd->_M_source + __global_offset + + (__b == __d->_M_bins_begin + ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])); + } + + for (_SequenceIndex __i = 0; __i < __offset; ++__i) + __sd->_M_temporaries[__iam][__i].~_ValueType(); + ::operator delete(__sd->_M_temporaries[__iam]); + } + + /** @brief Round up to the next greater power of 2. + * @param __x _Integer to round up */ + template<typename _Tp> + _Tp + __round_up_to_pow2(_Tp __x) + { + if (__x <= 1) + return 1; + else + return (_Tp)1 << (__rd_log2(__x - 1) + 1); + } + + /** @brief Main parallel random shuffle step. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end, + typename std::iterator_traits + <_RAIter>::difference_type __n, + _ThreadIndex __num_threads, + _RandomNumberGenerator& __rng) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _GLIBCXX_CALL(__n) + + const _Settings& __s = _Settings::get(); + + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + _BinIndex __num_bins, __num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first. + + // Must fit into L1. + __num_bins_cache = + std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb + / sizeof(_ValueType))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); +#endif + __num_bins = __round_up_to_pow2(__num_bins); + + if (__num_bins < __num_bins_cache) + { +#endif + // Now try the L2 cache + // Must fit into L2 + __num_bins_cache = static_cast<_BinIndex> + (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size + / sizeof(_ValueType)))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2. + __num_bins = static_cast<_BinIndex> + (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); + // Power of 2 and at least one element per bin, at most the TLB size. +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2), + __num_bins); +#endif + __num_bins = __round_up_to_pow2(__num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + __num_bins = __round_up_to_pow2( + std::max<_BinIndex>(__num_threads, __num_bins)); + + if (__num_threads <= 1) + { + _RandomNumber __derived_rng( + __rng(std::numeric_limits<uint32_t>::max())); + __sequential_random_shuffle(__begin, __end, __derived_rng); + return; + } + + _DRandomShufflingGlobalData<_RAIter> __sd(__begin); + _DRSSorterPU<_RAIter, _RandomNumber >* __pus; + _DifferenceType* __starts; + +# pragma omp parallel num_threads(__num_threads) + { + _ThreadIndex __num_threads = omp_get_num_threads(); +# pragma omp single + { + __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads]; + + __sd._M_temporaries = new _ValueType*[__num_threads]; + __sd._M_dist = new _DifferenceType*[__num_bins + 1]; + __sd._M_bin_proc = new _ThreadIndex[__num_bins]; + for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b) + __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; + for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b) + { + __sd._M_dist[0][0] = 0; + __sd._M_dist[__b][0] = 0; + } + __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; + int __bin_cursor = 0; + __sd._M_num_bins = __num_bins; + __sd._M_num_bits = __rd_log2(__num_bins); + + _DifferenceType __chunk_length = __n / __num_threads, + __split = __n % __num_threads, + __start = 0; + _DifferenceType __bin_chunk_length = __num_bins / __num_threads, + __bin_split = __num_bins % __num_threads; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + __starts[__i] = __start; + __start += (__i < __split + ? (__chunk_length + 1) : __chunk_length); + int __j = __pus[__i]._M_bins_begin = __bin_cursor; + + // Range of bins for this processor. + __bin_cursor += (__i < __bin_split + ? (__bin_chunk_length + 1) + : __bin_chunk_length); + __pus[__i].__bins_end = __bin_cursor; + for (; __j < __bin_cursor; ++__j) + __sd._M_bin_proc[__j] = __i; + __pus[__i]._M_num_threads = __num_threads; + __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max()); + __pus[__i]._M_sd = &__sd; + } + __starts[__num_threads] = __start; + } //single + // Now shuffle in parallel. + __parallel_random_shuffle_drs_pu(__pus); + } // parallel + + delete[] __starts; + delete[] __sd._M_bin_proc; + for (int __s = 0; __s < (__num_bins + 1); ++__s) + delete[] __sd._M_dist[__s]; + delete[] __sd._M_dist; + delete[] __sd._M_temporaries; + + delete[] __pus; + } + + /** @brief Sequential cache-efficient random shuffle. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + void + __sequential_random_shuffle(_RAIter __begin, _RAIter __end, + _RandomNumberGenerator& __rng) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + const _Settings& __s = _Settings::get(); + + _BinIndex __num_bins, __num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first, must fit into L1. + __num_bins_cache = std::max<_DifferenceType> + (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size + __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins); +#endif + __num_bins = __round_up_to_pow2(__num_bins); + + if (__num_bins < __num_bins_cache) + { +#endif + // Now try the L2 cache, must fit into L2. + __num_bins_cache = static_cast<_BinIndex> + (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size + / sizeof(_ValueType)))); + __num_bins_cache = __round_up_to_pow2(__num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + __num_bins = static_cast<_BinIndex> + (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); +#endif + __num_bins = __round_up_to_pow2(__num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + int __num_bits = __rd_log2(__num_bins); + + if (__num_bins > 1) + { + _ValueType* __target = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n)); + _BinIndex* __oracles = new _BinIndex[__n]; + _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1], + * __dist1 = new _DifferenceType[__num_bins + 1]; + + for (int __b = 0; __b < __num_bins + 1; ++__b) + __dist0[__b] = 0; + + _RandomNumber __bitrng(__rng(0xFFFFFFFF)); + + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + _BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng); + __oracles[__i] = __oracle; + + // To allow prefix (partial) sum. + ++(__dist0[__oracle + 1]); + } + + // Sum up bins. + __gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1, + __dist0); + + for (int __b = 0; __b < __num_bins + 1; ++__b) + __dist1[__b] = __dist0[__b]; + + // Distribute according to oracles. + for (_DifferenceType __i = 0; __i < __n; ++__i) + ::new(&(__target[(__dist0[__oracles[__i]])++])) + _ValueType(*(__begin + __i)); + + for (int __b = 0; __b < __num_bins; ++__b) + __sequential_random_shuffle(__target + __dist1[__b], + __target + __dist1[__b + 1], __rng); + + // Copy elements back. + std::copy(__target, __target + __n, __begin); + + delete[] __dist0; + delete[] __dist1; + delete[] __oracles; + + for (_DifferenceType __i = 0; __i < __n; ++__i) + __target[__i].~_ValueType(); + ::operator delete(__target); + } + else + __gnu_sequential::random_shuffle(__begin, __end, __rng); + } + + /** @brief Parallel random public call. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __rng Random number generator to use. + */ + template<typename _RAIter, typename _RandomNumberGenerator> + inline void + __parallel_random_shuffle(_RAIter __begin, _RAIter __end, + _RandomNumberGenerator __rng = _RandomNumber()) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + _DifferenceType __n = __end - __begin; + __parallel_random_shuffle_drs(__begin, __end, __n, + __get_max_threads(), __rng); + } +} + +#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ diff --git a/libstdc++-v3/include/parallel/search.h b/libstdc++-v3/include/parallel/search.h new file mode 100644 index 000000000..970992561 --- /dev/null +++ b/libstdc++-v3/include/parallel/search.h @@ -0,0 +1,172 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/search.h + * @brief Parallel implementation base for std::search() and + * std::search_n(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SEARCH_H +#define _GLIBCXX_PARALLEL_SEARCH_H 1 + +#include <bits/stl_algobase.h> + +#include <parallel/parallel.h> +#include <parallel/equally_split.h> + +namespace __gnu_parallel +{ + /** + * @brief Precalculate __advances for Knuth-Morris-Pratt algorithm. + * @param __elements Begin iterator of sequence to search for. + * @param __length Length of sequence to search for. + * @param __advances Returned __offsets. + */ + template<typename _RAIter, typename _DifferenceTp> + void + __calc_borders(_RAIter __elements, _DifferenceTp __length, + _DifferenceTp* __off) + { + typedef _DifferenceTp _DifferenceType; + + __off[0] = -1; + if (__length > 1) + __off[1] = 0; + _DifferenceType __k = 0; + for (_DifferenceType __j = 2; __j <= __length; __j++) + { + while ((__k >= 0) && !(__elements[__k] == __elements[__j-1])) + __k = __off[__k]; + __off[__j] = ++__k; + } + } + + // Generic parallel find algorithm (requires random access iterator). + + /** @brief Parallel std::search. + * @param __begin1 Begin iterator of first sequence. + * @param __end1 End iterator of first sequence. + * @param __begin2 Begin iterator of second sequence. + * @param __end2 End iterator of second sequence. + * @param __pred Find predicate. + * @return Place of finding in first sequences. */ + template<typename __RAIter1, + typename __RAIter2, + typename _Pred> + __RAIter1 + __search_template(__RAIter1 __begin1, __RAIter1 __end1, + __RAIter2 __begin2, __RAIter2 __end2, + _Pred __pred) + { + typedef std::iterator_traits<__RAIter1> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)); + + _DifferenceType __pattern_length = __end2 - __begin2; + + // Pattern too short. + if(__pattern_length <= 0) + return __end1; + + // Last point to start search. + _DifferenceType __input_length = (__end1 - __begin1) - __pattern_length; + + // Where is first occurrence of pattern? defaults to end. + _DifferenceType __result = (__end1 - __begin1); + _DifferenceType *__splitters; + + // Pattern too long. + if (__input_length < 0) + return __end1; + + omp_lock_t __result_lock; + omp_init_lock(&__result_lock); + + _ThreadIndex __num_threads = std::max<_DifferenceType> + (1, std::min<_DifferenceType>(__input_length, + __get_max_threads())); + + _DifferenceType __advances[__pattern_length]; + __calc_borders(__begin2, __pattern_length, __advances); + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __splitters = new _DifferenceType[__num_threads + 1]; + equally_split(__input_length, __num_threads, __splitters); + } + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __start = __splitters[__iam], + __stop = __splitters[__iam + 1]; + + _DifferenceType __pos_in_pattern = 0; + bool __found_pattern = false; + + while (__start <= __stop && !__found_pattern) + { + // Get new value of result. +#pragma omp flush(__result) + // No chance for this thread to find first occurrence. + if (__result < __start) + break; + while (__pred(__begin1[__start + __pos_in_pattern], + __begin2[__pos_in_pattern])) + { + ++__pos_in_pattern; + if (__pos_in_pattern == __pattern_length) + { + // Found new candidate for result. + omp_set_lock(&__result_lock); + __result = std::min(__result, __start); + omp_unset_lock(&__result_lock); + + __found_pattern = true; + break; + } + } + // Make safe jump. + __start += (__pos_in_pattern - __advances[__pos_in_pattern]); + __pos_in_pattern = (__advances[__pos_in_pattern] < 0 + ? 0 : __advances[__pos_in_pattern]); + } + } //parallel + + omp_destroy_lock(&__result_lock); + + delete[] __splitters; + + // Return iterator on found element. + return (__begin1 + __result); + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_SEARCH_H */ diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h new file mode 100644 index 000000000..f552c1dda --- /dev/null +++ b/libstdc++-v3/include/parallel/set_operations.h @@ -0,0 +1,529 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/set_operations.h + * @brief Parallel implementations of set operations for random-access + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Marius Elvert and Felix Bondarenko. + +#ifndef _GLIBCXX_PARALLEL_SET_OPERATIONS_H +#define _GLIBCXX_PARALLEL_SET_OPERATIONS_H 1 + +#include <omp.h> + +#include <parallel/settings.h> +#include <parallel/multiseq_selection.h> + +namespace __gnu_parallel +{ + template<typename _IIter, typename _OutputIterator> + _OutputIterator + __copy_tail(std::pair<_IIter, _IIter> __b, + std::pair<_IIter, _IIter> __e, _OutputIterator __r) + { + if (__b.first != __e.first) + { + do + { + *__r++ = *__b.first++; + } + while (__b.first != __e.first); + } + else + { + while (__b.second != __e.second) + *__r++ = *__b.second++; + } + return __r; + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __symmetric_difference_func + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; + + __symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {} + + _Compare _M_comp; + + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d, + _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + ++__r; + } + else if (_M_comp(*__c, *__a)) + { + *__r = *__c; + ++__c; + ++__r; + } + else + { + ++__a; + ++__c; + } + } + return std::copy(__c, __d, std::copy(__a, __b, __r)); + } + + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; + + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + ++__a; + ++__counter; + } + else if (_M_comp(*__c, *__a)) + { + ++__c; + ++__counter; + } + else + { + ++__a; + ++__c; + } + } + + return __counter + (__b - __a) + (__d - __c); + } + + _OutputIterator + __first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const + { return std::copy(__c, __d, __out); } + + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; + + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __difference_func + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; + + __difference_func(_Compare __comp) : _M_comp(__comp) {} + + _Compare _M_comp; + + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d, + _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + ++__r; + } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + } + } + return std::copy(__a, __b, __r); + } + + _DifferenceType + __count(_IIter __a, _IIter __b, + _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; + + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + ++__a; + ++__counter; + } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { ++__a; ++__c; } + } + + return __counter + (__b - __a); + } + + _OutputIterator + __first_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } + + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; + + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + struct __intersection_func + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; + + __intersection_func(_Compare __comp) : _M_comp(__comp) {} + + _Compare _M_comp; + + _OutputIterator + _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d, + _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + *__r = *__a; + ++__a; + ++__c; + ++__r; + } + } + + return __r; + } + + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; + + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + ++__counter; + } + } + + return __counter; + } + + _OutputIterator + __first_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } + + _OutputIterator + __second_empty(_IIter, _IIter, _OutputIterator __out) const + { return __out; } + }; + + template<class _IIter, class _OutputIterator, class _Compare> + struct __union_func + { + typedef typename std::iterator_traits<_IIter>::difference_type + _DifferenceType; + + __union_func(_Compare __comp) : _M_comp(__comp) {} + + _Compare _M_comp; + + _OutputIterator + _M_invoke(_IIter __a, const _IIter __b, _IIter __c, + const _IIter __d, _OutputIterator __r) const + { + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { + *__r = *__a; + ++__a; + } + else if (_M_comp(*__c, *__a)) + { + *__r = *__c; + ++__c; + } + else + { + *__r = *__a; + ++__a; + ++__c; + } + ++__r; + } + return std::copy(__c, __d, std::copy(__a, __b, __r)); + } + + _DifferenceType + __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const + { + _DifferenceType __counter = 0; + + while (__a != __b && __c != __d) + { + if (_M_comp(*__a, *__c)) + { ++__a; } + else if (_M_comp(*__c, *__a)) + { ++__c; } + else + { + ++__a; + ++__c; + } + ++__counter; + } + + __counter += (__b - __a); + __counter += (__d - __c); + return __counter; + } + + _OutputIterator + __first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const + { return std::copy(__c, __d, __out); } + + _OutputIterator + __second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const + { return std::copy(__a, __b, __out); } + }; + + template<typename _IIter, + typename _OutputIterator, + typename _Operation> + _OutputIterator + __parallel_set_operation(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Operation __op) + { + _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + typedef typename std::pair<_IIter, _IIter> _IteratorPair; + + if (__begin1 == __end1) + return __op.__first_empty(__begin2, __end2, __result); + + if (__begin2 == __end2) + return __op.__second_empty(__begin1, __end1, __result); + + const _DifferenceType __size = (__end1 - __begin1) + (__end2 - __begin2); + + const _IteratorPair __sequence[2] = { std::make_pair(__begin1, __end1), + std::make_pair(__begin2, __end2) }; + _OutputIterator __return_value = __result; + _DifferenceType *__borders; + _IteratorPair *__block_begins; + _DifferenceType* __lengths; + + _ThreadIndex __num_threads = + std::min<_DifferenceType>(__get_max_threads(), + std::min(__end1 - __begin1, __end2 - __begin2)); + +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + + __borders = new _DifferenceType[__num_threads + 2]; + equally_split(__size, __num_threads + 1, __borders); + __block_begins = new _IteratorPair[__num_threads + 1]; + // Very __start. + __block_begins[0] = std::make_pair(__begin1, __begin2); + __lengths = new _DifferenceType[__num_threads]; + } //single + + _ThreadIndex __iam = omp_get_thread_num(); + + // _Result from multiseq_partition. + _IIter __offset[2]; + const _DifferenceType __rank = __borders[__iam + 1]; + + multiseq_partition(__sequence, __sequence + 2, + __rank, __offset, __op._M_comp); + + // allowed to read? + // together + // *(__offset[ 0 ] - 1) == *__offset[ 1 ] + if (__offset[ 0 ] != __begin1 && __offset[1] != __end2 + && !__op._M_comp(*(__offset[0] - 1), *__offset[1]) + && !__op._M_comp(*__offset[1], *(__offset[0] - 1))) + { + // Avoid split between globally equal elements: move one to + // front in first sequence. + --__offset[0]; + } + + _IteratorPair __block_end = __block_begins[__iam + 1] = + _IteratorPair(__offset[0], __offset[1]); + + // Make sure all threads have their block_begin result written out. +# pragma omp barrier + + _IteratorPair __block_begin = __block_begins[__iam]; + + // Begin working for the first block, while the others except + // the last start to count. + if (__iam == 0) + { + // The first thread can copy already. + __lengths[ __iam ] = + __op._M_invoke(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second, + __result) - __result; + } + else + { + __lengths[ __iam ] = + __op.__count(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second); + } + + // Make sure everyone wrote their lengths. +# pragma omp barrier + + _OutputIterator __r = __result; + + if (__iam == 0) + { + // Do the last block. + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __r += __lengths[__i]; + + __block_begin = __block_begins[__num_threads]; + + // Return the result iterator of the last block. + __return_value = + __op._M_invoke(__block_begin.first, __end1, + __block_begin.second, __end2, __r); + + } + else + { + for (_ThreadIndex __i = 0; __i < __iam; ++__i) + __r += __lengths[ __i ]; + + // Reset begins for copy pass. + __op._M_invoke(__block_begin.first, __block_end.first, + __block_begin.second, __block_end.second, __r); + } + } + return __return_value; + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_union(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __union_func< _IIter, _OutputIterator, + _Compare>(__comp)); + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_intersection(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __intersection_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_difference(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __difference_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } + + template<typename _IIter, + typename _OutputIterator, + typename _Compare> + inline _OutputIterator + __parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1, + _IIter __begin2, _IIter __end2, + _OutputIterator __result, + _Compare __comp) + { + return __parallel_set_operation(__begin1, __end1, __begin2, __end2, + __result, + __symmetric_difference_func<_IIter, + _OutputIterator, _Compare>(__comp)); + } +} + +#endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */ diff --git a/libstdc++-v3/include/parallel/settings.h b/libstdc++-v3/include/parallel/settings.h new file mode 100644 index 000000000..5fc6a75c9 --- /dev/null +++ b/libstdc++-v3/include/parallel/settings.h @@ -0,0 +1,343 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/settings.h + * @brief Runtime settings and tuning parameters, heuristics to decide + * whether to use parallelized algorithms. + * This file is a GNU parallel extension to the Standard C++ Library. + * + * @section parallelization_decision + * The decision whether to run an algorithm in parallel. + * + * There are several ways the user can switch on and __off the parallel + * execution of an algorithm, both at compile- and run-time. + * + * Only sequential execution can be forced at compile-time. This + * reduces code size and protects code parts that have + * non-thread-safe side effects. + * + * Ultimately, forcing parallel execution at compile-time makes + * sense. Often, the sequential algorithm implementation is used as + * a subroutine, so no reduction in code size can be achieved. Also, + * the machine the program is run on might have only one processor + * core, so to avoid overhead, the algorithm is executed + * sequentially. + * + * To force sequential execution of an algorithm ultimately at + * compile-time, the user must add the tag +* gnu_parallel::sequential_tag() to the end of the parameter list, + * e. g. + * + * \code + * std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag()); + * \endcode + * + * This is compatible with all overloaded algorithm variants. No + * additional code will be instantiated, at all. The same holds for + * most algorithm calls with iterators not providing random access. + * + * If the algorithm call is not forced to be executed sequentially + * at compile-time, the decision is made at run-time. + * The global variable __gnu_parallel::_Settings::algorithm_strategy + * is checked. _It is a tristate variable corresponding to: + * + * a. force_sequential, meaning the sequential algorithm is executed. +* b. force_parallel, meaning the parallel algorithm is executed. +* c. heuristic + * + * For heuristic, the parallel algorithm implementation is called + * only if the input size is sufficiently large. For most + * algorithms, the input size is the (combined) length of the input +* sequence(__s). The threshold can be set by the user, individually + * for each algorithm. The according variables are called +* gnu_parallel::_Settings::[algorithm]_minimal_n . + * + * For some of the algorithms, there are even more tuning options, + * e. g. the ability to choose from multiple algorithm variants. See + * below for details. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SETTINGS_H +#define _GLIBCXX_PARALLEL_SETTINGS_H 1 + +#include <parallel/types.h> + +/** + * @brief Determine at compile(?)-time if the parallel variant of an + * algorithm should be called. + * @param __c A condition that is convertible to bool that is overruled by + * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision + * based on the input size. + */ +#define _GLIBCXX_PARALLEL_CONDITION(__c) \ + (__gnu_parallel::_Settings::get().algorithm_strategy \ + != __gnu_parallel::force_sequential \ + && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) \ + || __gnu_parallel::_Settings::get().algorithm_strategy \ + == __gnu_parallel::force_parallel)) + +/* +inline bool +parallel_condition(bool __c) +{ + bool ret = false; + const _Settings& __s = _Settings::get(); + if (__s.algorithm_strategy != force_seqential) + { + if (__s.algorithm_strategy == force_parallel) + ret = true; + else + ret = __get_max_threads() > 1 && __c; + } + return ret; +} +*/ + +namespace __gnu_parallel +{ + /// class _Settings + /// Run-time settings for the parallel mode including all tunable parameters. + struct _Settings + { + _AlgorithmStrategy algorithm_strategy; + + _SortAlgorithm sort_algorithm; + _PartialSumAlgorithm partial_sum_algorithm; + _MultiwayMergeAlgorithm multiway_merge_algorithm; + _FindAlgorithm find_algorithm; + + _SplittingAlgorithm sort_splitting; + _SplittingAlgorithm merge_splitting; + _SplittingAlgorithm multiway_merge_splitting; + + // Per-algorithm settings. + + /// Minimal input size for accumulate. + _SequenceIndex accumulate_minimal_n; + + /// Minimal input size for adjacent_difference. + unsigned int adjacent_difference_minimal_n; + + /// Minimal input size for count and count_if. + _SequenceIndex count_minimal_n; + + /// Minimal input size for fill. + _SequenceIndex fill_minimal_n; + + /// Block size increase factor for find. + double find_increasing_factor; + + /// Initial block size for find. + _SequenceIndex find_initial_block_size; + + /// Maximal block size for find. + _SequenceIndex find_maximum_block_size; + + /// Start with looking for this many elements sequentially, for find. + _SequenceIndex find_sequential_search_size; + + /// Minimal input size for for_each. + _SequenceIndex for_each_minimal_n; + + /// Minimal input size for generate. + _SequenceIndex generate_minimal_n; + + /// Minimal input size for max_element. + _SequenceIndex max_element_minimal_n; + + /// Minimal input size for merge. + _SequenceIndex merge_minimal_n; + + /// Oversampling factor for merge. + unsigned int merge_oversampling; + + /// Minimal input size for min_element. + _SequenceIndex min_element_minimal_n; + + /// Minimal input size for multiway_merge. + _SequenceIndex multiway_merge_minimal_n; + + /// Oversampling factor for multiway_merge. + int multiway_merge_minimal_k; + + /// Oversampling factor for multiway_merge. + unsigned int multiway_merge_oversampling; + + /// Minimal input size for nth_element. + _SequenceIndex nth_element_minimal_n; + + /// Chunk size for partition. + _SequenceIndex partition_chunk_size; + + /// Chunk size for partition, relative to input size. If > 0.0, + /// this value overrides partition_chunk_size. + double partition_chunk_share; + + /// Minimal input size for partition. + _SequenceIndex partition_minimal_n; + + /// Minimal input size for partial_sort. + _SequenceIndex partial_sort_minimal_n; + + /// Ratio for partial_sum. Assume "sum and write result" to be + /// this factor slower than just "sum". + float partial_sum_dilation; + + /// Minimal input size for partial_sum. + unsigned int partial_sum_minimal_n; + + /// Minimal input size for random_shuffle. + unsigned int random_shuffle_minimal_n; + + /// Minimal input size for replace and replace_if. + _SequenceIndex replace_minimal_n; + + /// Minimal input size for set_difference. + _SequenceIndex set_difference_minimal_n; + + /// Minimal input size for set_intersection. + _SequenceIndex set_intersection_minimal_n; + + /// Minimal input size for set_symmetric_difference. + _SequenceIndex set_symmetric_difference_minimal_n; + + /// Minimal input size for set_union. + _SequenceIndex set_union_minimal_n; + + /// Minimal input size for parallel sorting. + _SequenceIndex sort_minimal_n; + + /// Oversampling factor for parallel std::sort (MWMS). + unsigned int sort_mwms_oversampling; + + /// Such many samples to take to find a good pivot (quicksort). + unsigned int sort_qs_num_samples_preset; + + /// Maximal subsequence __length to switch to unbalanced __base case. + /// Applies to std::sort with dynamically load-balanced quicksort. + _SequenceIndex sort_qsb_base_case_maximal_n; + + /// Minimal input size for parallel std::transform. + _SequenceIndex transform_minimal_n; + + /// Minimal input size for unique_copy. + _SequenceIndex unique_copy_minimal_n; + + _SequenceIndex workstealing_chunk_size; + + // Hardware dependent tuning parameters. + + /// size of the L1 cache in bytes (underestimation). + unsigned long long L1_cache_size; + + /// size of the L2 cache in bytes (underestimation). + unsigned long long L2_cache_size; + + /// size of the Translation Lookaside Buffer (underestimation). + unsigned int TLB_size; + + /// Overestimation of cache line size. Used to avoid false + /// sharing, i.e. elements of different threads are at least this + /// amount apart. + unsigned int cache_line_size; + + // Statistics. + + /// The number of stolen ranges in load-balanced quicksort. + _SequenceIndex qsb_steals; + + /// Minimal input size for search and search_n. + _SequenceIndex search_minimal_n; + + /// Block size scale-down factor with respect to current position. + float find_scale_factor; + + /// Get the global settings. + _GLIBCXX_CONST static const _Settings& + get() throw(); + + /// Set the global settings. + static void + set(_Settings&) throw(); + + explicit + _Settings() : + algorithm_strategy(heuristic), + sort_algorithm(MWMS), + partial_sum_algorithm(LINEAR), + multiway_merge_algorithm(LOSER_TREE), + find_algorithm(CONSTANT_SIZE_BLOCKS), + sort_splitting(EXACT), + merge_splitting(EXACT), + multiway_merge_splitting(EXACT), + accumulate_minimal_n(1000), + adjacent_difference_minimal_n(1000), + count_minimal_n(1000), + fill_minimal_n(1000), + find_increasing_factor(2.0), + find_initial_block_size(256), + find_maximum_block_size(8192), + find_sequential_search_size(256), + for_each_minimal_n(1000), + generate_minimal_n(1000), + max_element_minimal_n(1000), + merge_minimal_n(1000), + merge_oversampling(10), + min_element_minimal_n(1000), + multiway_merge_minimal_n(1000), + multiway_merge_minimal_k(2), multiway_merge_oversampling(10), + nth_element_minimal_n(1000), + partition_chunk_size(1000), + partition_chunk_share(0.0), + partition_minimal_n(1000), + partial_sort_minimal_n(1000), + partial_sum_dilation(1.0f), + partial_sum_minimal_n(1000), + random_shuffle_minimal_n(1000), + replace_minimal_n(1000), + set_difference_minimal_n(1000), + set_intersection_minimal_n(1000), + set_symmetric_difference_minimal_n(1000), + set_union_minimal_n(1000), + sort_minimal_n(1000), + sort_mwms_oversampling(10), + sort_qs_num_samples_preset(100), + sort_qsb_base_case_maximal_n(100), + transform_minimal_n(1000), + unique_copy_minimal_n(10000), + workstealing_chunk_size(100), + L1_cache_size(16 << 10), + L2_cache_size(256 << 10), + TLB_size(128), + cache_line_size(64), + qsb_steals(0), + search_minimal_n(1000), + find_scale_factor(0.01f) + { } + }; +} + +#endif /* _GLIBCXX_PARALLEL_SETTINGS_H */ diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h new file mode 100644 index 000000000..f1a163c63 --- /dev/null +++ b/libstdc++-v3/include/parallel/sort.h @@ -0,0 +1,233 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/sort.h + * @brief Parallel sorting algorithm switch. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_SORT_H +#define _GLIBCXX_PARALLEL_SORT_H 1 + +#include <parallel/basic_iterator.h> +#include <parallel/features.h> +#include <parallel/parallel.h> + +#if _GLIBCXX_ASSERTIONS +#include <parallel/checkers.h> +#endif + +#if _GLIBCXX_MERGESORT +#include <parallel/multiway_mergesort.h> +#endif + +#if _GLIBCXX_QUICKSORT +#include <parallel/quicksort.h> +#endif + +#if _GLIBCXX_BAL_QUICKSORT +#include <parallel/balanced_quicksort.h> +#endif + +namespace __gnu_parallel +{ + //prototype + template<bool __stable, typename _RAIter, + typename _Compare, typename _Parallelism> + void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, _Parallelism __parallelism); + + /** + * @brief Choose multiway mergesort, splitting variant at run-time, + * for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, multiway_mergesort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms<__stable, true> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + else + parallel_sort_mwms<__stable, false> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + } + + /** + * @brief Choose multiway mergesort with exact splitting, + * for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, + multiway_mergesort_exact_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + parallel_sort_mwms<__stable, true> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + } + + /** + * @brief Choose multiway mergesort with splitting by sampling, + * for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, + multiway_mergesort_sampling_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + parallel_sort_mwms<__stable, false> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + } + + /** + * @brief Choose quicksort for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, quicksort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + _GLIBCXX_PARALLEL_ASSERT(__stable == false); + + __parallel_sort_qs(__begin, __end, __comp, + __parallelism.__get_num_threads()); + } + + /** + * @brief Choose balanced quicksort for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @param __stable Sort __stable. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, balanced_quicksort_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + _GLIBCXX_PARALLEL_ASSERT(__stable == false); + + __parallel_sort_qsb(__begin, __end, __comp, + __parallelism.__get_num_threads()); + } + + /** + * @brief Choose multiway mergesort with exact splitting, + * for parallel sorting. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, default_parallel_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + + __parallel_sort<__stable> + (__begin, __end, __comp, + multiway_mergesort_exact_tag(__parallelism.__get_num_threads())); + } + + /** + * @brief Choose a parallel sorting algorithm. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __comp Comparator. + * @param __stable Sort __stable. + * @callgraph + */ + template<bool __stable, typename _RAIter, typename _Compare> + inline void + __parallel_sort(_RAIter __begin, _RAIter __end, + _Compare __comp, parallel_tag __parallelism) + { + _GLIBCXX_CALL(__end - __begin) + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + if (false) ; +#if _GLIBCXX_MERGESORT + else if (__stable || _Settings::get().sort_algorithm == MWMS) + { + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms<__stable, true> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + else + parallel_sort_mwms<false, false> + (__begin, __end, __comp, __parallelism.__get_num_threads()); + } +#endif +#if _GLIBCXX_QUICKSORT + else if (_Settings::get().sort_algorithm == QS) + __parallel_sort_qs(__begin, __end, __comp, + __parallelism.__get_num_threads()); +#endif +#if _GLIBCXX_BAL_QUICKSORT + else if (_Settings::get().sort_algorithm == QS_BALANCED) + __parallel_sort_qsb(__begin, __end, __comp, + __parallelism.__get_num_threads()); +#endif + else + __gnu_sequential::sort(__begin, __end, __comp); + } +} // end namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_SORT_H */ diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h new file mode 100644 index 000000000..bc47b2699 --- /dev/null +++ b/libstdc++-v3/include/parallel/tags.h @@ -0,0 +1,185 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** + * @file parallel/tags.h + * @brief Tags for compile-time selection. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TAGS_H +#define _GLIBCXX_PARALLEL_TAGS_H 1 + +#include <omp.h> +#include <parallel/types.h> + +namespace __gnu_parallel +{ + /** @brief Forces sequential execution at compile time. */ + struct sequential_tag { }; + + /** @brief Recommends parallel execution at compile time, + * optionally using a user-specified number of threads. */ + struct parallel_tag + { + private: + _ThreadIndex _M_num_threads; + + public: + /** @brief Default constructor. Use default number of threads. */ + parallel_tag() + { _M_num_threads = 0; } + + /** @brief Default constructor. Recommend number of threads to use. + * @param __num_threads Desired number of threads. */ + parallel_tag(_ThreadIndex __num_threads) + { _M_num_threads = __num_threads; } + + /** @brief Find out desired number of threads. + * @return Desired number of threads. */ + _ThreadIndex __get_num_threads() + { + if(_M_num_threads == 0) + return omp_get_max_threads(); + else + return _M_num_threads; + } + + /** @brief Set the desired number of threads. + * @param __num_threads Desired number of threads. */ + void set_num_threads(_ThreadIndex __num_threads) + { _M_num_threads = __num_threads; } + }; + + /** @brief Recommends parallel execution using the + default parallel algorithm. */ + struct default_parallel_tag : public parallel_tag + { + default_parallel_tag() { } + default_parallel_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Recommends parallel execution using dynamic + load-balancing at compile time. */ + struct balanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using static + load-balancing at compile time. */ + struct unbalanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP dynamic + load-balancing at compile time. */ + struct omp_loop_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP static + load-balancing at compile time. */ + struct omp_loop_static_tag : public parallel_tag { }; + + + /** @brief Base class for for std::find() variants. */ + struct find_tag { }; + + + /** @brief Forces parallel merging + * with exact splitting, at compile time. */ + struct exact_tag : public parallel_tag + { + exact_tag() { } + exact_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Forces parallel merging + * with exact splitting, at compile time. */ + struct sampling_tag : public parallel_tag + { + sampling_tag() { } + sampling_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + + /** @brief Forces parallel sorting using multiway mergesort + * at compile time. */ + struct multiway_mergesort_tag : public parallel_tag + { + multiway_mergesort_tag() { } + multiway_mergesort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Forces parallel sorting using multiway mergesort + * with exact splitting at compile time. */ + struct multiway_mergesort_exact_tag : public parallel_tag + { + multiway_mergesort_exact_tag() { } + multiway_mergesort_exact_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Forces parallel sorting using multiway mergesort + * with splitting by sampling at compile time. */ + struct multiway_mergesort_sampling_tag : public parallel_tag + { + multiway_mergesort_sampling_tag() { } + multiway_mergesort_sampling_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Forces parallel sorting using unbalanced quicksort + * at compile time. */ + struct quicksort_tag : public parallel_tag + { + quicksort_tag() { } + quicksort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + /** @brief Forces parallel sorting using balanced quicksort + * at compile time. */ + struct balanced_quicksort_tag : public parallel_tag + { + balanced_quicksort_tag() { } + balanced_quicksort_tag(_ThreadIndex __num_threads) + : parallel_tag(__num_threads) { } + }; + + + /** @brief Selects the growing block size variant for std::find(). + @see _GLIBCXX_FIND_GROWING_BLOCKS */ + struct growing_blocks_tag : public find_tag { }; + + /** @brief Selects the constant block size variant for std::find(). + @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */ + struct constant_size_blocks_tag : public find_tag { }; + + /** @brief Selects the equal splitting variant for std::find(). + @see _GLIBCXX_FIND_EQUAL_SPLIT */ + struct equal_split_tag : public find_tag { }; +} + +#endif /* _GLIBCXX_PARALLEL_TAGS_H */ diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h new file mode 100644 index 000000000..bb6eaa5ed --- /dev/null +++ b/libstdc++-v3/include/parallel/types.h @@ -0,0 +1,137 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/types.h + * @brief Basic types and typedefs. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TYPES_H +#define _GLIBCXX_PARALLEL_TYPES_H 1 + +#include <cstdlib> +#include <limits> +#include <tr1/cstdint> + +namespace __gnu_parallel +{ + // Enumerated types. + + /// Run-time equivalents for the compile-time tags. + enum _Parallelism + { + /// Not parallel. + sequential, + + /// Parallel unbalanced (equal-sized chunks). + parallel_unbalanced, + + /// Parallel balanced (work-stealing). + parallel_balanced, + + /// Parallel with OpenMP dynamic load-balancing. + parallel_omp_loop, + + /// Parallel with OpenMP static load-balancing. + parallel_omp_loop_static, + + /// Parallel with OpenMP taskqueue construct. + parallel_taskqueue + }; + + /// Strategies for run-time algorithm selection: + // force_sequential, force_parallel, heuristic. + enum _AlgorithmStrategy + { + heuristic, + force_sequential, + force_parallel + }; + + /// Sorting algorithms: + // multi-way mergesort, quicksort, load-balanced quicksort. + enum _SortAlgorithm + { + MWMS, + QS, + QS_BALANCED + }; + + /// Merging algorithms: + // bubblesort-alike, loser-tree variants, enum __sentinel. + enum _MultiwayMergeAlgorithm + { + LOSER_TREE + }; + + /// Partial sum algorithms: recursive, linear. + enum _PartialSumAlgorithm + { + RECURSIVE, + LINEAR + }; + + /// Sorting/merging algorithms: sampling, __exact. + enum _SplittingAlgorithm + { + SAMPLING, + EXACT + }; + + /// Find algorithms: + // growing blocks, equal-sized blocks, equal splitting. + enum _FindAlgorithm + { + GROWING_BLOCKS, + CONSTANT_SIZE_BLOCKS, + EQUAL_SPLIT + }; + + /** + * @brief Unsigned integer to index __elements. + * The total number of elements for each algorithm must fit into this type. + */ + typedef uint64_t _SequenceIndex; + + /** + * @brief Unsigned integer to index a thread number. + * The maximum thread number (for each processor) must fit into this type. + */ + typedef uint16_t _ThreadIndex; + + // XXX atomics interface? + /// Longest compare-and-swappable integer type on this platform. + typedef int64_t _CASable; + + /// Number of bits of _CASable. + static const int _CASable_bits = std::numeric_limits<_CASable>::digits; + + /// ::_CASable with the right half of bits set to 1. + static const _CASable _CASable_mask = + ((_CASable(1) << (_CASable_bits / 2)) - 1); +} + +#endif /* _GLIBCXX_PARALLEL_TYPES_H */ diff --git a/libstdc++-v3/include/parallel/unique_copy.h b/libstdc++-v3/include/parallel/unique_copy.h new file mode 100644 index 000000000..72c84a9cc --- /dev/null +++ b/libstdc++-v3/include/parallel/unique_copy.h @@ -0,0 +1,197 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/unique_copy.h + * @brief Parallel implementations of std::unique_copy(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Robert Geisberger and Robin Dapp. + +#ifndef _GLIBCXX_PARALLEL_UNIQUE_COPY_H +#define _GLIBCXX_PARALLEL_UNIQUE_COPY_H 1 + +#include <parallel/parallel.h> +#include <parallel/multiseq_selection.h> + +namespace __gnu_parallel +{ + /** @brief Parallel std::unique_copy(), w/__o explicit equality predicate. + * @param __first Begin iterator of input sequence. + * @param __last End iterator of input sequence. + * @param __result Begin iterator of result __sequence. + * @param __binary_pred Equality predicate. + * @return End iterator of result __sequence. */ + template<typename _IIter, + class _OutputIterator, + class _BinaryPredicate> + _OutputIterator + __parallel_unique_copy(_IIter __first, _IIter __last, + _OutputIterator __result, + _BinaryPredicate __binary_pred) + { + _GLIBCXX_CALL(__last - __first) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __size = __last - __first; + + if (__size == 0) + return __result; + + // Let the first thread process two parts. + _DifferenceType *__counter; + _DifferenceType *__borders; + + _ThreadIndex __num_threads = __get_max_threads(); + // First part contains at least one element. +# pragma omp parallel num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + __borders = new _DifferenceType[__num_threads + 2]; + equally_split(__size, __num_threads + 1, __borders); + __counter = new _DifferenceType[__num_threads + 1]; + } + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType __begin, __end; + + // Check for length without duplicates + // Needed for position in output + _DifferenceType __i = 0; + _OutputIterator __out = __result; + + if (__iam == 0) + { + __begin = __borders[0] + 1; // == 1 + __end = __borders[__iam + 1]; + + ++__i; + *__out++ = *__first; + + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + { + ++__i; + *__out++ = *__iter; + } + } + } + else + { + __begin = __borders[__iam]; //one part + __end = __borders[__iam + 1]; + + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + ++__i; + } + } + __counter[__iam] = __i; + + // Last part still untouched. + _DifferenceType __begin_output; + +# pragma omp barrier + + // Store result in output on calculated positions. + __begin_output = 0; + + if (__iam == 0) + { + for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) + __begin_output += __counter[__t]; + + __i = 0; + + _OutputIterator __iter_out = __result + __begin_output; + + __begin = __borders[__num_threads]; + __end = __size; + + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (__iter == __first + || !__binary_pred(*__iter, *(__iter - 1))) + { + ++__i; + *__iter_out++ = *__iter; + } + } + + __counter[__num_threads] = __i; + } + else + { + for (_ThreadIndex __t = 0; __t < __iam; __t++) + __begin_output += __counter[__t]; + + _OutputIterator __iter_out = __result + __begin_output; + for (_IIter __iter = __first + __begin; __iter < __first + __end; + ++__iter) + { + if (!__binary_pred(*__iter, *(__iter - 1))) + *__iter_out++ = *__iter; + } + } + } + + _DifferenceType __end_output = 0; + for (_ThreadIndex __t = 0; __t < __num_threads + 1; __t++) + __end_output += __counter[__t]; + + delete[] __borders; + + return __result + __end_output; + } + + /** @brief Parallel std::unique_copy(), without explicit equality predicate + * @param __first Begin iterator of input sequence. + * @param __last End iterator of input sequence. + * @param __result Begin iterator of result __sequence. + * @return End iterator of result __sequence. */ + template<typename _IIter, class _OutputIterator> + inline _OutputIterator + __parallel_unique_copy(_IIter __first, _IIter __last, + _OutputIterator __result) + { + typedef typename std::iterator_traits<_IIter>::value_type + _ValueType; + return __parallel_unique_copy(__first, __last, __result, + std::equal_to<_ValueType>()); + } + +}//namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_UNIQUE_COPY_H */ diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h new file mode 100644 index 000000000..5e90ae992 --- /dev/null +++ b/libstdc++-v3/include/parallel/workstealing.h @@ -0,0 +1,312 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/workstealing.h + * @brief Parallelization of embarrassingly parallel execution by + * means of work-stealing. + * + * Work stealing is described in + * + * R. D. Blumofe and C. E. Leiserson. + * Scheduling multithreaded computations by work stealing. + * Journal of the ACM, 46(5):720–748, 1999. + * + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H +#define _GLIBCXX_PARALLEL_WORKSTEALING_H 1 + +#include <parallel/parallel.h> +#include <parallel/random_number.h> +#include <parallel/compatibility.h> + +namespace __gnu_parallel +{ + +#define _GLIBCXX_JOB_VOLATILE volatile + + /** @brief One __job for a certain thread. */ + template<typename _DifferenceTp> + struct _Job + { + typedef _DifferenceTp _DifferenceType; + + /** @brief First element. + * + * Changed by owning and stealing thread. By stealing thread, + * always incremented. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_first; + + /** @brief Last element. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_last; + + /** @brief Number of elements, i.e. @c _M_last-_M_first+1. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE _DifferenceType _M_load; + }; + + /** @brief Work stealing algorithm for random access iterators. + * + * Uses O(1) additional memory. Synchronization at job lists is + * done with atomic operations. + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __op User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param __f Functor to @a process an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to @a add a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template<typename _RAIter, + typename _Op, + typename _Fu, + typename _Red, + typename _Result> + _Op + __for_each_template_random_access_workstealing(_RAIter __begin, + _RAIter __end, _Op __op, + _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + + const _Settings& __s = _Settings::get(); + + _DifferenceType __chunk_size = + static_cast<_DifferenceType>(__s.workstealing_chunk_size); + + // How many jobs? + _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound; + + // To avoid false sharing in a cache line. + const int __stride = (__s.cache_line_size * 10 + / sizeof(_Job<_DifferenceType>) + 1); + + // Total number of threads currently working. + _ThreadIndex __busy = 0; + + _Job<_DifferenceType> *__job; + + omp_lock_t __output_lock; + omp_init_lock(&__output_lock); + + // Write base value to output. + __output = __base; + + // No more threads than jobs, at least one thread. + _ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex> + (1, __gnu_parallel::min<_DifferenceType>(__length, + __get_max_threads())); + +# pragma omp parallel shared(__busy) num_threads(__num_threads) + { +# pragma omp single + { + __num_threads = omp_get_num_threads(); + + // Create job description array. + __job = new _Job<_DifferenceType>[__num_threads * __stride]; + } + + // Initialization phase. + + // Flags for every thread if it is doing productive work. + bool __iam_working = false; + + // Thread id. + _ThreadIndex __iam = omp_get_thread_num(); + + // This job. + _Job<_DifferenceType>& __my_job = __job[__iam * __stride]; + + // Random number (for work stealing). + _ThreadIndex __victim; + + // Local value for reduction. + _Result __result = _Result(); + + // Number of elements to steal in one attempt. + _DifferenceType __steal; + + // Every thread has its own random number generator + // (modulo __num_threads). + _RandomNumber __rand_gen(__iam, __num_threads); + + // This thread is currently working. +# pragma omp atomic + ++__busy; + + __iam_working = true; + + // How many jobs per thread? last thread gets the rest. + __my_job._M_first = static_cast<_DifferenceType> + (__iam * (__length / __num_threads)); + + __my_job._M_last = (__iam == (__num_threads - 1) + ? (__length - 1) + : ((__iam + 1) * (__length / __num_threads) - 1)); + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + + // Init result with _M_first value (to have a base value for reduction) + if (__my_job._M_first <= __my_job._M_last) + { + // Cannot use volatile variable directly. + _DifferenceType __my_first = __my_job._M_first; + __result = __f(__op, __begin + __my_first); + ++__my_job._M_first; + --__my_job._M_load; + } + + _RAIter __current; + +# pragma omp barrier + + // Actual work phase + // Work on own or stolen current start + while (__busy > 0) + { + // Work until no productive thread left. +# pragma omp flush(__busy) + + // Thread has own work to do + while (__my_job._M_first <= __my_job._M_last) + { + // fetch-and-add call + // Reserve current job block (size __chunk_size) in my queue. + _DifferenceType __current_job = + __fetch_and_add<_DifferenceType>(&(__my_job._M_first), + __chunk_size); + + // Update _M_load, to make the three values consistent, + // _M_first might have been changed in the meantime + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + for (_DifferenceType __job_counter = 0; + __job_counter < __chunk_size + && __current_job <= __my_job._M_last; + ++__job_counter) + { + // Yes: process it! + __current = __begin + __current_job; + ++__current_job; + + // Do actual work. + __result = __r(__result, __f(__op, __current)); + } + +# pragma omp flush(__busy) + } + + // After reaching this point, a thread's __job list is empty. + if (__iam_working) + { + // This thread no longer has work. +# pragma omp atomic + --__busy; + + __iam_working = false; + } + + _DifferenceType __supposed_first, __supposed_last, + __supposed_load; + do + { + // Find random nonempty deque (not own), do consistency check. + __yield(); +# pragma omp flush(__busy) + __victim = __rand_gen(); + __supposed_first = __job[__victim * __stride]._M_first; + __supposed_last = __job[__victim * __stride]._M_last; + __supposed_load = __job[__victim * __stride]._M_load; + } + while (__busy > 0 + && ((__supposed_load <= 0) + || ((__supposed_first + __supposed_load - 1) + != __supposed_last))); + + if (__busy == 0) + break; + + if (__supposed_load > 0) + { + // Has work and work to do. + // Number of elements to steal (at least one). + __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2; + + // Push __victim's current start forward. + _DifferenceType __stolen_first = + __fetch_and_add<_DifferenceType> + (&(__job[__victim * __stride]._M_first), __steal); + _DifferenceType __stolen_try = (__stolen_first + __steal + - _DifferenceType(1)); + + __my_job._M_first = __stolen_first; + __my_job._M_last = __gnu_parallel::min(__stolen_try, + __supposed_last); + __my_job._M_load = __my_job._M_last - __my_job._M_first + 1; + + // Has potential work again. +# pragma omp atomic + ++__busy; + __iam_working = true; + +# pragma omp flush(__busy) + } +# pragma omp flush(__busy) + } // end while __busy > 0 + // Add accumulated result to output. + omp_set_lock(&__output_lock); + __output = __r(__output, __result); + omp_unset_lock(&__output_lock); + } + + delete[] __job; + + // Points to last element processed (needed as return value for + // some algorithms like transform) + __f._M_finish_iterator = __begin + __length; + + omp_destroy_lock(&__output_lock); + + return __op; + } +} // end namespace + +#endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */ |