Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // -*- C++ -*-
  2.  
  3. // Copyright (C) 2007-2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the terms
  7. // of the GNU General Public License as published by the Free Software
  8. // Foundation; either version 3, or (at your option) any later
  9. // version.
  10.  
  11. // This library is distributed in the hope that it will be useful, but
  12. // WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. // General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /** @file parallel/partial_sum.h
  26.  *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
  27. *  sums.
  28.  *  This file is a GNU parallel extension to the Standard C++ Library.
  29.  */
  30.  
  31. // Written by Johannes Singler.
  32.  
  33. #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
  34. #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
  35.  
  36. #include <omp.h>
  37. #include <new>
  38. #include <bits/stl_algobase.h>
  39. #include <parallel/parallel.h>
  40. #include <parallel/numericfwd.h>
  41.  
  42. namespace __gnu_parallel
  43. {
  44.   // Problem: there is no 0-element given.
  45.  
  46.   /** @brief Base case prefix sum routine.
  47.    *  @param __begin Begin iterator of input sequence.
  48.    *  @param __end End iterator of input sequence.
  49.    *  @param __result Begin iterator of output sequence.
  50.    *  @param __bin_op Associative binary function.
  51.    *  @param __value Start value. Must be passed since the neutral
  52.    *  element is unknown in general.
  53.    *  @return End iterator of output sequence. */
  54.   template<typename _IIter,
  55.            typename _OutputIterator,
  56.            typename _BinaryOperation>
  57.     _OutputIterator
  58.     __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
  59.                                     _OutputIterator __result,
  60.                                     _BinaryOperation __bin_op,
  61.       typename std::iterator_traits <_IIter>::value_type __value)
  62.     {
  63.       if (__begin == __end)
  64.         return __result;
  65.  
  66.       while (__begin != __end)
  67.         {
  68.           __value = __bin_op(__value, *__begin);
  69.           *__result = __value;
  70.           ++__result;
  71.           ++__begin;
  72.         }
  73.       return __result;
  74.     }
  75.  
  76.   /** @brief Parallel partial sum implementation, two-phase approach,
  77.       no recursion.
  78.       *  @param __begin Begin iterator of input sequence.
  79.       *  @param __end End iterator of input sequence.
  80.       *  @param __result Begin iterator of output sequence.
  81.       *  @param __bin_op Associative binary function.
  82.       *  @param __n Length of sequence.
  83.       *  @return End iterator of output sequence.
  84.       */
  85.   template<typename _IIter,
  86.            typename _OutputIterator,
  87.            typename _BinaryOperation>
  88.     _OutputIterator
  89.     __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
  90.                                   _OutputIterator __result,
  91.                                   _BinaryOperation __bin_op,
  92.       typename std::iterator_traits<_IIter>::difference_type __n)
  93.     {
  94.       typedef std::iterator_traits<_IIter> _TraitsType;
  95.       typedef typename _TraitsType::value_type _ValueType;
  96.       typedef typename _TraitsType::difference_type _DifferenceType;
  97.  
  98.       if (__begin == __end)
  99.         return __result;
  100.  
  101.       _ThreadIndex __num_threads =
  102.         std::min<_DifferenceType>(__get_max_threads(), __n - 1);
  103.  
  104.       if (__num_threads < 2)
  105.         {
  106.           *__result = *__begin;
  107.           return __parallel_partial_sum_basecase(__begin + 1, __end,
  108.                                                  __result + 1, __bin_op,
  109.                                                  *__begin);
  110.         }
  111.  
  112.       _DifferenceType* __borders;
  113.       _ValueType* __sums;
  114.  
  115.       const _Settings& __s = _Settings::get();
  116.  
  117. #     pragma omp parallel num_threads(__num_threads)
  118.       {
  119. #       pragma omp single
  120.         {
  121.           __num_threads = omp_get_num_threads();
  122.            
  123.           __borders = new _DifferenceType[__num_threads + 2];
  124.  
  125.           if (__s.partial_sum_dilation == 1.0f)
  126.             __equally_split(__n, __num_threads + 1, __borders);
  127.           else
  128.             {
  129.               _DifferenceType __first_part_length =
  130.                   std::max<_DifferenceType>(1,
  131.                     __n / (1.0f + __s.partial_sum_dilation * __num_threads));
  132.               _DifferenceType __chunk_length =
  133.                   (__n - __first_part_length) / __num_threads;
  134.               _DifferenceType __borderstart =
  135.                   __n - __num_threads * __chunk_length;
  136.               __borders[0] = 0;
  137.               for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
  138.                 {
  139.                   __borders[__i] = __borderstart;
  140.                   __borderstart += __chunk_length;
  141.                 }
  142.               __borders[__num_threads + 1] = __n;
  143.             }
  144.  
  145.           __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
  146.                                                            * __num_threads));
  147.           _OutputIterator __target_end;
  148.         } //single
  149.  
  150.         _ThreadIndex __iam = omp_get_thread_num();
  151.         if (__iam == 0)
  152.           {
  153.             *__result = *__begin;
  154.             __parallel_partial_sum_basecase(__begin + 1,
  155.                                             __begin + __borders[1],
  156.                                             __result + 1,
  157.                                             __bin_op, *__begin);
  158.             ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
  159.           }
  160.         else
  161.           {
  162.             ::new(&(__sums[__iam]))
  163.               _ValueType(__gnu_parallel::accumulate(
  164.                                          __begin + __borders[__iam] + 1,
  165.                                          __begin + __borders[__iam + 1],
  166.                                          *(__begin + __borders[__iam]),
  167.                                          __bin_op,
  168.                                          __gnu_parallel::sequential_tag()));
  169.           }
  170.  
  171. #       pragma omp barrier
  172.  
  173. #       pragma omp single
  174.         __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
  175.                                         __sums + 1, __bin_op, __sums[0]);
  176.  
  177. #       pragma omp barrier
  178.  
  179.         // Still same team.
  180.         __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
  181.                                         __begin + __borders[__iam + 2],
  182.                                         __result + __borders[__iam + 1],
  183.                                         __bin_op, __sums[__iam]);
  184.       } //parallel
  185.  
  186.       for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
  187.         __sums[__i].~_ValueType();
  188.       ::operator delete(__sums);
  189.  
  190.       delete[] __borders;
  191.  
  192.       return __result + __n;
  193.     }
  194.  
  195.   /** @brief Parallel partial sum front-__end.
  196.    *  @param __begin Begin iterator of input sequence.
  197.    *  @param __end End iterator of input sequence.
  198.    *  @param __result Begin iterator of output sequence.
  199.    *  @param __bin_op Associative binary function.
  200.    *  @return End iterator of output sequence. */
  201.   template<typename _IIter,
  202.            typename _OutputIterator,
  203.            typename _BinaryOperation>
  204.     _OutputIterator
  205.     __parallel_partial_sum(_IIter __begin, _IIter __end,
  206.                            _OutputIterator __result, _BinaryOperation __bin_op)
  207.     {
  208.       _GLIBCXX_CALL(__begin - __end)
  209.  
  210.       typedef std::iterator_traits<_IIter> _TraitsType;
  211.       typedef typename _TraitsType::value_type _ValueType;
  212.       typedef typename _TraitsType::difference_type _DifferenceType;
  213.  
  214.       _DifferenceType __n = __end - __begin;
  215.  
  216.       switch (_Settings::get().partial_sum_algorithm)
  217.         {
  218.         case LINEAR:
  219.           // Need an initial offset.
  220.           return __parallel_partial_sum_linear(__begin, __end, __result,
  221.                                                __bin_op, __n);
  222.         default:
  223.           // Partial_sum algorithm not implemented.
  224.           _GLIBCXX_PARALLEL_ASSERT(0);
  225.           return __result + __n;
  226.         }
  227.     }
  228. }
  229.  
  230. #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
  231.