Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. // -*- C++ -*-
  2.  
  3. // Copyright (C) 2007-2013 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the terms
  7. // of the GNU General Public License as published by the Free Software
  8. // Foundation; either version 3, or (at your option) any later
  9. // version.
  10.  
  11. // This library is distributed in the hope that it will be useful, but
  12. // WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. // General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /** @file parallel/multiway_mergesort.h
  26.  *  @brief Parallel multiway merge sort.
  27.  *  This file is a GNU parallel extension to the Standard C++ Library.
  28.  */
  29.  
  30. // Written by Johannes Singler.
  31.  
  32. #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
  33. #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
  34.  
  35. #include <vector>
  36.  
  37. #include <parallel/basic_iterator.h>
  38. #include <bits/stl_algo.h>
  39. #include <parallel/parallel.h>
  40. #include <parallel/multiway_merge.h>
  41.  
  42. namespace __gnu_parallel
  43. {
  44.   /** @brief Subsequence description. */
  45.   template<typename _DifferenceTp>
  46.     struct _Piece
  47.     {
  48.       typedef _DifferenceTp _DifferenceType;
  49.  
  50.       /** @brief Begin of subsequence. */
  51.       _DifferenceType _M_begin;
  52.  
  53.       /** @brief End of subsequence. */
  54.       _DifferenceType _M_end;
  55.     };
  56.  
  57.   /** @brief Data accessed by all threads.
  58.    *
  59.    *  PMWMS = parallel multiway mergesort */
  60.   template<typename _RAIter>
  61.     struct _PMWMSSortingData
  62.     {
  63.       typedef std::iterator_traits<_RAIter> _TraitsType;
  64.       typedef typename _TraitsType::value_type _ValueType;
  65.       typedef typename _TraitsType::difference_type _DifferenceType;
  66.  
  67.       /** @brief Number of threads involved. */
  68.       _ThreadIndex _M_num_threads;
  69.  
  70.       /** @brief Input __begin. */
  71.       _RAIter _M_source;
  72.  
  73.       /** @brief Start indices, per thread. */
  74.       _DifferenceType* _M_starts;
  75.  
  76.       /** @brief Storage in which to sort. */
  77.       _ValueType** _M_temporary;
  78.  
  79.       /** @brief Samples. */
  80.       _ValueType* _M_samples;
  81.  
  82.       /** @brief Offsets to add to the found positions. */
  83.       _DifferenceType* _M_offsets;
  84.  
  85.       /** @brief Pieces of data to merge @c [thread][__sequence] */
  86.       std::vector<_Piece<_DifferenceType> >* _M_pieces;
  87.   };
  88.  
  89.   /**
  90.    *  @brief Select _M_samples from a sequence.
  91.    *  @param __sd Pointer to algorithm data. _Result will be placed in
  92.    *  @c __sd->_M_samples.
  93.    *  @param __num_samples Number of _M_samples to select.
  94.    */
  95.   template<typename _RAIter, typename _DifferenceTp>
  96.     void
  97.     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
  98.                         _DifferenceTp __num_samples)
  99.     {
  100.       typedef std::iterator_traits<_RAIter> _TraitsType;
  101.       typedef typename _TraitsType::value_type _ValueType;
  102.       typedef _DifferenceTp _DifferenceType;
  103.  
  104.       _ThreadIndex __iam = omp_get_thread_num();
  105.  
  106.       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
  107.  
  108.       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
  109.                       __num_samples + 1, __es);
  110.  
  111.       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
  112.         ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
  113.             _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
  114.                                        + __es[__i + 1]]);
  115.  
  116.       delete[] __es;
  117.     }
  118.  
  119.   /** @brief Split consistently. */
  120.   template<bool __exact, typename _RAIter,
  121.            typename _Compare, typename _SortingPlacesIterator>
  122.     struct _SplitConsistently
  123.     { };
  124.  
  125.   /** @brief Split by exact splitting. */
  126.   template<typename _RAIter, typename _Compare,
  127.            typename _SortingPlacesIterator>
  128.     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
  129.     {
  130.       void
  131.       operator()(const _ThreadIndex __iam,
  132.                  _PMWMSSortingData<_RAIter>* __sd,
  133.                  _Compare& __comp,
  134.                  const typename
  135.                  std::iterator_traits<_RAIter>::difference_type
  136.                  __num_samples) const
  137.       {
  138. #       pragma omp barrier
  139.  
  140.         std::vector<std::pair<_SortingPlacesIterator,
  141.                               _SortingPlacesIterator> >
  142.           __seqs(__sd->_M_num_threads);
  143.         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
  144.           __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
  145.                                        __sd->_M_temporary[__s]
  146.                                        + (__sd->_M_starts[__s + 1]
  147.                                           - __sd->_M_starts[__s]));
  148.  
  149.         std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
  150.  
  151.         // if not last thread
  152.         if (__iam < __sd->_M_num_threads - 1)
  153.           multiseq_partition(__seqs.begin(), __seqs.end(),
  154.                              __sd->_M_starts[__iam + 1], __offsets.begin(),
  155.                              __comp);
  156.  
  157.         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
  158.           {
  159.             // for each sequence
  160.             if (__iam < (__sd->_M_num_threads - 1))
  161.               __sd->_M_pieces[__iam][__seq]._M_end
  162.                 = __offsets[__seq] - __seqs[__seq].first;
  163.             else
  164.               // very end of this sequence
  165.               __sd->_M_pieces[__iam][__seq]._M_end =
  166.                 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
  167.           }
  168.  
  169. #       pragma omp barrier
  170.  
  171.         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
  172.           {
  173.             // For each sequence.
  174.             if (__iam > 0)
  175.               __sd->_M_pieces[__iam][__seq]._M_begin =
  176.                 __sd->_M_pieces[__iam - 1][__seq]._M_end;
  177.             else
  178.               // Absolute beginning.
  179.               __sd->_M_pieces[__iam][__seq]._M_begin = 0;
  180.           }
  181.       }
  182.   };
  183.  
  184.   /** @brief Split by sampling. */
  185.   template<typename _RAIter, typename _Compare,
  186.            typename _SortingPlacesIterator>
  187.     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
  188.     {
  189.       void
  190.       operator()(const _ThreadIndex __iam,
  191.                  _PMWMSSortingData<_RAIter>* __sd,
  192.                  _Compare& __comp,
  193.                  const typename
  194.                  std::iterator_traits<_RAIter>::difference_type
  195.                  __num_samples) const
  196.       {
  197.         typedef std::iterator_traits<_RAIter> _TraitsType;
  198.         typedef typename _TraitsType::value_type _ValueType;
  199.         typedef typename _TraitsType::difference_type _DifferenceType;
  200.  
  201.         __determine_samples(__sd, __num_samples);
  202.  
  203. #       pragma omp barrier
  204.  
  205. #       pragma omp single
  206.         __gnu_sequential::sort(__sd->_M_samples,
  207.                                __sd->_M_samples
  208.                                + (__num_samples * __sd->_M_num_threads),
  209.                                __comp);
  210.  
  211. #       pragma omp barrier
  212.  
  213.         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
  214.           {
  215.             // For each sequence.
  216.             if (__num_samples * __iam > 0)
  217.               __sd->_M_pieces[__iam][__s]._M_begin =
  218.                 std::lower_bound(__sd->_M_temporary[__s],
  219.                                  __sd->_M_temporary[__s]
  220.                                  + (__sd->_M_starts[__s + 1]
  221.                                     - __sd->_M_starts[__s]),
  222.                                  __sd->_M_samples[__num_samples * __iam],
  223.                                  __comp)
  224.                 - __sd->_M_temporary[__s];
  225.             else
  226.               // Absolute beginning.
  227.               __sd->_M_pieces[__iam][__s]._M_begin = 0;
  228.  
  229.             if ((__num_samples * (__iam + 1)) <
  230.                 (__num_samples * __sd->_M_num_threads))
  231.               __sd->_M_pieces[__iam][__s]._M_end =
  232.                 std::lower_bound(__sd->_M_temporary[__s],
  233.                                  __sd->_M_temporary[__s]
  234.                                  + (__sd->_M_starts[__s + 1]
  235.                                     - __sd->_M_starts[__s]),
  236.                                  __sd->_M_samples[__num_samples * (__iam + 1)],
  237.                                  __comp)
  238.                 - __sd->_M_temporary[__s];
  239.             else
  240.               // Absolute end.
  241.               __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
  242.                                                     - __sd->_M_starts[__s]);
  243.           }
  244.       }
  245.   };
  246.  
  247.   template<bool __stable, typename _RAIter, typename _Compare>
  248.     struct __possibly_stable_sort
  249.     { };
  250.  
  251.   template<typename _RAIter, typename _Compare>
  252.     struct __possibly_stable_sort<true, _RAIter, _Compare>
  253.     {
  254.       void operator()(const _RAIter& __begin,
  255.                       const _RAIter& __end, _Compare& __comp) const
  256.       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
  257.     };
  258.  
  259.   template<typename _RAIter, typename _Compare>
  260.     struct __possibly_stable_sort<false, _RAIter, _Compare>
  261.     {
  262.       void operator()(const _RAIter __begin,
  263.                       const _RAIter __end, _Compare& __comp) const
  264.       { __gnu_sequential::sort(__begin, __end, __comp); }
  265.     };
  266.  
  267.   template<bool __stable, typename Seq_RAIter,
  268.            typename _RAIter, typename _Compare,
  269.            typename DiffType>
  270.     struct __possibly_stable_multiway_merge
  271.     { };
  272.  
  273.   template<typename Seq_RAIter, typename _RAIter,
  274.            typename _Compare, typename _DiffType>
  275.     struct __possibly_stable_multiway_merge<true, Seq_RAIter,
  276.                                             _RAIter, _Compare, _DiffType>
  277.     {
  278.       void operator()(const Seq_RAIter& __seqs_begin,
  279.                       const Seq_RAIter& __seqs_end,
  280.                       const _RAIter& __target,
  281.                       _Compare& __comp,
  282.                       _DiffType __length_am) const
  283.       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
  284.                               __length_am, __comp, sequential_tag()); }
  285.     };
  286.  
  287.   template<typename Seq_RAIter, typename _RAIter,
  288.            typename _Compare, typename _DiffType>
  289.     struct __possibly_stable_multiway_merge<false, Seq_RAIter,
  290.                                             _RAIter, _Compare, _DiffType>
  291.     {
  292.       void operator()(const Seq_RAIter& __seqs_begin,
  293.                       const Seq_RAIter& __seqs_end,
  294.                       const _RAIter& __target,
  295.                       _Compare& __comp,
  296.                       _DiffType __length_am) const
  297.       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
  298.                        __comp, sequential_tag()); }
  299.     };
  300.  
  301.   /** @brief PMWMS code executed by each thread.
  302.    *  @param __sd Pointer to algorithm data.
  303.    *  @param __comp Comparator.
  304.    */
  305.   template<bool __stable, bool __exact, typename _RAIter,
  306.            typename _Compare>
  307.     void
  308.     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
  309.                           _Compare& __comp)
  310.     {
  311.       typedef std::iterator_traits<_RAIter> _TraitsType;
  312.       typedef typename _TraitsType::value_type _ValueType;
  313.       typedef typename _TraitsType::difference_type _DifferenceType;
  314.  
  315.       _ThreadIndex __iam = omp_get_thread_num();
  316.  
  317.       // Length of this thread's chunk, before merging.
  318.       _DifferenceType __length_local =
  319.         __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
  320.  
  321.       // Sort in temporary storage, leave space for sentinel.
  322.  
  323.       typedef _ValueType* _SortingPlacesIterator;
  324.  
  325.       __sd->_M_temporary[__iam] =
  326.         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
  327.                                                 * (__length_local + 1)));
  328.  
  329.       // Copy there.
  330.       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
  331.                               __sd->_M_source + __sd->_M_starts[__iam]
  332.                               + __length_local,
  333.                               __sd->_M_temporary[__iam]);
  334.  
  335.       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
  336.         (__sd->_M_temporary[__iam],
  337.          __sd->_M_temporary[__iam] + __length_local,
  338.          __comp);
  339.  
  340.       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
  341.       // __sd->_M_temporary[__iam] + __length_local.
  342.  
  343.       // No barrier here: Synchronization is done by the splitting routine.
  344.  
  345.       _DifferenceType __num_samples =
  346.         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
  347.       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
  348.         (__iam, __sd, __comp, __num_samples);
  349.  
  350.       // Offset from __target __begin, __length after merging.
  351.       _DifferenceType __offset = 0, __length_am = 0;
  352.       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
  353.         {
  354.           __length_am += (__sd->_M_pieces[__iam][__s]._M_end
  355.                           - __sd->_M_pieces[__iam][__s]._M_begin);
  356.           __offset += __sd->_M_pieces[__iam][__s]._M_begin;
  357.         }
  358.  
  359.       typedef std::vector<
  360.         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
  361.         _SeqVector;
  362.       _SeqVector __seqs(__sd->_M_num_threads);
  363.  
  364.       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
  365.         {
  366.           __seqs[__s] =
  367.             std::make_pair(__sd->_M_temporary[__s]
  368.                            + __sd->_M_pieces[__iam][__s]._M_begin,
  369.                            __sd->_M_temporary[__s]
  370.                            + __sd->_M_pieces[__iam][__s]._M_end);
  371.         }
  372.  
  373.       __possibly_stable_multiway_merge<
  374.         __stable, typename _SeqVector::iterator,
  375.         _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
  376.                                      __sd->_M_source + __offset, __comp,
  377.                                      __length_am);
  378.  
  379. #     pragma omp barrier
  380.  
  381.       for (_DifferenceType __i = 0; __i < __length_local; ++__i)
  382.         __sd->_M_temporary[__iam][__i].~_ValueType();
  383.       ::operator delete(__sd->_M_temporary[__iam]);
  384.     }
  385.  
  386.   /** @brief PMWMS main call.
  387.    *  @param __begin Begin iterator of sequence.
  388.    *  @param __end End iterator of sequence.
  389.    *  @param __comp Comparator.
  390.    *  @param __num_threads Number of threads to use.
  391.    */
  392.   template<bool __stable, bool __exact, typename _RAIter,
  393.            typename _Compare>
  394.     void
  395.     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
  396.                        _Compare __comp,
  397.                        _ThreadIndex __num_threads)
  398.     {
  399.       _GLIBCXX_CALL(__end - __begin)
  400.  
  401.       typedef std::iterator_traits<_RAIter> _TraitsType;
  402.       typedef typename _TraitsType::value_type _ValueType;
  403.       typedef typename _TraitsType::difference_type _DifferenceType;
  404.  
  405.       _DifferenceType __n = __end - __begin;
  406.  
  407.       if (__n <= 1)
  408.         return;
  409.  
  410.       // at least one element per thread
  411.       if (__num_threads > __n)
  412.         __num_threads = static_cast<_ThreadIndex>(__n);
  413.  
  414.       // shared variables
  415.       _PMWMSSortingData<_RAIter> __sd;
  416.       _DifferenceType* __starts;
  417.       _DifferenceType __size;
  418.  
  419. #     pragma omp parallel num_threads(__num_threads)
  420.       {
  421.         __num_threads = omp_get_num_threads(); //no more threads than requested
  422.  
  423. #       pragma omp single
  424.         {
  425.           __sd._M_num_threads = __num_threads;
  426.           __sd._M_source = __begin;
  427.          
  428.           __sd._M_temporary = new _ValueType*[__num_threads];
  429.  
  430.           if (!__exact)
  431.             {
  432.               __size =
  433.                 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
  434.                 * __num_threads;
  435.               __sd._M_samples = static_cast<_ValueType*>
  436.                 (::operator new(__size * sizeof(_ValueType)));
  437.             }
  438.           else
  439.             __sd._M_samples = 0;
  440.  
  441.           __sd._M_offsets = new _DifferenceType[__num_threads - 1];
  442.           __sd._M_pieces
  443.             = new std::vector<_Piece<_DifferenceType> >[__num_threads];
  444.           for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
  445.             __sd._M_pieces[__s].resize(__num_threads);
  446.           __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
  447.  
  448.           _DifferenceType __chunk_length = __n / __num_threads;
  449.           _DifferenceType __split = __n % __num_threads;
  450.           _DifferenceType __pos = 0;
  451.           for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
  452.             {
  453.               __starts[__i] = __pos;
  454.               __pos += ((__i < __split)
  455.                         ? (__chunk_length + 1) : __chunk_length);
  456.             }
  457.           __starts[__num_threads] = __pos;
  458.         } //single
  459.  
  460.         // Now sort in parallel.
  461.         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
  462.       } //parallel
  463.  
  464.       delete[] __starts;
  465.       delete[] __sd._M_temporary;
  466.  
  467.       if (!__exact)
  468.         {
  469.           for (_DifferenceType __i = 0; __i < __size; ++__i)
  470.             __sd._M_samples[__i].~_ValueType();
  471.           ::operator delete(__sd._M_samples);
  472.         }
  473.  
  474.       delete[] __sd._M_offsets;
  475.       delete[] __sd._M_pieces;
  476.     }
  477.  
  478. } //namespace __gnu_parallel
  479.  
  480. #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
  481.