Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5134 serge 1
// -*- C++ -*-
2
 
3
// Copyright (C) 2007-2013 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the terms
7
// of the GNU General Public License as published by the Free Software
8
// Foundation; either version 3, or (at your option) any later
9
// version.
10
 
11
// This library is distributed in the hope that it will be useful, but
12
// WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
// General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// .
24
 
25
/** @file parallel/multiway_mergesort.h
26
 *  @brief Parallel multiway merge sort.
27
 *  This file is a GNU parallel extension to the Standard C++ Library.
28
 */
29
 
30
// Written by Johannes Singler.
31
 
32
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34
 
35
#include 
36
 
37
#include 
38
#include 
39
#include 
40
#include 
41
 
42
namespace __gnu_parallel
43
{
44
  /** @brief Subsequence description. */
45
  template
46
    struct _Piece
47
    {
48
      typedef _DifferenceTp _DifferenceType;
49
 
50
      /** @brief Begin of subsequence. */
51
      _DifferenceType _M_begin;
52
 
53
      /** @brief End of subsequence. */
54
      _DifferenceType _M_end;
55
    };
56
 
57
  /** @brief Data accessed by all threads.
58
   *
59
   *  PMWMS = parallel multiway mergesort */
60
  template
61
    struct _PMWMSSortingData
62
    {
63
      typedef std::iterator_traits<_RAIter> _TraitsType;
64
      typedef typename _TraitsType::value_type _ValueType;
65
      typedef typename _TraitsType::difference_type _DifferenceType;
66
 
67
      /** @brief Number of threads involved. */
68
      _ThreadIndex _M_num_threads;
69
 
70
      /** @brief Input __begin. */
71
      _RAIter _M_source;
72
 
73
      /** @brief Start indices, per thread. */
74
      _DifferenceType* _M_starts;
75
 
76
      /** @brief Storage in which to sort. */
77
      _ValueType** _M_temporary;
78
 
79
      /** @brief Samples. */
80
      _ValueType* _M_samples;
81
 
82
      /** @brief Offsets to add to the found positions. */
83
      _DifferenceType* _M_offsets;
84
 
85
      /** @brief Pieces of data to merge @c [thread][__sequence] */
86
      std::vector<_Piece<_DifferenceType> >* _M_pieces;
87
  };
88
 
89
  /**
90
   *  @brief Select _M_samples from a sequence.
91
   *  @param __sd Pointer to algorithm data. _Result will be placed in
92
   *  @c __sd->_M_samples.
93
   *  @param __num_samples Number of _M_samples to select.
94
   */
95
  template
96
    void
97
    __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98
			_DifferenceTp __num_samples)
99
    {
100
      typedef std::iterator_traits<_RAIter> _TraitsType;
101
      typedef typename _TraitsType::value_type _ValueType;
102
      typedef _DifferenceTp _DifferenceType;
103
 
104
      _ThreadIndex __iam = omp_get_thread_num();
105
 
106
      _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107
 
108
      __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109
		      __num_samples + 1, __es);
110
 
111
      for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112
	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113
	    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114
				       + __es[__i + 1]]);
115
 
116
      delete[] __es;
117
    }
118
 
119
  /** @brief Split consistently. */
120
  template
121
	   typename _Compare, typename _SortingPlacesIterator>
122
    struct _SplitConsistently
123
    { };
124
 
125
  /** @brief Split by exact splitting. */
126
  template
127
	   typename _SortingPlacesIterator>
128
    struct _SplitConsistently
129
    {
130
      void
131
      operator()(const _ThreadIndex __iam,
132
		 _PMWMSSortingData<_RAIter>* __sd,
133
		 _Compare& __comp,
134
		 const typename
135
		 std::iterator_traits<_RAIter>::difference_type
136
		 __num_samples) const
137
      {
138
#       pragma omp barrier
139
 
140
	std::vector
141
	                      _SortingPlacesIterator> >
142
	  __seqs(__sd->_M_num_threads);
143
	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
144
	  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145
				       __sd->_M_temporary[__s]
146
				       + (__sd->_M_starts[__s + 1]
147
					  - __sd->_M_starts[__s]));
148
 
149
	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
150
 
151
	// if not last thread
152
	if (__iam < __sd->_M_num_threads - 1)
153
	  multiseq_partition(__seqs.begin(), __seqs.end(),
154
			     __sd->_M_starts[__iam + 1], __offsets.begin(),
155
			     __comp);
156
 
157
	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158
	  {
159
	    // for each sequence
160
	    if (__iam < (__sd->_M_num_threads - 1))
161
	      __sd->_M_pieces[__iam][__seq]._M_end
162
		= __offsets[__seq] - __seqs[__seq].first;
163
	    else
164
	      // very end of this sequence
165
	      __sd->_M_pieces[__iam][__seq]._M_end =
166
		__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167
	  }
168
 
169
#       pragma omp barrier
170
 
171
	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172
	  {
173
	    // For each sequence.
174
	    if (__iam > 0)
175
	      __sd->_M_pieces[__iam][__seq]._M_begin =
176
		__sd->_M_pieces[__iam - 1][__seq]._M_end;
177
	    else
178
	      // Absolute beginning.
179
	      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180
	  }
181
      }
182
  };
183
 
184
  /** @brief Split by sampling. */
185
  template
186
	   typename _SortingPlacesIterator>
187
    struct _SplitConsistently
188
    {
189
      void
190
      operator()(const _ThreadIndex __iam,
191
		 _PMWMSSortingData<_RAIter>* __sd,
192
		 _Compare& __comp,
193
		 const typename
194
		 std::iterator_traits<_RAIter>::difference_type
195
		 __num_samples) const
196
      {
197
	typedef std::iterator_traits<_RAIter> _TraitsType;
198
	typedef typename _TraitsType::value_type _ValueType;
199
	typedef typename _TraitsType::difference_type _DifferenceType;
200
 
201
	__determine_samples(__sd, __num_samples);
202
 
203
#       pragma omp barrier
204
 
205
#       pragma omp single
206
	__gnu_sequential::sort(__sd->_M_samples,
207
			       __sd->_M_samples
208
			       + (__num_samples * __sd->_M_num_threads),
209
			       __comp);
210
 
211
#       pragma omp barrier
212
 
213
	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214
	  {
215
	    // For each sequence.
216
	    if (__num_samples * __iam > 0)
217
	      __sd->_M_pieces[__iam][__s]._M_begin =
218
                std::lower_bound(__sd->_M_temporary[__s],
219
				 __sd->_M_temporary[__s]
220
				 + (__sd->_M_starts[__s + 1]
221
				    - __sd->_M_starts[__s]),
222
				 __sd->_M_samples[__num_samples * __iam],
223
				 __comp)
224
                - __sd->_M_temporary[__s];
225
	    else
226
	      // Absolute beginning.
227
	      __sd->_M_pieces[__iam][__s]._M_begin = 0;
228
 
229
	    if ((__num_samples * (__iam + 1)) <
230
		(__num_samples * __sd->_M_num_threads))
231
	      __sd->_M_pieces[__iam][__s]._M_end =
232
                std::lower_bound(__sd->_M_temporary[__s],
233
				 __sd->_M_temporary[__s]
234
				 + (__sd->_M_starts[__s + 1]
235
				    - __sd->_M_starts[__s]),
236
				 __sd->_M_samples[__num_samples * (__iam + 1)],
237
				 __comp)
238
                - __sd->_M_temporary[__s];
239
	    else
240
	      // Absolute end.
241
	      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242
						    - __sd->_M_starts[__s]);
243
	  }
244
      }
245
  };
246
 
247
  template
248
    struct __possibly_stable_sort
249
    { };
250
 
251
  template
252
    struct __possibly_stable_sort
253
    {
254
      void operator()(const _RAIter& __begin,
255
		      const _RAIter& __end, _Compare& __comp) const
256
      { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257
    };
258
 
259
  template
260
    struct __possibly_stable_sort
261
    {
262
      void operator()(const _RAIter __begin,
263
		      const _RAIter __end, _Compare& __comp) const
264
      { __gnu_sequential::sort(__begin, __end, __comp); }
265
    };
266
 
267
  template
268
	   typename _RAIter, typename _Compare,
269
	   typename DiffType>
270
    struct __possibly_stable_multiway_merge
271
    { };
272
 
273
  template
274
	   typename _Compare, typename _DiffType>
275
    struct __possibly_stable_multiway_merge
276
					    _RAIter, _Compare, _DiffType>
277
    {
278
      void operator()(const Seq_RAIter& __seqs_begin,
279
		      const Seq_RAIter& __seqs_end,
280
		      const _RAIter& __target,
281
		      _Compare& __comp,
282
		      _DiffType __length_am) const
283
      { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284
			      __length_am, __comp, sequential_tag()); }
285
    };
286
 
287
  template
288
	   typename _Compare, typename _DiffType>
289
    struct __possibly_stable_multiway_merge
290
					    _RAIter, _Compare, _DiffType>
291
    {
292
      void operator()(const Seq_RAIter& __seqs_begin,
293
                      const Seq_RAIter& __seqs_end,
294
                      const _RAIter& __target,
295
                      _Compare& __comp,
296
                      _DiffType __length_am) const
297
      { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298
		       __comp, sequential_tag()); }
299
    };
300
 
301
  /** @brief PMWMS code executed by each thread.
302
   *  @param __sd Pointer to algorithm data.
303
   *  @param __comp Comparator.
304
   */
305
  template
306
	   typename _Compare>
307
    void
308
    parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
309
			  _Compare& __comp)
310
    {
311
      typedef std::iterator_traits<_RAIter> _TraitsType;
312
      typedef typename _TraitsType::value_type _ValueType;
313
      typedef typename _TraitsType::difference_type _DifferenceType;
314
 
315
      _ThreadIndex __iam = omp_get_thread_num();
316
 
317
      // Length of this thread's chunk, before merging.
318
      _DifferenceType __length_local =
319
	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
320
 
321
      // Sort in temporary storage, leave space for sentinel.
322
 
323
      typedef _ValueType* _SortingPlacesIterator;
324
 
325
      __sd->_M_temporary[__iam] =
326
        static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327
						* (__length_local + 1)));
328
 
329
      // Copy there.
330
      std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331
			      __sd->_M_source + __sd->_M_starts[__iam]
332
			      + __length_local,
333
			      __sd->_M_temporary[__iam]);
334
 
335
      __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
336
        (__sd->_M_temporary[__iam],
337
	 __sd->_M_temporary[__iam] + __length_local,
338
         __comp);
339
 
340
      // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341
      // __sd->_M_temporary[__iam] + __length_local.
342
 
343
      // No barrier here: Synchronization is done by the splitting routine.
344
 
345
      _DifferenceType __num_samples =
346
        _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
347
      _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
348
        (__iam, __sd, __comp, __num_samples);
349
 
350
      // Offset from __target __begin, __length after merging.
351
      _DifferenceType __offset = 0, __length_am = 0;
352
      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353
	{
354
	  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355
			  - __sd->_M_pieces[__iam][__s]._M_begin);
356
	  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357
	}
358
 
359
      typedef std::vector<
360
        std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
361
        _SeqVector;
362
      _SeqVector __seqs(__sd->_M_num_threads);
363
 
364
      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
365
	{
366
	  __seqs[__s] =
367
	    std::make_pair(__sd->_M_temporary[__s]
368
			   + __sd->_M_pieces[__iam][__s]._M_begin,
369
			   __sd->_M_temporary[__s]
370
			   + __sd->_M_pieces[__iam][__s]._M_end);
371
	}
372
 
373
      __possibly_stable_multiway_merge<
374
        __stable, typename _SeqVector::iterator,
375
	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376
				     __sd->_M_source + __offset, __comp,
377
				     __length_am);
378
 
379
#     pragma omp barrier
380
 
381
      for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382
	__sd->_M_temporary[__iam][__i].~_ValueType();
383
      ::operator delete(__sd->_M_temporary[__iam]);
384
    }
385
 
386
  /** @brief PMWMS main call.
387
   *  @param __begin Begin iterator of sequence.
388
   *  @param __end End iterator of sequence.
389
   *  @param __comp Comparator.
390
   *  @param __num_threads Number of threads to use.
391
   */
392
  template
393
           typename _Compare>
394
    void
395
    parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396
		       _Compare __comp,
397
		       _ThreadIndex __num_threads)
398
    {
399
      _GLIBCXX_CALL(__end - __begin)
400
 
401
      typedef std::iterator_traits<_RAIter> _TraitsType;
402
      typedef typename _TraitsType::value_type _ValueType;
403
      typedef typename _TraitsType::difference_type _DifferenceType;
404
 
405
      _DifferenceType __n = __end - __begin;
406
 
407
      if (__n <= 1)
408
	return;
409
 
410
      // at least one element per thread
411
      if (__num_threads > __n)
412
	__num_threads = static_cast<_ThreadIndex>(__n);
413
 
414
      // shared variables
415
      _PMWMSSortingData<_RAIter> __sd;
416
      _DifferenceType* __starts;
417
      _DifferenceType __size;
418
 
419
#     pragma omp parallel num_threads(__num_threads)
420
      {
421
        __num_threads = omp_get_num_threads(); //no more threads than requested
422
 
423
#       pragma omp single
424
	{
425
	  __sd._M_num_threads = __num_threads;
426
	  __sd._M_source = __begin;
427
 
428
	  __sd._M_temporary = new _ValueType*[__num_threads];
429
 
430
	  if (!__exact)
431
	    {
432
	      __size =
433
		(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434
		* __num_threads;
435
	      __sd._M_samples = static_cast<_ValueType*>
436
		(::operator new(__size * sizeof(_ValueType)));
437
	    }
438
	  else
439
	    __sd._M_samples = 0;
440
 
441
	  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442
	  __sd._M_pieces
443
	    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
444
	  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
445
	    __sd._M_pieces[__s].resize(__num_threads);
446
	  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
447
 
448
	  _DifferenceType __chunk_length = __n / __num_threads;
449
	  _DifferenceType __split = __n % __num_threads;
450
	  _DifferenceType __pos = 0;
451
	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
452
	    {
453
	      __starts[__i] = __pos;
454
	      __pos += ((__i < __split)
455
			? (__chunk_length + 1) : __chunk_length);
456
	    }
457
	  __starts[__num_threads] = __pos;
458
	} //single
459
 
460
        // Now sort in parallel.
461
        parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462
      } //parallel
463
 
464
      delete[] __starts;
465
      delete[] __sd._M_temporary;
466
 
467
      if (!__exact)
468
	{
469
	  for (_DifferenceType __i = 0; __i < __size; ++__i)
470
	    __sd._M_samples[__i].~_ValueType();
471
	  ::operator delete(__sd._M_samples);
472
	}
473
 
474
      delete[] __sd._M_offsets;
475
      delete[] __sd._M_pieces;
476
    }
477
 
478
} //namespace __gnu_parallel
479
 
480
#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */