WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/libstdc++-v3/include/parallel/multiway_mergesort.h

Rev	Author	Line No.	Line
5134	serge	1	// -- C++ --
		2
		3	// Copyright (C) 2007-2013 Free Software Foundation, Inc.
		4	//
		5	// This file is part of the GNU ISO C++ Library. This library is free
		6	// software; you can redistribute it and/or modify it under the terms
		7	// of the GNU General Public License as published by the Free Software
		8	// Foundation; either version 3, or (at your option) any later
		9	// version.
		10
		11	// This library is distributed in the hope that it will be useful, but
		12	// WITHOUT ANY WARRANTY; without even the implied warranty of
		13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	// General Public License for more details.
		15
		16	// Under Section 7 of GPL version 3, you are granted additional
		17	// permissions described in the GCC Runtime Library Exception, version
		18	// 3.1, as published by the Free Software Foundation.
		19
		20	// You should have received a copy of the GNU General Public License and
		21	// a copy of the GCC Runtime Library Exception along with this program;
		22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
		23	// .
		24
		25	/** @file parallel/multiway_mergesort.h
		26	* @brief Parallel multiway merge sort.
		27	* This file is a GNU parallel extension to the Standard C++ Library.
		28	*/
		29
		30	// Written by Johannes Singler.
		31
		32	#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
		33	#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
		34
		35	#include
		36
		37	#include
		38	#include
		39	#include
		40	#include
		41
		42	namespace __gnu_parallel
		43	{
		44	/** @brief Subsequence description. */
		45	template
		46	struct _Piece
		47	{
		48	typedef _DifferenceTp _DifferenceType;
		49
		50	/** @brief Begin of subsequence. */
		51	_DifferenceType _M_begin;
		52
		53	/** @brief End of subsequence. */
		54	_DifferenceType _M_end;
		55	};
		56
		57	/** @brief Data accessed by all threads.
		58	*
		59	* PMWMS = parallel multiway mergesort */
		60	template
		61	struct _PMWMSSortingData
		62	{
		63	typedef std::iterator_traits<_RAIter> _TraitsType;
		64	typedef typename _TraitsType::value_type _ValueType;
		65	typedef typename _TraitsType::difference_type _DifferenceType;
		66
		67	/** @brief Number of threads involved. */
		68	_ThreadIndex _M_num_threads;
		69
		70	/** @brief Input __begin. */
		71	_RAIter _M_source;
		72
		73	/** @brief Start indices, per thread. */
		74	_DifferenceType* _M_starts;
		75
		76	/** @brief Storage in which to sort. */
		77	_ValueType** _M_temporary;
		78
		79	/** @brief Samples. */
		80	_ValueType* _M_samples;
		81
		82	/** @brief Offsets to add to the found positions. */
		83	_DifferenceType* _M_offsets;
		84
		85	/** @brief Pieces of data to merge @c [thread][__sequence] */
		86	std::vector<_Piece<_DifferenceType> >* _M_pieces;
		87	};
		88
		89	/**
		90	* @brief Select _M_samples from a sequence.
		91	* @param __sd Pointer to algorithm data. _Result will be placed in
		92	* @c __sd->_M_samples.
		93	* @param __num_samples Number of _M_samples to select.
		94	*/
		95	template
		96	void
		97	__determine_samples(_PMWMSSortingData<_RAIter>* __sd,
		98	_DifferenceTp __num_samples)
		99	{
		100	typedef std::iterator_traits<_RAIter> _TraitsType;
		101	typedef typename _TraitsType::value_type _ValueType;
		102	typedef _DifferenceTp _DifferenceType;
		103
		104	_ThreadIndex __iam = omp_get_thread_num();
		105
		106	_DifferenceType* __es = new _DifferenceType[__num_samples + 2];
		107
		108	__equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
		109	__num_samples + 1, __es);
		110
		111	for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
		112	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
		113	_ValueType(__sd->_M_source[__sd->_M_starts[__iam]
		114	+ __es[__i + 1]]);
		115
		116	delete[] __es;
		117	}
		118
		119	/** @brief Split consistently. */
		120	template
		121	typename _Compare, typename _SortingPlacesIterator>
		122	struct _SplitConsistently
		123	{ };
		124
		125	/** @brief Split by exact splitting. */
		126	template
		127	typename _SortingPlacesIterator>
		128	struct _SplitConsistently
		129	{
		130	void
		131	operator()(const _ThreadIndex __iam,
		132	_PMWMSSortingData<_RAIter>* __sd,
		133	_Compare& __comp,
		134	const typename
		135	std::iterator_traits<_RAIter>::difference_type
		136	__num_samples) const
		137	{
		138	# pragma omp barrier
		139
		140	std::vector
		141	_SortingPlacesIterator> >
		142	__seqs(__sd->_M_num_threads);
		143	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
		144	__seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
		145	__sd->_M_temporary[__s]
		146	+ (__sd->_M_starts[__s + 1]
		147	- __sd->_M_starts[__s]));
		148
		149	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
		150
		151	// if not last thread
		152	if (__iam < __sd->_M_num_threads - 1)
		153	multiseq_partition(__seqs.begin(), __seqs.end(),
		154	__sd->_M_starts[__iam + 1], __offsets.begin(),
		155	__comp);
		156
		157	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
		158	{
		159	// for each sequence
		160	if (__iam < (__sd->_M_num_threads - 1))
		161	__sd->_M_pieces[__iam][__seq]._M_end
		162	= __offsets[__seq] - __seqs[__seq].first;
		163	else
		164	// very end of this sequence
		165	__sd->_M_pieces[__iam][__seq]._M_end =
		166	__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
		167	}
		168
		169	# pragma omp barrier
		170
		171	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
		172	{
		173	// For each sequence.
		174	if (__iam > 0)
		175	__sd->_M_pieces[__iam][__seq]._M_begin =
		176	__sd->_M_pieces[__iam - 1][__seq]._M_end;
		177	else
		178	// Absolute beginning.
		179	__sd->_M_pieces[__iam][__seq]._M_begin = 0;
		180	}
		181	}
		182	};
		183
		184	/** @brief Split by sampling. */
		185	template
		186	typename _SortingPlacesIterator>
		187	struct _SplitConsistently
		188	{
		189	void
		190	operator()(const _ThreadIndex __iam,
		191	_PMWMSSortingData<_RAIter>* __sd,
		192	_Compare& __comp,
		193	const typename
		194	std::iterator_traits<_RAIter>::difference_type
		195	__num_samples) const
		196	{
		197	typedef std::iterator_traits<_RAIter> _TraitsType;
		198	typedef typename _TraitsType::value_type _ValueType;
		199	typedef typename _TraitsType::difference_type _DifferenceType;
		200
		201	__determine_samples(__sd, __num_samples);
		202
		203	# pragma omp barrier
		204
		205	# pragma omp single
		206	__gnu_sequential::sort(__sd->_M_samples,
		207	__sd->_M_samples
		208	+ (__num_samples * __sd->_M_num_threads),
		209	__comp);
		210
		211	# pragma omp barrier
		212
		213	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
		214	{
		215	// For each sequence.
		216	if (__num_samples * __iam > 0)
		217	__sd->_M_pieces[__iam][__s]._M_begin =
		218	std::lower_bound(__sd->_M_temporary[__s],
		219	__sd->_M_temporary[__s]
		220	+ (__sd->_M_starts[__s + 1]
		221	- __sd->_M_starts[__s]),
		222	__sd->_M_samples[__num_samples * __iam],
		223	__comp)
		224	- __sd->_M_temporary[__s];
		225	else
		226	// Absolute beginning.
		227	__sd->_M_pieces[__iam][__s]._M_begin = 0;
		228
		229	if ((__num_samples * (__iam + 1)) <
		230	(__num_samples * __sd->_M_num_threads))
		231	__sd->_M_pieces[__iam][__s]._M_end =
		232	std::lower_bound(__sd->_M_temporary[__s],
		233	__sd->_M_temporary[__s]
		234	+ (__sd->_M_starts[__s + 1]
		235	- __sd->_M_starts[__s]),
		236	__sd->_M_samples[__num_samples * (__iam + 1)],
		237	__comp)
		238	- __sd->_M_temporary[__s];
		239	else
		240	// Absolute end.
		241	__sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
		242	- __sd->_M_starts[__s]);
		243	}
		244	}
		245	};
		246
		247	template
		248	struct __possibly_stable_sort
		249	{ };
		250
		251	template
		252	struct __possibly_stable_sort
		253	{
		254	void operator()(const _RAIter& __begin,
		255	const _RAIter& __end, _Compare& __comp) const
		256	{ __gnu_sequential::stable_sort(__begin, __end, __comp); }
		257	};
		258
		259	template
		260	struct __possibly_stable_sort
		261	{
		262	void operator()(const _RAIter __begin,
		263	const _RAIter __end, _Compare& __comp) const
		264	{ __gnu_sequential::sort(__begin, __end, __comp); }
		265	};
		266
		267	template
		268	typename _RAIter, typename _Compare,
		269	typename DiffType>
		270	struct __possibly_stable_multiway_merge
		271	{ };
		272
		273	template
		274	typename _Compare, typename _DiffType>
		275	struct __possibly_stable_multiway_merge
		276	_RAIter, _Compare, _DiffType>
		277	{
		278	void operator()(const Seq_RAIter& __seqs_begin,
		279	const Seq_RAIter& __seqs_end,
		280	const _RAIter& __target,
		281	_Compare& __comp,
		282	_DiffType __length_am) const
		283	{ stable_multiway_merge(__seqs_begin, __seqs_end, __target,
		284	__length_am, __comp, sequential_tag()); }
		285	};
		286
		287	template
		288	typename _Compare, typename _DiffType>
		289	struct __possibly_stable_multiway_merge
		290	_RAIter, _Compare, _DiffType>
		291	{
		292	void operator()(const Seq_RAIter& __seqs_begin,
		293	const Seq_RAIter& __seqs_end,
		294	const _RAIter& __target,
		295	_Compare& __comp,
		296	_DiffType __length_am) const
		297	{ multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
		298	__comp, sequential_tag()); }
		299	};
		300
		301	/** @brief PMWMS code executed by each thread.
		302	* @param __sd Pointer to algorithm data.
		303	* @param __comp Comparator.
		304	*/
		305	template
		306	typename _Compare>
		307	void
		308	parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
		309	_Compare& __comp)
		310	{
		311	typedef std::iterator_traits<_RAIter> _TraitsType;
		312	typedef typename _TraitsType::value_type _ValueType;
		313	typedef typename _TraitsType::difference_type _DifferenceType;
		314
		315	_ThreadIndex __iam = omp_get_thread_num();
		316
		317	// Length of this thread's chunk, before merging.
		318	_DifferenceType __length_local =
		319	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
		320
		321	// Sort in temporary storage, leave space for sentinel.
		322
		323	typedef _ValueType* _SortingPlacesIterator;
		324
		325	__sd->_M_temporary[__iam] =
		326	static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
		327	* (__length_local + 1)));
		328
		329	// Copy there.
		330	std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
		331	__sd->_M_source + __sd->_M_starts[__iam]
		332	+ __length_local,
		333	__sd->_M_temporary[__iam]);
		334
		335	__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
		336	(__sd->_M_temporary[__iam],
		337	__sd->_M_temporary[__iam] + __length_local,
		338	__comp);
		339
		340	// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
		341	// __sd->_M_temporary[__iam] + __length_local.
		342
		343	// No barrier here: Synchronization is done by the splitting routine.
		344
		345	_DifferenceType __num_samples =
		346	_Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
		347	_SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
		348	(__iam, __sd, __comp, __num_samples);
		349
		350	// Offset from __target __begin, __length after merging.
		351	_DifferenceType __offset = 0, __length_am = 0;
		352	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
		353	{
		354	__length_am += (__sd->_M_pieces[__iam][__s]._M_end
		355	- __sd->_M_pieces[__iam][__s]._M_begin);
		356	__offset += __sd->_M_pieces[__iam][__s]._M_begin;
		357	}
		358
		359	typedef std::vector<
		360	std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
		361	_SeqVector;
		362	_SeqVector __seqs(__sd->_M_num_threads);
		363
		364	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
		365	{
		366	__seqs[__s] =
		367	std::make_pair(__sd->_M_temporary[__s]
		368	+ __sd->_M_pieces[__iam][__s]._M_begin,
		369	__sd->_M_temporary[__s]
		370	+ __sd->_M_pieces[__iam][__s]._M_end);
		371	}
		372
		373	__possibly_stable_multiway_merge<
		374	__stable, typename _SeqVector::iterator,
		375	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
		376	__sd->_M_source + __offset, __comp,
		377	__length_am);
		378
		379	# pragma omp barrier
		380
		381	for (_DifferenceType __i = 0; __i < __length_local; ++__i)
		382	__sd->_M_temporary[__iam][__i].~_ValueType();
		383	::operator delete(__sd->_M_temporary[__iam]);
		384	}
		385
		386	/** @brief PMWMS main call.
		387	* @param __begin Begin iterator of sequence.
		388	* @param __end End iterator of sequence.
		389	* @param __comp Comparator.
		390	* @param __num_threads Number of threads to use.
		391	*/
		392	template
		393	typename _Compare>
		394	void
		395	parallel_sort_mwms(_RAIter __begin, _RAIter __end,
		396	_Compare __comp,
		397	_ThreadIndex __num_threads)
		398	{
		399	_GLIBCXX_CALL(__end - __begin)
		400
		401	typedef std::iterator_traits<_RAIter> _TraitsType;
		402	typedef typename _TraitsType::value_type _ValueType;
		403	typedef typename _TraitsType::difference_type _DifferenceType;
		404
		405	_DifferenceType __n = __end - __begin;
		406
		407	if (__n <= 1)
		408	return;
		409
		410	// at least one element per thread
		411	if (__num_threads > __n)
		412	__num_threads = static_cast<_ThreadIndex>(__n);
		413
		414	// shared variables
		415	_PMWMSSortingData<_RAIter> __sd;
		416	_DifferenceType* __starts;
		417	_DifferenceType __size;
		418
		419	# pragma omp parallel num_threads(__num_threads)
		420	{
		421	__num_threads = omp_get_num_threads(); //no more threads than requested
		422
		423	# pragma omp single
		424	{
		425	__sd._M_num_threads = __num_threads;
		426	__sd._M_source = __begin;
		427
		428	__sd._M_temporary = new _ValueType*[__num_threads];
		429
		430	if (!__exact)
		431	{
		432	__size =
		433	(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
		434	* __num_threads;
		435	__sd._M_samples = static_cast<_ValueType*>
		436	(::operator new(__size * sizeof(_ValueType)));
		437	}
		438	else
		439	__sd._M_samples = 0;
		440
		441	__sd._M_offsets = new _DifferenceType[__num_threads - 1];
		442	__sd._M_pieces
		443	= new std::vector<_Piece<_DifferenceType> >[__num_threads];
		444	for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
		445	__sd._M_pieces[__s].resize(__num_threads);
		446	__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
		447
		448	_DifferenceType __chunk_length = __n / __num_threads;
		449	_DifferenceType __split = __n % __num_threads;
		450	_DifferenceType __pos = 0;
		451	for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
		452	{
		453	__starts[__i] = __pos;
		454	__pos += ((__i < __split)
		455	? (__chunk_length + 1) : __chunk_length);
		456	}
		457	__starts[__num_threads] = __pos;
		458	} //single
		459
		460	// Now sort in parallel.
		461	parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
		462	} //parallel
		463
		464	delete[] __starts;
		465	delete[] __sd._M_temporary;
		466
		467	if (!__exact)
		468	{
		469	for (_DifferenceType __i = 0; __i < __size; ++__i)
		470	__sd._M_samples[__i].~_ValueType();
		471	::operator delete(__sd._M_samples);
		472	}
		473
		474	delete[] __sd._M_offsets;
		475	delete[] __sd._M_pieces;
		476	}
		477
		478	} //namespace __gnu_parallel
		479
		480	#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/libstdc++-v3/include/parallel/multiway_mergesort.h – Rev 5136