Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // -*- C++ -*-
  2.  
  3. // Copyright (C) 2007-2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the terms
  7. // of the GNU General Public License as published by the Free Software
  8. // Foundation; either version 3, or (at your option) any later
  9. // version.
  10.  
  11. // This library is distributed in the hope that it will be useful, but
  12. // WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. // General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /** @file parallel/settings.h
  26.  *  @brief Runtime settings and tuning parameters, heuristics to decide
  27.  *  whether to use parallelized algorithms.
  28.  *  This file is a GNU parallel extension to the Standard C++ Library.
  29.  *
  30.  *  @section parallelization_decision
  31.  *  The decision whether to run an algorithm in parallel.
  32.  *
  33.  *  There are several ways the user can switch on and __off the parallel
  34.  *  execution of an algorithm, both at compile- and run-time.
  35.  *
  36.  *  Only sequential execution can be forced at compile-time.  This
  37.  *  reduces code size and protects code parts that have
  38.  *  non-thread-safe side effects.
  39.  *
  40.  *  Ultimately, forcing parallel execution at compile-time makes
  41.  *  sense.  Often, the sequential algorithm implementation is used as
  42.  *  a subroutine, so no reduction in code size can be achieved.  Also,
  43.  *  the machine the program is run on might have only one processor
  44.  *  core, so to avoid overhead, the algorithm is executed
  45.  *  sequentially.
  46.  *
  47.  *  To force sequential execution of an algorithm ultimately at
  48.  *  compile-time, the user must add the tag
  49. *  gnu_parallel::sequential_tag() to the end of the parameter list,
  50.  *  e. g.
  51.  *
  52.  *  \code
  53.  *  std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
  54.  *  \endcode
  55.  *
  56.  *  This is compatible with all overloaded algorithm variants.  No
  57.  *  additional code will be instantiated, at all.  The same holds for
  58.  *  most algorithm calls with iterators not providing random access.
  59.  *
  60.  *  If the algorithm call is not forced to be executed sequentially
  61.  *  at compile-time, the decision is made at run-time.
  62.  *  The global variable __gnu_parallel::_Settings::algorithm_strategy
  63.  *  is checked. _It is a tristate variable corresponding to:
  64.  *
  65.  *  a. force_sequential, meaning the sequential algorithm is executed.
  66. *  b. force_parallel, meaning the parallel algorithm is executed.
  67. *  c. heuristic
  68.  *
  69.  *  For heuristic, the parallel algorithm implementation is called
  70.  *  only if the input size is sufficiently large.  For most
  71.  *  algorithms, the input size is the (combined) length of the input
  72. *  sequence(__s).  The threshold can be set by the user, individually
  73.  *  for each algorithm.  The according variables are called
  74. *  gnu_parallel::_Settings::[algorithm]_minimal_n .
  75.  *
  76.  *  For some of the algorithms, there are even more tuning options,
  77.  *  e. g. the ability to choose from multiple algorithm variants.  See
  78.  *  below for details.
  79.  */
  80.  
  81. // Written by Johannes Singler and Felix Putze.
  82.  
  83. #ifndef _GLIBCXX_PARALLEL_SETTINGS_H
  84. #define _GLIBCXX_PARALLEL_SETTINGS_H 1
  85.  
  86. #include <parallel/types.h>
  87.  
  88. /**
  89.   * @brief Determine at compile(?)-time if the parallel variant of an
  90.   * algorithm should be called.
  91.   * @param __c A condition that is convertible to bool that is overruled by
  92.   * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
  93.   * based on the input size.
  94.   */
  95. #define _GLIBCXX_PARALLEL_CONDITION(__c) \
  96.   (__gnu_parallel::_Settings::get().algorithm_strategy \
  97.     != __gnu_parallel::force_sequential \
  98.   && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) \
  99.      || __gnu_parallel::_Settings::get().algorithm_strategy \
  100.         == __gnu_parallel::force_parallel))
  101.  
  102. /*
  103. inline bool
  104. parallel_condition(bool __c)
  105. {
  106.   bool ret = false;
  107.   const _Settings& __s = _Settings::get();
  108.   if (__s.algorithm_strategy != force_seqential)
  109.     {
  110.       if (__s.algorithm_strategy == force_parallel)
  111.         ret = true;
  112.       else
  113.         ret = __get_max_threads() > 1 && __c;
  114.     }
  115.   return ret;
  116. }
  117. */
  118.  
  119. namespace __gnu_parallel
  120. {
  121.   /// class _Settings
  122.   /// Run-time settings for the parallel mode including all tunable parameters.
  123.   struct _Settings
  124.   {
  125.     _AlgorithmStrategy          algorithm_strategy;
  126.    
  127.     _SortAlgorithm              sort_algorithm;
  128.     _PartialSumAlgorithm        partial_sum_algorithm;
  129.     _MultiwayMergeAlgorithm     multiway_merge_algorithm;
  130.     _FindAlgorithm              find_algorithm;
  131.  
  132.     _SplittingAlgorithm         sort_splitting;
  133.     _SplittingAlgorithm         merge_splitting;
  134.     _SplittingAlgorithm         multiway_merge_splitting;
  135.  
  136.     // Per-algorithm settings.
  137.  
  138.     /// Minimal input size for accumulate.
  139.     _SequenceIndex              accumulate_minimal_n;
  140.  
  141.     /// Minimal input size for adjacent_difference.
  142.     unsigned int                adjacent_difference_minimal_n;
  143.  
  144.     /// Minimal input size for count and count_if.
  145.     _SequenceIndex              count_minimal_n;
  146.  
  147.     /// Minimal input size for fill.
  148.     _SequenceIndex              fill_minimal_n;
  149.  
  150.     /// Block size increase factor for find.
  151.     double                      find_increasing_factor;
  152.  
  153.     /// Initial block size for find.
  154.     _SequenceIndex              find_initial_block_size;
  155.  
  156.     /// Maximal block size for find.
  157.     _SequenceIndex              find_maximum_block_size;
  158.  
  159.     /// Start with looking for this many elements sequentially, for find.
  160.     _SequenceIndex              find_sequential_search_size;
  161.  
  162.     /// Minimal input size for for_each.
  163.     _SequenceIndex              for_each_minimal_n;
  164.  
  165.     /// Minimal input size for generate.
  166.     _SequenceIndex              generate_minimal_n;
  167.  
  168.     /// Minimal input size for max_element.
  169.     _SequenceIndex              max_element_minimal_n;
  170.  
  171.     /// Minimal input size for merge.
  172.     _SequenceIndex              merge_minimal_n;
  173.  
  174.     /// Oversampling factor for merge.
  175.     unsigned int                merge_oversampling;
  176.  
  177.     /// Minimal input size for min_element.
  178.     _SequenceIndex              min_element_minimal_n;
  179.  
  180.     /// Minimal input size for multiway_merge.
  181.     _SequenceIndex              multiway_merge_minimal_n;
  182.  
  183.     /// Oversampling factor for multiway_merge.
  184.     int                         multiway_merge_minimal_k;
  185.  
  186.     /// Oversampling factor for multiway_merge.
  187.     unsigned int                multiway_merge_oversampling;
  188.  
  189.     /// Minimal input size for nth_element.
  190.     _SequenceIndex              nth_element_minimal_n;
  191.  
  192.     /// Chunk size for partition.
  193.     _SequenceIndex              partition_chunk_size;
  194.  
  195.     /// Chunk size for partition, relative to input size.  If > 0.0,
  196.     /// this value overrides partition_chunk_size.
  197.     double                      partition_chunk_share;
  198.  
  199.     /// Minimal input size for partition.
  200.     _SequenceIndex              partition_minimal_n;
  201.  
  202.     /// Minimal input size for partial_sort.
  203.     _SequenceIndex              partial_sort_minimal_n;
  204.  
  205.     /// Ratio for partial_sum. Assume "sum and write result" to be
  206.     /// this factor slower than just "sum".
  207.     float                       partial_sum_dilation;
  208.  
  209.     /// Minimal input size for partial_sum.
  210.     unsigned int                partial_sum_minimal_n;
  211.  
  212.     /// Minimal input size for random_shuffle.
  213.     unsigned int                random_shuffle_minimal_n;
  214.  
  215.     /// Minimal input size for replace and replace_if.
  216.     _SequenceIndex              replace_minimal_n;
  217.  
  218.     /// Minimal input size for set_difference.
  219.     _SequenceIndex              set_difference_minimal_n;
  220.  
  221.     /// Minimal input size for set_intersection.
  222.     _SequenceIndex              set_intersection_minimal_n;
  223.  
  224.     /// Minimal input size for set_symmetric_difference.
  225.     _SequenceIndex              set_symmetric_difference_minimal_n;
  226.  
  227.     /// Minimal input size for set_union.
  228.     _SequenceIndex              set_union_minimal_n;
  229.  
  230.     /// Minimal input size for parallel sorting.
  231.     _SequenceIndex              sort_minimal_n;
  232.  
  233.     /// Oversampling factor for parallel std::sort (MWMS).
  234.     unsigned int                sort_mwms_oversampling;
  235.  
  236.     /// Such many samples to take to find a good pivot (quicksort).
  237.     unsigned int                sort_qs_num_samples_preset;
  238.  
  239.     /// Maximal subsequence __length to switch to unbalanced __base case.
  240.     /// Applies to std::sort with dynamically load-balanced quicksort.
  241.     _SequenceIndex              sort_qsb_base_case_maximal_n;
  242.  
  243.     /// Minimal input size for parallel std::transform.
  244.     _SequenceIndex              transform_minimal_n;
  245.  
  246.     /// Minimal input size for unique_copy.
  247.     _SequenceIndex              unique_copy_minimal_n;
  248.  
  249.     _SequenceIndex              workstealing_chunk_size;
  250.  
  251.     // Hardware dependent tuning parameters.
  252.  
  253.     /// size of the L1 cache in bytes (underestimation).
  254.     unsigned long long          L1_cache_size;
  255.  
  256.     /// size of the L2 cache in bytes (underestimation).
  257.     unsigned long long          L2_cache_size;
  258.  
  259.     /// size of the Translation Lookaside Buffer (underestimation).
  260.     unsigned int                TLB_size;
  261.  
  262.     /// Overestimation of cache line size.  Used to avoid false
  263.     /// sharing, i.e. elements of different threads are at least this
  264.     /// amount apart.
  265.     unsigned int                cache_line_size;
  266.  
  267.     // Statistics.
  268.  
  269.     /// The number of stolen ranges in load-balanced quicksort.
  270.     _SequenceIndex              qsb_steals;
  271.  
  272.     /// Minimal input size for search and search_n.
  273.     _SequenceIndex              search_minimal_n;
  274.  
  275.     /// Block size scale-down factor with respect to current position.
  276.     float                       find_scale_factor;
  277.  
  278.     /// Get the global settings.
  279.     _GLIBCXX_CONST static const _Settings&
  280.     get() throw();
  281.  
  282.     /// Set the global settings.
  283.     static void
  284.     set(_Settings&) throw();
  285.  
  286.     explicit
  287.     _Settings() :
  288.             algorithm_strategy(heuristic),
  289.             sort_algorithm(MWMS),
  290.             partial_sum_algorithm(LINEAR),
  291.             multiway_merge_algorithm(LOSER_TREE),
  292.             find_algorithm(CONSTANT_SIZE_BLOCKS),
  293.             sort_splitting(EXACT),
  294.             merge_splitting(EXACT),
  295.             multiway_merge_splitting(EXACT),
  296.             accumulate_minimal_n(1000),
  297.             adjacent_difference_minimal_n(1000),
  298.             count_minimal_n(1000),
  299.             fill_minimal_n(1000),
  300.             find_increasing_factor(2.0),
  301.             find_initial_block_size(256),
  302.             find_maximum_block_size(8192),
  303.             find_sequential_search_size(256),
  304.             for_each_minimal_n(1000),
  305.             generate_minimal_n(1000),
  306.             max_element_minimal_n(1000),
  307.             merge_minimal_n(1000),
  308.             merge_oversampling(10),
  309.             min_element_minimal_n(1000),
  310.             multiway_merge_minimal_n(1000),
  311.             multiway_merge_minimal_k(2), multiway_merge_oversampling(10),
  312.             nth_element_minimal_n(1000),
  313.             partition_chunk_size(1000),
  314.             partition_chunk_share(0.0),
  315.             partition_minimal_n(1000),
  316.             partial_sort_minimal_n(1000),
  317.             partial_sum_dilation(1.0f),
  318.             partial_sum_minimal_n(1000),
  319.             random_shuffle_minimal_n(1000),
  320.             replace_minimal_n(1000),
  321.             set_difference_minimal_n(1000),
  322.             set_intersection_minimal_n(1000),
  323.             set_symmetric_difference_minimal_n(1000),
  324.             set_union_minimal_n(1000),
  325.             sort_minimal_n(1000),
  326.             sort_mwms_oversampling(10),
  327.             sort_qs_num_samples_preset(100),
  328.             sort_qsb_base_case_maximal_n(100),
  329.             transform_minimal_n(1000),
  330.             unique_copy_minimal_n(10000),
  331.             workstealing_chunk_size(100),
  332.             L1_cache_size(16 << 10),
  333.             L2_cache_size(256 << 10),
  334.             TLB_size(128),
  335.             cache_line_size(64),
  336.             qsb_steals(0),
  337.             search_minimal_n(1000),
  338.             find_scale_factor(0.01f)
  339.     { }
  340.   };
  341. }
  342.  
  343. #endif /* _GLIBCXX_PARALLEL_SETTINGS_H */
  344.