Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // Optimizations for random number extensions, x86 version -*- C++ -*-
  2.  
  3. // Copyright (C) 2012-2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /** @file ext/random.tcc
  26.  *  This is an internal header file, included by other library headers.
  27.  *  Do not attempt to use it directly. @headername{ext/random}
  28.  */
  29.  
  30. #ifndef _EXT_OPT_RANDOM_H
  31. #define _EXT_OPT_RANDOM_H 1
  32.  
  33. #pragma GCC system_header
  34.  
  35. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  36.  
  37. #ifdef __SSE2__
  38.  
  39. namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
  40. {
  41. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  42.  
  43.   namespace {
  44.  
  45.     template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
  46.              uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
  47.       inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
  48.                                       __m128i __c, __m128i __d)
  49.       {
  50.         __m128i __y = _mm_srli_epi32(__b, __sr1);
  51.         __m128i __z = _mm_srli_si128(__c, __sr2);
  52.         __m128i __v = _mm_slli_epi32(__d, __sl1);
  53.         __z = _mm_xor_si128(__z, __a);
  54.         __z = _mm_xor_si128(__z, __v);
  55.         __m128i __x = _mm_slli_si128(__a, __sl2);
  56.         __y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
  57.         __z = _mm_xor_si128(__z, __x);
  58.         return _mm_xor_si128(__z, __y);
  59.       }
  60.  
  61.   }
  62.  
  63.  
  64. #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ  1
  65.   template<typename _UIntType, size_t __m,
  66.            size_t __pos1, size_t __sl1, size_t __sl2,
  67.            size_t __sr1, size_t __sr2,
  68.            uint32_t __msk1, uint32_t __msk2,
  69.            uint32_t __msk3, uint32_t __msk4,
  70.            uint32_t __parity1, uint32_t __parity2,
  71.            uint32_t __parity3, uint32_t __parity4>
  72.     void simd_fast_mersenne_twister_engine<_UIntType, __m,
  73.                                            __pos1, __sl1, __sl2, __sr1, __sr2,
  74.                                            __msk1, __msk2, __msk3, __msk4,
  75.                                            __parity1, __parity2, __parity3,
  76.                                            __parity4>::
  77.     _M_gen_rand(void)
  78.     {
  79.       __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
  80.       __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
  81.  
  82.       size_t __i;
  83.       for (__i = 0; __i < _M_nstate - __pos1; ++__i)
  84.         {
  85.           __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
  86.                                          __msk1, __msk2, __msk3, __msk4>
  87.             (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
  88.           _mm_store_si128(&_M_state[__i], __r);
  89.           __r1 = __r2;
  90.           __r2 = __r;
  91.         }
  92.       for (; __i < _M_nstate; ++__i)
  93.         {
  94.           __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
  95.                                          __msk1, __msk2, __msk3, __msk4>
  96.             (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
  97.           _mm_store_si128(&_M_state[__i], __r);
  98.           __r1 = __r2;
  99.           __r2 = __r;
  100.         }
  101.  
  102.       _M_pos = 0;
  103.     }
  104.  
  105.  
  106. #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL     1
  107.   template<typename _UIntType, size_t __m,
  108.            size_t __pos1, size_t __sl1, size_t __sl2,
  109.            size_t __sr1, size_t __sr2,
  110.            uint32_t __msk1, uint32_t __msk2,
  111.            uint32_t __msk3, uint32_t __msk4,
  112.            uint32_t __parity1, uint32_t __parity2,
  113.            uint32_t __parity3, uint32_t __parity4>
  114.     bool
  115.     operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
  116.                __m, __pos1, __sl1, __sl2, __sr1, __sr2,
  117.                __msk1, __msk2, __msk3, __msk4,
  118.                __parity1, __parity2, __parity3, __parity4>& __lhs,
  119.                const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
  120.                __m, __pos1, __sl1, __sl2, __sr1, __sr2,
  121.                __msk1, __msk2, __msk3, __msk4,
  122.                __parity1, __parity2, __parity3, __parity4>& __rhs)
  123.     {
  124.       __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
  125.       for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
  126.         __res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
  127.                                                     __rhs._M_state[__i]));
  128.       return (_mm_movemask_epi8(__res) == 0xffff
  129.               && __lhs._M_pos == __rhs._M_pos);
  130.     }
  131.  
  132.  
  133. _GLIBCXX_END_NAMESPACE_VERSION
  134. } // namespace
  135.  
  136. #endif // __SSE2__
  137.  
  138. #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  139.  
  140. #endif // _EXT_OPT_RANDOM_H
  141.