Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2008 Dennis Smit
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  10.  * license, and/or sell copies of the Software, and to permit persons to whom
  11.  * the Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
  20.  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  21.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  23.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  **************************************************************************/
  26.  
  27. /**
  28.  * @file
  29.  * CPU feature detection.
  30.  *
  31.  * @author Dennis Smit
  32.  * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
  33.  */
  34.  
  35. #include "pipe/p_config.h"
  36.  
  37. #include "u_debug.h"
  38. #include "u_cpu_detect.h"
  39.  
  40. #if defined(PIPE_ARCH_PPC)
  41. #if defined(PIPE_OS_APPLE)
  42. #include <sys/sysctl.h>
  43. #else
  44. #include <signal.h>
  45. #include <setjmp.h>
  46. #endif
  47. #endif
  48.  
  49. #if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
  50. #include <sys/param.h>
  51. #include <sys/sysctl.h>
  52. #include <machine/cpu.h>
  53. #endif
  54.  
  55. #if defined(PIPE_OS_FREEBSD)
  56. #include <sys/types.h>
  57. #include <sys/sysctl.h>
  58. #endif
  59.  
  60. #if defined(PIPE_OS_LINUX)
  61. #include <signal.h>
  62. #endif
  63.  
  64. #ifdef PIPE_OS_UNIX
  65. #include <unistd.h>
  66. #endif
  67.  
  68. #if defined(PIPE_OS_WINDOWS)
  69. #include <windows.h>
  70. #if defined(PIPE_CC_MSVC)
  71. #include <intrin.h>
  72. #endif
  73. #endif
  74.  
  75.  
  76. #ifdef DEBUG
  77. DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
  78. #endif
  79.  
  80.  
  81. struct util_cpu_caps util_cpu_caps;
  82.  
  83. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  84. static int has_cpuid(void);
  85. #endif
  86.  
  87.  
  88. #if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
  89. static jmp_buf  __lv_powerpc_jmpbuf;
  90. static volatile sig_atomic_t __lv_powerpc_canjump = 0;
  91.  
  92. static void
  93. sigill_handler(int sig)
  94. {
  95.    if (!__lv_powerpc_canjump) {
  96.       signal (sig, SIG_DFL);
  97.       raise (sig);
  98.    }
  99.  
  100.    __lv_powerpc_canjump = 0;
  101.    longjmp(__lv_powerpc_jmpbuf, 1);
  102. }
  103. #endif
  104.  
  105. #if defined(PIPE_ARCH_PPC)
  106. static void
  107. check_os_altivec_support(void)
  108. {
  109. #if defined(PIPE_OS_APPLE)
  110.    int sels[2] = {CTL_HW, HW_VECTORUNIT};
  111.    int has_vu = 0;
  112.    int len = sizeof (has_vu);
  113.    int err;
  114.  
  115.    err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
  116.  
  117.    if (err == 0) {
  118.       if (has_vu != 0) {
  119.          util_cpu_caps.has_altivec = 1;
  120.       }
  121.    }
  122. #else /* !PIPE_OS_APPLE */
  123.    /* not on Apple/Darwin, do it the brute-force way */
  124.    /* this is borrowed from the libmpeg2 library */
  125.    signal(SIGILL, sigill_handler);
  126.    if (setjmp(__lv_powerpc_jmpbuf)) {
  127.       signal(SIGILL, SIG_DFL);
  128.    } else {
  129.       __lv_powerpc_canjump = 1;
  130.  
  131.       __asm __volatile
  132.          ("mtspr 256, %0\n\t"
  133.           "vand %%v0, %%v0, %%v0"
  134.           :
  135.           : "r" (-1));
  136.  
  137.       signal(SIGILL, SIG_DFL);
  138.       util_cpu_caps.has_altivec = 1;
  139.    }
  140. #endif /* !PIPE_OS_APPLE */
  141. }
  142. #endif /* PIPE_ARCH_PPC */
  143.  
  144.  
  145. #if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
  146. static int has_cpuid(void)
  147. {
  148. #if defined(PIPE_ARCH_X86)
  149. #if defined(PIPE_OS_GCC)
  150.    int a, c;
  151.  
  152.    __asm __volatile
  153.       ("pushf\n"
  154.        "popl %0\n"
  155.        "movl %0, %1\n"
  156.        "xorl $0x200000, %0\n"
  157.        "push %0\n"
  158.        "popf\n"
  159.        "pushf\n"
  160.        "popl %0\n"
  161.        : "=a" (a), "=c" (c)
  162.        :
  163.        : "cc");
  164.  
  165.    return a != c;
  166. #else
  167.    /* FIXME */
  168.    return 1;
  169. #endif
  170. #elif defined(PIPE_ARCH_X86_64)
  171.    return 1;
  172. #else
  173.    return 0;
  174. #endif
  175. }
  176.  
  177.  
  178. /**
  179.  * @sa cpuid.h included in gcc-4.3 onwards.
  180.  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
  181.  */
  182. static INLINE void
  183. cpuid(uint32_t ax, uint32_t *p)
  184. {
  185. #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
  186.    __asm __volatile (
  187.      "xchgl %%ebx, %1\n\t"
  188.      "cpuid\n\t"
  189.      "xchgl %%ebx, %1"
  190.      : "=a" (p[0]),
  191.        "=S" (p[1]),
  192.        "=c" (p[2]),
  193.        "=d" (p[3])
  194.      : "0" (ax)
  195.    );
  196. #elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64)
  197.    __asm __volatile (
  198.      "cpuid\n\t"
  199.      : "=a" (p[0]),
  200.        "=b" (p[1]),
  201.        "=c" (p[2]),
  202.        "=d" (p[3])
  203.      : "0" (ax)
  204.    );
  205. #elif defined(PIPE_CC_MSVC)
  206.    __cpuid(p, ax);
  207. #else
  208.    p[0] = 0;
  209.    p[1] = 0;
  210.    p[2] = 0;
  211.    p[3] = 0;
  212. #endif
  213. }
  214.  
  215. /**
  216.  * @sa cpuid.h included in gcc-4.4 onwards.
  217.  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
  218.  */
  219. static INLINE void
  220. cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
  221. {
  222. #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
  223.    __asm __volatile (
  224.      "xchgl %%ebx, %1\n\t"
  225.      "cpuid\n\t"
  226.      "xchgl %%ebx, %1"
  227.      : "=a" (p[0]),
  228.        "=S" (p[1]),
  229.        "=c" (p[2]),
  230.        "=d" (p[3])
  231.      : "0" (ax), "2" (cx)
  232.    );
  233. #elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64)
  234.    __asm __volatile (
  235.      "cpuid\n\t"
  236.      : "=a" (p[0]),
  237.        "=b" (p[1]),
  238.        "=c" (p[2]),
  239.        "=d" (p[3])
  240.      : "0" (ax), "2" (cx)
  241.    );
  242. #elif defined(PIPE_CC_MSVC)
  243.    __cpuidex(p, ax, cx);
  244. #else
  245.    p[0] = 0;
  246.    p[1] = 0;
  247.    p[2] = 0;
  248.    p[3] = 0;
  249. #endif
  250. }
  251.  
  252.  
  253. static INLINE uint64_t xgetbv(void)
  254. {
  255. #if defined(PIPE_CC_GCC)
  256.    uint32_t eax, edx;
  257.  
  258.    __asm __volatile (
  259.      ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4
  260.      : "=a"(eax),
  261.        "=d"(edx)
  262.      : "c"(0)
  263.    );
  264.  
  265.    return ((uint64_t)edx << 32) | eax;
  266. #elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
  267.    return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
  268. #else
  269.    return 0;
  270. #endif
  271. }
  272.  
  273.  
  274. #if defined(PIPE_ARCH_X86)
  275. PIPE_ALIGN_STACK static INLINE boolean sse2_has_daz(void)
  276. {
  277.    struct {
  278.       uint32_t pad1[7];
  279.       uint32_t mxcsr_mask;
  280.       uint32_t pad2[128-8];
  281.    } PIPE_ALIGN_VAR(16) fxarea;
  282.  
  283.    fxarea.mxcsr_mask = 0;
  284. #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO))
  285.    __asm __volatile ("fxsave %0" : "+m" (fxarea));
  286. #elif (defined(PIPE_CC_MSVC) && _MSC_VER >= 1700) || defined(PIPE_CC_ICL)
  287.    /* 1700 = Visual Studio 2012 */
  288.    _fxsave(&fxarea);
  289. #else
  290.    fxarea.mxcsr_mask = 0;
  291. #endif
  292.    return !!(fxarea.mxcsr_mask & (1 << 6));
  293. }
  294. #endif
  295.  
  296. #endif /* X86 or X86_64 */
  297.  
  298. void
  299. util_cpu_detect(void)
  300. {
  301.    static boolean util_cpu_detect_initialized = FALSE;
  302.  
  303.    if(util_cpu_detect_initialized)
  304.       return;
  305.  
  306.    memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
  307.  
  308.    /* Count the number of CPUs in system */
  309. #if defined(PIPE_OS_WINDOWS)
  310.    {
  311.       SYSTEM_INFO system_info;
  312.       GetSystemInfo(&system_info);
  313.       util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors;
  314.    }
  315. #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN)
  316.    util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  317.    if (util_cpu_caps.nr_cpus == -1)
  318.       util_cpu_caps.nr_cpus = 1;
  319. #elif defined(PIPE_OS_BSD)
  320.    {
  321.       int mib[2], ncpu;
  322.       int len;
  323.  
  324.       mib[0] = CTL_HW;
  325.       mib[1] = HW_NCPU;
  326.  
  327.       len = sizeof (ncpu);
  328.       sysctl(mib, 2, &ncpu, &len, NULL, 0);
  329.       util_cpu_caps.nr_cpus = ncpu;
  330.    }
  331. #else
  332.    util_cpu_caps.nr_cpus = 1;
  333. #endif
  334.  
  335.    /* Make the fallback cacheline size nonzero so that it can be
  336.     * safely passed to align().
  337.     */
  338.    util_cpu_caps.cacheline = sizeof(void *);
  339.  
  340. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  341.    if (has_cpuid()) {
  342.       uint32_t regs[4];
  343.       uint32_t regs2[4];
  344.  
  345.       util_cpu_caps.cacheline = 32;
  346.  
  347.       /* Get max cpuid level */
  348.       cpuid(0x00000000, regs);
  349.  
  350.       if (regs[0] >= 0x00000001) {
  351.          unsigned int cacheline;
  352.  
  353.          cpuid (0x00000001, regs2);
  354.  
  355.          util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
  356.          if (util_cpu_caps.x86_cpu_type == 0xf)
  357.              util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
  358.  
  359.          /* general feature flags */
  360.          util_cpu_caps.has_tsc    = (regs2[3] >>  4) & 1; /* 0x0000010 */
  361.          util_cpu_caps.has_mmx    = (regs2[3] >> 23) & 1; /* 0x0800000 */
  362.          util_cpu_caps.has_sse    = (regs2[3] >> 25) & 1; /* 0x2000000 */
  363.          util_cpu_caps.has_sse2   = (regs2[3] >> 26) & 1; /* 0x4000000 */
  364.          util_cpu_caps.has_sse3   = (regs2[2] >>  0) & 1; /* 0x0000001 */
  365.          util_cpu_caps.has_ssse3  = (regs2[2] >>  9) & 1; /* 0x0000020 */
  366.          util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
  367.          util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
  368.          util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1;
  369.          util_cpu_caps.has_avx    = ((regs2[2] >> 28) & 1) && // AVX
  370.                                     ((regs2[2] >> 27) & 1) && // OSXSAVE
  371.                                     ((xgetbv() & 6) == 6);    // XMM & YMM
  372.          util_cpu_caps.has_f16c   = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx;
  373.          util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
  374. #if defined(PIPE_ARCH_X86_64)
  375.          util_cpu_caps.has_daz = 1;
  376. #else
  377.          util_cpu_caps.has_daz = util_cpu_caps.has_sse3 ||
  378.             (util_cpu_caps.has_sse2 && sse2_has_daz());
  379. #endif
  380.  
  381.          cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
  382.          if (cacheline > 0)
  383.             util_cpu_caps.cacheline = cacheline;
  384.       }
  385.       if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) {
  386.          uint32_t regs7[4];
  387.          cpuid_count(0x00000007, 0x00000000, regs7);
  388.          util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
  389.       }
  390.  
  391.       if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
  392.          /* GenuineIntel */
  393.          util_cpu_caps.has_intel = 1;
  394.       }
  395.  
  396.       cpuid(0x80000000, regs);
  397.  
  398.       if (regs[0] >= 0x80000001) {
  399.  
  400.          cpuid(0x80000001, regs2);
  401.  
  402.          util_cpu_caps.has_mmx  |= (regs2[3] >> 23) & 1;
  403.          util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
  404.          util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
  405.          util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
  406.  
  407.          util_cpu_caps.has_xop = util_cpu_caps.has_avx &&
  408.                                  ((regs2[2] >> 11) & 1);
  409.       }
  410.  
  411.       if (regs[0] >= 0x80000006) {
  412.          /* should we really do this if the clflush size above worked? */
  413.          unsigned int cacheline;
  414.          cpuid(0x80000006, regs2);
  415.          cacheline = regs2[2] & 0xFF;
  416.          if (cacheline > 0)
  417.             util_cpu_caps.cacheline = cacheline;
  418.       }
  419.  
  420.       if (!util_cpu_caps.has_sse) {
  421.          util_cpu_caps.has_sse2 = 0;
  422.          util_cpu_caps.has_sse3 = 0;
  423.          util_cpu_caps.has_ssse3 = 0;
  424.          util_cpu_caps.has_sse4_1 = 0;
  425.       }
  426.    }
  427. #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
  428.  
  429. #if defined(PIPE_ARCH_PPC)
  430.    check_os_altivec_support();
  431. #endif /* PIPE_ARCH_PPC */
  432.  
  433. #ifdef DEBUG
  434.    if (debug_get_option_dump_cpu()) {
  435.       debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
  436.  
  437.       debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
  438.       debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
  439.  
  440.       debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
  441.       debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
  442.       debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
  443.       debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
  444.       debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
  445.       debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
  446.       debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
  447.       debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
  448.       debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
  449.       debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
  450.       debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2);
  451.       debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
  452.       debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
  453.       debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
  454.       debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
  455.       debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
  456.       debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
  457.       debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
  458.    }
  459. #endif
  460.  
  461.    util_cpu_detect_initialized = TRUE;
  462. }
  463.