Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2008 Dennis Smit
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  10.  * license, and/or sell copies of the Software, and to permit persons to whom
  11.  * the Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
  20.  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  21.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  23.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  **************************************************************************/
  26.  
  27. /**
  28.  * @file
  29.  * CPU feature detection.
  30.  *
  31.  * @author Dennis Smit
  32.  * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
  33.  */
  34.  
  35. #include "pipe/p_config.h"
  36.  
  37. #include "u_debug.h"
  38. #include "u_cpu_detect.h"
  39.  
  40. #if defined(PIPE_ARCH_PPC)
  41. #if defined(PIPE_OS_APPLE)
  42. #include <sys/sysctl.h>
  43. #else
  44. #include <signal.h>
  45. #include <setjmp.h>
  46. #endif
  47. #endif
  48.  
  49. #if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
  50. #include <sys/param.h>
  51. #include <sys/sysctl.h>
  52. #include <machine/cpu.h>
  53. #endif
  54.  
  55. #if defined(PIPE_OS_FREEBSD)
  56. #include <sys/types.h>
  57. #include <sys/sysctl.h>
  58. #endif
  59.  
  60. #if defined(PIPE_OS_LINUX)
  61. #include <signal.h>
  62. #endif
  63.  
  64. #ifdef PIPE_OS_UNIX
  65. #include <unistd.h>
  66. #endif
  67.  
  68. #if defined(PIPE_OS_WINDOWS)
  69. #include <windows.h>
  70. #if defined(MSVC)
  71. #include <intrin.h>
  72. #endif
  73. #endif
  74.  
  75.  
  76. #ifdef DEBUG
  77. DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
  78. #endif
  79.  
  80.  
  81. struct util_cpu_caps util_cpu_caps;
  82.  
  83. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  84. static int has_cpuid(void);
  85. #endif
  86.  
  87.  
  88. #if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
  89. static jmp_buf  __lv_powerpc_jmpbuf;
  90. static volatile sig_atomic_t __lv_powerpc_canjump = 0;
  91.  
  92. static void
  93. sigill_handler(int sig)
  94. {
  95.    if (!__lv_powerpc_canjump) {
  96.       signal (sig, SIG_DFL);
  97.       raise (sig);
  98.    }
  99.  
  100.    __lv_powerpc_canjump = 0;
  101.    longjmp(__lv_powerpc_jmpbuf, 1);
  102. }
  103. #endif
  104.  
  105. #if defined(PIPE_ARCH_PPC)
  106. static void
  107. check_os_altivec_support(void)
  108. {
  109. #if defined(PIPE_OS_APPLE)
  110.    int sels[2] = {CTL_HW, HW_VECTORUNIT};
  111.    int has_vu = 0;
  112.    int len = sizeof (has_vu);
  113.    int err;
  114.  
  115.    err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
  116.  
  117.    if (err == 0) {
  118.       if (has_vu != 0) {
  119.          util_cpu_caps.has_altivec = 1;
  120.       }
  121.    }
  122. #else /* !PIPE_OS_APPLE */
  123.    /* not on Apple/Darwin, do it the brute-force way */
  124.    /* this is borrowed from the libmpeg2 library */
  125.    signal(SIGILL, sigill_handler);
  126.    if (setjmp(__lv_powerpc_jmpbuf)) {
  127.       signal(SIGILL, SIG_DFL);
  128.    } else {
  129.       __lv_powerpc_canjump = 1;
  130.  
  131.       __asm __volatile
  132.          ("mtspr 256, %0\n\t"
  133.           "vand %%v0, %%v0, %%v0"
  134.           :
  135.           : "r" (-1));
  136.  
  137.       signal(SIGILL, SIG_DFL);
  138.       util_cpu_caps.has_altivec = 1;
  139.    }
  140. #endif /* !PIPE_OS_APPLE */
  141. }
  142. #endif /* PIPE_ARCH_PPC */
  143.  
  144.  
  145. #if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
  146. static int has_cpuid(void)
  147. {
  148. #if defined(PIPE_ARCH_X86)
  149. #if defined(PIPE_OS_GCC)
  150.    int a, c;
  151.  
  152.    __asm __volatile
  153.       ("pushf\n"
  154.        "popl %0\n"
  155.        "movl %0, %1\n"
  156.        "xorl $0x200000, %0\n"
  157.        "push %0\n"
  158.        "popf\n"
  159.        "pushf\n"
  160.        "popl %0\n"
  161.        : "=a" (a), "=c" (c)
  162.        :
  163.        : "cc");
  164.  
  165.    return a != c;
  166. #else
  167.    /* FIXME */
  168.    return 1;
  169. #endif
  170. #elif defined(PIPE_ARCH_X86_64)
  171.    return 1;
  172. #else
  173.    return 0;
  174. #endif
  175. }
  176.  
  177.  
  178. /**
  179.  * @sa cpuid.h included in gcc-4.3 onwards.
  180.  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
  181.  */
  182. static INLINE void
  183. cpuid(uint32_t ax, uint32_t *p)
  184. {
  185. #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
  186.    __asm __volatile (
  187.      "xchgl %%ebx, %1\n\t"
  188.      "cpuid\n\t"
  189.      "xchgl %%ebx, %1"
  190.      : "=a" (p[0]),
  191.        "=S" (p[1]),
  192.        "=c" (p[2]),
  193.        "=d" (p[3])
  194.      : "0" (ax)
  195.    );
  196. #elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64)
  197.    __asm __volatile (
  198.      "cpuid\n\t"
  199.      : "=a" (p[0]),
  200.        "=b" (p[1]),
  201.        "=c" (p[2]),
  202.        "=d" (p[3])
  203.      : "0" (ax)
  204.    );
  205. #elif defined(PIPE_CC_MSVC)
  206.    __cpuid(p, ax);
  207. #else
  208.    p[0] = 0;
  209.    p[1] = 0;
  210.    p[2] = 0;
  211.    p[3] = 0;
  212. #endif
  213. }
  214. #endif /* X86 or X86_64 */
  215.  
  216. void
  217. util_cpu_detect(void)
  218. {
  219.    static boolean util_cpu_detect_initialized = FALSE;
  220.  
  221.    if(util_cpu_detect_initialized)
  222.       return;
  223.  
  224.    memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
  225.  
  226.    /* Count the number of CPUs in system */
  227. #if defined(PIPE_OS_WINDOWS)
  228.    {
  229.       SYSTEM_INFO system_info;
  230.       GetSystemInfo(&system_info);
  231.       util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors;
  232.    }
  233. #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN)
  234.    util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  235.    if (util_cpu_caps.nr_cpus == -1)
  236.       util_cpu_caps.nr_cpus = 1;
  237. #elif defined(PIPE_OS_BSD)
  238.    {
  239.       int mib[2], ncpu;
  240.       int len;
  241.  
  242.       mib[0] = CTL_HW;
  243.       mib[1] = HW_NCPU;
  244.  
  245.       len = sizeof (ncpu);
  246.       sysctl(mib, 2, &ncpu, &len, NULL, 0);
  247.       util_cpu_caps.nr_cpus = ncpu;
  248.    }
  249. #else
  250.    util_cpu_caps.nr_cpus = 1;
  251. #endif
  252.  
  253.    /* Make the fallback cacheline size nonzero so that it can be
  254.     * safely passed to align().
  255.     */
  256.    util_cpu_caps.cacheline = sizeof(void *);
  257.  
  258. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  259.    if (has_cpuid()) {
  260.       uint32_t regs[4];
  261.       uint32_t regs2[4];
  262.  
  263.       util_cpu_caps.cacheline = 32;
  264.  
  265.       /* Get max cpuid level */
  266.       cpuid(0x00000000, regs);
  267.  
  268.       if (regs[0] >= 0x00000001) {
  269.          unsigned int cacheline;
  270.  
  271.          cpuid (0x00000001, regs2);
  272.  
  273.          util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
  274.          if (util_cpu_caps.x86_cpu_type == 0xf)
  275.              util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
  276.  
  277.          /* general feature flags */
  278.          util_cpu_caps.has_tsc    = (regs2[3] >>  4) & 1; /* 0x0000010 */
  279.          util_cpu_caps.has_mmx    = (regs2[3] >> 23) & 1; /* 0x0800000 */
  280.          util_cpu_caps.has_sse    = (regs2[3] >> 25) & 1; /* 0x2000000 */
  281.          util_cpu_caps.has_sse2   = (regs2[3] >> 26) & 1; /* 0x4000000 */
  282.          util_cpu_caps.has_sse3   = (regs2[2] >>  0) & 1; /* 0x0000001 */
  283.          util_cpu_caps.has_ssse3  = (regs2[2] >>  9) & 1; /* 0x0000020 */
  284.          util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
  285.          util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
  286.          util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1;
  287.          util_cpu_caps.has_avx    = (regs2[2] >> 28) & 1;
  288.          util_cpu_caps.has_f16c   = (regs2[2] >> 29) & 1;
  289.          util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
  290.  
  291.          cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
  292.          if (cacheline > 0)
  293.             util_cpu_caps.cacheline = cacheline;
  294.       }
  295.  
  296.       if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
  297.          /* GenuineIntel */
  298.          util_cpu_caps.has_intel = 1;
  299.       }
  300.  
  301.       cpuid(0x80000000, regs);
  302.  
  303.       if (regs[0] >= 0x80000001) {
  304.  
  305.          cpuid(0x80000001, regs2);
  306.  
  307.          util_cpu_caps.has_mmx  |= (regs2[3] >> 23) & 1;
  308.          util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
  309.          util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
  310.          util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
  311.       }
  312.  
  313.       if (regs[0] >= 0x80000006) {
  314.          cpuid(0x80000006, regs2);
  315.          util_cpu_caps.cacheline = regs2[2] & 0xFF;
  316.       }
  317.  
  318.       if (!util_cpu_caps.has_sse) {
  319.          util_cpu_caps.has_sse2 = 0;
  320.          util_cpu_caps.has_sse3 = 0;
  321.          util_cpu_caps.has_ssse3 = 0;
  322.          util_cpu_caps.has_sse4_1 = 0;
  323.       }
  324.    }
  325. #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
  326.  
  327. #if defined(PIPE_ARCH_PPC)
  328.    check_os_altivec_support();
  329. #endif /* PIPE_ARCH_PPC */
  330.  
  331. #ifdef DEBUG
  332.    if (debug_get_option_dump_cpu()) {
  333.       debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
  334.  
  335.       debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
  336.       debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
  337.  
  338.       debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
  339.       debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
  340.       debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
  341.       debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
  342.       debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
  343.       debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
  344.       debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
  345.       debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
  346.       debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
  347.       debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
  348.       debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
  349.       debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
  350.       debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
  351.    }
  352. #endif
  353.  
  354.    util_cpu_detect_initialized = TRUE;
  355. }
  356.