Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. #ifndef MMX_X64_H_INCLUDED
  2. #define MMX_X64_H_INCLUDED
  3.  
  4. /* Implementation of x64 MMX substitition functions, before
  5.  * pixman is reimplemented not to use __m64 type on Visual C++
  6.  *
  7.  * Copyright (C)2009 by George Yohng
  8.  * Released in public domain.
  9.  */
  10.  
  11. #include <intrin.h>
  12.  
  13. #define M64C(a) (*(const __m64 *)(&a))
  14. #define M64U(a) (*(const unsigned long long *)(&a))
  15.  
  16. __inline __m64
  17. _m_from_int (int a)
  18. {
  19.     long long i64 = a;
  20.  
  21.     return M64C (i64);
  22. }
  23.  
  24. __inline __m64
  25. _mm_setzero_si64 ()
  26. {
  27.     long long i64 = 0;
  28.  
  29.     return M64C (i64);
  30. }
  31.  
  32. __inline __m64
  33. _mm_set_pi32 (int i1,   int i0)
  34. {
  35.     unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
  36.  
  37.     return M64C (i64);
  38. }
  39.  
  40. __inline void
  41. _m_empty ()
  42. {
  43. }
  44.  
  45. __inline __m64
  46. _mm_set1_pi16 (short w)
  47. {
  48.     unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
  49.  
  50.     return M64C (i64);
  51. }
  52.  
  53. __inline int
  54. _m_to_int (__m64 m)
  55. {
  56.     return m.m64_i32[0];
  57. }
  58.  
  59. __inline __m64
  60. _mm_movepi64_pi64 (__m128i a)
  61. {
  62.     return M64C (a.m128i_i64[0]);
  63. }
  64.  
  65. __inline __m64
  66. _m_pand (__m64 a, __m64 b)
  67. {
  68.     unsigned long long i64 = M64U (a) & M64U (b);
  69.  
  70.     return M64C (i64);
  71. }
  72.  
  73. __inline __m64
  74. _m_por (__m64 a, __m64 b)
  75. {
  76.     unsigned long long i64 = M64U (a) | M64U (b);
  77.  
  78.     return M64C (i64);
  79. }
  80.  
  81. __inline __m64
  82. _m_pxor (__m64 a, __m64 b)
  83. {
  84.     unsigned long long i64 = M64U (a) ^ M64U (b);
  85.  
  86.     return M64C (i64);
  87. }
  88.  
  89. __inline __m64
  90. _m_pmulhuw (__m64 a, __m64 b)        /* unoptimized */
  91. {
  92.     unsigned short d[4] =
  93.     {
  94.         (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
  95.         (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
  96.         (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
  97.         (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
  98.     };
  99.  
  100.     return M64C (d[0]);
  101. }
  102.  
  103. __inline __m64
  104. _m_pmullw2 (__m64 a, __m64 b)        /* unoptimized */
  105. {
  106.     unsigned short d[4] =
  107.     {
  108.         (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
  109.         (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
  110.         (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
  111.         (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
  112.     };
  113.  
  114.     return M64C (d[0]);
  115. }
  116.  
  117. __inline __m64
  118. _m_pmullw (__m64 a, __m64 b)        /* unoptimized */
  119. {
  120.     unsigned long long x =
  121.         ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])))  +
  122.         (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16)  +
  123.         (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32)  +
  124.         (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
  125.  
  126.     return M64C (x);
  127. }
  128.  
  129. __inline __m64
  130. _m_paddusb (__m64 a, __m64 b)        /* unoptimized */
  131. {
  132.     unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
  133.                            (M64U (b) & 0x00FF00FF00FF00FFULL);
  134.  
  135.     unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
  136.                            ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
  137.  
  138.     x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
  139.     y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
  140.  
  141.     x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
  142.  
  143.     return M64C (x);
  144. }
  145.  
  146. __inline __m64
  147. _m_paddusw (__m64 a, __m64 b)        /* unoptimized */
  148. {
  149.     unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
  150.                            (M64U (b) & 0x0000FFFF0000FFFFULL);
  151.  
  152.     unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
  153.                            ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
  154.  
  155.     x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
  156.     y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
  157.  
  158.     x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
  159.  
  160.     return M64C (x);
  161. }
  162.  
  163. __inline __m64
  164. _m_pshufw (__m64 a, int n)         /* unoptimized */
  165. {
  166.     unsigned short d[4] =
  167.     {
  168.         a.m64_u16[n & 3],
  169.         a.m64_u16[(n >> 2) & 3],
  170.         a.m64_u16[(n >> 4) & 3],
  171.         a.m64_u16[(n >> 6) & 3]
  172.     };
  173.  
  174.     return M64C (d[0]);
  175. }
  176.  
  177. __inline unsigned char
  178. sat16 (unsigned short d)
  179. {
  180.     if (d > 0xFF) return 0xFF;
  181.     else return d & 0xFF;
  182. }
  183.  
  184. __inline __m64
  185. _m_packuswb (__m64 m1, __m64 m2)          /* unoptimized */
  186. {
  187.     unsigned char d[8] =
  188.     {
  189.         sat16 (m1.m64_u16[0]),
  190.         sat16 (m1.m64_u16[1]),
  191.         sat16 (m1.m64_u16[2]),
  192.         sat16 (m1.m64_u16[3]),
  193.         sat16 (m2.m64_u16[0]),
  194.         sat16 (m2.m64_u16[1]),
  195.         sat16 (m2.m64_u16[2]),
  196.         sat16 (m2.m64_u16[3])
  197.     };
  198.  
  199.     return M64C (d[0]);
  200. }
  201.  
  202. __inline __m64 _m_punpcklbw (__m64 m1, __m64 m2)          /* unoptimized */
  203. {
  204.     unsigned char d[8] =
  205.     {
  206.         m1.m64_u8[0],
  207.         m2.m64_u8[0],
  208.         m1.m64_u8[1],
  209.         m2.m64_u8[1],
  210.         m1.m64_u8[2],
  211.         m2.m64_u8[2],
  212.         m1.m64_u8[3],
  213.         m2.m64_u8[3],
  214.     };
  215.  
  216.     return M64C (d[0]);
  217. }
  218.  
  219. __inline __m64 _m_punpckhbw (__m64 m1, __m64 m2)          /* unoptimized */
  220. {
  221.     unsigned char d[8] =
  222.     {
  223.         m1.m64_u8[4],
  224.         m2.m64_u8[4],
  225.         m1.m64_u8[5],
  226.         m2.m64_u8[5],
  227.         m1.m64_u8[6],
  228.         m2.m64_u8[6],
  229.         m1.m64_u8[7],
  230.         m2.m64_u8[7],
  231.     };
  232.  
  233.     return M64C (d[0]);
  234. }
  235.  
  236. __inline __m64 _m_psrlwi (__m64 a, int n)       /* unoptimized */
  237. {
  238.     unsigned short d[4] =
  239.     {
  240.         a.m64_u16[0] >> n,
  241.         a.m64_u16[1] >> n,
  242.         a.m64_u16[2] >> n,
  243.         a.m64_u16[3] >> n
  244.     };
  245.  
  246.     return M64C (d[0]);
  247. }
  248.  
  249. __inline __m64 _m_psrlqi (__m64 m, int n)
  250. {
  251.     unsigned long long x = M64U (m) >> n;
  252.  
  253.     return M64C (x);
  254. }
  255.  
  256. __inline __m64 _m_psllqi (__m64 m, int n)
  257. {
  258.     unsigned long long x = M64U (m) << n;
  259.  
  260.     return M64C (x);
  261. }
  262.  
  263. #endif /* MMX_X64_H_INCLUDED */
  264.