Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the "Software"),
  7.  * to deal in the Software without restriction, including without limitation
  8.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9.  * and/or sell copies of the Software, and to permit persons to whom the
  10.  * Software is furnished to do so, subject to the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice shall be included
  13.  * in all copies or substantial portions of the Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  16.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21.  * OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  **************************************************************************/
  24.  
  25. #ifndef _RTASM_X86SSE_H_
  26. #define _RTASM_X86SSE_H_
  27.  
  28. #include "pipe/p_compiler.h"
  29. #include "pipe/p_config.h"
  30.  
  31. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  32.  
  33. /* It is up to the caller to ensure that instructions issued are
  34.  * suitable for the host cpu.  There are no checks made in this module
  35.  * for mmx/sse/sse2 support on the cpu.
  36.  */
  37. struct x86_reg {
  38.    unsigned file:2;
  39.    unsigned idx:4;
  40.    unsigned mod:2;              /* mod_REG if this is just a register */
  41.    int      disp:24;            /* only +/- 23bits of offset - should be enough... */
  42. };
  43.  
  44. #define X86_MMX 1
  45. #define X86_MMX2 2
  46. #define X86_SSE 4
  47. #define X86_SSE2 8
  48. #define X86_SSE3 0x10
  49. #define X86_SSE4_1 0x20
  50.  
  51. struct x86_function {
  52.    unsigned caps;
  53.    unsigned size;
  54.    unsigned char *store;
  55.    unsigned char *csr;
  56.  
  57.    unsigned stack_offset:16;
  58.    unsigned need_emms:8;
  59.    int x87_stack:8;
  60.  
  61.    unsigned char error_overflow[4];
  62. };
  63.  
  64. enum x86_reg_file {
  65.    file_REG32,
  66.    file_MMX,
  67.    file_XMM,
  68.    file_x87
  69. };
  70.  
  71. /* Values for mod field of modr/m byte
  72.  */
  73. enum x86_reg_mod {
  74.    mod_INDIRECT,
  75.    mod_DISP8,
  76.    mod_DISP32,
  77.    mod_REG
  78. };
  79.  
  80. enum x86_reg_name {
  81.    reg_AX,
  82.    reg_CX,
  83.    reg_DX,
  84.    reg_BX,
  85.    reg_SP,
  86.    reg_BP,
  87.    reg_SI,
  88.    reg_DI,
  89.    reg_R8,
  90.    reg_R9,
  91.    reg_R10,
  92.    reg_R11,
  93.    reg_R12,
  94.    reg_R13,
  95.    reg_R14,
  96.    reg_R15
  97. };
  98.  
  99.  
  100. enum x86_cc {
  101.    cc_O,                        /* overflow */
  102.    cc_NO,                       /* not overflow */
  103.    cc_NAE,                      /* not above or equal / carry */
  104.    cc_AE,                       /* above or equal / not carry */
  105.    cc_E,                        /* equal / zero */
  106.    cc_NE                        /* not equal / not zero */
  107. };
  108.  
  109. enum sse_cc {
  110.    cc_Equal,
  111.    cc_LessThan,
  112.    cc_LessThanEqual,
  113.    cc_Unordered,
  114.    cc_NotEqual,
  115.    cc_NotLessThan,
  116.    cc_NotLessThanEqual,
  117.    cc_Ordered
  118. };
  119.  
  120. #define cc_Z  cc_E
  121. #define cc_NZ cc_NE
  122.  
  123.  
  124. /** generic pointer to function */
  125. typedef void (*x86_func)(void);
  126.  
  127.  
  128. /* Begin/end/retrieve function creation:
  129.  */
  130.  
  131. enum x86_target
  132. {
  133.    X86_32,
  134.    X86_64_STD_ABI,
  135.    X86_64_WIN64_ABI
  136. };
  137.  
  138. /* make this read a member of x86_function if target != host is desired */
  139. static INLINE enum x86_target x86_target( struct x86_function* p )
  140. {
  141. #ifdef PIPE_ARCH_X86
  142.    return X86_32;
  143. #elif defined(_WIN64)
  144.    return X86_64_WIN64_ABI;
  145. #elif defined(PIPE_ARCH_X86_64)
  146.    return X86_64_STD_ABI;
  147. #endif
  148. }
  149.  
  150. static INLINE unsigned x86_target_caps( struct x86_function* p )
  151. {
  152.    return p->caps;
  153. }
  154.  
  155. void x86_init_func( struct x86_function *p );
  156. void x86_init_func_size( struct x86_function *p, unsigned code_size );
  157. void x86_release_func( struct x86_function *p );
  158. x86_func x86_get_func( struct x86_function *p );
  159.  
  160. /* Debugging:
  161.  */
  162. void x86_print_reg( struct x86_reg reg );
  163.  
  164.  
  165. /* Create and manipulate registers and regmem values:
  166.  */
  167. struct x86_reg x86_make_reg( enum x86_reg_file file,
  168.                              enum x86_reg_name idx );
  169.  
  170. struct x86_reg x86_make_disp( struct x86_reg reg,
  171.                               int disp );
  172.  
  173. struct x86_reg x86_deref( struct x86_reg reg );
  174.  
  175. struct x86_reg x86_get_base_reg( struct x86_reg reg );
  176.  
  177.  
  178. /* Labels, jumps and fixup:
  179.  */
  180. int x86_get_label( struct x86_function *p );
  181.  
  182. void x64_rexw(struct x86_function *p);
  183.  
  184. void x86_jcc( struct x86_function *p,
  185.               enum x86_cc cc,
  186.               int label );
  187.  
  188. int x86_jcc_forward( struct x86_function *p,
  189.                           enum x86_cc cc );
  190.  
  191. int x86_jmp_forward( struct x86_function *p);
  192.  
  193. int x86_call_forward( struct x86_function *p);
  194.  
  195. void x86_fixup_fwd_jump( struct x86_function *p,
  196.                          int fixup );
  197.  
  198. void x86_jmp( struct x86_function *p, int label );
  199.  
  200. /* void x86_call( struct x86_function *p, void (*label)() ); */
  201. void x86_call( struct x86_function *p, struct x86_reg reg);
  202.  
  203. void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
  204. void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
  205. void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
  206. void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
  207. void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
  208. void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
  209. void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
  210.  
  211.  
  212. /* Macro for sse_shufps() and sse2_pshufd():
  213.  */
  214. #define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
  215. #define SHUF_NOOP               RSW(0,1,2,3)
  216. #define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
  217.  
  218. void mmx_emms( struct x86_function *p );
  219. void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  220. void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  221. void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  222. void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  223.  
  224. void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  225. void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  226. void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  227. void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  228. void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  229. void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  230. void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  231.  
  232. void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  233. void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  234. void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  235. void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  236. void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  237.  
  238. void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  239. void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  240. void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  241. void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  242. void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
  243.                   unsigned char shuf );
  244. void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
  245.                   unsigned char shuf );
  246. void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
  247.                   unsigned char shuf );
  248. void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  249. void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  250.  
  251. void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  252. void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  253. void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  254. void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  255.  
  256. void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  257. void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  258. void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  259.  
  260. void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  261. void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  262. void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  263.  
  264. void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  265. void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
  266.  
  267. void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  268.  
  269. void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
  270. void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
  271. void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
  272.  
  273. void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
  274. void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
  275. void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
  276.  
  277. void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
  278.  
  279. void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  280. void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  281. void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  282. void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  283. void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  284. void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  285. void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
  286.                 enum sse_cc cc );
  287. void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  288. void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  289. void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  290. void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  291. void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  292. void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  293. void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  294. void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  295. void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  296. void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  297. void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  298. void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  299. void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  300. void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  301. void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  302. void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  303. void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  304. void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
  305.                  unsigned char shuf );
  306. void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  307. void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  308. void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
  309. void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
  310.  
  311. void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  312. void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  313. void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );
  314. void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  315. void x86_dec( struct x86_function *p, struct x86_reg reg );
  316. void x86_inc( struct x86_function *p, struct x86_reg reg );
  317. void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  318. void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  319. void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  320. void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  321. void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  322. void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  323. void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  324. void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
  325. void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
  326. void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
  327. void x86_mul( struct x86_function *p, struct x86_reg src );
  328. void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  329. void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  330. void x86_pop( struct x86_function *p, struct x86_reg reg );
  331. void x86_push( struct x86_function *p, struct x86_reg reg );
  332. void x86_push_imm32( struct x86_function *p, int imm );
  333. void x86_ret( struct x86_function *p );
  334. void x86_retw( struct x86_function *p, unsigned short imm );
  335. void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  336. void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  337. void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
  338. void x86_sahf( struct x86_function *p );
  339. void x86_div( struct x86_function *p, struct x86_reg src );
  340. void x86_bswap( struct x86_function *p, struct x86_reg src );
  341. void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
  342. void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
  343. void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
  344.  
  345. void x86_cdecl_caller_push_regs( struct x86_function *p );
  346. void x86_cdecl_caller_pop_regs( struct x86_function *p );
  347.  
  348. void x87_assert_stack_empty( struct x86_function *p );
  349.  
  350. void x87_f2xm1( struct x86_function *p );
  351. void x87_fabs( struct x86_function *p );
  352. void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  353. void x87_faddp( struct x86_function *p, struct x86_reg dst );
  354. void x87_fchs( struct x86_function *p );
  355. void x87_fclex( struct x86_function *p );
  356. void x87_fcmovb( struct x86_function *p, struct x86_reg src );
  357. void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
  358. void x87_fcmove( struct x86_function *p, struct x86_reg src );
  359. void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
  360. void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
  361. void x87_fcmovne( struct x86_function *p, struct x86_reg src );
  362. void x87_fcom( struct x86_function *p, struct x86_reg dst );
  363. void x87_fcomi( struct x86_function *p, struct x86_reg dst );
  364. void x87_fcomip( struct x86_function *p, struct x86_reg dst );
  365. void x87_fcomp( struct x86_function *p, struct x86_reg dst );
  366. void x87_fcos( struct x86_function *p );
  367. void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  368. void x87_fdivp( struct x86_function *p, struct x86_reg dst );
  369. void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  370. void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
  371. void x87_fild( struct x86_function *p, struct x86_reg arg );
  372. void x87_fist( struct x86_function *p, struct x86_reg dst );
  373. void x87_fistp( struct x86_function *p, struct x86_reg dst );
  374. void x87_fld( struct x86_function *p, struct x86_reg arg );
  375. void x87_fld1( struct x86_function *p );
  376. void x87_fldcw( struct x86_function *p, struct x86_reg arg );
  377. void x87_fldl2e( struct x86_function *p );
  378. void x87_fldln2( struct x86_function *p );
  379. void x87_fldz( struct x86_function *p );
  380. void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  381. void x87_fmulp( struct x86_function *p, struct x86_reg dst );
  382. void x87_fnclex( struct x86_function *p );
  383. void x87_fprndint( struct x86_function *p );
  384. void x87_fpop( struct x86_function *p );
  385. void x87_fscale( struct x86_function *p );
  386. void x87_fsin( struct x86_function *p );
  387. void x87_fsincos( struct x86_function *p );
  388. void x87_fsqrt( struct x86_function *p );
  389. void x87_fst( struct x86_function *p, struct x86_reg dst );
  390. void x87_fstp( struct x86_function *p, struct x86_reg dst );
  391. void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  392. void x87_fsubp( struct x86_function *p, struct x86_reg dst );
  393. void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
  394. void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
  395. void x87_ftst( struct x86_function *p );
  396. void x87_fxch( struct x86_function *p, struct x86_reg dst );
  397. void x87_fxtract( struct x86_function *p );
  398. void x87_fyl2x( struct x86_function *p );
  399. void x87_fyl2xp1( struct x86_function *p );
  400. void x87_fwait( struct x86_function *p );
  401. void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
  402. void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
  403. void x87_fucompp( struct x86_function *p );
  404. void x87_fucomp( struct x86_function *p, struct x86_reg arg );
  405. void x87_fucom( struct x86_function *p, struct x86_reg arg );
  406.  
  407.  
  408.  
  409. /* Retrieve a reference to one of the function arguments, taking into
  410.  * account any push/pop activity.  Note - doesn't track explicit
  411.  * manipulation of ESP by other instructions.
  412.  */
  413. struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
  414.  
  415. #endif
  416. #endif
  417.