Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
#ifdef USE_X86_ASM
2
#if defined(__i386__) || defined(__386__)
3
 
4
#include "main/imports.h"
5
#include "x86sse.h"
6
 
7
#define DISASSEM 0
8
#define X86_TWOB 0x0f
9
 
10
#if 0
11
static unsigned char *cptr( void (*label)() )
12
{
13
   return (unsigned char *)(unsigned long)label;
14
}
15
#endif
16
 
17
 
18
static void do_realloc( struct x86_function *p )
19
{
20
   if (p->size == 0) {
21
      p->size = 1024;
22
      p->store = _mesa_exec_malloc(p->size);
23
      p->csr = p->store;
24
   }
25
   else {
26
      unsigned used = p->csr - p->store;
27
      unsigned char *tmp = p->store;
28
      p->size *= 2;
29
      p->store = _mesa_exec_malloc(p->size);
30
      memcpy(p->store, tmp, used);
31
      p->csr = p->store + used;
32
      _mesa_exec_free(tmp);
33
   }
34
}
35
 
36
/* Emit bytes to the instruction stream:
37
 */
38
static unsigned char *reserve( struct x86_function *p, int bytes )
39
{
40
   if (p->csr + bytes - p->store > p->size)
41
      do_realloc(p);
42
 
43
   {
44
      unsigned char *csr = p->csr;
45
      p->csr += bytes;
46
      return csr;
47
   }
48
}
49
 
50
 
51
 
52
static void emit_1b( struct x86_function *p, char b0 )
53
{
54
   char *csr = (char *)reserve(p, 1);
55
   *csr = b0;
56
}
57
 
58
static void emit_1i( struct x86_function *p, int i0 )
59
{
60
   int *icsr = (int *)reserve(p, sizeof(i0));
61
   *icsr = i0;
62
}
63
 
64
static void emit_1ub( struct x86_function *p, unsigned char b0 )
65
{
66
   unsigned char *csr = reserve(p, 1);
67
   *csr++ = b0;
68
}
69
 
70
static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
71
{
72
   unsigned char *csr = reserve(p, 2);
73
   *csr++ = b0;
74
   *csr++ = b1;
75
}
76
 
77
static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
78
{
79
   unsigned char *csr = reserve(p, 3);
80
   *csr++ = b0;
81
   *csr++ = b1;
82
   *csr++ = b2;
83
}
84
 
85
 
86
/* Build a modRM byte + possible displacement.  No treatment of SIB
87
 * indexing.  BZZT - no way to encode an absolute address.
88
 */
89
static void emit_modrm( struct x86_function *p,
90
			struct x86_reg reg,
91
			struct x86_reg regmem )
92
{
93
   unsigned char val = 0;
94
 
95
   assert(reg.mod == mod_REG);
96
 
97
   val |= regmem.mod << 6;     	/* mod field */
98
   val |= reg.idx << 3;		/* reg field */
99
   val |= regmem.idx;		/* r/m field */
100
 
101
   emit_1ub(p, val);
102
 
103
   /* Oh-oh we've stumbled into the SIB thing.
104
    */
105
   if (regmem.file == file_REG32 &&
106
       regmem.idx == reg_SP) {
107
      emit_1ub(p, 0x24);		/* simplistic! */
108
   }
109
 
110
   switch (regmem.mod) {
111
   case mod_REG:
112
   case mod_INDIRECT:
113
      break;
114
   case mod_DISP8:
115
      emit_1b(p, regmem.disp);
116
      break;
117
   case mod_DISP32:
118
      emit_1i(p, regmem.disp);
119
      break;
120
   default:
121
      assert(0);
122
      break;
123
   }
124
}
125
 
126
 
127
static void emit_modrm_noreg( struct x86_function *p,
128
			      unsigned op,
129
			      struct x86_reg regmem )
130
{
131
   struct x86_reg dummy = x86_make_reg(file_REG32, op);
132
   emit_modrm(p, dummy, regmem);
133
}
134
 
135
/* Many x86 instructions have two opcodes to cope with the situations
136
 * where the destination is a register or memory reference
137
 * respectively.  This function selects the correct opcode based on
138
 * the arguments presented.
139
 */
140
static void emit_op_modrm( struct x86_function *p,
141
			   unsigned char op_dst_is_reg,
142
			   unsigned char op_dst_is_mem,
143
			   struct x86_reg dst,
144
			   struct x86_reg src )
145
{
146
   switch (dst.mod) {
147
   case mod_REG:
148
      emit_1ub(p, op_dst_is_reg);
149
      emit_modrm(p, dst, src);
150
      break;
151
   case mod_INDIRECT:
152
   case mod_DISP32:
153
   case mod_DISP8:
154
      assert(src.mod == mod_REG);
155
      emit_1ub(p, op_dst_is_mem);
156
      emit_modrm(p, src, dst);
157
      break;
158
   default:
159
      assert(0);
160
      break;
161
   }
162
}
163
 
164
 
165
 
166
 
167
 
168
 
169
 
170
/* Create and manipulate registers and regmem values:
171
 */
172
struct x86_reg x86_make_reg( enum x86_reg_file file,
173
			     enum x86_reg_name idx )
174
{
175
   struct x86_reg reg;
176
 
177
   reg.file = file;
178
   reg.idx = idx;
179
   reg.mod = mod_REG;
180
   reg.disp = 0;
181
 
182
   return reg;
183
}
184
 
185
struct x86_reg x86_make_disp( struct x86_reg reg,
186
			      int disp )
187
{
188
   assert(reg.file == file_REG32);
189
 
190
   if (reg.mod == mod_REG)
191
      reg.disp = disp;
192
   else
193
      reg.disp += disp;
194
 
195
   if (reg.disp == 0)
196
      reg.mod = mod_INDIRECT;
197
   else if (reg.disp <= 127 && reg.disp >= -128)
198
      reg.mod = mod_DISP8;
199
   else
200
      reg.mod = mod_DISP32;
201
 
202
   return reg;
203
}
204
 
205
struct x86_reg x86_deref( struct x86_reg reg )
206
{
207
   return x86_make_disp(reg, 0);
208
}
209
 
210
struct x86_reg x86_get_base_reg( struct x86_reg reg )
211
{
212
   return x86_make_reg( reg.file, reg.idx );
213
}
214
 
215
unsigned char *x86_get_label( struct x86_function *p )
216
{
217
   return p->csr;
218
}
219
 
220
 
221
 
222
/***********************************************************************
223
 * x86 instructions
224
 */
225
 
226
 
227
void x86_jcc( struct x86_function *p,
228
	      enum x86_cc cc,
229
	      unsigned char *label )
230
{
231
   int offset = label - (x86_get_label(p) + 2);
232
 
233
   if (offset <= 127 && offset >= -128) {
234
      emit_1ub(p, 0x70 + cc);
235
      emit_1b(p, (char) offset);
236
   }
237
   else {
238
      offset = label - (x86_get_label(p) + 6);
239
      emit_2ub(p, 0x0f, 0x80 + cc);
240
      emit_1i(p, offset);
241
   }
242
}
243
 
244
/* Always use a 32bit offset for forward jumps:
245
 */
246
unsigned char *x86_jcc_forward( struct x86_function *p,
247
			  enum x86_cc cc )
248
{
249
   emit_2ub(p, 0x0f, 0x80 + cc);
250
   emit_1i(p, 0);
251
   return x86_get_label(p);
252
}
253
 
254
unsigned char *x86_jmp_forward( struct x86_function *p)
255
{
256
   emit_1ub(p, 0xe9);
257
   emit_1i(p, 0);
258
   return x86_get_label(p);
259
}
260
 
261
unsigned char *x86_call_forward( struct x86_function *p)
262
{
263
   emit_1ub(p, 0xe8);
264
   emit_1i(p, 0);
265
   return x86_get_label(p);
266
}
267
 
268
/* Fixup offset from forward jump:
269
 */
270
void x86_fixup_fwd_jump( struct x86_function *p,
271
			 unsigned char *fixup )
272
{
273
   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
274
}
275
 
276
void x86_jmp( struct x86_function *p, unsigned char *label)
277
{
278
   emit_1ub(p, 0xe9);
279
   emit_1i(p, label - x86_get_label(p) - 4);
280
}
281
 
282
#if 0
283
/* This doesn't work once we start reallocating & copying the
284
 * generated code on buffer fills, because the call is relative to the
285
 * current pc.
286
 */
287
void x86_call( struct x86_function *p, void (*label)())
288
{
289
   emit_1ub(p, 0xe8);
290
   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
291
}
292
#else
293
void x86_call( struct x86_function *p, struct x86_reg reg)
294
{
295
   emit_1ub(p, 0xff);
296
   emit_modrm_noreg(p, 2, reg);
297
}
298
#endif
299
 
300
 
301
/* michal:
302
 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
303
 * I load the immediate into general purpose register and use it.
304
 */
305
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
306
{
307
   assert(dst.mod == mod_REG);
308
   emit_1ub(p, 0xb8 + dst.idx);
309
   emit_1i(p, imm);
310
}
311
 
312
void x86_push( struct x86_function *p,
313
	       struct x86_reg reg )
314
{
315
   assert(reg.mod == mod_REG);
316
   emit_1ub(p, 0x50 + reg.idx);
317
   p->stack_offset += 4;
318
}
319
 
320
void x86_pop( struct x86_function *p,
321
	      struct x86_reg reg )
322
{
323
   assert(reg.mod == mod_REG);
324
   emit_1ub(p, 0x58 + reg.idx);
325
   p->stack_offset -= 4;
326
}
327
 
328
void x86_inc( struct x86_function *p,
329
	      struct x86_reg reg )
330
{
331
   assert(reg.mod == mod_REG);
332
   emit_1ub(p, 0x40 + reg.idx);
333
}
334
 
335
void x86_dec( struct x86_function *p,
336
	      struct x86_reg reg )
337
{
338
   assert(reg.mod == mod_REG);
339
   emit_1ub(p, 0x48 + reg.idx);
340
}
341
 
342
void x86_ret( struct x86_function *p )
343
{
344
   emit_1ub(p, 0xc3);
345
}
346
 
347
void x86_sahf( struct x86_function *p )
348
{
349
   emit_1ub(p, 0x9e);
350
}
351
 
352
void x86_mov( struct x86_function *p,
353
	      struct x86_reg dst,
354
	      struct x86_reg src )
355
{
356
   emit_op_modrm( p, 0x8b, 0x89, dst, src );
357
}
358
 
359
void x86_xor( struct x86_function *p,
360
	      struct x86_reg dst,
361
	      struct x86_reg src )
362
{
363
   emit_op_modrm( p, 0x33, 0x31, dst, src );
364
}
365
 
366
void x86_cmp( struct x86_function *p,
367
	      struct x86_reg dst,
368
	      struct x86_reg src )
369
{
370
   emit_op_modrm( p, 0x3b, 0x39, dst, src );
371
}
372
 
373
void x86_lea( struct x86_function *p,
374
	      struct x86_reg dst,
375
	      struct x86_reg src )
376
{
377
   emit_1ub(p, 0x8d);
378
   emit_modrm( p, dst, src );
379
}
380
 
381
void x86_test( struct x86_function *p,
382
	       struct x86_reg dst,
383
	       struct x86_reg src )
384
{
385
   emit_1ub(p, 0x85);
386
   emit_modrm( p, dst, src );
387
}
388
 
389
void x86_add( struct x86_function *p,
390
	       struct x86_reg dst,
391
	       struct x86_reg src )
392
{
393
   emit_op_modrm(p, 0x03, 0x01, dst, src );
394
}
395
 
396
void x86_mul( struct x86_function *p,
397
	       struct x86_reg src )
398
{
399
   assert (src.file == file_REG32 && src.mod == mod_REG);
400
   emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
401
}
402
 
403
void x86_sub( struct x86_function *p,
404
	       struct x86_reg dst,
405
	       struct x86_reg src )
406
{
407
   emit_op_modrm(p, 0x2b, 0x29, dst, src );
408
}
409
 
410
void x86_or( struct x86_function *p,
411
             struct x86_reg dst,
412
             struct x86_reg src )
413
{
414
   emit_op_modrm( p, 0x0b, 0x09, dst, src );
415
}
416
 
417
void x86_and( struct x86_function *p,
418
              struct x86_reg dst,
419
              struct x86_reg src )
420
{
421
   emit_op_modrm( p, 0x23, 0x21, dst, src );
422
}
423
 
424
 
425
 
426
/***********************************************************************
427
 * SSE instructions
428
 */
429
 
430
 
431
void sse_movss( struct x86_function *p,
432
		struct x86_reg dst,
433
		struct x86_reg src )
434
{
435
   emit_2ub(p, 0xF3, X86_TWOB);
436
   emit_op_modrm( p, 0x10, 0x11, dst, src );
437
}
438
 
439
void sse_movaps( struct x86_function *p,
440
		 struct x86_reg dst,
441
		 struct x86_reg src )
442
{
443
   emit_1ub(p, X86_TWOB);
444
   emit_op_modrm( p, 0x28, 0x29, dst, src );
445
}
446
 
447
void sse_movups( struct x86_function *p,
448
		 struct x86_reg dst,
449
		 struct x86_reg src )
450
{
451
   emit_1ub(p, X86_TWOB);
452
   emit_op_modrm( p, 0x10, 0x11, dst, src );
453
}
454
 
455
void sse_movhps( struct x86_function *p,
456
		 struct x86_reg dst,
457
		 struct x86_reg src )
458
{
459
   assert(dst.mod != mod_REG || src.mod != mod_REG);
460
   emit_1ub(p, X86_TWOB);
461
   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
462
}
463
 
464
void sse_movlps( struct x86_function *p,
465
		 struct x86_reg dst,
466
		 struct x86_reg src )
467
{
468
   assert(dst.mod != mod_REG || src.mod != mod_REG);
469
   emit_1ub(p, X86_TWOB);
470
   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
471
}
472
 
473
void sse_maxps( struct x86_function *p,
474
		struct x86_reg dst,
475
		struct x86_reg src )
476
{
477
   emit_2ub(p, X86_TWOB, 0x5F);
478
   emit_modrm( p, dst, src );
479
}
480
 
481
void sse_maxss( struct x86_function *p,
482
		struct x86_reg dst,
483
		struct x86_reg src )
484
{
485
   emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
486
   emit_modrm( p, dst, src );
487
}
488
 
489
void sse_divss( struct x86_function *p,
490
		struct x86_reg dst,
491
		struct x86_reg src )
492
{
493
   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
494
   emit_modrm( p, dst, src );
495
}
496
 
497
void sse_minps( struct x86_function *p,
498
		struct x86_reg dst,
499
		struct x86_reg src )
500
{
501
   emit_2ub(p, X86_TWOB, 0x5D);
502
   emit_modrm( p, dst, src );
503
}
504
 
505
void sse_subps( struct x86_function *p,
506
		struct x86_reg dst,
507
		struct x86_reg src )
508
{
509
   emit_2ub(p, X86_TWOB, 0x5C);
510
   emit_modrm( p, dst, src );
511
}
512
 
513
void sse_mulps( struct x86_function *p,
514
		struct x86_reg dst,
515
		struct x86_reg src )
516
{
517
   emit_2ub(p, X86_TWOB, 0x59);
518
   emit_modrm( p, dst, src );
519
}
520
 
521
void sse_mulss( struct x86_function *p,
522
		struct x86_reg dst,
523
		struct x86_reg src )
524
{
525
   emit_3ub(p, 0xF3, X86_TWOB, 0x59);
526
   emit_modrm( p, dst, src );
527
}
528
 
529
void sse_addps( struct x86_function *p,
530
		struct x86_reg dst,
531
		struct x86_reg src )
532
{
533
   emit_2ub(p, X86_TWOB, 0x58);
534
   emit_modrm( p, dst, src );
535
}
536
 
537
void sse_addss( struct x86_function *p,
538
		struct x86_reg dst,
539
		struct x86_reg src )
540
{
541
   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
542
   emit_modrm( p, dst, src );
543
}
544
 
545
void sse_andnps( struct x86_function *p,
546
                 struct x86_reg dst,
547
                 struct x86_reg src )
548
{
549
   emit_2ub(p, X86_TWOB, 0x55);
550
   emit_modrm( p, dst, src );
551
}
552
 
553
void sse_andps( struct x86_function *p,
554
		struct x86_reg dst,
555
		struct x86_reg src )
556
{
557
   emit_2ub(p, X86_TWOB, 0x54);
558
   emit_modrm( p, dst, src );
559
}
560
 
561
void sse_rsqrtps( struct x86_function *p,
562
                  struct x86_reg dst,
563
                  struct x86_reg src )
564
{
565
   emit_2ub(p, X86_TWOB, 0x52);
566
   emit_modrm( p, dst, src );
567
}
568
 
569
void sse_rsqrtss( struct x86_function *p,
570
		  struct x86_reg dst,
571
		  struct x86_reg src )
572
{
573
   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
574
   emit_modrm( p, dst, src );
575
 
576
}
577
 
578
void sse_movhlps( struct x86_function *p,
579
		  struct x86_reg dst,
580
		  struct x86_reg src )
581
{
582
   assert(dst.mod == mod_REG && src.mod == mod_REG);
583
   emit_2ub(p, X86_TWOB, 0x12);
584
   emit_modrm( p, dst, src );
585
}
586
 
587
void sse_movlhps( struct x86_function *p,
588
		  struct x86_reg dst,
589
		  struct x86_reg src )
590
{
591
   assert(dst.mod == mod_REG && src.mod == mod_REG);
592
   emit_2ub(p, X86_TWOB, 0x16);
593
   emit_modrm( p, dst, src );
594
}
595
 
596
void sse_orps( struct x86_function *p,
597
               struct x86_reg dst,
598
               struct x86_reg src )
599
{
600
   emit_2ub(p, X86_TWOB, 0x56);
601
   emit_modrm( p, dst, src );
602
}
603
 
604
void sse_xorps( struct x86_function *p,
605
                struct x86_reg dst,
606
                struct x86_reg src )
607
{
608
   emit_2ub(p, X86_TWOB, 0x57);
609
   emit_modrm( p, dst, src );
610
}
611
 
612
void sse_cvtps2pi( struct x86_function *p,
613
		   struct x86_reg dst,
614
		   struct x86_reg src )
615
{
616
   assert(dst.file == file_MMX &&
617
	  (src.file == file_XMM || src.mod != mod_REG));
618
 
619
   p->need_emms = 1;
620
 
621
   emit_2ub(p, X86_TWOB, 0x2d);
622
   emit_modrm( p, dst, src );
623
}
624
 
625
 
626
/* Shufps can also be used to implement a reduced swizzle when dest ==
627
 * arg0.
628
 */
629
void sse_shufps( struct x86_function *p,
630
		 struct x86_reg dest,
631
		 struct x86_reg arg0,
632
		 unsigned char shuf)
633
{
634
   emit_2ub(p, X86_TWOB, 0xC6);
635
   emit_modrm(p, dest, arg0);
636
   emit_1ub(p, shuf);
637
}
638
 
639
void sse_cmpps( struct x86_function *p,
640
		struct x86_reg dest,
641
		struct x86_reg arg0,
642
		unsigned char cc)
643
{
644
   emit_2ub(p, X86_TWOB, 0xC2);
645
   emit_modrm(p, dest, arg0);
646
   emit_1ub(p, cc);
647
}
648
 
649
void sse_pmovmskb( struct x86_function *p,
650
                   struct x86_reg dest,
651
                   struct x86_reg src)
652
{
653
    emit_3ub(p, 0x66, X86_TWOB, 0xD7);
654
    emit_modrm(p, dest, src);
655
}
656
 
657
/***********************************************************************
658
 * SSE2 instructions
659
 */
660
 
661
/**
662
 * Perform a reduced swizzle:
663
 */
664
void sse2_pshufd( struct x86_function *p,
665
		  struct x86_reg dest,
666
		  struct x86_reg arg0,
667
		  unsigned char shuf)
668
{
669
   emit_3ub(p, 0x66, X86_TWOB, 0x70);
670
   emit_modrm(p, dest, arg0);
671
   emit_1ub(p, shuf);
672
}
673
 
674
void sse2_cvttps2dq( struct x86_function *p,
675
                     struct x86_reg dst,
676
                     struct x86_reg src )
677
{
678
   emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
679
   emit_modrm( p, dst, src );
680
}
681
 
682
void sse2_cvtps2dq( struct x86_function *p,
683
		    struct x86_reg dst,
684
		    struct x86_reg src )
685
{
686
   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
687
   emit_modrm( p, dst, src );
688
}
689
 
690
void sse2_packssdw( struct x86_function *p,
691
		    struct x86_reg dst,
692
		    struct x86_reg src )
693
{
694
   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
695
   emit_modrm( p, dst, src );
696
}
697
 
698
void sse2_packsswb( struct x86_function *p,
699
		    struct x86_reg dst,
700
		    struct x86_reg src )
701
{
702
   emit_3ub(p, 0x66, X86_TWOB, 0x63);
703
   emit_modrm( p, dst, src );
704
}
705
 
706
void sse2_packuswb( struct x86_function *p,
707
		    struct x86_reg dst,
708
		    struct x86_reg src )
709
{
710
   emit_3ub(p, 0x66, X86_TWOB, 0x67);
711
   emit_modrm( p, dst, src );
712
}
713
 
714
void sse2_rcpps( struct x86_function *p,
715
                 struct x86_reg dst,
716
                 struct x86_reg src )
717
{
718
   emit_2ub(p, X86_TWOB, 0x53);
719
   emit_modrm( p, dst, src );
720
}
721
 
722
void sse2_rcpss( struct x86_function *p,
723
		struct x86_reg dst,
724
		struct x86_reg src )
725
{
726
   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
727
   emit_modrm( p, dst, src );
728
}
729
 
730
void sse2_movd( struct x86_function *p,
731
		struct x86_reg dst,
732
		struct x86_reg src )
733
{
734
   emit_2ub(p, 0x66, X86_TWOB);
735
   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
736
}
737
 
738
 
739
 
740
 
741
/***********************************************************************
742
 * x87 instructions
743
 */
744
void x87_fist( struct x86_function *p, struct x86_reg dst )
745
{
746
   emit_1ub(p, 0xdb);
747
   emit_modrm_noreg(p, 2, dst);
748
}
749
 
750
void x87_fistp( struct x86_function *p, struct x86_reg dst )
751
{
752
   emit_1ub(p, 0xdb);
753
   emit_modrm_noreg(p, 3, dst);
754
}
755
 
756
void x87_fild( struct x86_function *p, struct x86_reg arg )
757
{
758
   emit_1ub(p, 0xdf);
759
   emit_modrm_noreg(p, 0, arg);
760
}
761
 
762
void x87_fldz( struct x86_function *p )
763
{
764
   emit_2ub(p, 0xd9, 0xee);
765
}
766
 
767
 
768
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
769
{
770
   assert(arg.file == file_REG32);
771
   assert(arg.mod != mod_REG);
772
   emit_1ub(p, 0xd9);
773
   emit_modrm_noreg(p, 5, arg);
774
}
775
 
776
void x87_fld1( struct x86_function *p )
777
{
778
   emit_2ub(p, 0xd9, 0xe8);
779
}
780
 
781
void x87_fldl2e( struct x86_function *p )
782
{
783
   emit_2ub(p, 0xd9, 0xea);
784
}
785
 
786
void x87_fldln2( struct x86_function *p )
787
{
788
   emit_2ub(p, 0xd9, 0xed);
789
}
790
 
791
void x87_fwait( struct x86_function *p )
792
{
793
   emit_1ub(p, 0x9b);
794
}
795
 
796
void x87_fnclex( struct x86_function *p )
797
{
798
   emit_2ub(p, 0xdb, 0xe2);
799
}
800
 
801
void x87_fclex( struct x86_function *p )
802
{
803
   x87_fwait(p);
804
   x87_fnclex(p);
805
}
806
 
807
 
808
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
809
			  unsigned char dst0ub0,
810
			  unsigned char dst0ub1,
811
			  unsigned char arg0ub0,
812
			  unsigned char arg0ub1,
813
			  unsigned char argmem_noreg)
814
{
815
   assert(dst.file == file_x87);
816
 
817
   if (arg.file == file_x87) {
818
      if (dst.idx == 0)
819
	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
820
      else if (arg.idx == 0)
821
	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
822
      else
823
	 assert(0);
824
   }
825
   else if (dst.idx == 0) {
826
      assert(arg.file == file_REG32);
827
      emit_1ub(p, 0xd8);
828
      emit_modrm_noreg(p, argmem_noreg, arg);
829
   }
830
   else
831
      assert(0);
832
}
833
 
834
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
835
{
836
   x87_arith_op(p, dst, arg,
837
		0xd8, 0xc8,
838
		0xdc, 0xc8,
839
		4);
840
}
841
 
842
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
843
{
844
   x87_arith_op(p, dst, arg,
845
		0xd8, 0xe0,
846
		0xdc, 0xe8,
847
		4);
848
}
849
 
850
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
851
{
852
   x87_arith_op(p, dst, arg,
853
		0xd8, 0xe8,
854
		0xdc, 0xe0,
855
		5);
856
}
857
 
858
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
859
{
860
   x87_arith_op(p, dst, arg,
861
		0xd8, 0xc0,
862
		0xdc, 0xc0,
863
		0);
864
}
865
 
866
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
867
{
868
   x87_arith_op(p, dst, arg,
869
		0xd8, 0xf0,
870
		0xdc, 0xf8,
871
		6);
872
}
873
 
874
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
875
{
876
   x87_arith_op(p, dst, arg,
877
		0xd8, 0xf8,
878
		0xdc, 0xf0,
879
		7);
880
}
881
 
882
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
883
{
884
   assert(dst.file == file_x87);
885
   assert(dst.idx >= 1);
886
   emit_2ub(p, 0xde, 0xc8+dst.idx);
887
}
888
 
889
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
890
{
891
   assert(dst.file == file_x87);
892
   assert(dst.idx >= 1);
893
   emit_2ub(p, 0xde, 0xe8+dst.idx);
894
}
895
 
896
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
897
{
898
   assert(dst.file == file_x87);
899
   assert(dst.idx >= 1);
900
   emit_2ub(p, 0xde, 0xe0+dst.idx);
901
}
902
 
903
void x87_faddp( struct x86_function *p, struct x86_reg dst )
904
{
905
   assert(dst.file == file_x87);
906
   assert(dst.idx >= 1);
907
   emit_2ub(p, 0xde, 0xc0+dst.idx);
908
}
909
 
910
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
911
{
912
   assert(dst.file == file_x87);
913
   assert(dst.idx >= 1);
914
   emit_2ub(p, 0xde, 0xf8+dst.idx);
915
}
916
 
917
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
918
{
919
   assert(dst.file == file_x87);
920
   assert(dst.idx >= 1);
921
   emit_2ub(p, 0xde, 0xf0+dst.idx);
922
}
923
 
924
void x87_fucom( struct x86_function *p, struct x86_reg arg )
925
{
926
   assert(arg.file == file_x87);
927
   emit_2ub(p, 0xdd, 0xe0+arg.idx);
928
}
929
 
930
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
931
{
932
   assert(arg.file == file_x87);
933
   emit_2ub(p, 0xdd, 0xe8+arg.idx);
934
}
935
 
936
void x87_fucompp( struct x86_function *p )
937
{
938
   emit_2ub(p, 0xda, 0xe9);
939
}
940
 
941
void x87_fxch( struct x86_function *p, struct x86_reg arg )
942
{
943
   assert(arg.file == file_x87);
944
   emit_2ub(p, 0xd9, 0xc8+arg.idx);
945
}
946
 
947
void x87_fabs( struct x86_function *p )
948
{
949
   emit_2ub(p, 0xd9, 0xe1);
950
}
951
 
952
void x87_fchs( struct x86_function *p )
953
{
954
   emit_2ub(p, 0xd9, 0xe0);
955
}
956
 
957
void x87_fcos( struct x86_function *p )
958
{
959
   emit_2ub(p, 0xd9, 0xff);
960
}
961
 
962
 
963
void x87_fprndint( struct x86_function *p )
964
{
965
   emit_2ub(p, 0xd9, 0xfc);
966
}
967
 
968
void x87_fscale( struct x86_function *p )
969
{
970
   emit_2ub(p, 0xd9, 0xfd);
971
}
972
 
973
void x87_fsin( struct x86_function *p )
974
{
975
   emit_2ub(p, 0xd9, 0xfe);
976
}
977
 
978
void x87_fsincos( struct x86_function *p )
979
{
980
   emit_2ub(p, 0xd9, 0xfb);
981
}
982
 
983
void x87_fsqrt( struct x86_function *p )
984
{
985
   emit_2ub(p, 0xd9, 0xfa);
986
}
987
 
988
void x87_fxtract( struct x86_function *p )
989
{
990
   emit_2ub(p, 0xd9, 0xf4);
991
}
992
 
993
/* st0 = (2^st0)-1
994
 *
995
 * Restrictions: -1.0 <= st0 <= 1.0
996
 */
997
void x87_f2xm1( struct x86_function *p )
998
{
999
   emit_2ub(p, 0xd9, 0xf0);
1000
}
1001
 
1002
/* st1 = st1 * log2(st0);
1003
 * pop_stack;
1004
 */
1005
void x87_fyl2x( struct x86_function *p )
1006
{
1007
   emit_2ub(p, 0xd9, 0xf1);
1008
}
1009
 
1010
/* st1 = st1 * log2(st0 + 1.0);
1011
 * pop_stack;
1012
 *
1013
 * A fast operation, with restrictions: -.29 < st0 < .29
1014
 */
1015
void x87_fyl2xp1( struct x86_function *p )
1016
{
1017
   emit_2ub(p, 0xd9, 0xf9);
1018
}
1019
 
1020
 
1021
void x87_fld( struct x86_function *p, struct x86_reg arg )
1022
{
1023
   if (arg.file == file_x87)
1024
      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1025
   else {
1026
      emit_1ub(p, 0xd9);
1027
      emit_modrm_noreg(p, 0, arg);
1028
   }
1029
}
1030
 
1031
void x87_fst( struct x86_function *p, struct x86_reg dst )
1032
{
1033
   if (dst.file == file_x87)
1034
      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1035
   else {
1036
      emit_1ub(p, 0xd9);
1037
      emit_modrm_noreg(p, 2, dst);
1038
   }
1039
}
1040
 
1041
void x87_fstp( struct x86_function *p, struct x86_reg dst )
1042
{
1043
   if (dst.file == file_x87)
1044
      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1045
   else {
1046
      emit_1ub(p, 0xd9);
1047
      emit_modrm_noreg(p, 3, dst);
1048
   }
1049
}
1050
 
1051
void x87_fcom( struct x86_function *p, struct x86_reg dst )
1052
{
1053
   if (dst.file == file_x87)
1054
      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1055
   else {
1056
      emit_1ub(p, 0xd8);
1057
      emit_modrm_noreg(p, 2, dst);
1058
   }
1059
}
1060
 
1061
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1062
{
1063
   if (dst.file == file_x87)
1064
      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1065
   else {
1066
      emit_1ub(p, 0xd8);
1067
      emit_modrm_noreg(p, 3, dst);
1068
   }
1069
}
1070
 
1071
 
1072
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1073
{
1074
   assert(dst.file == file_REG32);
1075
 
1076
   if (dst.idx == reg_AX &&
1077
       dst.mod == mod_REG)
1078
      emit_2ub(p, 0xdf, 0xe0);
1079
   else {
1080
      emit_1ub(p, 0xdd);
1081
      emit_modrm_noreg(p, 7, dst);
1082
   }
1083
}
1084
 
1085
 
1086
 
1087
 
1088
/***********************************************************************
1089
 * MMX instructions
1090
 */
1091
 
1092
void mmx_emms( struct x86_function *p )
1093
{
1094
   assert(p->need_emms);
1095
   emit_2ub(p, 0x0f, 0x77);
1096
   p->need_emms = 0;
1097
}
1098
 
1099
void mmx_packssdw( struct x86_function *p,
1100
		   struct x86_reg dst,
1101
		   struct x86_reg src )
1102
{
1103
   assert(dst.file == file_MMX &&
1104
	  (src.file == file_MMX || src.mod != mod_REG));
1105
 
1106
   p->need_emms = 1;
1107
 
1108
   emit_2ub(p, X86_TWOB, 0x6b);
1109
   emit_modrm( p, dst, src );
1110
}
1111
 
1112
void mmx_packuswb( struct x86_function *p,
1113
		   struct x86_reg dst,
1114
		   struct x86_reg src )
1115
{
1116
   assert(dst.file == file_MMX &&
1117
	  (src.file == file_MMX || src.mod != mod_REG));
1118
 
1119
   p->need_emms = 1;
1120
 
1121
   emit_2ub(p, X86_TWOB, 0x67);
1122
   emit_modrm( p, dst, src );
1123
}
1124
 
1125
void mmx_movd( struct x86_function *p,
1126
	       struct x86_reg dst,
1127
	       struct x86_reg src )
1128
{
1129
   p->need_emms = 1;
1130
   emit_1ub(p, X86_TWOB);
1131
   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1132
}
1133
 
1134
void mmx_movq( struct x86_function *p,
1135
	       struct x86_reg dst,
1136
	       struct x86_reg src )
1137
{
1138
   p->need_emms = 1;
1139
   emit_1ub(p, X86_TWOB);
1140
   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1141
}
1142
 
1143
 
1144
/***********************************************************************
1145
 * Helper functions
1146
 */
1147
 
1148
 
1149
/* Retreive a reference to one of the function arguments, taking into
1150
 * account any push/pop activity:
1151
 */
1152
struct x86_reg x86_fn_arg( struct x86_function *p,
1153
			   unsigned arg )
1154
{
1155
   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1156
			p->stack_offset + arg * 4);	/* ??? */
1157
}
1158
 
1159
 
1160
void x86_init_func( struct x86_function *p )
1161
{
1162
   p->size = 0;
1163
   p->store = NULL;
1164
   p->csr = p->store;
1165
}
1166
 
1167
int x86_init_func_size( struct x86_function *p, unsigned code_size )
1168
{
1169
   p->size = code_size;
1170
   p->store = _mesa_exec_malloc(code_size);
1171
   p->csr = p->store;
1172
   return p->store != NULL;
1173
}
1174
 
1175
void x86_release_func( struct x86_function *p )
1176
{
1177
   _mesa_exec_free(p->store);
1178
   p->store = NULL;
1179
   p->csr = NULL;
1180
   p->size = 0;
1181
}
1182
 
1183
 
1184
void (*x86_get_func( struct x86_function *p ))(void)
1185
{
1186
   if (DISASSEM && p->store)
1187
      printf("disassemble %p %p\n", p->store, p->csr);
1188
   return (void (*)(void)) (unsigned long) p->store;
1189
}
1190
 
1191
#else
1192
 
1193
void x86sse_dummy( void )
1194
{
1195
}
1196
 
1197
#endif
1198
 
1199
#else  /* USE_X86_ASM */
1200
 
1201
int x86sse_c_dummy_var; /* silence warning */
1202
 
1203
#endif /* USE_X86_ASM */