Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6429 siemargl 1
/*
2
 *  x86-64 code generator for TCC
3
 *
4
 *  Copyright (c) 2008 Shinichiro Hamaji
5
 *
6
 *  Based on i386-gen.c by Fabrice Bellard
7
 *
8
 * This library is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2 of the License, or (at your option) any later version.
12
 *
13
 * This library is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this library; if not, write to the Free Software
20
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
 */
22
 
23
#ifdef TARGET_DEFS_ONLY
24
 
25
/* number of available registers */
26
#define NB_REGS         25
27
#define NB_ASM_REGS     8
28
 
29
/* a register can belong to several classes. The classes must be
30
   sorted from more general to more precise (see gv2() code which does
31
   assumptions on it). */
32
#define RC_INT     0x0001 /* generic integer register */
33
#define RC_FLOAT   0x0002 /* generic float register */
34
#define RC_RAX     0x0004
35
#define RC_RCX     0x0008
36
#define RC_RDX     0x0010
37
#define RC_ST0     0x0080 /* only for long double */
38
#define RC_R8      0x0100
39
#define RC_R9      0x0200
40
#define RC_R10     0x0400
41
#define RC_R11     0x0800
42
#define RC_XMM0    0x1000
43
#define RC_XMM1    0x2000
44
#define RC_XMM2    0x4000
45
#define RC_XMM3    0x8000
46
#define RC_XMM4    0x10000
47
#define RC_XMM5    0x20000
48
#define RC_XMM6    0x40000
49
#define RC_XMM7    0x80000
50
#define RC_IRET    RC_RAX /* function return: integer register */
51
#define RC_LRET    RC_RDX /* function return: second integer register */
52
#define RC_FRET    RC_XMM0 /* function return: float register */
53
#define RC_QRET    RC_XMM1 /* function return: second float register */
54
 
55
/* pretty names for the registers */
56
enum {
57
    TREG_RAX = 0,
58
    TREG_RCX = 1,
59
    TREG_RDX = 2,
60
    TREG_RSP = 4,
61
    TREG_RSI = 6,
62
    TREG_RDI = 7,
63
 
64
    TREG_R8  = 8,
65
    TREG_R9  = 9,
66
    TREG_R10 = 10,
67
    TREG_R11 = 11,
68
 
69
    TREG_XMM0 = 16,
70
    TREG_XMM1 = 17,
71
    TREG_XMM2 = 18,
72
    TREG_XMM3 = 19,
73
    TREG_XMM4 = 20,
74
    TREG_XMM5 = 21,
75
    TREG_XMM6 = 22,
76
    TREG_XMM7 = 23,
77
 
78
    TREG_ST0 = 24,
79
 
80
    TREG_MEM = 0x20
81
};
82
 
83
#define REX_BASE(reg) (((reg) >> 3) & 1)
84
#define REG_VALUE(reg) ((reg) & 7)
85
 
86
/* return registers for function */
87
#define REG_IRET TREG_RAX /* single word int return register */
88
#define REG_LRET TREG_RDX /* second word return register (for long long) */
89
#define REG_FRET TREG_XMM0 /* float return register */
90
#define REG_QRET TREG_XMM1 /* second float return register */
91
 
92
/* defined if function parameters must be evaluated in reverse order */
93
#define INVERT_FUNC_PARAMS
94
 
95
/* pointer size, in bytes */
96
#define PTR_SIZE 8
97
 
98
/* long double size and alignment, in bytes */
99
#define LDOUBLE_SIZE  16
100
#define LDOUBLE_ALIGN 16
101
/* maximum alignment (for aligned attribute support) */
102
#define MAX_ALIGN     16
103
 
104
/******************************************************/
105
/* ELF defines */
106
 
107
#define EM_TCC_TARGET EM_X86_64
108
 
109
/* relocation type for 32 bit data relocation */
110
#define R_DATA_32   R_X86_64_32
111
#define R_DATA_PTR  R_X86_64_64
112
#define R_JMP_SLOT  R_X86_64_JUMP_SLOT
113
#define R_COPY      R_X86_64_COPY
114
 
115
#define ELF_START_ADDR 0x400000
116
#define ELF_PAGE_SIZE  0x200000
117
 
118
/******************************************************/
119
#else /* ! TARGET_DEFS_ONLY */
120
/******************************************************/
121
#include "tcc.h"
122
#include 
123
 
124
ST_DATA const int reg_classes[NB_REGS] = {
125
    /* eax */ RC_INT | RC_RAX,
126
    /* ecx */ RC_INT | RC_RCX,
127
    /* edx */ RC_INT | RC_RDX,
128
    0,
129
    0,
130
    0,
131
    0,
132
    0,
133
    RC_R8,
134
    RC_R9,
135
    RC_R10,
136
    RC_R11,
137
    0,
138
    0,
139
    0,
140
    0,
141
    /* xmm0 */ RC_FLOAT | RC_XMM0,
142
    /* xmm1 */ RC_FLOAT | RC_XMM1,
143
    /* xmm2 */ RC_FLOAT | RC_XMM2,
144
    /* xmm3 */ RC_FLOAT | RC_XMM3,
145
    /* xmm4 */ RC_FLOAT | RC_XMM4,
146
    /* xmm5 */ RC_FLOAT | RC_XMM5,
147
    /* xmm6 an xmm7 are included so gv() can be used on them,
148
       but they are not tagged with RC_FLOAT because they are
149
       callee saved on Windows */
150
    RC_XMM6,
151
    RC_XMM7,
152
    /* st0 */ RC_ST0
153
};
154
 
155
static unsigned long func_sub_sp_offset;
156
static int func_ret_sub;
157
 
158
/* XXX: make it faster ? */
159
void g(int c)
160
{
161
    int ind1;
162
    ind1 = ind + 1;
163
    if (ind1 > cur_text_section->data_allocated)
164
        section_realloc(cur_text_section, ind1);
165
    cur_text_section->data[ind] = c;
166
    ind = ind1;
167
}
168
 
169
void o(unsigned int c)
170
{
171
    while (c) {
172
        g(c);
173
        c = c >> 8;
174
    }
175
}
176
 
177
void gen_le16(int v)
178
{
179
    g(v);
180
    g(v >> 8);
181
}
182
 
183
void gen_le32(int c)
184
{
185
    g(c);
186
    g(c >> 8);
187
    g(c >> 16);
188
    g(c >> 24);
189
}
190
 
191
void gen_le64(int64_t c)
192
{
193
    g(c);
194
    g(c >> 8);
195
    g(c >> 16);
196
    g(c >> 24);
197
    g(c >> 32);
198
    g(c >> 40);
199
    g(c >> 48);
200
    g(c >> 56);
201
}
202
 
203
void orex(int ll, int r, int r2, int b)
204
{
205
    if ((r & VT_VALMASK) >= VT_CONST)
206
        r = 0;
207
    if ((r2 & VT_VALMASK) >= VT_CONST)
208
        r2 = 0;
209
    if (ll || REX_BASE(r) || REX_BASE(r2))
210
        o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
211
    o(b);
212
}
213
 
214
/* output a symbol and patch all calls to it */
215
void gsym_addr(int t, int a)
216
{
217
    while (t) {
218
        unsigned char *ptr = cur_text_section->data + t;
219
        uint32_t n = read32le(ptr); /* next value */
220
        write32le(ptr, a - t - 4);
221
        t = n;
222
    }
223
}
224
 
225
void gsym(int t)
226
{
227
    gsym_addr(t, ind);
228
}
229
 
230
/* psym is used to put an instruction with a data field which is a
231
   reference to a symbol. It is in fact the same as oad ! */
232
#define psym oad
233
 
234
static int is64_type(int t)
235
{
236
    return ((t & VT_BTYPE) == VT_PTR ||
237
            (t & VT_BTYPE) == VT_FUNC ||
238
            (t & VT_BTYPE) == VT_LLONG);
239
}
240
 
241
/* instruction + 4 bytes data. Return the address of the data */
242
ST_FUNC int oad(int c, int s)
243
{
244
    int ind1;
245
 
246
    o(c);
247
    ind1 = ind + 4;
248
    if (ind1 > cur_text_section->data_allocated)
249
        section_realloc(cur_text_section, ind1);
250
    write32le(cur_text_section->data + ind, s);
251
    s = ind;
252
    ind = ind1;
253
    return s;
254
}
255
 
256
ST_FUNC void gen_addr32(int r, Sym *sym, int c)
257
{
258
    if (r & VT_SYM)
259
        greloc(cur_text_section, sym, ind, R_X86_64_32);
260
    gen_le32(c);
261
}
262
 
263
/* output constant with relocation if 'r & VT_SYM' is true */
264
ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
265
{
266
    if (r & VT_SYM)
267
        greloc(cur_text_section, sym, ind, R_X86_64_64);
268
    gen_le64(c);
269
}
270
 
271
/* output constant with relocation if 'r & VT_SYM' is true */
272
ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
273
{
274
    if (r & VT_SYM)
275
        greloc(cur_text_section, sym, ind, R_X86_64_PC32);
276
    gen_le32(c-4);
277
}
278
 
279
/* output got address with relocation */
280
static void gen_gotpcrel(int r, Sym *sym, int c)
281
{
282
#ifndef TCC_TARGET_PE
283
    Section *sr;
284
    ElfW(Rela) *rel;
285
    greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL);
286
    sr = cur_text_section->reloc;
287
    rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela)));
288
    rel->r_addend = -4;
289
#else
290
    tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
291
        get_tok_str(sym->v, NULL), c, r,
292
        cur_text_section->data[ind-3],
293
        cur_text_section->data[ind-2],
294
        cur_text_section->data[ind-1]
295
        );
296
    greloc(cur_text_section, sym, ind, R_X86_64_PC32);
297
#endif
298
    gen_le32(0);
299
    if (c) {
300
        /* we use add c, %xxx for displacement */
301
        orex(1, r, 0, 0x81);
302
        o(0xc0 + REG_VALUE(r));
303
        gen_le32(c);
304
    }
305
}
306
 
307
static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
308
{
309
    op_reg = REG_VALUE(op_reg) << 3;
310
    if ((r & VT_VALMASK) == VT_CONST) {
311
        /* constant memory reference */
312
        o(0x05 | op_reg);
313
        if (is_got) {
314
            gen_gotpcrel(r, sym, c);
315
        } else {
316
            gen_addrpc32(r, sym, c);
317
        }
318
    } else if ((r & VT_VALMASK) == VT_LOCAL) {
319
        /* currently, we use only ebp as base */
320
        if (c == (char)c) {
321
            /* short reference */
322
            o(0x45 | op_reg);
323
            g(c);
324
        } else {
325
            oad(0x85 | op_reg, c);
326
        }
327
    } else if ((r & VT_VALMASK) >= TREG_MEM) {
328
        if (c) {
329
            g(0x80 | op_reg | REG_VALUE(r));
330
            gen_le32(c);
331
        } else {
332
            g(0x00 | op_reg | REG_VALUE(r));
333
        }
334
    } else {
335
        g(0x00 | op_reg | REG_VALUE(r));
336
    }
337
}
338
 
339
/* generate a modrm reference. 'op_reg' contains the addtionnal 3
340
   opcode bits */
341
static void gen_modrm(int op_reg, int r, Sym *sym, int c)
342
{
343
    gen_modrm_impl(op_reg, r, sym, c, 0);
344
}
345
 
346
/* generate a modrm reference. 'op_reg' contains the addtionnal 3
347
   opcode bits */
348
static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
349
{
350
    int is_got;
351
    is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
352
    orex(1, r, op_reg, opcode);
353
    gen_modrm_impl(op_reg, r, sym, c, is_got);
354
}
355
 
356
 
357
/* load 'r' from value 'sv' */
358
void load(int r, SValue *sv)
359
{
360
    int v, t, ft, fc, fr;
361
    SValue v1;
362
 
363
#ifdef TCC_TARGET_PE
364
    SValue v2;
365
    sv = pe_getimport(sv, &v2);
366
#endif
367
 
368
    fr = sv->r;
369
    ft = sv->type.t & ~VT_DEFSIGN;
370
    fc = sv->c.i;
371
 
372
    ft &= ~(VT_VOLATILE | VT_CONSTANT);
373
 
374
#ifndef TCC_TARGET_PE
375
    /* we use indirect access via got */
376
    if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
377
        (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
378
        /* use the result register as a temporal register */
379
        int tr = r | TREG_MEM;
380
        if (is_float(ft)) {
381
            /* we cannot use float registers as a temporal register */
382
            tr = get_reg(RC_INT) | TREG_MEM;
383
        }
384
        gen_modrm64(0x8b, tr, fr, sv->sym, 0);
385
 
386
        /* load from the temporal register */
387
        fr = tr | VT_LVAL;
388
    }
389
#endif
390
 
391
    v = fr & VT_VALMASK;
392
    if (fr & VT_LVAL) {
393
        int b, ll;
394
        if (v == VT_LLOCAL) {
395
            v1.type.t = VT_PTR;
396
            v1.r = VT_LOCAL | VT_LVAL;
397
            v1.c.i = fc;
398
            fr = r;
399
            if (!(reg_classes[fr] & (RC_INT|RC_R11)))
400
                fr = get_reg(RC_INT);
401
            load(fr, &v1);
402
        }
403
        ll = 0;
404
        if ((ft & VT_BTYPE) == VT_FLOAT) {
405
            b = 0x6e0f66;
406
            r = REG_VALUE(r); /* movd */
407
        } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
408
            b = 0x7e0ff3; /* movq */
409
            r = REG_VALUE(r);
410
        } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
411
            b = 0xdb, r = 5; /* fldt */
412
        } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
413
            b = 0xbe0f;   /* movsbl */
414
        } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
415
            b = 0xb60f;   /* movzbl */
416
        } else if ((ft & VT_TYPE) == VT_SHORT) {
417
            b = 0xbf0f;   /* movswl */
418
        } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
419
            b = 0xb70f;   /* movzwl */
420
        } else {
421
            assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG)
422
                   || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM)
423
                   || ((ft & VT_BTYPE) == VT_FUNC));
424
            ll = is64_type(ft);
425
            b = 0x8b;
426
        }
427
        if (ll) {
428
            gen_modrm64(b, r, fr, sv->sym, fc);
429
        } else {
430
            orex(ll, fr, r, b);
431
            gen_modrm(r, fr, sv->sym, fc);
432
        }
433
    } else {
434
        if (v == VT_CONST) {
435
            if (fr & VT_SYM) {
436
#ifdef TCC_TARGET_PE
437
                orex(1,0,r,0x8d);
438
                o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
439
                gen_addrpc32(fr, sv->sym, fc);
440
#else
441
                if (sv->sym->type.t & VT_STATIC) {
442
                    orex(1,0,r,0x8d);
443
                    o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
444
                    gen_addrpc32(fr, sv->sym, fc);
445
                } else {
446
                    orex(1,0,r,0x8b);
447
                    o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
448
                    gen_gotpcrel(r, sv->sym, fc);
449
                }
450
#endif
451
            } else if (is64_type(ft)) {
452
                orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
453
                gen_le64(sv->c.i);
454
            } else {
455
                orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
456
                gen_le32(fc);
457
            }
458
        } else if (v == VT_LOCAL) {
459
            orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
460
            gen_modrm(r, VT_LOCAL, sv->sym, fc);
461
        } else if (v == VT_CMP) {
462
            orex(0,r,0,0);
463
	    if ((fc & ~0x100) != TOK_NE)
464
              oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
465
	    else
466
              oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
467
	    if (fc & 0x100)
468
	      {
469
	        /* This was a float compare.  If the parity bit is
470
		   set the result was unordered, meaning false for everything
471
		   except TOK_NE, and true for TOK_NE.  */
472
		fc &= ~0x100;
473
		o(0x037a + (REX_BASE(r) << 8));
474
	      }
475
            orex(0,r,0, 0x0f); /* setxx %br */
476
            o(fc);
477
            o(0xc0 + REG_VALUE(r));
478
        } else if (v == VT_JMP || v == VT_JMPI) {
479
            t = v & 1;
480
            orex(0,r,0,0);
481
            oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
482
            o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
483
            gsym(fc);
484
            orex(0,r,0,0);
485
            oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
486
        } else if (v != r) {
487
            if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
488
                if (v == TREG_ST0) {
489
                    /* gen_cvt_ftof(VT_DOUBLE); */
490
                    o(0xf0245cdd); /* fstpl -0x10(%rsp) */
491
                    /* movsd -0x10(%rsp),%xmmN */
492
                    o(0x100ff2);
493
                    o(0x44 + REG_VALUE(r)*8); /* %xmmN */
494
                    o(0xf024);
495
                } else {
496
                    assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
497
                    if ((ft & VT_BTYPE) == VT_FLOAT) {
498
                        o(0x100ff3);
499
                    } else {
500
                        assert((ft & VT_BTYPE) == VT_DOUBLE);
501
                        o(0x100ff2);
502
                    }
503
                    o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
504
                }
505
            } else if (r == TREG_ST0) {
506
                assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
507
                /* gen_cvt_ftof(VT_LDOUBLE); */
508
                /* movsd %xmmN,-0x10(%rsp) */
509
                o(0x110ff2);
510
                o(0x44 + REG_VALUE(r)*8); /* %xmmN */
511
                o(0xf024);
512
                o(0xf02444dd); /* fldl -0x10(%rsp) */
513
            } else {
514
                orex(1,r,v, 0x89);
515
                o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
516
            }
517
        }
518
    }
519
}
520
 
521
/* store register 'r' in lvalue 'v' */
522
void store(int r, SValue *v)
523
{
524
    int fr, bt, ft, fc;
525
    int op64 = 0;
526
    /* store the REX prefix in this variable when PIC is enabled */
527
    int pic = 0;
528
 
529
#ifdef TCC_TARGET_PE
530
    SValue v2;
531
    v = pe_getimport(v, &v2);
532
#endif
533
 
534
    ft = v->type.t;
535
    fc = v->c.i;
536
    fr = v->r & VT_VALMASK;
537
    ft &= ~(VT_VOLATILE | VT_CONSTANT);
538
    bt = ft & VT_BTYPE;
539
 
540
#ifndef TCC_TARGET_PE
541
    /* we need to access the variable via got */
542
    if (fr == VT_CONST && (v->r & VT_SYM)) {
543
        /* mov xx(%rip), %r11 */
544
        o(0x1d8b4c);
545
        gen_gotpcrel(TREG_R11, v->sym, v->c.i);
546
        pic = is64_type(bt) ? 0x49 : 0x41;
547
    }
548
#endif
549
 
550
    /* XXX: incorrect if float reg to reg */
551
    if (bt == VT_FLOAT) {
552
        o(0x66);
553
        o(pic);
554
        o(0x7e0f); /* movd */
555
        r = REG_VALUE(r);
556
    } else if (bt == VT_DOUBLE) {
557
        o(0x66);
558
        o(pic);
559
        o(0xd60f); /* movq */
560
        r = REG_VALUE(r);
561
    } else if (bt == VT_LDOUBLE) {
562
        o(0xc0d9); /* fld %st(0) */
563
        o(pic);
564
        o(0xdb); /* fstpt */
565
        r = 7;
566
    } else {
567
        if (bt == VT_SHORT)
568
            o(0x66);
569
        o(pic);
570
        if (bt == VT_BYTE || bt == VT_BOOL)
571
            orex(0, 0, r, 0x88);
572
        else if (is64_type(bt))
573
            op64 = 0x89;
574
        else
575
            orex(0, 0, r, 0x89);
576
    }
577
    if (pic) {
578
        /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
579
        if (op64)
580
            o(op64);
581
        o(3 + (r << 3));
582
    } else if (op64) {
583
        if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
584
            gen_modrm64(op64, r, v->r, v->sym, fc);
585
        } else if (fr != r) {
586
            /* XXX: don't we really come here? */
587
            abort();
588
            o(0xc0 + fr + r * 8); /* mov r, fr */
589
        }
590
    } else {
591
        if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
592
            gen_modrm(r, v->r, v->sym, fc);
593
        } else if (fr != r) {
594
            /* XXX: don't we really come here? */
595
            abort();
596
            o(0xc0 + fr + r * 8); /* mov r, fr */
597
        }
598
    }
599
}
600
 
601
/* 'is_jmp' is '1' if it is a jump */
602
static void gcall_or_jmp(int is_jmp)
603
{
604
    int r;
605
    if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
606
	((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) {
607
        /* constant case */
608
        if (vtop->r & VT_SYM) {
609
            /* relocation case */
610
#ifdef TCC_TARGET_PE
611
            greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32);
612
#else
613
            greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32);
614
#endif
615
        } else {
616
            /* put an empty PC32 relocation */
617
            put_elf_reloc(symtab_section, cur_text_section,
618
                          ind + 1, R_X86_64_PC32, 0);
619
        }
620
        oad(0xe8 + is_jmp, vtop->c.i - 4); /* call/jmp im */
621
    } else {
622
        /* otherwise, indirect call */
623
        r = TREG_R11;
624
        load(r, vtop);
625
        o(0x41); /* REX */
626
        o(0xff); /* call/jmp *r */
627
        o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
628
    }
629
}
630
 
631
#if defined(CONFIG_TCC_BCHECK)
632
#ifndef TCC_TARGET_PE
633
static addr_t func_bound_offset;
634
static unsigned long func_bound_ind;
635
#endif
636
 
637
static void gen_static_call(int v)
638
{
639
    Sym *sym = external_global_sym(v, &func_old_type, 0);
640
    oad(0xe8, -4);
641
    greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
642
}
643
 
644
/* generate a bounded pointer addition */
645
ST_FUNC void gen_bounded_ptr_add(void)
646
{
647
    /* save all temporary registers */
648
    save_regs(0);
649
 
650
    /* prepare fast x86_64 function call */
651
    gv(RC_RAX);
652
    o(0xc68948); // mov  %rax,%rsi ## second arg in %rsi, this must be size
653
    vtop--;
654
 
655
    gv(RC_RAX);
656
    o(0xc78948); // mov  %rax,%rdi ## first arg in %rdi, this must be ptr
657
    vtop--;
658
 
659
    /* do a fast function call */
660
    gen_static_call(TOK___bound_ptr_add);
661
 
662
    /* returned pointer is in rax */
663
    vtop++;
664
    vtop->r = TREG_RAX | VT_BOUNDED;
665
 
666
 
667
    /* relocation offset of the bounding function call point */
668
    vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
669
}
670
 
671
/* patch pointer addition in vtop so that pointer dereferencing is
672
   also tested */
673
ST_FUNC void gen_bounded_ptr_deref(void)
674
{
675
    addr_t func;
676
    int size, align;
677
    ElfW(Rela) *rel;
678
    Sym *sym;
679
 
680
    size = 0;
681
    /* XXX: put that code in generic part of tcc */
682
    if (!is_float(vtop->type.t)) {
683
        if (vtop->r & VT_LVAL_BYTE)
684
            size = 1;
685
        else if (vtop->r & VT_LVAL_SHORT)
686
            size = 2;
687
    }
688
    if (!size)
689
    size = type_size(&vtop->type, &align);
690
    switch(size) {
691
    case  1: func = TOK___bound_ptr_indir1; break;
692
    case  2: func = TOK___bound_ptr_indir2; break;
693
    case  4: func = TOK___bound_ptr_indir4; break;
694
    case  8: func = TOK___bound_ptr_indir8; break;
695
    case 12: func = TOK___bound_ptr_indir12; break;
696
    case 16: func = TOK___bound_ptr_indir16; break;
697
    default:
698
        tcc_error("unhandled size when dereferencing bounded pointer");
699
        func = 0;
700
        break;
701
    }
702
 
703
    sym = external_global_sym(func, &func_old_type, 0);
704
    if (!sym->c)
705
        put_extern_sym(sym, NULL, 0, 0);
706
 
707
    /* patch relocation */
708
    /* XXX: find a better solution ? */
709
 
710
    rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
711
    rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
712
}
713
#endif
714
 
715
#ifdef TCC_TARGET_PE
716
 
717
#define REGN 4
718
static const uint8_t arg_regs[REGN] = {
719
    TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
720
};
721
 
722
/* Prepare arguments in R10 and R11 rather than RCX and RDX
723
   because gv() will not ever use these */
724
static int arg_prepare_reg(int idx) {
725
  if (idx == 0 || idx == 1)
726
      /* idx=0: r10, idx=1: r11 */
727
      return idx + 10;
728
  else
729
      return arg_regs[idx];
730
}
731
 
732
static int func_scratch;
733
 
734
/* Generate function call. The function address is pushed first, then
735
   all the parameters in call order. This functions pops all the
736
   parameters and the function address. */
737
 
738
void gen_offs_sp(int b, int r, int d)
739
{
740
    orex(1,0,r & 0x100 ? 0 : r, b);
741
    if (d == (char)d) {
742
        o(0x2444 | (REG_VALUE(r) << 3));
743
        g(d);
744
    } else {
745
        o(0x2484 | (REG_VALUE(r) << 3));
746
        gen_le32(d);
747
    }
748
}
749
 
750
/* Return the number of registers needed to return the struct, or 0 if
751
   returning via struct pointer. */
752
ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
753
{
754
    int size, align;
755
    *regsize = 8;
756
    *ret_align = 1; // Never have to re-align return values for x86-64
757
    size = type_size(vt, &align);
758
    ret->ref = NULL;
759
    if (size > 8) {
760
        return 0;
761
    } else if (size > 4) {
762
        ret->t = VT_LLONG;
763
        return 1;
764
    } else if (size > 2) {
765
        ret->t = VT_INT;
766
        return 1;
767
    } else if (size > 1) {
768
        ret->t = VT_SHORT;
769
        return 1;
770
    } else {
771
        ret->t = VT_BYTE;
772
        return 1;
773
    }
774
}
775
 
776
static int is_sse_float(int t) {
777
    int bt;
778
    bt = t & VT_BTYPE;
779
    return bt == VT_DOUBLE || bt == VT_FLOAT;
780
}
781
 
782
int gfunc_arg_size(CType *type) {
783
    int align;
784
    if (type->t & (VT_ARRAY|VT_BITFIELD))
785
        return 8;
786
    return type_size(type, &align);
787
}
788
 
789
void gfunc_call(int nb_args)
790
{
791
    int size, r, args_size, i, d, bt, struct_size;
792
    int arg;
793
 
794
    args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
795
    arg = nb_args;
796
 
797
    /* for struct arguments, we need to call memcpy and the function
798
       call breaks register passing arguments we are preparing.
799
       So, we process arguments which will be passed by stack first. */
800
    struct_size = args_size;
801
    for(i = 0; i < nb_args; i++) {
802
        SValue *sv;
803
 
804
        --arg;
805
        sv = &vtop[-i];
806
        bt = (sv->type.t & VT_BTYPE);
807
        size = gfunc_arg_size(&sv->type);
808
 
809
        if (size <= 8)
810
            continue; /* arguments smaller than 8 bytes passed in registers or on stack */
811
 
812
        if (bt == VT_STRUCT) {
813
            /* align to stack align size */
814
            size = (size + 15) & ~15;
815
            /* generate structure store */
816
            r = get_reg(RC_INT);
817
            gen_offs_sp(0x8d, r, struct_size);
818
            struct_size += size;
819
 
820
            /* generate memcpy call */
821
            vset(&sv->type, r | VT_LVAL, 0);
822
            vpushv(sv);
823
            vstore();
824
            --vtop;
825
        } else if (bt == VT_LDOUBLE) {
826
            gv(RC_ST0);
827
            gen_offs_sp(0xdb, 0x107, struct_size);
828
            struct_size += 16;
829
        }
830
    }
831
 
832
    if (func_scratch < struct_size)
833
        func_scratch = struct_size;
834
 
835
    arg = nb_args;
836
    struct_size = args_size;
837
 
838
    for(i = 0; i < nb_args; i++) {
839
        --arg;
840
        bt = (vtop->type.t & VT_BTYPE);
841
 
842
        size = gfunc_arg_size(&vtop->type);
843
        if (size > 8) {
844
            /* align to stack align size */
845
            size = (size + 15) & ~15;
846
            if (arg >= REGN) {
847
                d = get_reg(RC_INT);
848
                gen_offs_sp(0x8d, d, struct_size);
849
                gen_offs_sp(0x89, d, arg*8);
850
            } else {
851
                d = arg_prepare_reg(arg);
852
                gen_offs_sp(0x8d, d, struct_size);
853
            }
854
            struct_size += size;
855
        } else {
856
            if (is_sse_float(vtop->type.t)) {
857
                gv(RC_XMM0); /* only use one float register */
858
                if (arg >= REGN) {
859
                    /* movq %xmm0, j*8(%rsp) */
860
                    gen_offs_sp(0xd60f66, 0x100, arg*8);
861
                } else {
862
                    /* movaps %xmm0, %xmmN */
863
                    o(0x280f);
864
                    o(0xc0 + (arg << 3));
865
                    d = arg_prepare_reg(arg);
866
                    /* mov %xmm0, %rxx */
867
                    o(0x66);
868
                    orex(1,d,0, 0x7e0f);
869
                    o(0xc0 + REG_VALUE(d));
870
                }
871
            } else {
872
                if (bt == VT_STRUCT) {
873
                    vtop->type.ref = NULL;
874
                    vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
875
                        : size > 1 ? VT_SHORT : VT_BYTE;
876
                }
877
 
878
                r = gv(RC_INT);
879
                if (arg >= REGN) {
880
                    gen_offs_sp(0x89, r, arg*8);
881
                } else {
882
                    d = arg_prepare_reg(arg);
883
                    orex(1,d,r,0x89); /* mov */
884
                    o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
885
                }
886
            }
887
        }
888
        vtop--;
889
    }
890
    save_regs(0);
891
 
892
    /* Copy R10 and R11 into RCX and RDX, respectively */
893
    if (nb_args > 0) {
894
        o(0xd1894c); /* mov %r10, %rcx */
895
        if (nb_args > 1) {
896
            o(0xda894c); /* mov %r11, %rdx */
897
        }
898
    }
899
 
900
    gcall_or_jmp(0);
901
    vtop--;
902
}
903
 
904
 
905
#define FUNC_PROLOG_SIZE 11
906
 
907
/* generate function prolog of type 't' */
908
void gfunc_prolog(CType *func_type)
909
{
910
    int addr, reg_param_index, bt, size;
911
    Sym *sym;
912
    CType *type;
913
 
914
    func_ret_sub = 0;
915
    func_scratch = 0;
916
    loc = 0;
917
 
918
    addr = PTR_SIZE * 2;
919
    ind += FUNC_PROLOG_SIZE;
920
    func_sub_sp_offset = ind;
921
    reg_param_index = 0;
922
 
923
    sym = func_type->ref;
924
 
925
    /* if the function returns a structure, then add an
926
       implicit pointer parameter */
927
    func_vt = sym->type;
928
    func_var = (sym->c == FUNC_ELLIPSIS);
929
    size = gfunc_arg_size(&func_vt);
930
    if (size > 8) {
931
        gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
932
        func_vc = addr;
933
        reg_param_index++;
934
        addr += 8;
935
    }
936
 
937
    /* define parameters */
938
    while ((sym = sym->next) != NULL) {
939
        type = &sym->type;
940
        bt = type->t & VT_BTYPE;
941
        size = gfunc_arg_size(type);
942
        if (size > 8) {
943
            if (reg_param_index < REGN) {
944
                gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
945
            }
946
            sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr);
947
        } else {
948
            if (reg_param_index < REGN) {
949
                /* save arguments passed by register */
950
                if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
951
                    o(0xd60f66); /* movq */
952
                    gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
953
                } else {
954
                    gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
955
                }
956
            }
957
            sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
958
        }
959
        addr += 8;
960
        reg_param_index++;
961
    }
962
 
963
    while (reg_param_index < REGN) {
964
        if (func_type->ref->c == FUNC_ELLIPSIS) {
965
            gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
966
            addr += 8;
967
        }
968
        reg_param_index++;
969
    }
970
}
971
 
972
/* generate function epilog */
973
void gfunc_epilog(void)
974
{
975
    int v, saved_ind;
976
 
977
    o(0xc9); /* leave */
978
    if (func_ret_sub == 0) {
979
        o(0xc3); /* ret */
980
    } else {
981
        o(0xc2); /* ret n */
982
        g(func_ret_sub);
983
        g(func_ret_sub >> 8);
984
    }
985
 
986
    saved_ind = ind;
987
    ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
988
    /* align local size to word & save local variables */
989
    v = (func_scratch + -loc + 15) & -16;
990
 
991
    if (v >= 4096) {
992
        Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0);
993
        oad(0xb8, v); /* mov stacksize, %eax */
994
        oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */
995
        greloc(cur_text_section, sym, ind-4, R_X86_64_PC32);
996
        o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
997
    } else {
998
        o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
999
        o(0xec8148);  /* sub rsp, stacksize */
1000
        gen_le32(v);
1001
    }
1002
 
1003
    cur_text_section->data_offset = saved_ind;
1004
    pe_add_unwind_data(ind, saved_ind, v);
1005
    ind = cur_text_section->data_offset;
1006
}
1007
 
1008
#else
1009
 
1010
static void gadd_sp(int val)
1011
{
1012
    if (val == (char)val) {
1013
        o(0xc48348);
1014
        g(val);
1015
    } else {
1016
        oad(0xc48148, val); /* add $xxx, %rsp */
1017
    }
1018
}
1019
 
1020
typedef enum X86_64_Mode {
1021
  x86_64_mode_none,
1022
  x86_64_mode_memory,
1023
  x86_64_mode_integer,
1024
  x86_64_mode_sse,
1025
  x86_64_mode_x87
1026
} X86_64_Mode;
1027
 
1028
static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1029
{
1030
    if (a == b)
1031
        return a;
1032
    else if (a == x86_64_mode_none)
1033
        return b;
1034
    else if (b == x86_64_mode_none)
1035
        return a;
1036
    else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1037
        return x86_64_mode_memory;
1038
    else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1039
        return x86_64_mode_integer;
1040
    else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1041
        return x86_64_mode_memory;
1042
    else
1043
        return x86_64_mode_sse;
1044
}
1045
 
1046
static X86_64_Mode classify_x86_64_inner(CType *ty)
1047
{
1048
    X86_64_Mode mode;
1049
    Sym *f;
1050
 
1051
    switch (ty->t & VT_BTYPE) {
1052
    case VT_VOID: return x86_64_mode_none;
1053
 
1054
    case VT_INT:
1055
    case VT_BYTE:
1056
    case VT_SHORT:
1057
    case VT_LLONG:
1058
    case VT_BOOL:
1059
    case VT_PTR:
1060
    case VT_FUNC:
1061
    case VT_ENUM: return x86_64_mode_integer;
1062
 
1063
    case VT_FLOAT:
1064
    case VT_DOUBLE: return x86_64_mode_sse;
1065
 
1066
    case VT_LDOUBLE: return x86_64_mode_x87;
1067
 
1068
    case VT_STRUCT:
1069
        f = ty->ref;
1070
 
1071
        mode = x86_64_mode_none;
1072
        for (f = f->next; f; f = f->next)
1073
            mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1074
 
1075
        return mode;
1076
    }
1077
 
1078
    assert(0);
1079
}
1080
 
1081
static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1082
{
1083
    X86_64_Mode mode;
1084
    int size, align, ret_t = 0;
1085
 
1086
    if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1087
        *psize = 8;
1088
        *palign = 8;
1089
        *reg_count = 1;
1090
        ret_t = ty->t;
1091
        mode = x86_64_mode_integer;
1092
    } else {
1093
        size = type_size(ty, &align);
1094
        *psize = (size + 7) & ~7;
1095
        *palign = (align + 7) & ~7;
1096
 
1097
        if (size > 16) {
1098
            mode = x86_64_mode_memory;
1099
        } else {
1100
            mode = classify_x86_64_inner(ty);
1101
            switch (mode) {
1102
            case x86_64_mode_integer:
1103
                if (size > 8) {
1104
                    *reg_count = 2;
1105
                    ret_t = VT_QLONG;
1106
                } else {
1107
                    *reg_count = 1;
1108
                    ret_t = (size > 4) ? VT_LLONG : VT_INT;
1109
                }
1110
                break;
1111
 
1112
            case x86_64_mode_x87:
1113
                *reg_count = 1;
1114
                ret_t = VT_LDOUBLE;
1115
                break;
1116
 
1117
            case x86_64_mode_sse:
1118
                if (size > 8) {
1119
                    *reg_count = 2;
1120
                    ret_t = VT_QFLOAT;
1121
                } else {
1122
                    *reg_count = 1;
1123
                    ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1124
                }
1125
                break;
1126
            default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1127
            }
1128
        }
1129
    }
1130
 
1131
    if (ret) {
1132
        ret->ref = NULL;
1133
        ret->t = ret_t;
1134
    }
1135
 
1136
    return mode;
1137
}
1138
 
1139
ST_FUNC int classify_x86_64_va_arg(CType *ty)
1140
{
1141
    /* This definition must be synced with stdarg.h */
1142
    enum __va_arg_type {
1143
        __va_gen_reg, __va_float_reg, __va_stack
1144
    };
1145
    int size, align, reg_count;
1146
    X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count);
1147
    switch (mode) {
1148
    default: return __va_stack;
1149
    case x86_64_mode_integer: return __va_gen_reg;
1150
    case x86_64_mode_sse: return __va_float_reg;
1151
    }
1152
}
1153
 
1154
/* Return the number of registers needed to return the struct, or 0 if
1155
   returning via struct pointer. */
1156
ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1157
{
1158
    int size, align, reg_count;
1159
    *ret_align = 1; // Never have to re-align return values for x86-64
1160
    *regsize = 8;
1161
    return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) != x86_64_mode_memory);
1162
}
1163
 
1164
#define REGN 6
1165
static const uint8_t arg_regs[REGN] = {
1166
    TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1167
};
1168
 
1169
static int arg_prepare_reg(int idx) {
1170
  if (idx == 2 || idx == 3)
1171
      /* idx=2: r10, idx=3: r11 */
1172
      return idx + 8;
1173
  else
1174
      return arg_regs[idx];
1175
}
1176
 
1177
/* Generate function call. The function address is pushed first, then
1178
   all the parameters in call order. This functions pops all the
1179
   parameters and the function address. */
1180
void gfunc_call(int nb_args)
1181
{
1182
    X86_64_Mode mode;
1183
    CType type;
1184
    int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count;
1185
    int nb_reg_args = 0;
1186
    int nb_sse_args = 0;
1187
    int sse_reg, gen_reg;
1188
 
1189
    /* calculate the number of integer/float register arguments */
1190
    for(i = 0; i < nb_args; i++) {
1191
        mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
1192
        if (mode == x86_64_mode_sse)
1193
            nb_sse_args += reg_count;
1194
        else if (mode == x86_64_mode_integer)
1195
            nb_reg_args += reg_count;
1196
    }
1197
 
1198
    /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments
1199
       and ended by a 16-byte aligned argument. This is because, from the point of view of
1200
       the callee, argument alignment is computed from the bottom up. */
1201
    /* for struct arguments, we need to call memcpy and the function
1202
       call breaks register passing arguments we are preparing.
1203
       So, we process arguments which will be passed by stack first. */
1204
    gen_reg = nb_reg_args;
1205
    sse_reg = nb_sse_args;
1206
    run_start = 0;
1207
    args_size = 0;
1208
    while (run_start != nb_args) {
1209
        int run_gen_reg = gen_reg, run_sse_reg = sse_reg;
1210
 
1211
        run_end = nb_args;
1212
        stack_adjust = 0;
1213
        for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) {
1214
            mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
1215
            switch (mode) {
1216
            case x86_64_mode_memory:
1217
            case x86_64_mode_x87:
1218
            stack_arg:
1219
                if (align == 16)
1220
                    run_end = i;
1221
                else
1222
                    stack_adjust += size;
1223
                break;
1224
 
1225
            case x86_64_mode_sse:
1226
                sse_reg -= reg_count;
1227
                if (sse_reg + reg_count > 8) goto stack_arg;
1228
                break;
1229
 
1230
            case x86_64_mode_integer:
1231
                gen_reg -= reg_count;
1232
                if (gen_reg + reg_count > REGN) goto stack_arg;
1233
                break;
1234
	    default: break; /* nothing to be done for x86_64_mode_none */
1235
            }
1236
        }
1237
 
1238
        gen_reg = run_gen_reg;
1239
        sse_reg = run_sse_reg;
1240
 
1241
        /* adjust stack to align SSE boundary */
1242
        if (stack_adjust &= 15) {
1243
            /* fetch cpu flag before the following sub will change the value */
1244
            if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP)
1245
                gv(RC_INT);
1246
 
1247
            stack_adjust = 16 - stack_adjust;
1248
            o(0x48);
1249
            oad(0xec81, stack_adjust); /* sub $xxx, %rsp */
1250
            args_size += stack_adjust;
1251
        }
1252
 
1253
        for(i = run_start; i < run_end;) {
1254
            /* Swap argument to top, it will possibly be changed here,
1255
              and might use more temps. At the end of the loop we keep
1256
              in on the stack and swap it back to its original position
1257
              if it is a register. */
1258
            SValue tmp = vtop[0];
1259
            int arg_stored = 1;
1260
 
1261
            vtop[0] = vtop[-i];
1262
            vtop[-i] = tmp;
1263
            mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count);
1264
 
1265
            switch (vtop->type.t & VT_BTYPE) {
1266
            case VT_STRUCT:
1267
                if (mode == x86_64_mode_sse) {
1268
                    if (sse_reg > 8)
1269
                        sse_reg -= reg_count;
1270
                    else
1271
                        arg_stored = 0;
1272
                } else if (mode == x86_64_mode_integer) {
1273
                    if (gen_reg > REGN)
1274
                        gen_reg -= reg_count;
1275
                    else
1276
                        arg_stored = 0;
1277
                }
1278
 
1279
                if (arg_stored) {
1280
                    /* allocate the necessary size on stack */
1281
                    o(0x48);
1282
                    oad(0xec81, size); /* sub $xxx, %rsp */
1283
                    /* generate structure store */
1284
                    r = get_reg(RC_INT);
1285
                    orex(1, r, 0, 0x89); /* mov %rsp, r */
1286
                    o(0xe0 + REG_VALUE(r));
1287
                    vset(&vtop->type, r | VT_LVAL, 0);
1288
                    vswap();
1289
                    vstore();
1290
                    args_size += size;
1291
                }
1292
                break;
1293
 
1294
            case VT_LDOUBLE:
1295
                assert(0);
1296
                break;
1297
 
1298
            case VT_FLOAT:
1299
            case VT_DOUBLE:
1300
                assert(mode == x86_64_mode_sse);
1301
                if (sse_reg > 8) {
1302
                    --sse_reg;
1303
                    r = gv(RC_FLOAT);
1304
                    o(0x50); /* push $rax */
1305
                    /* movq %xmmN, (%rsp) */
1306
                    o(0xd60f66);
1307
                    o(0x04 + REG_VALUE(r)*8);
1308
                    o(0x24);
1309
                    args_size += size;
1310
                } else {
1311
                    arg_stored = 0;
1312
                }
1313
                break;
1314
 
1315
            default:
1316
                assert(mode == x86_64_mode_integer);
1317
                /* simple type */
1318
                /* XXX: implicit cast ? */
1319
                if (gen_reg > REGN) {
1320
                    --gen_reg;
1321
                    r = gv(RC_INT);
1322
                    orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1323
                    args_size += size;
1324
                } else {
1325
                    arg_stored = 0;
1326
                }
1327
                break;
1328
            }
1329
 
1330
            /* And swap the argument back to it's original position.  */
1331
            tmp = vtop[0];
1332
            vtop[0] = vtop[-i];
1333
            vtop[-i] = tmp;
1334
 
1335
            if (arg_stored) {
1336
              vrotb(i+1);
1337
              assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r));
1338
              vpop();
1339
              --nb_args;
1340
              --run_end;
1341
            } else {
1342
              ++i;
1343
            }
1344
        }
1345
 
1346
        /* handle 16 byte aligned arguments at end of run */
1347
        run_start = i = run_end;
1348
        while (i < nb_args) {
1349
            /* Rotate argument to top since it will always be popped */
1350
            mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count);
1351
            if (align != 16)
1352
              break;
1353
 
1354
            vrotb(i+1);
1355
 
1356
            if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1357
                gv(RC_ST0);
1358
                oad(0xec8148, size); /* sub $xxx, %rsp */
1359
                o(0x7cdb); /* fstpt 0(%rsp) */
1360
                g(0x24);
1361
                g(0x00);
1362
                args_size += size;
1363
            } else {
1364
                assert(mode == x86_64_mode_memory);
1365
 
1366
                /* allocate the necessary size on stack */
1367
                o(0x48);
1368
                oad(0xec81, size); /* sub $xxx, %rsp */
1369
                /* generate structure store */
1370
                r = get_reg(RC_INT);
1371
                orex(1, r, 0, 0x89); /* mov %rsp, r */
1372
                o(0xe0 + REG_VALUE(r));
1373
                vset(&vtop->type, r | VT_LVAL, 0);
1374
                vswap();
1375
                vstore();
1376
                args_size += size;
1377
            }
1378
 
1379
            vpop();
1380
            --nb_args;
1381
        }
1382
    }
1383
 
1384
    /* XXX This should be superfluous.  */
1385
    save_regs(0); /* save used temporary registers */
1386
 
1387
    /* then, we prepare register passing arguments.
1388
       Note that we cannot set RDX and RCX in this loop because gv()
1389
       may break these temporary registers. Let's use R10 and R11
1390
       instead of them */
1391
    assert(gen_reg <= REGN);
1392
    assert(sse_reg <= 8);
1393
    for(i = 0; i < nb_args; i++) {
1394
        mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count);
1395
        /* Alter stack entry type so that gv() knows how to treat it */
1396
        vtop->type = type;
1397
        if (mode == x86_64_mode_sse) {
1398
            if (reg_count == 2) {
1399
                sse_reg -= 2;
1400
                gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1401
                if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1402
                    /* movaps %xmm0, %xmmN */
1403
                    o(0x280f);
1404
                    o(0xc0 + (sse_reg << 3));
1405
                    /* movaps %xmm1, %xmmN */
1406
                    o(0x280f);
1407
                    o(0xc1 + ((sse_reg+1) << 3));
1408
                }
1409
            } else {
1410
                assert(reg_count == 1);
1411
                --sse_reg;
1412
                /* Load directly to register */
1413
                gv(RC_XMM0 << sse_reg);
1414
            }
1415
        } else if (mode == x86_64_mode_integer) {
1416
            /* simple type */
1417
            /* XXX: implicit cast ? */
1418
            int d;
1419
            gen_reg -= reg_count;
1420
            r = gv(RC_INT);
1421
            d = arg_prepare_reg(gen_reg);
1422
            orex(1,d,r,0x89); /* mov */
1423
            o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1424
            if (reg_count == 2) {
1425
                d = arg_prepare_reg(gen_reg+1);
1426
                orex(1,d,vtop->r2,0x89); /* mov */
1427
                o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1428
            }
1429
        }
1430
        vtop--;
1431
    }
1432
    assert(gen_reg == 0);
1433
    assert(sse_reg == 0);
1434
 
1435
    /* We shouldn't have many operands on the stack anymore, but the
1436
       call address itself is still there, and it might be in %eax
1437
       (or edx/ecx) currently, which the below writes would clobber.
1438
       So evict all remaining operands here.  */
1439
    save_regs(0);
1440
 
1441
    /* Copy R10 and R11 into RDX and RCX, respectively */
1442
    if (nb_reg_args > 2) {
1443
        o(0xd2894c); /* mov %r10, %rdx */
1444
        if (nb_reg_args > 3) {
1445
            o(0xd9894c); /* mov %r11, %rcx */
1446
        }
1447
    }
1448
 
1449
    oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1450
    gcall_or_jmp(0);
1451
    if (args_size)
1452
        gadd_sp(args_size);
1453
    vtop--;
1454
}
1455
 
1456
 
1457
#define FUNC_PROLOG_SIZE 11
1458
 
1459
static void push_arg_reg(int i) {
1460
    loc -= 8;
1461
    gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1462
}
1463
 
1464
/* generate function prolog of type 't' */
1465
void gfunc_prolog(CType *func_type)
1466
{
1467
    X86_64_Mode mode;
1468
    int i, addr, align, size, reg_count;
1469
    int param_addr = 0, reg_param_index, sse_param_index;
1470
    Sym *sym;
1471
    CType *type;
1472
 
1473
    sym = func_type->ref;
1474
    addr = PTR_SIZE * 2;
1475
    loc = 0;
1476
    ind += FUNC_PROLOG_SIZE;
1477
    func_sub_sp_offset = ind;
1478
    func_ret_sub = 0;
1479
 
1480
    if (func_type->ref->c == FUNC_ELLIPSIS) {
1481
        int seen_reg_num, seen_sse_num, seen_stack_size;
1482
        seen_reg_num = seen_sse_num = 0;
1483
        /* frame pointer and return address */
1484
        seen_stack_size = PTR_SIZE * 2;
1485
        /* count the number of seen parameters */
1486
        sym = func_type->ref;
1487
        while ((sym = sym->next) != NULL) {
1488
            type = &sym->type;
1489
            mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
1490
            switch (mode) {
1491
            default:
1492
            stack_arg:
1493
                seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1494
                break;
1495
 
1496
            case x86_64_mode_integer:
1497
                if (seen_reg_num + reg_count <= 8) {
1498
                    seen_reg_num += reg_count;
1499
                } else {
1500
                    seen_reg_num = 8;
1501
                    goto stack_arg;
1502
                }
1503
                break;
1504
 
1505
            case x86_64_mode_sse:
1506
                if (seen_sse_num + reg_count <= 8) {
1507
                    seen_sse_num += reg_count;
1508
                } else {
1509
                    seen_sse_num = 8;
1510
                    goto stack_arg;
1511
                }
1512
                break;
1513
            }
1514
        }
1515
 
1516
        loc -= 16;
1517
        /* movl $0x????????, -0x10(%rbp) */
1518
        o(0xf045c7);
1519
        gen_le32(seen_reg_num * 8);
1520
        /* movl $0x????????, -0xc(%rbp) */
1521
        o(0xf445c7);
1522
        gen_le32(seen_sse_num * 16 + 48);
1523
        /* movl $0x????????, -0x8(%rbp) */
1524
        o(0xf845c7);
1525
        gen_le32(seen_stack_size);
1526
 
1527
        /* save all register passing arguments */
1528
        for (i = 0; i < 8; i++) {
1529
            loc -= 16;
1530
            o(0xd60f66); /* movq */
1531
            gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1532
            /* movq $0, loc+8(%rbp) */
1533
            o(0x85c748);
1534
            gen_le32(loc + 8);
1535
            gen_le32(0);
1536
        }
1537
        for (i = 0; i < REGN; i++) {
1538
            push_arg_reg(REGN-1-i);
1539
        }
1540
    }
1541
 
1542
    sym = func_type->ref;
1543
    reg_param_index = 0;
1544
    sse_param_index = 0;
1545
 
1546
    /* if the function returns a structure, then add an
1547
       implicit pointer parameter */
1548
    func_vt = sym->type;
1549
    mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count);
1550
    if (mode == x86_64_mode_memory) {
1551
        push_arg_reg(reg_param_index);
1552
        func_vc = loc;
1553
        reg_param_index++;
1554
    }
1555
    /* define parameters */
1556
    while ((sym = sym->next) != NULL) {
1557
        type = &sym->type;
1558
        mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count);
1559
        switch (mode) {
1560
        case x86_64_mode_sse:
1561
            if (sse_param_index + reg_count <= 8) {
1562
                /* save arguments passed by register */
1563
                loc -= reg_count * 8;
1564
                param_addr = loc;
1565
                for (i = 0; i < reg_count; ++i) {
1566
                    o(0xd60f66); /* movq */
1567
                    gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1568
                    ++sse_param_index;
1569
                }
1570
            } else {
1571
                addr = (addr + align - 1) & -align;
1572
                param_addr = addr;
1573
                addr += size;
1574
            }
1575
            break;
1576
 
1577
        case x86_64_mode_memory:
1578
        case x86_64_mode_x87:
1579
            addr = (addr + align - 1) & -align;
1580
            param_addr = addr;
1581
            addr += size;
1582
            break;
1583
 
1584
        case x86_64_mode_integer: {
1585
            if (reg_param_index + reg_count <= REGN) {
1586
                /* save arguments passed by register */
1587
                loc -= reg_count * 8;
1588
                param_addr = loc;
1589
                for (i = 0; i < reg_count; ++i) {
1590
                    gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1591
                    ++reg_param_index;
1592
                }
1593
            } else {
1594
                addr = (addr + align - 1) & -align;
1595
                param_addr = addr;
1596
                addr += size;
1597
            }
1598
            break;
1599
        }
1600
	default: break; /* nothing to be done for x86_64_mode_none */
1601
        }
1602
        sym_push(sym->v & ~SYM_FIELD, type,
1603
                 VT_LOCAL | VT_LVAL, param_addr);
1604
    }
1605
 
1606
#ifdef CONFIG_TCC_BCHECK
1607
    /* leave some room for bound checking code */
1608
    if (tcc_state->do_bounds_check) {
1609
        func_bound_offset = lbounds_section->data_offset;
1610
        func_bound_ind = ind;
1611
        oad(0xb8, 0); /* lbound section pointer */
1612
	o(0xc78948);  /* mov  %rax,%rdi ## first arg in %rdi, this must be ptr */
1613
	oad(0xb8, 0); /* call to function */
1614
    }
1615
#endif
1616
}
1617
 
1618
/* generate function epilog */
1619
void gfunc_epilog(void)
1620
{
1621
    int v, saved_ind;
1622
 
1623
#ifdef CONFIG_TCC_BCHECK
1624
    if (tcc_state->do_bounds_check
1625
	&& func_bound_offset != lbounds_section->data_offset)
1626
    {
1627
        addr_t saved_ind;
1628
        addr_t *bounds_ptr;
1629
        Sym *sym_data;
1630
 
1631
        /* add end of table info */
1632
        bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
1633
        *bounds_ptr = 0;
1634
 
1635
        /* generate bound local allocation */
1636
        sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
1637
                               func_bound_offset, lbounds_section->data_offset);
1638
        saved_ind = ind;
1639
        ind = func_bound_ind;
1640
        greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1641
        ind = ind + 5 + 3;
1642
        gen_static_call(TOK___bound_local_new);
1643
        ind = saved_ind;
1644
 
1645
        /* generate bound check local freeing */
1646
        o(0x5250); /* save returned value, if any */
1647
        greloc(cur_text_section, sym_data, ind + 1, R_386_32);
1648
        oad(0xb8, 0); /* mov xxx, %rax */
1649
	o(0xc78948);  /* mov  %rax,%rdi ## first arg in %rdi, this must be ptr */
1650
        gen_static_call(TOK___bound_local_delete);
1651
        o(0x585a); /* restore returned value, if any */
1652
    }
1653
#endif
1654
    o(0xc9); /* leave */
1655
    if (func_ret_sub == 0) {
1656
        o(0xc3); /* ret */
1657
    } else {
1658
        o(0xc2); /* ret n */
1659
        g(func_ret_sub);
1660
        g(func_ret_sub >> 8);
1661
    }
1662
    /* align local size to word & save local variables */
1663
    v = (-loc + 15) & -16;
1664
    saved_ind = ind;
1665
    ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1666
    o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
1667
    o(0xec8148);  /* sub rsp, stacksize */
1668
    gen_le32(v);
1669
    ind = saved_ind;
1670
}
1671
 
1672
#endif /* not PE */
1673
 
1674
/* generate a jump to a label */
1675
int gjmp(int t)
1676
{
1677
    return psym(0xe9, t);
1678
}
1679
 
1680
/* generate a jump to a fixed address */
1681
void gjmp_addr(int a)
1682
{
1683
    int r;
1684
    r = a - ind - 2;
1685
    if (r == (char)r) {
1686
        g(0xeb);
1687
        g(r);
1688
    } else {
1689
        oad(0xe9, a - ind - 5);
1690
    }
1691
}
1692
 
1693
/* generate a test. set 'inv' to invert test. Stack entry is popped */
1694
int gtst(int inv, int t)
1695
{
1696
    int v = vtop->r & VT_VALMASK;
1697
    if (v == VT_CMP) {
1698
        /* fast case : can jump directly since flags are set */
1699
	if (vtop->c.i & 0x100)
1700
	  {
1701
	    /* This was a float compare.  If the parity flag is set
1702
	       the result was unordered.  For anything except != this
1703
	       means false and we don't jump (anding both conditions).
1704
	       For != this means true (oring both).
1705
	       Take care about inverting the test.  We need to jump
1706
	       to our target if the result was unordered and test wasn't NE,
1707
	       otherwise if unordered we don't want to jump.  */
1708
	    vtop->c.i &= ~0x100;
1709
            if (inv == (vtop->c.i == TOK_NE))
1710
	      o(0x067a);  /* jp +6 */
1711
	    else
1712
	      {
1713
	        g(0x0f);
1714
		t = psym(0x8a, t); /* jp t */
1715
	      }
1716
	  }
1717
        g(0x0f);
1718
        t = psym((vtop->c.i - 16) ^ inv, t);
1719
    } else if (v == VT_JMP || v == VT_JMPI) {
1720
        /* && or || optimization */
1721
        if ((v & 1) == inv) {
1722
            /* insert vtop->c jump list in t */
1723
            uint32_t n1, n = vtop->c.i;
1724
            if (n) {
1725
                while ((n1 = read32le(cur_text_section->data + n)))
1726
                    n = n1;
1727
                write32le(cur_text_section->data + n, t);
1728
                t = vtop->c.i;
1729
            }
1730
        } else {
1731
            t = gjmp(t);
1732
            gsym(vtop->c.i);
1733
        }
1734
    }
1735
    vtop--;
1736
    return t;
1737
}
1738
 
1739
/* generate an integer binary operation */
1740
void gen_opi(int op)
1741
{
1742
    int r, fr, opc, c;
1743
    int ll, uu, cc;
1744
 
1745
    ll = is64_type(vtop[-1].type.t);
1746
    uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1747
    cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1748
 
1749
    switch(op) {
1750
    case '+':
1751
    case TOK_ADDC1: /* add with carry generation */
1752
        opc = 0;
1753
    gen_op8:
1754
        if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1755
            /* constant case */
1756
            vswap();
1757
            r = gv(RC_INT);
1758
            vswap();
1759
            c = vtop->c.i;
1760
            if (c == (char)c) {
1761
                /* XXX: generate inc and dec for smaller code ? */
1762
                orex(ll, r, 0, 0x83);
1763
                o(0xc0 | (opc << 3) | REG_VALUE(r));
1764
                g(c);
1765
            } else {
1766
                orex(ll, r, 0, 0x81);
1767
                oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1768
            }
1769
        } else {
1770
            gv2(RC_INT, RC_INT);
1771
            r = vtop[-1].r;
1772
            fr = vtop[0].r;
1773
            orex(ll, r, fr, (opc << 3) | 0x01);
1774
            o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1775
        }
1776
        vtop--;
1777
        if (op >= TOK_ULT && op <= TOK_GT) {
1778
            vtop->r = VT_CMP;
1779
            vtop->c.i = op;
1780
        }
1781
        break;
1782
    case '-':
1783
    case TOK_SUBC1: /* sub with carry generation */
1784
        opc = 5;
1785
        goto gen_op8;
1786
    case TOK_ADDC2: /* add with carry use */
1787
        opc = 2;
1788
        goto gen_op8;
1789
    case TOK_SUBC2: /* sub with carry use */
1790
        opc = 3;
1791
        goto gen_op8;
1792
    case '&':
1793
        opc = 4;
1794
        goto gen_op8;
1795
    case '^':
1796
        opc = 6;
1797
        goto gen_op8;
1798
    case '|':
1799
        opc = 1;
1800
        goto gen_op8;
1801
    case '*':
1802
        gv2(RC_INT, RC_INT);
1803
        r = vtop[-1].r;
1804
        fr = vtop[0].r;
1805
        orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1806
        o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1807
        vtop--;
1808
        break;
1809
    case TOK_SHL:
1810
        opc = 4;
1811
        goto gen_shift;
1812
    case TOK_SHR:
1813
        opc = 5;
1814
        goto gen_shift;
1815
    case TOK_SAR:
1816
        opc = 7;
1817
    gen_shift:
1818
        opc = 0xc0 | (opc << 3);
1819
        if (cc) {
1820
            /* constant case */
1821
            vswap();
1822
            r = gv(RC_INT);
1823
            vswap();
1824
            orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1825
            o(opc | REG_VALUE(r));
1826
            g(vtop->c.i & (ll ? 63 : 31));
1827
        } else {
1828
            /* we generate the shift in ecx */
1829
            gv2(RC_INT, RC_RCX);
1830
            r = vtop[-1].r;
1831
            orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1832
            o(opc | REG_VALUE(r));
1833
        }
1834
        vtop--;
1835
        break;
1836
    case TOK_UDIV:
1837
    case TOK_UMOD:
1838
        uu = 1;
1839
        goto divmod;
1840
    case '/':
1841
    case '%':
1842
    case TOK_PDIV:
1843
        uu = 0;
1844
    divmod:
1845
        /* first operand must be in eax */
1846
        /* XXX: need better constraint for second operand */
1847
        gv2(RC_RAX, RC_RCX);
1848
        r = vtop[-1].r;
1849
        fr = vtop[0].r;
1850
        vtop--;
1851
        save_reg(TREG_RDX);
1852
        orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1853
        orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1854
        o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1855
        if (op == '%' || op == TOK_UMOD)
1856
            r = TREG_RDX;
1857
        else
1858
            r = TREG_RAX;
1859
        vtop->r = r;
1860
        break;
1861
    default:
1862
        opc = 7;
1863
        goto gen_op8;
1864
    }
1865
}
1866
 
1867
void gen_opl(int op)
1868
{
1869
    gen_opi(op);
1870
}
1871
 
1872
/* generate a floating point operation 'v = t1 op t2' instruction. The
1873
   two operands are guaranted to have the same floating point type */
1874
/* XXX: need to use ST1 too */
1875
void gen_opf(int op)
1876
{
1877
    int a, ft, fc, swapped, r;
1878
    int float_type =
1879
        (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1880
 
1881
    /* convert constants to memory references */
1882
    if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1883
        vswap();
1884
        gv(float_type);
1885
        vswap();
1886
    }
1887
    if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1888
        gv(float_type);
1889
 
1890
    /* must put at least one value in the floating point register */
1891
    if ((vtop[-1].r & VT_LVAL) &&
1892
        (vtop[0].r & VT_LVAL)) {
1893
        vswap();
1894
        gv(float_type);
1895
        vswap();
1896
    }
1897
    swapped = 0;
1898
    /* swap the stack if needed so that t1 is the register and t2 is
1899
       the memory reference */
1900
    if (vtop[-1].r & VT_LVAL) {
1901
        vswap();
1902
        swapped = 1;
1903
    }
1904
    if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1905
        if (op >= TOK_ULT && op <= TOK_GT) {
1906
            /* load on stack second operand */
1907
            load(TREG_ST0, vtop);
1908
            save_reg(TREG_RAX); /* eax is used by FP comparison code */
1909
            if (op == TOK_GE || op == TOK_GT)
1910
                swapped = !swapped;
1911
            else if (op == TOK_EQ || op == TOK_NE)
1912
                swapped = 0;
1913
            if (swapped)
1914
                o(0xc9d9); /* fxch %st(1) */
1915
            if (op == TOK_EQ || op == TOK_NE)
1916
                o(0xe9da); /* fucompp */
1917
            else
1918
                o(0xd9de); /* fcompp */
1919
            o(0xe0df); /* fnstsw %ax */
1920
            if (op == TOK_EQ) {
1921
                o(0x45e480); /* and $0x45, %ah */
1922
                o(0x40fC80); /* cmp $0x40, %ah */
1923
            } else if (op == TOK_NE) {
1924
                o(0x45e480); /* and $0x45, %ah */
1925
                o(0x40f480); /* xor $0x40, %ah */
1926
                op = TOK_NE;
1927
            } else if (op == TOK_GE || op == TOK_LE) {
1928
                o(0x05c4f6); /* test $0x05, %ah */
1929
                op = TOK_EQ;
1930
            } else {
1931
                o(0x45c4f6); /* test $0x45, %ah */
1932
                op = TOK_EQ;
1933
            }
1934
            vtop--;
1935
            vtop->r = VT_CMP;
1936
            vtop->c.i = op;
1937
        } else {
1938
            /* no memory reference possible for long double operations */
1939
            load(TREG_ST0, vtop);
1940
            swapped = !swapped;
1941
 
1942
            switch(op) {
1943
            default:
1944
            case '+':
1945
                a = 0;
1946
                break;
1947
            case '-':
1948
                a = 4;
1949
                if (swapped)
1950
                    a++;
1951
                break;
1952
            case '*':
1953
                a = 1;
1954
                break;
1955
            case '/':
1956
                a = 6;
1957
                if (swapped)
1958
                    a++;
1959
                break;
1960
            }
1961
            ft = vtop->type.t;
1962
            fc = vtop->c.i;
1963
            o(0xde); /* fxxxp %st, %st(1) */
1964
            o(0xc1 + (a << 3));
1965
            vtop--;
1966
        }
1967
    } else {
1968
        if (op >= TOK_ULT && op <= TOK_GT) {
1969
            /* if saved lvalue, then we must reload it */
1970
            r = vtop->r;
1971
            fc = vtop->c.i;
1972
            if ((r & VT_VALMASK) == VT_LLOCAL) {
1973
                SValue v1;
1974
                r = get_reg(RC_INT);
1975
                v1.type.t = VT_PTR;
1976
                v1.r = VT_LOCAL | VT_LVAL;
1977
                v1.c.i = fc;
1978
                load(r, &v1);
1979
                fc = 0;
1980
            }
1981
 
1982
            if (op == TOK_EQ || op == TOK_NE) {
1983
                swapped = 0;
1984
            } else {
1985
                if (op == TOK_LE || op == TOK_LT)
1986
                    swapped = !swapped;
1987
                if (op == TOK_LE || op == TOK_GE) {
1988
                    op = 0x93; /* setae */
1989
                } else {
1990
                    op = 0x97; /* seta */
1991
                }
1992
            }
1993
 
1994
            if (swapped) {
1995
                gv(RC_FLOAT);
1996
                vswap();
1997
            }
1998
            assert(!(vtop[-1].r & VT_LVAL));
1999
 
2000
            if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
2001
                o(0x66);
2002
            if (op == TOK_EQ || op == TOK_NE)
2003
                o(0x2e0f); /* ucomisd */
2004
            else
2005
                o(0x2f0f); /* comisd */
2006
 
2007
            if (vtop->r & VT_LVAL) {
2008
                gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2009
            } else {
2010
                o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2011
            }
2012
 
2013
            vtop--;
2014
            vtop->r = VT_CMP;
2015
            vtop->c.i = op | 0x100;
2016
        } else {
2017
            assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
2018
            switch(op) {
2019
            default:
2020
            case '+':
2021
                a = 0;
2022
                break;
2023
            case '-':
2024
                a = 4;
2025
                break;
2026
            case '*':
2027
                a = 1;
2028
                break;
2029
            case '/':
2030
                a = 6;
2031
                break;
2032
            }
2033
            ft = vtop->type.t;
2034
            fc = vtop->c.i;
2035
            assert((ft & VT_BTYPE) != VT_LDOUBLE);
2036
 
2037
            r = vtop->r;
2038
            /* if saved lvalue, then we must reload it */
2039
            if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2040
                SValue v1;
2041
                r = get_reg(RC_INT);
2042
                v1.type.t = VT_PTR;
2043
                v1.r = VT_LOCAL | VT_LVAL;
2044
                v1.c.i = fc;
2045
                load(r, &v1);
2046
                fc = 0;
2047
            }
2048
 
2049
            assert(!(vtop[-1].r & VT_LVAL));
2050
            if (swapped) {
2051
                assert(vtop->r & VT_LVAL);
2052
                gv(RC_FLOAT);
2053
                vswap();
2054
            }
2055
 
2056
            if ((ft & VT_BTYPE) == VT_DOUBLE) {
2057
                o(0xf2);
2058
            } else {
2059
                o(0xf3);
2060
            }
2061
            o(0x0f);
2062
            o(0x58 + a);
2063
 
2064
            if (vtop->r & VT_LVAL) {
2065
                gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2066
            } else {
2067
                o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2068
            }
2069
 
2070
            vtop--;
2071
        }
2072
    }
2073
}
2074
 
2075
/* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2076
   and 'long long' cases. */
2077
void gen_cvt_itof(int t)
2078
{
2079
    if ((t & VT_BTYPE) == VT_LDOUBLE) {
2080
        save_reg(TREG_ST0);
2081
        gv(RC_INT);
2082
        if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2083
            /* signed long long to float/double/long double (unsigned case
2084
               is handled generically) */
2085
            o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2086
            o(0x242cdf); /* fildll (%rsp) */
2087
            o(0x08c48348); /* add $8, %rsp */
2088
        } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2089
                   (VT_INT | VT_UNSIGNED)) {
2090
            /* unsigned int to float/double/long double */
2091
            o(0x6a); /* push $0 */
2092
            g(0x00);
2093
            o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2094
            o(0x242cdf); /* fildll (%rsp) */
2095
            o(0x10c48348); /* add $16, %rsp */
2096
        } else {
2097
            /* int to float/double/long double */
2098
            o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2099
            o(0x2404db); /* fildl (%rsp) */
2100
            o(0x08c48348); /* add $8, %rsp */
2101
        }
2102
        vtop->r = TREG_ST0;
2103
    } else {
2104
        int r = get_reg(RC_FLOAT);
2105
        gv(RC_INT);
2106
        o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2107
        if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2108
            (VT_INT | VT_UNSIGNED) ||
2109
            (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2110
            o(0x48); /* REX */
2111
        }
2112
        o(0x2a0f);
2113
        o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2114
        vtop->r = r;
2115
    }
2116
}
2117
 
2118
/* convert from one floating point type to another */
2119
void gen_cvt_ftof(int t)
2120
{
2121
    int ft, bt, tbt;
2122
 
2123
    ft = vtop->type.t;
2124
    bt = ft & VT_BTYPE;
2125
    tbt = t & VT_BTYPE;
2126
 
2127
    if (bt == VT_FLOAT) {
2128
        gv(RC_FLOAT);
2129
        if (tbt == VT_DOUBLE) {
2130
            o(0x140f); /* unpcklps */
2131
            o(0xc0 + REG_VALUE(vtop->r)*9);
2132
            o(0x5a0f); /* cvtps2pd */
2133
            o(0xc0 + REG_VALUE(vtop->r)*9);
2134
        } else if (tbt == VT_LDOUBLE) {
2135
            save_reg(RC_ST0);
2136
            /* movss %xmm0,-0x10(%rsp) */
2137
            o(0x110ff3);
2138
            o(0x44 + REG_VALUE(vtop->r)*8);
2139
            o(0xf024);
2140
            o(0xf02444d9); /* flds -0x10(%rsp) */
2141
            vtop->r = TREG_ST0;
2142
        }
2143
    } else if (bt == VT_DOUBLE) {
2144
        gv(RC_FLOAT);
2145
        if (tbt == VT_FLOAT) {
2146
            o(0x140f66); /* unpcklpd */
2147
            o(0xc0 + REG_VALUE(vtop->r)*9);
2148
            o(0x5a0f66); /* cvtpd2ps */
2149
            o(0xc0 + REG_VALUE(vtop->r)*9);
2150
        } else if (tbt == VT_LDOUBLE) {
2151
            save_reg(RC_ST0);
2152
            /* movsd %xmm0,-0x10(%rsp) */
2153
            o(0x110ff2);
2154
            o(0x44 + REG_VALUE(vtop->r)*8);
2155
            o(0xf024);
2156
            o(0xf02444dd); /* fldl -0x10(%rsp) */
2157
            vtop->r = TREG_ST0;
2158
        }
2159
    } else {
2160
        int r;
2161
        gv(RC_ST0);
2162
        r = get_reg(RC_FLOAT);
2163
        if (tbt == VT_DOUBLE) {
2164
            o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2165
            /* movsd -0x10(%rsp),%xmm0 */
2166
            o(0x100ff2);
2167
            o(0x44 + REG_VALUE(r)*8);
2168
            o(0xf024);
2169
            vtop->r = r;
2170
        } else if (tbt == VT_FLOAT) {
2171
            o(0xf0245cd9); /* fstps -0x10(%rsp) */
2172
            /* movss -0x10(%rsp),%xmm0 */
2173
            o(0x100ff3);
2174
            o(0x44 + REG_VALUE(r)*8);
2175
            o(0xf024);
2176
            vtop->r = r;
2177
        }
2178
    }
2179
}
2180
 
2181
/* convert fp to int 't' type */
2182
void gen_cvt_ftoi(int t)
2183
{
2184
    int ft, bt, size, r;
2185
    ft = vtop->type.t;
2186
    bt = ft & VT_BTYPE;
2187
    if (bt == VT_LDOUBLE) {
2188
        gen_cvt_ftof(VT_DOUBLE);
2189
        bt = VT_DOUBLE;
2190
    }
2191
 
2192
    gv(RC_FLOAT);
2193
    if (t != VT_INT)
2194
        size = 8;
2195
    else
2196
        size = 4;
2197
 
2198
    r = get_reg(RC_INT);
2199
    if (bt == VT_FLOAT) {
2200
        o(0xf3);
2201
    } else if (bt == VT_DOUBLE) {
2202
        o(0xf2);
2203
    } else {
2204
        assert(0);
2205
    }
2206
    orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2207
    o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2208
    vtop->r = r;
2209
}
2210
 
2211
/* computed goto support */
2212
void ggoto(void)
2213
{
2214
    gcall_or_jmp(1);
2215
    vtop--;
2216
}
2217
 
2218
/* Save the stack pointer onto the stack and return the location of its address */
2219
ST_FUNC void gen_vla_sp_save(int addr) {
2220
    /* mov %rsp,addr(%rbp)*/
2221
    gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2222
}
2223
 
2224
/* Restore the SP from a location on the stack */
2225
ST_FUNC void gen_vla_sp_restore(int addr) {
2226
    gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2227
}
2228
 
2229
/* Subtract from the stack pointer, and push the resulting value onto the stack */
2230
ST_FUNC void gen_vla_alloc(CType *type, int align) {
2231
#ifdef TCC_TARGET_PE
2232
    /* alloca does more than just adjust %rsp on Windows */
2233
    vpush_global_sym(&func_old_type, TOK_alloca);
2234
    vswap(); /* Move alloca ref past allocation size */
2235
    gfunc_call(1);
2236
    vset(type, REG_IRET, 0);
2237
#else
2238
    int r;
2239
    r = gv(RC_INT); /* allocation size */
2240
    /* sub r,%rsp */
2241
    o(0x2b48);
2242
    o(0xe0 | REG_VALUE(r));
2243
    /* We align to 16 bytes rather than align */
2244
    /* and ~15, %rsp */
2245
    o(0xf0e48348);
2246
    vpop();
2247
#endif
2248
}
2249
 
2250
 
2251
/* end of x86-64 code generator */
2252
/*************************************************************/
2253
#endif /* ! TARGET_DEFS_ONLY */
2254
/******************************************************/