Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6429 | siemargl | 1 | /* |
2 | * x86-64 code generator for TCC |
||
3 | * |
||
4 | * Copyright (c) 2008 Shinichiro Hamaji |
||
5 | * |
||
6 | * Based on i386-gen.c by Fabrice Bellard |
||
7 | * |
||
8 | * This library is free software; you can redistribute it and/or |
||
9 | * modify it under the terms of the GNU Lesser General Public |
||
10 | * License as published by the Free Software Foundation; either |
||
11 | * version 2 of the License, or (at your option) any later version. |
||
12 | * |
||
13 | * This library is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 | * Lesser General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU Lesser General Public |
||
19 | * License along with this library; if not, write to the Free Software |
||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||
21 | */ |
||
22 | |||
23 | #ifdef TARGET_DEFS_ONLY |
||
24 | |||
25 | /* number of available registers */ |
||
26 | #define NB_REGS 25 |
||
27 | #define NB_ASM_REGS 8 |
||
28 | |||
29 | /* a register can belong to several classes. The classes must be |
||
30 | sorted from more general to more precise (see gv2() code which does |
||
31 | assumptions on it). */ |
||
32 | #define RC_INT 0x0001 /* generic integer register */ |
||
33 | #define RC_FLOAT 0x0002 /* generic float register */ |
||
34 | #define RC_RAX 0x0004 |
||
35 | #define RC_RCX 0x0008 |
||
36 | #define RC_RDX 0x0010 |
||
37 | #define RC_ST0 0x0080 /* only for long double */ |
||
38 | #define RC_R8 0x0100 |
||
39 | #define RC_R9 0x0200 |
||
40 | #define RC_R10 0x0400 |
||
41 | #define RC_R11 0x0800 |
||
42 | #define RC_XMM0 0x1000 |
||
43 | #define RC_XMM1 0x2000 |
||
44 | #define RC_XMM2 0x4000 |
||
45 | #define RC_XMM3 0x8000 |
||
46 | #define RC_XMM4 0x10000 |
||
47 | #define RC_XMM5 0x20000 |
||
48 | #define RC_XMM6 0x40000 |
||
49 | #define RC_XMM7 0x80000 |
||
50 | #define RC_IRET RC_RAX /* function return: integer register */ |
||
51 | #define RC_LRET RC_RDX /* function return: second integer register */ |
||
52 | #define RC_FRET RC_XMM0 /* function return: float register */ |
||
53 | #define RC_QRET RC_XMM1 /* function return: second float register */ |
||
54 | |||
55 | /* pretty names for the registers */ |
||
56 | enum { |
||
57 | TREG_RAX = 0, |
||
58 | TREG_RCX = 1, |
||
59 | TREG_RDX = 2, |
||
60 | TREG_RSP = 4, |
||
61 | TREG_RSI = 6, |
||
62 | TREG_RDI = 7, |
||
63 | |||
64 | TREG_R8 = 8, |
||
65 | TREG_R9 = 9, |
||
66 | TREG_R10 = 10, |
||
67 | TREG_R11 = 11, |
||
68 | |||
69 | TREG_XMM0 = 16, |
||
70 | TREG_XMM1 = 17, |
||
71 | TREG_XMM2 = 18, |
||
72 | TREG_XMM3 = 19, |
||
73 | TREG_XMM4 = 20, |
||
74 | TREG_XMM5 = 21, |
||
75 | TREG_XMM6 = 22, |
||
76 | TREG_XMM7 = 23, |
||
77 | |||
78 | TREG_ST0 = 24, |
||
79 | |||
80 | TREG_MEM = 0x20 |
||
81 | }; |
||
82 | |||
83 | #define REX_BASE(reg) (((reg) >> 3) & 1) |
||
84 | #define REG_VALUE(reg) ((reg) & 7) |
||
85 | |||
86 | /* return registers for function */ |
||
87 | #define REG_IRET TREG_RAX /* single word int return register */ |
||
88 | #define REG_LRET TREG_RDX /* second word return register (for long long) */ |
||
89 | #define REG_FRET TREG_XMM0 /* float return register */ |
||
90 | #define REG_QRET TREG_XMM1 /* second float return register */ |
||
91 | |||
92 | /* defined if function parameters must be evaluated in reverse order */ |
||
93 | #define INVERT_FUNC_PARAMS |
||
94 | |||
95 | /* pointer size, in bytes */ |
||
96 | #define PTR_SIZE 8 |
||
97 | |||
98 | /* long double size and alignment, in bytes */ |
||
99 | #define LDOUBLE_SIZE 16 |
||
100 | #define LDOUBLE_ALIGN 16 |
||
101 | /* maximum alignment (for aligned attribute support) */ |
||
102 | #define MAX_ALIGN 16 |
||
103 | |||
104 | /******************************************************/ |
||
105 | /* ELF defines */ |
||
106 | |||
107 | #define EM_TCC_TARGET EM_X86_64 |
||
108 | |||
109 | /* relocation type for 32 bit data relocation */ |
||
110 | #define R_DATA_32 R_X86_64_32 |
||
111 | #define R_DATA_PTR R_X86_64_64 |
||
112 | #define R_JMP_SLOT R_X86_64_JUMP_SLOT |
||
113 | #define R_COPY R_X86_64_COPY |
||
114 | |||
115 | #define ELF_START_ADDR 0x400000 |
||
116 | #define ELF_PAGE_SIZE 0x200000 |
||
117 | |||
118 | /******************************************************/ |
||
119 | #else /* ! TARGET_DEFS_ONLY */ |
||
120 | /******************************************************/ |
||
121 | #include "tcc.h" |
||
122 | #include |
||
123 | |||
124 | ST_DATA const int reg_classes[NB_REGS] = { |
||
125 | /* eax */ RC_INT | RC_RAX, |
||
126 | /* ecx */ RC_INT | RC_RCX, |
||
127 | /* edx */ RC_INT | RC_RDX, |
||
128 | 0, |
||
129 | 0, |
||
130 | 0, |
||
131 | 0, |
||
132 | 0, |
||
133 | RC_R8, |
||
134 | RC_R9, |
||
135 | RC_R10, |
||
136 | RC_R11, |
||
137 | 0, |
||
138 | 0, |
||
139 | 0, |
||
140 | 0, |
||
141 | /* xmm0 */ RC_FLOAT | RC_XMM0, |
||
142 | /* xmm1 */ RC_FLOAT | RC_XMM1, |
||
143 | /* xmm2 */ RC_FLOAT | RC_XMM2, |
||
144 | /* xmm3 */ RC_FLOAT | RC_XMM3, |
||
145 | /* xmm4 */ RC_FLOAT | RC_XMM4, |
||
146 | /* xmm5 */ RC_FLOAT | RC_XMM5, |
||
147 | /* xmm6 an xmm7 are included so gv() can be used on them, |
||
148 | but they are not tagged with RC_FLOAT because they are |
||
149 | callee saved on Windows */ |
||
150 | RC_XMM6, |
||
151 | RC_XMM7, |
||
152 | /* st0 */ RC_ST0 |
||
153 | }; |
||
154 | |||
155 | static unsigned long func_sub_sp_offset; |
||
156 | static int func_ret_sub; |
||
157 | |||
158 | /* XXX: make it faster ? */ |
||
159 | void g(int c) |
||
160 | { |
||
161 | int ind1; |
||
162 | ind1 = ind + 1; |
||
163 | if (ind1 > cur_text_section->data_allocated) |
||
164 | section_realloc(cur_text_section, ind1); |
||
165 | cur_text_section->data[ind] = c; |
||
166 | ind = ind1; |
||
167 | } |
||
168 | |||
169 | void o(unsigned int c) |
||
170 | { |
||
171 | while (c) { |
||
172 | g(c); |
||
173 | c = c >> 8; |
||
174 | } |
||
175 | } |
||
176 | |||
177 | void gen_le16(int v) |
||
178 | { |
||
179 | g(v); |
||
180 | g(v >> 8); |
||
181 | } |
||
182 | |||
183 | void gen_le32(int c) |
||
184 | { |
||
185 | g(c); |
||
186 | g(c >> 8); |
||
187 | g(c >> 16); |
||
188 | g(c >> 24); |
||
189 | } |
||
190 | |||
191 | void gen_le64(int64_t c) |
||
192 | { |
||
193 | g(c); |
||
194 | g(c >> 8); |
||
195 | g(c >> 16); |
||
196 | g(c >> 24); |
||
197 | g(c >> 32); |
||
198 | g(c >> 40); |
||
199 | g(c >> 48); |
||
200 | g(c >> 56); |
||
201 | } |
||
202 | |||
203 | void orex(int ll, int r, int r2, int b) |
||
204 | { |
||
205 | if ((r & VT_VALMASK) >= VT_CONST) |
||
206 | r = 0; |
||
207 | if ((r2 & VT_VALMASK) >= VT_CONST) |
||
208 | r2 = 0; |
||
209 | if (ll || REX_BASE(r) || REX_BASE(r2)) |
||
210 | o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3)); |
||
211 | o(b); |
||
212 | } |
||
213 | |||
214 | /* output a symbol and patch all calls to it */ |
||
215 | void gsym_addr(int t, int a) |
||
216 | { |
||
217 | while (t) { |
||
218 | unsigned char *ptr = cur_text_section->data + t; |
||
219 | uint32_t n = read32le(ptr); /* next value */ |
||
220 | write32le(ptr, a - t - 4); |
||
221 | t = n; |
||
222 | } |
||
223 | } |
||
224 | |||
225 | void gsym(int t) |
||
226 | { |
||
227 | gsym_addr(t, ind); |
||
228 | } |
||
229 | |||
230 | /* psym is used to put an instruction with a data field which is a |
||
231 | reference to a symbol. It is in fact the same as oad ! */ |
||
232 | #define psym oad |
||
233 | |||
234 | static int is64_type(int t) |
||
235 | { |
||
236 | return ((t & VT_BTYPE) == VT_PTR || |
||
237 | (t & VT_BTYPE) == VT_FUNC || |
||
238 | (t & VT_BTYPE) == VT_LLONG); |
||
239 | } |
||
240 | |||
241 | /* instruction + 4 bytes data. Return the address of the data */ |
||
242 | ST_FUNC int oad(int c, int s) |
||
243 | { |
||
244 | int ind1; |
||
245 | |||
246 | o(c); |
||
247 | ind1 = ind + 4; |
||
248 | if (ind1 > cur_text_section->data_allocated) |
||
249 | section_realloc(cur_text_section, ind1); |
||
250 | write32le(cur_text_section->data + ind, s); |
||
251 | s = ind; |
||
252 | ind = ind1; |
||
253 | return s; |
||
254 | } |
||
255 | |||
256 | ST_FUNC void gen_addr32(int r, Sym *sym, int c) |
||
257 | { |
||
258 | if (r & VT_SYM) |
||
259 | greloc(cur_text_section, sym, ind, R_X86_64_32); |
||
260 | gen_le32(c); |
||
261 | } |
||
262 | |||
263 | /* output constant with relocation if 'r & VT_SYM' is true */ |
||
264 | ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c) |
||
265 | { |
||
266 | if (r & VT_SYM) |
||
267 | greloc(cur_text_section, sym, ind, R_X86_64_64); |
||
268 | gen_le64(c); |
||
269 | } |
||
270 | |||
271 | /* output constant with relocation if 'r & VT_SYM' is true */ |
||
272 | ST_FUNC void gen_addrpc32(int r, Sym *sym, int c) |
||
273 | { |
||
274 | if (r & VT_SYM) |
||
275 | greloc(cur_text_section, sym, ind, R_X86_64_PC32); |
||
276 | gen_le32(c-4); |
||
277 | } |
||
278 | |||
279 | /* output got address with relocation */ |
||
280 | static void gen_gotpcrel(int r, Sym *sym, int c) |
||
281 | { |
||
282 | #ifndef TCC_TARGET_PE |
||
283 | Section *sr; |
||
284 | ElfW(Rela) *rel; |
||
285 | greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL); |
||
286 | sr = cur_text_section->reloc; |
||
287 | rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela))); |
||
288 | rel->r_addend = -4; |
||
289 | #else |
||
290 | tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n", |
||
291 | get_tok_str(sym->v, NULL), c, r, |
||
292 | cur_text_section->data[ind-3], |
||
293 | cur_text_section->data[ind-2], |
||
294 | cur_text_section->data[ind-1] |
||
295 | ); |
||
296 | greloc(cur_text_section, sym, ind, R_X86_64_PC32); |
||
297 | #endif |
||
298 | gen_le32(0); |
||
299 | if (c) { |
||
300 | /* we use add c, %xxx for displacement */ |
||
301 | orex(1, r, 0, 0x81); |
||
302 | o(0xc0 + REG_VALUE(r)); |
||
303 | gen_le32(c); |
||
304 | } |
||
305 | } |
||
306 | |||
307 | static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got) |
||
308 | { |
||
309 | op_reg = REG_VALUE(op_reg) << 3; |
||
310 | if ((r & VT_VALMASK) == VT_CONST) { |
||
311 | /* constant memory reference */ |
||
312 | o(0x05 | op_reg); |
||
313 | if (is_got) { |
||
314 | gen_gotpcrel(r, sym, c); |
||
315 | } else { |
||
316 | gen_addrpc32(r, sym, c); |
||
317 | } |
||
318 | } else if ((r & VT_VALMASK) == VT_LOCAL) { |
||
319 | /* currently, we use only ebp as base */ |
||
320 | if (c == (char)c) { |
||
321 | /* short reference */ |
||
322 | o(0x45 | op_reg); |
||
323 | g(c); |
||
324 | } else { |
||
325 | oad(0x85 | op_reg, c); |
||
326 | } |
||
327 | } else if ((r & VT_VALMASK) >= TREG_MEM) { |
||
328 | if (c) { |
||
329 | g(0x80 | op_reg | REG_VALUE(r)); |
||
330 | gen_le32(c); |
||
331 | } else { |
||
332 | g(0x00 | op_reg | REG_VALUE(r)); |
||
333 | } |
||
334 | } else { |
||
335 | g(0x00 | op_reg | REG_VALUE(r)); |
||
336 | } |
||
337 | } |
||
338 | |||
339 | /* generate a modrm reference. 'op_reg' contains the addtionnal 3 |
||
340 | opcode bits */ |
||
341 | static void gen_modrm(int op_reg, int r, Sym *sym, int c) |
||
342 | { |
||
343 | gen_modrm_impl(op_reg, r, sym, c, 0); |
||
344 | } |
||
345 | |||
346 | /* generate a modrm reference. 'op_reg' contains the addtionnal 3 |
||
347 | opcode bits */ |
||
348 | static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) |
||
349 | { |
||
350 | int is_got; |
||
351 | is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC); |
||
352 | orex(1, r, op_reg, opcode); |
||
353 | gen_modrm_impl(op_reg, r, sym, c, is_got); |
||
354 | } |
||
355 | |||
356 | |||
357 | /* load 'r' from value 'sv' */ |
||
358 | void load(int r, SValue *sv) |
||
359 | { |
||
360 | int v, t, ft, fc, fr; |
||
361 | SValue v1; |
||
362 | |||
363 | #ifdef TCC_TARGET_PE |
||
364 | SValue v2; |
||
365 | sv = pe_getimport(sv, &v2); |
||
366 | #endif |
||
367 | |||
368 | fr = sv->r; |
||
369 | ft = sv->type.t & ~VT_DEFSIGN; |
||
370 | fc = sv->c.i; |
||
371 | |||
372 | ft &= ~(VT_VOLATILE | VT_CONSTANT); |
||
373 | |||
374 | #ifndef TCC_TARGET_PE |
||
375 | /* we use indirect access via got */ |
||
376 | if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) && |
||
377 | (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) { |
||
378 | /* use the result register as a temporal register */ |
||
379 | int tr = r | TREG_MEM; |
||
380 | if (is_float(ft)) { |
||
381 | /* we cannot use float registers as a temporal register */ |
||
382 | tr = get_reg(RC_INT) | TREG_MEM; |
||
383 | } |
||
384 | gen_modrm64(0x8b, tr, fr, sv->sym, 0); |
||
385 | |||
386 | /* load from the temporal register */ |
||
387 | fr = tr | VT_LVAL; |
||
388 | } |
||
389 | #endif |
||
390 | |||
391 | v = fr & VT_VALMASK; |
||
392 | if (fr & VT_LVAL) { |
||
393 | int b, ll; |
||
394 | if (v == VT_LLOCAL) { |
||
395 | v1.type.t = VT_PTR; |
||
396 | v1.r = VT_LOCAL | VT_LVAL; |
||
397 | v1.c.i = fc; |
||
398 | fr = r; |
||
399 | if (!(reg_classes[fr] & (RC_INT|RC_R11))) |
||
400 | fr = get_reg(RC_INT); |
||
401 | load(fr, &v1); |
||
402 | } |
||
403 | ll = 0; |
||
404 | if ((ft & VT_BTYPE) == VT_FLOAT) { |
||
405 | b = 0x6e0f66; |
||
406 | r = REG_VALUE(r); /* movd */ |
||
407 | } else if ((ft & VT_BTYPE) == VT_DOUBLE) { |
||
408 | b = 0x7e0ff3; /* movq */ |
||
409 | r = REG_VALUE(r); |
||
410 | } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { |
||
411 | b = 0xdb, r = 5; /* fldt */ |
||
412 | } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) { |
||
413 | b = 0xbe0f; /* movsbl */ |
||
414 | } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) { |
||
415 | b = 0xb60f; /* movzbl */ |
||
416 | } else if ((ft & VT_TYPE) == VT_SHORT) { |
||
417 | b = 0xbf0f; /* movswl */ |
||
418 | } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { |
||
419 | b = 0xb70f; /* movzwl */ |
||
420 | } else { |
||
421 | assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG) |
||
422 | || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM) |
||
423 | || ((ft & VT_BTYPE) == VT_FUNC)); |
||
424 | ll = is64_type(ft); |
||
425 | b = 0x8b; |
||
426 | } |
||
427 | if (ll) { |
||
428 | gen_modrm64(b, r, fr, sv->sym, fc); |
||
429 | } else { |
||
430 | orex(ll, fr, r, b); |
||
431 | gen_modrm(r, fr, sv->sym, fc); |
||
432 | } |
||
433 | } else { |
||
434 | if (v == VT_CONST) { |
||
435 | if (fr & VT_SYM) { |
||
436 | #ifdef TCC_TARGET_PE |
||
437 | orex(1,0,r,0x8d); |
||
438 | o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ |
||
439 | gen_addrpc32(fr, sv->sym, fc); |
||
440 | #else |
||
441 | if (sv->sym->type.t & VT_STATIC) { |
||
442 | orex(1,0,r,0x8d); |
||
443 | o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */ |
||
444 | gen_addrpc32(fr, sv->sym, fc); |
||
445 | } else { |
||
446 | orex(1,0,r,0x8b); |
||
447 | o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */ |
||
448 | gen_gotpcrel(r, sv->sym, fc); |
||
449 | } |
||
450 | #endif |
||
451 | } else if (is64_type(ft)) { |
||
452 | orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ |
||
453 | gen_le64(sv->c.i); |
||
454 | } else { |
||
455 | orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ |
||
456 | gen_le32(fc); |
||
457 | } |
||
458 | } else if (v == VT_LOCAL) { |
||
459 | orex(1,0,r,0x8d); /* lea xxx(%ebp), r */ |
||
460 | gen_modrm(r, VT_LOCAL, sv->sym, fc); |
||
461 | } else if (v == VT_CMP) { |
||
462 | orex(0,r,0,0); |
||
463 | if ((fc & ~0x100) != TOK_NE) |
||
464 | oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */ |
||
465 | else |
||
466 | oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */ |
||
467 | if (fc & 0x100) |
||
468 | { |
||
469 | /* This was a float compare. If the parity bit is |
||
470 | set the result was unordered, meaning false for everything |
||
471 | except TOK_NE, and true for TOK_NE. */ |
||
472 | fc &= ~0x100; |
||
473 | o(0x037a + (REX_BASE(r) << 8)); |
||
474 | } |
||
475 | orex(0,r,0, 0x0f); /* setxx %br */ |
||
476 | o(fc); |
||
477 | o(0xc0 + REG_VALUE(r)); |
||
478 | } else if (v == VT_JMP || v == VT_JMPI) { |
||
479 | t = v & 1; |
||
480 | orex(0,r,0,0); |
||
481 | oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */ |
||
482 | o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */ |
||
483 | gsym(fc); |
||
484 | orex(0,r,0,0); |
||
485 | oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ |
||
486 | } else if (v != r) { |
||
487 | if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) { |
||
488 | if (v == TREG_ST0) { |
||
489 | /* gen_cvt_ftof(VT_DOUBLE); */ |
||
490 | o(0xf0245cdd); /* fstpl -0x10(%rsp) */ |
||
491 | /* movsd -0x10(%rsp),%xmmN */ |
||
492 | o(0x100ff2); |
||
493 | o(0x44 + REG_VALUE(r)*8); /* %xmmN */ |
||
494 | o(0xf024); |
||
495 | } else { |
||
496 | assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); |
||
497 | if ((ft & VT_BTYPE) == VT_FLOAT) { |
||
498 | o(0x100ff3); |
||
499 | } else { |
||
500 | assert((ft & VT_BTYPE) == VT_DOUBLE); |
||
501 | o(0x100ff2); |
||
502 | } |
||
503 | o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); |
||
504 | } |
||
505 | } else if (r == TREG_ST0) { |
||
506 | assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); |
||
507 | /* gen_cvt_ftof(VT_LDOUBLE); */ |
||
508 | /* movsd %xmmN,-0x10(%rsp) */ |
||
509 | o(0x110ff2); |
||
510 | o(0x44 + REG_VALUE(r)*8); /* %xmmN */ |
||
511 | o(0xf024); |
||
512 | o(0xf02444dd); /* fldl -0x10(%rsp) */ |
||
513 | } else { |
||
514 | orex(1,r,v, 0x89); |
||
515 | o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */ |
||
516 | } |
||
517 | } |
||
518 | } |
||
519 | } |
||
520 | |||
521 | /* store register 'r' in lvalue 'v' */ |
||
522 | void store(int r, SValue *v) |
||
523 | { |
||
524 | int fr, bt, ft, fc; |
||
525 | int op64 = 0; |
||
526 | /* store the REX prefix in this variable when PIC is enabled */ |
||
527 | int pic = 0; |
||
528 | |||
529 | #ifdef TCC_TARGET_PE |
||
530 | SValue v2; |
||
531 | v = pe_getimport(v, &v2); |
||
532 | #endif |
||
533 | |||
534 | ft = v->type.t; |
||
535 | fc = v->c.i; |
||
536 | fr = v->r & VT_VALMASK; |
||
537 | ft &= ~(VT_VOLATILE | VT_CONSTANT); |
||
538 | bt = ft & VT_BTYPE; |
||
539 | |||
540 | #ifndef TCC_TARGET_PE |
||
541 | /* we need to access the variable via got */ |
||
542 | if (fr == VT_CONST && (v->r & VT_SYM)) { |
||
543 | /* mov xx(%rip), %r11 */ |
||
544 | o(0x1d8b4c); |
||
545 | gen_gotpcrel(TREG_R11, v->sym, v->c.i); |
||
546 | pic = is64_type(bt) ? 0x49 : 0x41; |
||
547 | } |
||
548 | #endif |
||
549 | |||
550 | /* XXX: incorrect if float reg to reg */ |
||
551 | if (bt == VT_FLOAT) { |
||
552 | o(0x66); |
||
553 | o(pic); |
||
554 | o(0x7e0f); /* movd */ |
||
555 | r = REG_VALUE(r); |
||
556 | } else if (bt == VT_DOUBLE) { |
||
557 | o(0x66); |
||
558 | o(pic); |
||
559 | o(0xd60f); /* movq */ |
||
560 | r = REG_VALUE(r); |
||
561 | } else if (bt == VT_LDOUBLE) { |
||
562 | o(0xc0d9); /* fld %st(0) */ |
||
563 | o(pic); |
||
564 | o(0xdb); /* fstpt */ |
||
565 | r = 7; |
||
566 | } else { |
||
567 | if (bt == VT_SHORT) |
||
568 | o(0x66); |
||
569 | o(pic); |
||
570 | if (bt == VT_BYTE || bt == VT_BOOL) |
||
571 | orex(0, 0, r, 0x88); |
||
572 | else if (is64_type(bt)) |
||
573 | op64 = 0x89; |
||
574 | else |
||
575 | orex(0, 0, r, 0x89); |
||
576 | } |
||
577 | if (pic) { |
||
578 | /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */ |
||
579 | if (op64) |
||
580 | o(op64); |
||
581 | o(3 + (r << 3)); |
||
582 | } else if (op64) { |
||
583 | if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { |
||
584 | gen_modrm64(op64, r, v->r, v->sym, fc); |
||
585 | } else if (fr != r) { |
||
586 | /* XXX: don't we really come here? */ |
||
587 | abort(); |
||
588 | o(0xc0 + fr + r * 8); /* mov r, fr */ |
||
589 | } |
||
590 | } else { |
||
591 | if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { |
||
592 | gen_modrm(r, v->r, v->sym, fc); |
||
593 | } else if (fr != r) { |
||
594 | /* XXX: don't we really come here? */ |
||
595 | abort(); |
||
596 | o(0xc0 + fr + r * 8); /* mov r, fr */ |
||
597 | } |
||
598 | } |
||
599 | } |
||
600 | |||
601 | /* 'is_jmp' is '1' if it is a jump */ |
||
602 | static void gcall_or_jmp(int is_jmp) |
||
603 | { |
||
604 | int r; |
||
605 | if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && |
||
606 | ((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) { |
||
607 | /* constant case */ |
||
608 | if (vtop->r & VT_SYM) { |
||
609 | /* relocation case */ |
||
610 | #ifdef TCC_TARGET_PE |
||
611 | greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32); |
||
612 | #else |
||
613 | greloc(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32); |
||
614 | #endif |
||
615 | } else { |
||
616 | /* put an empty PC32 relocation */ |
||
617 | put_elf_reloc(symtab_section, cur_text_section, |
||
618 | ind + 1, R_X86_64_PC32, 0); |
||
619 | } |
||
620 | oad(0xe8 + is_jmp, vtop->c.i - 4); /* call/jmp im */ |
||
621 | } else { |
||
622 | /* otherwise, indirect call */ |
||
623 | r = TREG_R11; |
||
624 | load(r, vtop); |
||
625 | o(0x41); /* REX */ |
||
626 | o(0xff); /* call/jmp *r */ |
||
627 | o(0xd0 + REG_VALUE(r) + (is_jmp << 4)); |
||
628 | } |
||
629 | } |
||
630 | |||
631 | #if defined(CONFIG_TCC_BCHECK) |
||
632 | #ifndef TCC_TARGET_PE |
||
633 | static addr_t func_bound_offset; |
||
634 | static unsigned long func_bound_ind; |
||
635 | #endif |
||
636 | |||
637 | static void gen_static_call(int v) |
||
638 | { |
||
639 | Sym *sym = external_global_sym(v, &func_old_type, 0); |
||
640 | oad(0xe8, -4); |
||
641 | greloc(cur_text_section, sym, ind-4, R_X86_64_PC32); |
||
642 | } |
||
643 | |||
644 | /* generate a bounded pointer addition */ |
||
645 | ST_FUNC void gen_bounded_ptr_add(void) |
||
646 | { |
||
647 | /* save all temporary registers */ |
||
648 | save_regs(0); |
||
649 | |||
650 | /* prepare fast x86_64 function call */ |
||
651 | gv(RC_RAX); |
||
652 | o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size |
||
653 | vtop--; |
||
654 | |||
655 | gv(RC_RAX); |
||
656 | o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr |
||
657 | vtop--; |
||
658 | |||
659 | /* do a fast function call */ |
||
660 | gen_static_call(TOK___bound_ptr_add); |
||
661 | |||
662 | /* returned pointer is in rax */ |
||
663 | vtop++; |
||
664 | vtop->r = TREG_RAX | VT_BOUNDED; |
||
665 | |||
666 | |||
667 | /* relocation offset of the bounding function call point */ |
||
668 | vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela))); |
||
669 | } |
||
670 | |||
671 | /* patch pointer addition in vtop so that pointer dereferencing is |
||
672 | also tested */ |
||
673 | ST_FUNC void gen_bounded_ptr_deref(void) |
||
674 | { |
||
675 | addr_t func; |
||
676 | int size, align; |
||
677 | ElfW(Rela) *rel; |
||
678 | Sym *sym; |
||
679 | |||
680 | size = 0; |
||
681 | /* XXX: put that code in generic part of tcc */ |
||
682 | if (!is_float(vtop->type.t)) { |
||
683 | if (vtop->r & VT_LVAL_BYTE) |
||
684 | size = 1; |
||
685 | else if (vtop->r & VT_LVAL_SHORT) |
||
686 | size = 2; |
||
687 | } |
||
688 | if (!size) |
||
689 | size = type_size(&vtop->type, &align); |
||
690 | switch(size) { |
||
691 | case 1: func = TOK___bound_ptr_indir1; break; |
||
692 | case 2: func = TOK___bound_ptr_indir2; break; |
||
693 | case 4: func = TOK___bound_ptr_indir4; break; |
||
694 | case 8: func = TOK___bound_ptr_indir8; break; |
||
695 | case 12: func = TOK___bound_ptr_indir12; break; |
||
696 | case 16: func = TOK___bound_ptr_indir16; break; |
||
697 | default: |
||
698 | tcc_error("unhandled size when dereferencing bounded pointer"); |
||
699 | func = 0; |
||
700 | break; |
||
701 | } |
||
702 | |||
703 | sym = external_global_sym(func, &func_old_type, 0); |
||
704 | if (!sym->c) |
||
705 | put_extern_sym(sym, NULL, 0, 0); |
||
706 | |||
707 | /* patch relocation */ |
||
708 | /* XXX: find a better solution ? */ |
||
709 | |||
710 | rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i); |
||
711 | rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info)); |
||
712 | } |
||
713 | #endif |
||
714 | |||
715 | #ifdef TCC_TARGET_PE |
||
716 | |||
717 | #define REGN 4 |
||
718 | static const uint8_t arg_regs[REGN] = { |
||
719 | TREG_RCX, TREG_RDX, TREG_R8, TREG_R9 |
||
720 | }; |
||
721 | |||
722 | /* Prepare arguments in R10 and R11 rather than RCX and RDX |
||
723 | because gv() will not ever use these */ |
||
724 | static int arg_prepare_reg(int idx) { |
||
725 | if (idx == 0 || idx == 1) |
||
726 | /* idx=0: r10, idx=1: r11 */ |
||
727 | return idx + 10; |
||
728 | else |
||
729 | return arg_regs[idx]; |
||
730 | } |
||
731 | |||
732 | static int func_scratch; |
||
733 | |||
734 | /* Generate function call. The function address is pushed first, then |
||
735 | all the parameters in call order. This functions pops all the |
||
736 | parameters and the function address. */ |
||
737 | |||
738 | void gen_offs_sp(int b, int r, int d) |
||
739 | { |
||
740 | orex(1,0,r & 0x100 ? 0 : r, b); |
||
741 | if (d == (char)d) { |
||
742 | o(0x2444 | (REG_VALUE(r) << 3)); |
||
743 | g(d); |
||
744 | } else { |
||
745 | o(0x2484 | (REG_VALUE(r) << 3)); |
||
746 | gen_le32(d); |
||
747 | } |
||
748 | } |
||
749 | |||
750 | /* Return the number of registers needed to return the struct, or 0 if |
||
751 | returning via struct pointer. */ |
||
752 | ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) |
||
753 | { |
||
754 | int size, align; |
||
755 | *regsize = 8; |
||
756 | *ret_align = 1; // Never have to re-align return values for x86-64 |
||
757 | size = type_size(vt, &align); |
||
758 | ret->ref = NULL; |
||
759 | if (size > 8) { |
||
760 | return 0; |
||
761 | } else if (size > 4) { |
||
762 | ret->t = VT_LLONG; |
||
763 | return 1; |
||
764 | } else if (size > 2) { |
||
765 | ret->t = VT_INT; |
||
766 | return 1; |
||
767 | } else if (size > 1) { |
||
768 | ret->t = VT_SHORT; |
||
769 | return 1; |
||
770 | } else { |
||
771 | ret->t = VT_BYTE; |
||
772 | return 1; |
||
773 | } |
||
774 | } |
||
775 | |||
776 | static int is_sse_float(int t) { |
||
777 | int bt; |
||
778 | bt = t & VT_BTYPE; |
||
779 | return bt == VT_DOUBLE || bt == VT_FLOAT; |
||
780 | } |
||
781 | |||
782 | int gfunc_arg_size(CType *type) { |
||
783 | int align; |
||
784 | if (type->t & (VT_ARRAY|VT_BITFIELD)) |
||
785 | return 8; |
||
786 | return type_size(type, &align); |
||
787 | } |
||
788 | |||
789 | void gfunc_call(int nb_args) |
||
790 | { |
||
791 | int size, r, args_size, i, d, bt, struct_size; |
||
792 | int arg; |
||
793 | |||
794 | args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE; |
||
795 | arg = nb_args; |
||
796 | |||
797 | /* for struct arguments, we need to call memcpy and the function |
||
798 | call breaks register passing arguments we are preparing. |
||
799 | So, we process arguments which will be passed by stack first. */ |
||
800 | struct_size = args_size; |
||
801 | for(i = 0; i < nb_args; i++) { |
||
802 | SValue *sv; |
||
803 | |||
804 | --arg; |
||
805 | sv = &vtop[-i]; |
||
806 | bt = (sv->type.t & VT_BTYPE); |
||
807 | size = gfunc_arg_size(&sv->type); |
||
808 | |||
809 | if (size <= 8) |
||
810 | continue; /* arguments smaller than 8 bytes passed in registers or on stack */ |
||
811 | |||
812 | if (bt == VT_STRUCT) { |
||
813 | /* align to stack align size */ |
||
814 | size = (size + 15) & ~15; |
||
815 | /* generate structure store */ |
||
816 | r = get_reg(RC_INT); |
||
817 | gen_offs_sp(0x8d, r, struct_size); |
||
818 | struct_size += size; |
||
819 | |||
820 | /* generate memcpy call */ |
||
821 | vset(&sv->type, r | VT_LVAL, 0); |
||
822 | vpushv(sv); |
||
823 | vstore(); |
||
824 | --vtop; |
||
825 | } else if (bt == VT_LDOUBLE) { |
||
826 | gv(RC_ST0); |
||
827 | gen_offs_sp(0xdb, 0x107, struct_size); |
||
828 | struct_size += 16; |
||
829 | } |
||
830 | } |
||
831 | |||
832 | if (func_scratch < struct_size) |
||
833 | func_scratch = struct_size; |
||
834 | |||
835 | arg = nb_args; |
||
836 | struct_size = args_size; |
||
837 | |||
838 | for(i = 0; i < nb_args; i++) { |
||
839 | --arg; |
||
840 | bt = (vtop->type.t & VT_BTYPE); |
||
841 | |||
842 | size = gfunc_arg_size(&vtop->type); |
||
843 | if (size > 8) { |
||
844 | /* align to stack align size */ |
||
845 | size = (size + 15) & ~15; |
||
846 | if (arg >= REGN) { |
||
847 | d = get_reg(RC_INT); |
||
848 | gen_offs_sp(0x8d, d, struct_size); |
||
849 | gen_offs_sp(0x89, d, arg*8); |
||
850 | } else { |
||
851 | d = arg_prepare_reg(arg); |
||
852 | gen_offs_sp(0x8d, d, struct_size); |
||
853 | } |
||
854 | struct_size += size; |
||
855 | } else { |
||
856 | if (is_sse_float(vtop->type.t)) { |
||
857 | gv(RC_XMM0); /* only use one float register */ |
||
858 | if (arg >= REGN) { |
||
859 | /* movq %xmm0, j*8(%rsp) */ |
||
860 | gen_offs_sp(0xd60f66, 0x100, arg*8); |
||
861 | } else { |
||
862 | /* movaps %xmm0, %xmmN */ |
||
863 | o(0x280f); |
||
864 | o(0xc0 + (arg << 3)); |
||
865 | d = arg_prepare_reg(arg); |
||
866 | /* mov %xmm0, %rxx */ |
||
867 | o(0x66); |
||
868 | orex(1,d,0, 0x7e0f); |
||
869 | o(0xc0 + REG_VALUE(d)); |
||
870 | } |
||
871 | } else { |
||
872 | if (bt == VT_STRUCT) { |
||
873 | vtop->type.ref = NULL; |
||
874 | vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT |
||
875 | : size > 1 ? VT_SHORT : VT_BYTE; |
||
876 | } |
||
877 | |||
878 | r = gv(RC_INT); |
||
879 | if (arg >= REGN) { |
||
880 | gen_offs_sp(0x89, r, arg*8); |
||
881 | } else { |
||
882 | d = arg_prepare_reg(arg); |
||
883 | orex(1,d,r,0x89); /* mov */ |
||
884 | o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); |
||
885 | } |
||
886 | } |
||
887 | } |
||
888 | vtop--; |
||
889 | } |
||
890 | save_regs(0); |
||
891 | |||
892 | /* Copy R10 and R11 into RCX and RDX, respectively */ |
||
893 | if (nb_args > 0) { |
||
894 | o(0xd1894c); /* mov %r10, %rcx */ |
||
895 | if (nb_args > 1) { |
||
896 | o(0xda894c); /* mov %r11, %rdx */ |
||
897 | } |
||
898 | } |
||
899 | |||
900 | gcall_or_jmp(0); |
||
901 | vtop--; |
||
902 | } |
||
903 | |||
904 | |||
905 | #define FUNC_PROLOG_SIZE 11 |
||
906 | |||
907 | /* generate function prolog of type 't' */ |
||
908 | void gfunc_prolog(CType *func_type) |
||
909 | { |
||
910 | int addr, reg_param_index, bt, size; |
||
911 | Sym *sym; |
||
912 | CType *type; |
||
913 | |||
914 | func_ret_sub = 0; |
||
915 | func_scratch = 0; |
||
916 | loc = 0; |
||
917 | |||
918 | addr = PTR_SIZE * 2; |
||
919 | ind += FUNC_PROLOG_SIZE; |
||
920 | func_sub_sp_offset = ind; |
||
921 | reg_param_index = 0; |
||
922 | |||
923 | sym = func_type->ref; |
||
924 | |||
925 | /* if the function returns a structure, then add an |
||
926 | implicit pointer parameter */ |
||
927 | func_vt = sym->type; |
||
928 | func_var = (sym->c == FUNC_ELLIPSIS); |
||
929 | size = gfunc_arg_size(&func_vt); |
||
930 | if (size > 8) { |
||
931 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
||
932 | func_vc = addr; |
||
933 | reg_param_index++; |
||
934 | addr += 8; |
||
935 | } |
||
936 | |||
937 | /* define parameters */ |
||
938 | while ((sym = sym->next) != NULL) { |
||
939 | type = &sym->type; |
||
940 | bt = type->t & VT_BTYPE; |
||
941 | size = gfunc_arg_size(type); |
||
942 | if (size > 8) { |
||
943 | if (reg_param_index < REGN) { |
||
944 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
||
945 | } |
||
946 | sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr); |
||
947 | } else { |
||
948 | if (reg_param_index < REGN) { |
||
949 | /* save arguments passed by register */ |
||
950 | if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) { |
||
951 | o(0xd60f66); /* movq */ |
||
952 | gen_modrm(reg_param_index, VT_LOCAL, NULL, addr); |
||
953 | } else { |
||
954 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
||
955 | } |
||
956 | } |
||
957 | sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr); |
||
958 | } |
||
959 | addr += 8; |
||
960 | reg_param_index++; |
||
961 | } |
||
962 | |||
963 | while (reg_param_index < REGN) { |
||
964 | if (func_type->ref->c == FUNC_ELLIPSIS) { |
||
965 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); |
||
966 | addr += 8; |
||
967 | } |
||
968 | reg_param_index++; |
||
969 | } |
||
970 | } |
||
971 | |||
972 | /* generate function epilog */ |
||
973 | void gfunc_epilog(void) |
||
974 | { |
||
975 | int v, saved_ind; |
||
976 | |||
977 | o(0xc9); /* leave */ |
||
978 | if (func_ret_sub == 0) { |
||
979 | o(0xc3); /* ret */ |
||
980 | } else { |
||
981 | o(0xc2); /* ret n */ |
||
982 | g(func_ret_sub); |
||
983 | g(func_ret_sub >> 8); |
||
984 | } |
||
985 | |||
986 | saved_ind = ind; |
||
987 | ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; |
||
988 | /* align local size to word & save local variables */ |
||
989 | v = (func_scratch + -loc + 15) & -16; |
||
990 | |||
991 | if (v >= 4096) { |
||
992 | Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); |
||
993 | oad(0xb8, v); /* mov stacksize, %eax */ |
||
994 | oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */ |
||
995 | greloc(cur_text_section, sym, ind-4, R_X86_64_PC32); |
||
996 | o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ |
||
997 | } else { |
||
998 | o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ |
||
999 | o(0xec8148); /* sub rsp, stacksize */ |
||
1000 | gen_le32(v); |
||
1001 | } |
||
1002 | |||
1003 | cur_text_section->data_offset = saved_ind; |
||
1004 | pe_add_unwind_data(ind, saved_ind, v); |
||
1005 | ind = cur_text_section->data_offset; |
||
1006 | } |
||
1007 | |||
1008 | #else |
||
1009 | |||
1010 | static void gadd_sp(int val) |
||
1011 | { |
||
1012 | if (val == (char)val) { |
||
1013 | o(0xc48348); |
||
1014 | g(val); |
||
1015 | } else { |
||
1016 | oad(0xc48148, val); /* add $xxx, %rsp */ |
||
1017 | } |
||
1018 | } |
||
1019 | |||
1020 | typedef enum X86_64_Mode { |
||
1021 | x86_64_mode_none, |
||
1022 | x86_64_mode_memory, |
||
1023 | x86_64_mode_integer, |
||
1024 | x86_64_mode_sse, |
||
1025 | x86_64_mode_x87 |
||
1026 | } X86_64_Mode; |
||
1027 | |||
1028 | static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) |
||
1029 | { |
||
1030 | if (a == b) |
||
1031 | return a; |
||
1032 | else if (a == x86_64_mode_none) |
||
1033 | return b; |
||
1034 | else if (b == x86_64_mode_none) |
||
1035 | return a; |
||
1036 | else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory)) |
||
1037 | return x86_64_mode_memory; |
||
1038 | else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer)) |
||
1039 | return x86_64_mode_integer; |
||
1040 | else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87)) |
||
1041 | return x86_64_mode_memory; |
||
1042 | else |
||
1043 | return x86_64_mode_sse; |
||
1044 | } |
||
1045 | |||
1046 | static X86_64_Mode classify_x86_64_inner(CType *ty) |
||
1047 | { |
||
1048 | X86_64_Mode mode; |
||
1049 | Sym *f; |
||
1050 | |||
1051 | switch (ty->t & VT_BTYPE) { |
||
1052 | case VT_VOID: return x86_64_mode_none; |
||
1053 | |||
1054 | case VT_INT: |
||
1055 | case VT_BYTE: |
||
1056 | case VT_SHORT: |
||
1057 | case VT_LLONG: |
||
1058 | case VT_BOOL: |
||
1059 | case VT_PTR: |
||
1060 | case VT_FUNC: |
||
1061 | case VT_ENUM: return x86_64_mode_integer; |
||
1062 | |||
1063 | case VT_FLOAT: |
||
1064 | case VT_DOUBLE: return x86_64_mode_sse; |
||
1065 | |||
1066 | case VT_LDOUBLE: return x86_64_mode_x87; |
||
1067 | |||
1068 | case VT_STRUCT: |
||
1069 | f = ty->ref; |
||
1070 | |||
1071 | mode = x86_64_mode_none; |
||
1072 | for (f = f->next; f; f = f->next) |
||
1073 | mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type)); |
||
1074 | |||
1075 | return mode; |
||
1076 | } |
||
1077 | |||
1078 | assert(0); |
||
1079 | } |
||
1080 | |||
1081 | static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) |
||
1082 | { |
||
1083 | X86_64_Mode mode; |
||
1084 | int size, align, ret_t = 0; |
||
1085 | |||
1086 | if (ty->t & (VT_BITFIELD|VT_ARRAY)) { |
||
1087 | *psize = 8; |
||
1088 | *palign = 8; |
||
1089 | *reg_count = 1; |
||
1090 | ret_t = ty->t; |
||
1091 | mode = x86_64_mode_integer; |
||
1092 | } else { |
||
1093 | size = type_size(ty, &align); |
||
1094 | *psize = (size + 7) & ~7; |
||
1095 | *palign = (align + 7) & ~7; |
||
1096 | |||
1097 | if (size > 16) { |
||
1098 | mode = x86_64_mode_memory; |
||
1099 | } else { |
||
1100 | mode = classify_x86_64_inner(ty); |
||
1101 | switch (mode) { |
||
1102 | case x86_64_mode_integer: |
||
1103 | if (size > 8) { |
||
1104 | *reg_count = 2; |
||
1105 | ret_t = VT_QLONG; |
||
1106 | } else { |
||
1107 | *reg_count = 1; |
||
1108 | ret_t = (size > 4) ? VT_LLONG : VT_INT; |
||
1109 | } |
||
1110 | break; |
||
1111 | |||
1112 | case x86_64_mode_x87: |
||
1113 | *reg_count = 1; |
||
1114 | ret_t = VT_LDOUBLE; |
||
1115 | break; |
||
1116 | |||
1117 | case x86_64_mode_sse: |
||
1118 | if (size > 8) { |
||
1119 | *reg_count = 2; |
||
1120 | ret_t = VT_QFLOAT; |
||
1121 | } else { |
||
1122 | *reg_count = 1; |
||
1123 | ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT; |
||
1124 | } |
||
1125 | break; |
||
1126 | default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ |
||
1127 | } |
||
1128 | } |
||
1129 | } |
||
1130 | |||
1131 | if (ret) { |
||
1132 | ret->ref = NULL; |
||
1133 | ret->t = ret_t; |
||
1134 | } |
||
1135 | |||
1136 | return mode; |
||
1137 | } |
||
1138 | |||
1139 | ST_FUNC int classify_x86_64_va_arg(CType *ty) |
||
1140 | { |
||
1141 | /* This definition must be synced with stdarg.h */ |
||
1142 | enum __va_arg_type { |
||
1143 | __va_gen_reg, __va_float_reg, __va_stack |
||
1144 | }; |
||
1145 | int size, align, reg_count; |
||
1146 | X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count); |
||
1147 | switch (mode) { |
||
1148 | default: return __va_stack; |
||
1149 | case x86_64_mode_integer: return __va_gen_reg; |
||
1150 | case x86_64_mode_sse: return __va_float_reg; |
||
1151 | } |
||
1152 | } |
||
1153 | |||
1154 | /* Return the number of registers needed to return the struct, or 0 if |
||
1155 | returning via struct pointer. */ |
||
1156 | ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) |
||
1157 | { |
||
1158 | int size, align, reg_count; |
||
1159 | *ret_align = 1; // Never have to re-align return values for x86-64 |
||
1160 | *regsize = 8; |
||
1161 | return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) != x86_64_mode_memory); |
||
1162 | } |
||
1163 | |||
1164 | #define REGN 6 |
||
1165 | static const uint8_t arg_regs[REGN] = { |
||
1166 | TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 |
||
1167 | }; |
||
1168 | |||
1169 | static int arg_prepare_reg(int idx) { |
||
1170 | if (idx == 2 || idx == 3) |
||
1171 | /* idx=2: r10, idx=3: r11 */ |
||
1172 | return idx + 8; |
||
1173 | else |
||
1174 | return arg_regs[idx]; |
||
1175 | } |
||
1176 | |||
1177 | /* Generate function call. The function address is pushed first, then |
||
1178 | all the parameters in call order. This functions pops all the |
||
1179 | parameters and the function address. */ |
||
1180 | void gfunc_call(int nb_args) |
||
1181 | { |
||
1182 | X86_64_Mode mode; |
||
1183 | CType type; |
||
1184 | int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count; |
||
1185 | int nb_reg_args = 0; |
||
1186 | int nb_sse_args = 0; |
||
1187 | int sse_reg, gen_reg; |
||
1188 | |||
1189 | /* calculate the number of integer/float register arguments */ |
||
1190 | for(i = 0; i < nb_args; i++) { |
||
1191 | mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); |
||
1192 | if (mode == x86_64_mode_sse) |
||
1193 | nb_sse_args += reg_count; |
||
1194 | else if (mode == x86_64_mode_integer) |
||
1195 | nb_reg_args += reg_count; |
||
1196 | } |
||
1197 | |||
1198 | /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments |
||
1199 | and ended by a 16-byte aligned argument. This is because, from the point of view of |
||
1200 | the callee, argument alignment is computed from the bottom up. */ |
||
1201 | /* for struct arguments, we need to call memcpy and the function |
||
1202 | call breaks register passing arguments we are preparing. |
||
1203 | So, we process arguments which will be passed by stack first. */ |
||
1204 | gen_reg = nb_reg_args; |
||
1205 | sse_reg = nb_sse_args; |
||
1206 | run_start = 0; |
||
1207 | args_size = 0; |
||
1208 | while (run_start != nb_args) { |
||
1209 | int run_gen_reg = gen_reg, run_sse_reg = sse_reg; |
||
1210 | |||
1211 | run_end = nb_args; |
||
1212 | stack_adjust = 0; |
||
1213 | for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) { |
||
1214 | mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); |
||
1215 | switch (mode) { |
||
1216 | case x86_64_mode_memory: |
||
1217 | case x86_64_mode_x87: |
||
1218 | stack_arg: |
||
1219 | if (align == 16) |
||
1220 | run_end = i; |
||
1221 | else |
||
1222 | stack_adjust += size; |
||
1223 | break; |
||
1224 | |||
1225 | case x86_64_mode_sse: |
||
1226 | sse_reg -= reg_count; |
||
1227 | if (sse_reg + reg_count > 8) goto stack_arg; |
||
1228 | break; |
||
1229 | |||
1230 | case x86_64_mode_integer: |
||
1231 | gen_reg -= reg_count; |
||
1232 | if (gen_reg + reg_count > REGN) goto stack_arg; |
||
1233 | break; |
||
1234 | default: break; /* nothing to be done for x86_64_mode_none */ |
||
1235 | } |
||
1236 | } |
||
1237 | |||
1238 | gen_reg = run_gen_reg; |
||
1239 | sse_reg = run_sse_reg; |
||
1240 | |||
1241 | /* adjust stack to align SSE boundary */ |
||
1242 | if (stack_adjust &= 15) { |
||
1243 | /* fetch cpu flag before the following sub will change the value */ |
||
1244 | if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP) |
||
1245 | gv(RC_INT); |
||
1246 | |||
1247 | stack_adjust = 16 - stack_adjust; |
||
1248 | o(0x48); |
||
1249 | oad(0xec81, stack_adjust); /* sub $xxx, %rsp */ |
||
1250 | args_size += stack_adjust; |
||
1251 | } |
||
1252 | |||
1253 | for(i = run_start; i < run_end;) { |
||
1254 | /* Swap argument to top, it will possibly be changed here, |
||
1255 | and might use more temps. At the end of the loop we keep |
||
1256 | in on the stack and swap it back to its original position |
||
1257 | if it is a register. */ |
||
1258 | SValue tmp = vtop[0]; |
||
1259 | int arg_stored = 1; |
||
1260 | |||
1261 | vtop[0] = vtop[-i]; |
||
1262 | vtop[-i] = tmp; |
||
1263 | mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count); |
||
1264 | |||
1265 | switch (vtop->type.t & VT_BTYPE) { |
||
1266 | case VT_STRUCT: |
||
1267 | if (mode == x86_64_mode_sse) { |
||
1268 | if (sse_reg > 8) |
||
1269 | sse_reg -= reg_count; |
||
1270 | else |
||
1271 | arg_stored = 0; |
||
1272 | } else if (mode == x86_64_mode_integer) { |
||
1273 | if (gen_reg > REGN) |
||
1274 | gen_reg -= reg_count; |
||
1275 | else |
||
1276 | arg_stored = 0; |
||
1277 | } |
||
1278 | |||
1279 | if (arg_stored) { |
||
1280 | /* allocate the necessary size on stack */ |
||
1281 | o(0x48); |
||
1282 | oad(0xec81, size); /* sub $xxx, %rsp */ |
||
1283 | /* generate structure store */ |
||
1284 | r = get_reg(RC_INT); |
||
1285 | orex(1, r, 0, 0x89); /* mov %rsp, r */ |
||
1286 | o(0xe0 + REG_VALUE(r)); |
||
1287 | vset(&vtop->type, r | VT_LVAL, 0); |
||
1288 | vswap(); |
||
1289 | vstore(); |
||
1290 | args_size += size; |
||
1291 | } |
||
1292 | break; |
||
1293 | |||
1294 | case VT_LDOUBLE: |
||
1295 | assert(0); |
||
1296 | break; |
||
1297 | |||
1298 | case VT_FLOAT: |
||
1299 | case VT_DOUBLE: |
||
1300 | assert(mode == x86_64_mode_sse); |
||
1301 | if (sse_reg > 8) { |
||
1302 | --sse_reg; |
||
1303 | r = gv(RC_FLOAT); |
||
1304 | o(0x50); /* push $rax */ |
||
1305 | /* movq %xmmN, (%rsp) */ |
||
1306 | o(0xd60f66); |
||
1307 | o(0x04 + REG_VALUE(r)*8); |
||
1308 | o(0x24); |
||
1309 | args_size += size; |
||
1310 | } else { |
||
1311 | arg_stored = 0; |
||
1312 | } |
||
1313 | break; |
||
1314 | |||
1315 | default: |
||
1316 | assert(mode == x86_64_mode_integer); |
||
1317 | /* simple type */ |
||
1318 | /* XXX: implicit cast ? */ |
||
1319 | if (gen_reg > REGN) { |
||
1320 | --gen_reg; |
||
1321 | r = gv(RC_INT); |
||
1322 | orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */ |
||
1323 | args_size += size; |
||
1324 | } else { |
||
1325 | arg_stored = 0; |
||
1326 | } |
||
1327 | break; |
||
1328 | } |
||
1329 | |||
1330 | /* And swap the argument back to it's original position. */ |
||
1331 | tmp = vtop[0]; |
||
1332 | vtop[0] = vtop[-i]; |
||
1333 | vtop[-i] = tmp; |
||
1334 | |||
1335 | if (arg_stored) { |
||
1336 | vrotb(i+1); |
||
1337 | assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r)); |
||
1338 | vpop(); |
||
1339 | --nb_args; |
||
1340 | --run_end; |
||
1341 | } else { |
||
1342 | ++i; |
||
1343 | } |
||
1344 | } |
||
1345 | |||
1346 | /* handle 16 byte aligned arguments at end of run */ |
||
1347 | run_start = i = run_end; |
||
1348 | while (i < nb_args) { |
||
1349 | /* Rotate argument to top since it will always be popped */ |
||
1350 | mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); |
||
1351 | if (align != 16) |
||
1352 | break; |
||
1353 | |||
1354 | vrotb(i+1); |
||
1355 | |||
1356 | if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { |
||
1357 | gv(RC_ST0); |
||
1358 | oad(0xec8148, size); /* sub $xxx, %rsp */ |
||
1359 | o(0x7cdb); /* fstpt 0(%rsp) */ |
||
1360 | g(0x24); |
||
1361 | g(0x00); |
||
1362 | args_size += size; |
||
1363 | } else { |
||
1364 | assert(mode == x86_64_mode_memory); |
||
1365 | |||
1366 | /* allocate the necessary size on stack */ |
||
1367 | o(0x48); |
||
1368 | oad(0xec81, size); /* sub $xxx, %rsp */ |
||
1369 | /* generate structure store */ |
||
1370 | r = get_reg(RC_INT); |
||
1371 | orex(1, r, 0, 0x89); /* mov %rsp, r */ |
||
1372 | o(0xe0 + REG_VALUE(r)); |
||
1373 | vset(&vtop->type, r | VT_LVAL, 0); |
||
1374 | vswap(); |
||
1375 | vstore(); |
||
1376 | args_size += size; |
||
1377 | } |
||
1378 | |||
1379 | vpop(); |
||
1380 | --nb_args; |
||
1381 | } |
||
1382 | } |
||
1383 | |||
1384 | /* XXX This should be superfluous. */ |
||
1385 | save_regs(0); /* save used temporary registers */ |
||
1386 | |||
1387 | /* then, we prepare register passing arguments. |
||
1388 | Note that we cannot set RDX and RCX in this loop because gv() |
||
1389 | may break these temporary registers. Let's use R10 and R11 |
||
1390 | instead of them */ |
||
1391 | assert(gen_reg <= REGN); |
||
1392 | assert(sse_reg <= 8); |
||
1393 | for(i = 0; i < nb_args; i++) { |
||
1394 | mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); |
||
1395 | /* Alter stack entry type so that gv() knows how to treat it */ |
||
1396 | vtop->type = type; |
||
1397 | if (mode == x86_64_mode_sse) { |
||
1398 | if (reg_count == 2) { |
||
1399 | sse_reg -= 2; |
||
1400 | gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */ |
||
1401 | if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ |
||
1402 | /* movaps %xmm0, %xmmN */ |
||
1403 | o(0x280f); |
||
1404 | o(0xc0 + (sse_reg << 3)); |
||
1405 | /* movaps %xmm1, %xmmN */ |
||
1406 | o(0x280f); |
||
1407 | o(0xc1 + ((sse_reg+1) << 3)); |
||
1408 | } |
||
1409 | } else { |
||
1410 | assert(reg_count == 1); |
||
1411 | --sse_reg; |
||
1412 | /* Load directly to register */ |
||
1413 | gv(RC_XMM0 << sse_reg); |
||
1414 | } |
||
1415 | } else if (mode == x86_64_mode_integer) { |
||
1416 | /* simple type */ |
||
1417 | /* XXX: implicit cast ? */ |
||
1418 | int d; |
||
1419 | gen_reg -= reg_count; |
||
1420 | r = gv(RC_INT); |
||
1421 | d = arg_prepare_reg(gen_reg); |
||
1422 | orex(1,d,r,0x89); /* mov */ |
||
1423 | o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); |
||
1424 | if (reg_count == 2) { |
||
1425 | d = arg_prepare_reg(gen_reg+1); |
||
1426 | orex(1,d,vtop->r2,0x89); /* mov */ |
||
1427 | o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d)); |
||
1428 | } |
||
1429 | } |
||
1430 | vtop--; |
||
1431 | } |
||
1432 | assert(gen_reg == 0); |
||
1433 | assert(sse_reg == 0); |
||
1434 | |||
1435 | /* We shouldn't have many operands on the stack anymore, but the |
||
1436 | call address itself is still there, and it might be in %eax |
||
1437 | (or edx/ecx) currently, which the below writes would clobber. |
||
1438 | So evict all remaining operands here. */ |
||
1439 | save_regs(0); |
||
1440 | |||
1441 | /* Copy R10 and R11 into RDX and RCX, respectively */ |
||
1442 | if (nb_reg_args > 2) { |
||
1443 | o(0xd2894c); /* mov %r10, %rdx */ |
||
1444 | if (nb_reg_args > 3) { |
||
1445 | o(0xd9894c); /* mov %r11, %rcx */ |
||
1446 | } |
||
1447 | } |
||
1448 | |||
1449 | oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ |
||
1450 | gcall_or_jmp(0); |
||
1451 | if (args_size) |
||
1452 | gadd_sp(args_size); |
||
1453 | vtop--; |
||
1454 | } |
||
1455 | |||
1456 | |||
1457 | #define FUNC_PROLOG_SIZE 11 |
||
1458 | |||
1459 | static void push_arg_reg(int i) { |
||
1460 | loc -= 8; |
||
1461 | gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc); |
||
1462 | } |
||
1463 | |||
1464 | /* generate function prolog of type 't' */ |
||
1465 | void gfunc_prolog(CType *func_type) |
||
1466 | { |
||
1467 | X86_64_Mode mode; |
||
1468 | int i, addr, align, size, reg_count; |
||
1469 | int param_addr = 0, reg_param_index, sse_param_index; |
||
1470 | Sym *sym; |
||
1471 | CType *type; |
||
1472 | |||
1473 | sym = func_type->ref; |
||
1474 | addr = PTR_SIZE * 2; |
||
1475 | loc = 0; |
||
1476 | ind += FUNC_PROLOG_SIZE; |
||
1477 | func_sub_sp_offset = ind; |
||
1478 | func_ret_sub = 0; |
||
1479 | |||
1480 | if (func_type->ref->c == FUNC_ELLIPSIS) { |
||
1481 | int seen_reg_num, seen_sse_num, seen_stack_size; |
||
1482 | seen_reg_num = seen_sse_num = 0; |
||
1483 | /* frame pointer and return address */ |
||
1484 | seen_stack_size = PTR_SIZE * 2; |
||
1485 | /* count the number of seen parameters */ |
||
1486 | sym = func_type->ref; |
||
1487 | while ((sym = sym->next) != NULL) { |
||
1488 | type = &sym->type; |
||
1489 | mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); |
||
1490 | switch (mode) { |
||
1491 | default: |
||
1492 | stack_arg: |
||
1493 | seen_stack_size = ((seen_stack_size + align - 1) & -align) + size; |
||
1494 | break; |
||
1495 | |||
1496 | case x86_64_mode_integer: |
||
1497 | if (seen_reg_num + reg_count <= 8) { |
||
1498 | seen_reg_num += reg_count; |
||
1499 | } else { |
||
1500 | seen_reg_num = 8; |
||
1501 | goto stack_arg; |
||
1502 | } |
||
1503 | break; |
||
1504 | |||
1505 | case x86_64_mode_sse: |
||
1506 | if (seen_sse_num + reg_count <= 8) { |
||
1507 | seen_sse_num += reg_count; |
||
1508 | } else { |
||
1509 | seen_sse_num = 8; |
||
1510 | goto stack_arg; |
||
1511 | } |
||
1512 | break; |
||
1513 | } |
||
1514 | } |
||
1515 | |||
1516 | loc -= 16; |
||
1517 | /* movl $0x????????, -0x10(%rbp) */ |
||
1518 | o(0xf045c7); |
||
1519 | gen_le32(seen_reg_num * 8); |
||
1520 | /* movl $0x????????, -0xc(%rbp) */ |
||
1521 | o(0xf445c7); |
||
1522 | gen_le32(seen_sse_num * 16 + 48); |
||
1523 | /* movl $0x????????, -0x8(%rbp) */ |
||
1524 | o(0xf845c7); |
||
1525 | gen_le32(seen_stack_size); |
||
1526 | |||
1527 | /* save all register passing arguments */ |
||
1528 | for (i = 0; i < 8; i++) { |
||
1529 | loc -= 16; |
||
1530 | o(0xd60f66); /* movq */ |
||
1531 | gen_modrm(7 - i, VT_LOCAL, NULL, loc); |
||
1532 | /* movq $0, loc+8(%rbp) */ |
||
1533 | o(0x85c748); |
||
1534 | gen_le32(loc + 8); |
||
1535 | gen_le32(0); |
||
1536 | } |
||
1537 | for (i = 0; i < REGN; i++) { |
||
1538 | push_arg_reg(REGN-1-i); |
||
1539 | } |
||
1540 | } |
||
1541 | |||
1542 | sym = func_type->ref; |
||
1543 | reg_param_index = 0; |
||
1544 | sse_param_index = 0; |
||
1545 | |||
1546 | /* if the function returns a structure, then add an |
||
1547 | implicit pointer parameter */ |
||
1548 | func_vt = sym->type; |
||
1549 | mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count); |
||
1550 | if (mode == x86_64_mode_memory) { |
||
1551 | push_arg_reg(reg_param_index); |
||
1552 | func_vc = loc; |
||
1553 | reg_param_index++; |
||
1554 | } |
||
1555 | /* define parameters */ |
||
1556 | while ((sym = sym->next) != NULL) { |
||
1557 | type = &sym->type; |
||
1558 | mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); |
||
1559 | switch (mode) { |
||
1560 | case x86_64_mode_sse: |
||
1561 | if (sse_param_index + reg_count <= 8) { |
||
1562 | /* save arguments passed by register */ |
||
1563 | loc -= reg_count * 8; |
||
1564 | param_addr = loc; |
||
1565 | for (i = 0; i < reg_count; ++i) { |
||
1566 | o(0xd60f66); /* movq */ |
||
1567 | gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8); |
||
1568 | ++sse_param_index; |
||
1569 | } |
||
1570 | } else { |
||
1571 | addr = (addr + align - 1) & -align; |
||
1572 | param_addr = addr; |
||
1573 | addr += size; |
||
1574 | } |
||
1575 | break; |
||
1576 | |||
1577 | case x86_64_mode_memory: |
||
1578 | case x86_64_mode_x87: |
||
1579 | addr = (addr + align - 1) & -align; |
||
1580 | param_addr = addr; |
||
1581 | addr += size; |
||
1582 | break; |
||
1583 | |||
1584 | case x86_64_mode_integer: { |
||
1585 | if (reg_param_index + reg_count <= REGN) { |
||
1586 | /* save arguments passed by register */ |
||
1587 | loc -= reg_count * 8; |
||
1588 | param_addr = loc; |
||
1589 | for (i = 0; i < reg_count; ++i) { |
||
1590 | gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8); |
||
1591 | ++reg_param_index; |
||
1592 | } |
||
1593 | } else { |
||
1594 | addr = (addr + align - 1) & -align; |
||
1595 | param_addr = addr; |
||
1596 | addr += size; |
||
1597 | } |
||
1598 | break; |
||
1599 | } |
||
1600 | default: break; /* nothing to be done for x86_64_mode_none */ |
||
1601 | } |
||
1602 | sym_push(sym->v & ~SYM_FIELD, type, |
||
1603 | VT_LOCAL | VT_LVAL, param_addr); |
||
1604 | } |
||
1605 | |||
1606 | #ifdef CONFIG_TCC_BCHECK |
||
1607 | /* leave some room for bound checking code */ |
||
1608 | if (tcc_state->do_bounds_check) { |
||
1609 | func_bound_offset = lbounds_section->data_offset; |
||
1610 | func_bound_ind = ind; |
||
1611 | oad(0xb8, 0); /* lbound section pointer */ |
||
1612 | o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */ |
||
1613 | oad(0xb8, 0); /* call to function */ |
||
1614 | } |
||
1615 | #endif |
||
1616 | } |
||
1617 | |||
1618 | /* generate function epilog */ |
||
1619 | void gfunc_epilog(void) |
||
1620 | { |
||
1621 | int v, saved_ind; |
||
1622 | |||
1623 | #ifdef CONFIG_TCC_BCHECK |
||
1624 | if (tcc_state->do_bounds_check |
||
1625 | && func_bound_offset != lbounds_section->data_offset) |
||
1626 | { |
||
1627 | addr_t saved_ind; |
||
1628 | addr_t *bounds_ptr; |
||
1629 | Sym *sym_data; |
||
1630 | |||
1631 | /* add end of table info */ |
||
1632 | bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t)); |
||
1633 | *bounds_ptr = 0; |
||
1634 | |||
1635 | /* generate bound local allocation */ |
||
1636 | sym_data = get_sym_ref(&char_pointer_type, lbounds_section, |
||
1637 | func_bound_offset, lbounds_section->data_offset); |
||
1638 | saved_ind = ind; |
||
1639 | ind = func_bound_ind; |
||
1640 | greloc(cur_text_section, sym_data, ind + 1, R_386_32); |
||
1641 | ind = ind + 5 + 3; |
||
1642 | gen_static_call(TOK___bound_local_new); |
||
1643 | ind = saved_ind; |
||
1644 | |||
1645 | /* generate bound check local freeing */ |
||
1646 | o(0x5250); /* save returned value, if any */ |
||
1647 | greloc(cur_text_section, sym_data, ind + 1, R_386_32); |
||
1648 | oad(0xb8, 0); /* mov xxx, %rax */ |
||
1649 | o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */ |
||
1650 | gen_static_call(TOK___bound_local_delete); |
||
1651 | o(0x585a); /* restore returned value, if any */ |
||
1652 | } |
||
1653 | #endif |
||
1654 | o(0xc9); /* leave */ |
||
1655 | if (func_ret_sub == 0) { |
||
1656 | o(0xc3); /* ret */ |
||
1657 | } else { |
||
1658 | o(0xc2); /* ret n */ |
||
1659 | g(func_ret_sub); |
||
1660 | g(func_ret_sub >> 8); |
||
1661 | } |
||
1662 | /* align local size to word & save local variables */ |
||
1663 | v = (-loc + 15) & -16; |
||
1664 | saved_ind = ind; |
||
1665 | ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; |
||
1666 | o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ |
||
1667 | o(0xec8148); /* sub rsp, stacksize */ |
||
1668 | gen_le32(v); |
||
1669 | ind = saved_ind; |
||
1670 | } |
||
1671 | |||
1672 | #endif /* not PE */ |
||
1673 | |||
1674 | /* generate a jump to a label */ |
||
1675 | int gjmp(int t) |
||
1676 | { |
||
1677 | return psym(0xe9, t); |
||
1678 | } |
||
1679 | |||
1680 | /* generate a jump to a fixed address */ |
||
1681 | void gjmp_addr(int a) |
||
1682 | { |
||
1683 | int r; |
||
1684 | r = a - ind - 2; |
||
1685 | if (r == (char)r) { |
||
1686 | g(0xeb); |
||
1687 | g(r); |
||
1688 | } else { |
||
1689 | oad(0xe9, a - ind - 5); |
||
1690 | } |
||
1691 | } |
||
1692 | |||
1693 | /* generate a test. set 'inv' to invert test. Stack entry is popped */ |
||
1694 | int gtst(int inv, int t) |
||
1695 | { |
||
1696 | int v = vtop->r & VT_VALMASK; |
||
1697 | if (v == VT_CMP) { |
||
1698 | /* fast case : can jump directly since flags are set */ |
||
1699 | if (vtop->c.i & 0x100) |
||
1700 | { |
||
1701 | /* This was a float compare. If the parity flag is set |
||
1702 | the result was unordered. For anything except != this |
||
1703 | means false and we don't jump (anding both conditions). |
||
1704 | For != this means true (oring both). |
||
1705 | Take care about inverting the test. We need to jump |
||
1706 | to our target if the result was unordered and test wasn't NE, |
||
1707 | otherwise if unordered we don't want to jump. */ |
||
1708 | vtop->c.i &= ~0x100; |
||
1709 | if (inv == (vtop->c.i == TOK_NE)) |
||
1710 | o(0x067a); /* jp +6 */ |
||
1711 | else |
||
1712 | { |
||
1713 | g(0x0f); |
||
1714 | t = psym(0x8a, t); /* jp t */ |
||
1715 | } |
||
1716 | } |
||
1717 | g(0x0f); |
||
1718 | t = psym((vtop->c.i - 16) ^ inv, t); |
||
1719 | } else if (v == VT_JMP || v == VT_JMPI) { |
||
1720 | /* && or || optimization */ |
||
1721 | if ((v & 1) == inv) { |
||
1722 | /* insert vtop->c jump list in t */ |
||
1723 | uint32_t n1, n = vtop->c.i; |
||
1724 | if (n) { |
||
1725 | while ((n1 = read32le(cur_text_section->data + n))) |
||
1726 | n = n1; |
||
1727 | write32le(cur_text_section->data + n, t); |
||
1728 | t = vtop->c.i; |
||
1729 | } |
||
1730 | } else { |
||
1731 | t = gjmp(t); |
||
1732 | gsym(vtop->c.i); |
||
1733 | } |
||
1734 | } |
||
1735 | vtop--; |
||
1736 | return t; |
||
1737 | } |
||
1738 | |||
1739 | /* generate an integer binary operation */ |
||
1740 | void gen_opi(int op) |
||
1741 | { |
||
1742 | int r, fr, opc, c; |
||
1743 | int ll, uu, cc; |
||
1744 | |||
1745 | ll = is64_type(vtop[-1].type.t); |
||
1746 | uu = (vtop[-1].type.t & VT_UNSIGNED) != 0; |
||
1747 | cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; |
||
1748 | |||
1749 | switch(op) { |
||
1750 | case '+': |
||
1751 | case TOK_ADDC1: /* add with carry generation */ |
||
1752 | opc = 0; |
||
1753 | gen_op8: |
||
1754 | if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) { |
||
1755 | /* constant case */ |
||
1756 | vswap(); |
||
1757 | r = gv(RC_INT); |
||
1758 | vswap(); |
||
1759 | c = vtop->c.i; |
||
1760 | if (c == (char)c) { |
||
1761 | /* XXX: generate inc and dec for smaller code ? */ |
||
1762 | orex(ll, r, 0, 0x83); |
||
1763 | o(0xc0 | (opc << 3) | REG_VALUE(r)); |
||
1764 | g(c); |
||
1765 | } else { |
||
1766 | orex(ll, r, 0, 0x81); |
||
1767 | oad(0xc0 | (opc << 3) | REG_VALUE(r), c); |
||
1768 | } |
||
1769 | } else { |
||
1770 | gv2(RC_INT, RC_INT); |
||
1771 | r = vtop[-1].r; |
||
1772 | fr = vtop[0].r; |
||
1773 | orex(ll, r, fr, (opc << 3) | 0x01); |
||
1774 | o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); |
||
1775 | } |
||
1776 | vtop--; |
||
1777 | if (op >= TOK_ULT && op <= TOK_GT) { |
||
1778 | vtop->r = VT_CMP; |
||
1779 | vtop->c.i = op; |
||
1780 | } |
||
1781 | break; |
||
1782 | case '-': |
||
1783 | case TOK_SUBC1: /* sub with carry generation */ |
||
1784 | opc = 5; |
||
1785 | goto gen_op8; |
||
1786 | case TOK_ADDC2: /* add with carry use */ |
||
1787 | opc = 2; |
||
1788 | goto gen_op8; |
||
1789 | case TOK_SUBC2: /* sub with carry use */ |
||
1790 | opc = 3; |
||
1791 | goto gen_op8; |
||
1792 | case '&': |
||
1793 | opc = 4; |
||
1794 | goto gen_op8; |
||
1795 | case '^': |
||
1796 | opc = 6; |
||
1797 | goto gen_op8; |
||
1798 | case '|': |
||
1799 | opc = 1; |
||
1800 | goto gen_op8; |
||
1801 | case '*': |
||
1802 | gv2(RC_INT, RC_INT); |
||
1803 | r = vtop[-1].r; |
||
1804 | fr = vtop[0].r; |
||
1805 | orex(ll, fr, r, 0xaf0f); /* imul fr, r */ |
||
1806 | o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8); |
||
1807 | vtop--; |
||
1808 | break; |
||
1809 | case TOK_SHL: |
||
1810 | opc = 4; |
||
1811 | goto gen_shift; |
||
1812 | case TOK_SHR: |
||
1813 | opc = 5; |
||
1814 | goto gen_shift; |
||
1815 | case TOK_SAR: |
||
1816 | opc = 7; |
||
1817 | gen_shift: |
||
1818 | opc = 0xc0 | (opc << 3); |
||
1819 | if (cc) { |
||
1820 | /* constant case */ |
||
1821 | vswap(); |
||
1822 | r = gv(RC_INT); |
||
1823 | vswap(); |
||
1824 | orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ |
||
1825 | o(opc | REG_VALUE(r)); |
||
1826 | g(vtop->c.i & (ll ? 63 : 31)); |
||
1827 | } else { |
||
1828 | /* we generate the shift in ecx */ |
||
1829 | gv2(RC_INT, RC_RCX); |
||
1830 | r = vtop[-1].r; |
||
1831 | orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */ |
||
1832 | o(opc | REG_VALUE(r)); |
||
1833 | } |
||
1834 | vtop--; |
||
1835 | break; |
||
1836 | case TOK_UDIV: |
||
1837 | case TOK_UMOD: |
||
1838 | uu = 1; |
||
1839 | goto divmod; |
||
1840 | case '/': |
||
1841 | case '%': |
||
1842 | case TOK_PDIV: |
||
1843 | uu = 0; |
||
1844 | divmod: |
||
1845 | /* first operand must be in eax */ |
||
1846 | /* XXX: need better constraint for second operand */ |
||
1847 | gv2(RC_RAX, RC_RCX); |
||
1848 | r = vtop[-1].r; |
||
1849 | fr = vtop[0].r; |
||
1850 | vtop--; |
||
1851 | save_reg(TREG_RDX); |
||
1852 | orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ |
||
1853 | orex(ll, fr, 0, 0xf7); /* div fr, %eax */ |
||
1854 | o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); |
||
1855 | if (op == '%' || op == TOK_UMOD) |
||
1856 | r = TREG_RDX; |
||
1857 | else |
||
1858 | r = TREG_RAX; |
||
1859 | vtop->r = r; |
||
1860 | break; |
||
1861 | default: |
||
1862 | opc = 7; |
||
1863 | goto gen_op8; |
||
1864 | } |
||
1865 | } |
||
1866 | |||
1867 | void gen_opl(int op) |
||
1868 | { |
||
1869 | gen_opi(op); |
||
1870 | } |
||
1871 | |||
1872 | /* generate a floating point operation 'v = t1 op t2' instruction. The |
||
1873 | two operands are guaranted to have the same floating point type */ |
||
1874 | /* XXX: need to use ST1 too */ |
||
1875 | void gen_opf(int op) |
||
1876 | { |
||
1877 | int a, ft, fc, swapped, r; |
||
1878 | int float_type = |
||
1879 | (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; |
||
1880 | |||
1881 | /* convert constants to memory references */ |
||
1882 | if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { |
||
1883 | vswap(); |
||
1884 | gv(float_type); |
||
1885 | vswap(); |
||
1886 | } |
||
1887 | if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) |
||
1888 | gv(float_type); |
||
1889 | |||
1890 | /* must put at least one value in the floating point register */ |
||
1891 | if ((vtop[-1].r & VT_LVAL) && |
||
1892 | (vtop[0].r & VT_LVAL)) { |
||
1893 | vswap(); |
||
1894 | gv(float_type); |
||
1895 | vswap(); |
||
1896 | } |
||
1897 | swapped = 0; |
||
1898 | /* swap the stack if needed so that t1 is the register and t2 is |
||
1899 | the memory reference */ |
||
1900 | if (vtop[-1].r & VT_LVAL) { |
||
1901 | vswap(); |
||
1902 | swapped = 1; |
||
1903 | } |
||
1904 | if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { |
||
1905 | if (op >= TOK_ULT && op <= TOK_GT) { |
||
1906 | /* load on stack second operand */ |
||
1907 | load(TREG_ST0, vtop); |
||
1908 | save_reg(TREG_RAX); /* eax is used by FP comparison code */ |
||
1909 | if (op == TOK_GE || op == TOK_GT) |
||
1910 | swapped = !swapped; |
||
1911 | else if (op == TOK_EQ || op == TOK_NE) |
||
1912 | swapped = 0; |
||
1913 | if (swapped) |
||
1914 | o(0xc9d9); /* fxch %st(1) */ |
||
1915 | if (op == TOK_EQ || op == TOK_NE) |
||
1916 | o(0xe9da); /* fucompp */ |
||
1917 | else |
||
1918 | o(0xd9de); /* fcompp */ |
||
1919 | o(0xe0df); /* fnstsw %ax */ |
||
1920 | if (op == TOK_EQ) { |
||
1921 | o(0x45e480); /* and $0x45, %ah */ |
||
1922 | o(0x40fC80); /* cmp $0x40, %ah */ |
||
1923 | } else if (op == TOK_NE) { |
||
1924 | o(0x45e480); /* and $0x45, %ah */ |
||
1925 | o(0x40f480); /* xor $0x40, %ah */ |
||
1926 | op = TOK_NE; |
||
1927 | } else if (op == TOK_GE || op == TOK_LE) { |
||
1928 | o(0x05c4f6); /* test $0x05, %ah */ |
||
1929 | op = TOK_EQ; |
||
1930 | } else { |
||
1931 | o(0x45c4f6); /* test $0x45, %ah */ |
||
1932 | op = TOK_EQ; |
||
1933 | } |
||
1934 | vtop--; |
||
1935 | vtop->r = VT_CMP; |
||
1936 | vtop->c.i = op; |
||
1937 | } else { |
||
1938 | /* no memory reference possible for long double operations */ |
||
1939 | load(TREG_ST0, vtop); |
||
1940 | swapped = !swapped; |
||
1941 | |||
1942 | switch(op) { |
||
1943 | default: |
||
1944 | case '+': |
||
1945 | a = 0; |
||
1946 | break; |
||
1947 | case '-': |
||
1948 | a = 4; |
||
1949 | if (swapped) |
||
1950 | a++; |
||
1951 | break; |
||
1952 | case '*': |
||
1953 | a = 1; |
||
1954 | break; |
||
1955 | case '/': |
||
1956 | a = 6; |
||
1957 | if (swapped) |
||
1958 | a++; |
||
1959 | break; |
||
1960 | } |
||
1961 | ft = vtop->type.t; |
||
1962 | fc = vtop->c.i; |
||
1963 | o(0xde); /* fxxxp %st, %st(1) */ |
||
1964 | o(0xc1 + (a << 3)); |
||
1965 | vtop--; |
||
1966 | } |
||
1967 | } else { |
||
1968 | if (op >= TOK_ULT && op <= TOK_GT) { |
||
1969 | /* if saved lvalue, then we must reload it */ |
||
1970 | r = vtop->r; |
||
1971 | fc = vtop->c.i; |
||
1972 | if ((r & VT_VALMASK) == VT_LLOCAL) { |
||
1973 | SValue v1; |
||
1974 | r = get_reg(RC_INT); |
||
1975 | v1.type.t = VT_PTR; |
||
1976 | v1.r = VT_LOCAL | VT_LVAL; |
||
1977 | v1.c.i = fc; |
||
1978 | load(r, &v1); |
||
1979 | fc = 0; |
||
1980 | } |
||
1981 | |||
1982 | if (op == TOK_EQ || op == TOK_NE) { |
||
1983 | swapped = 0; |
||
1984 | } else { |
||
1985 | if (op == TOK_LE || op == TOK_LT) |
||
1986 | swapped = !swapped; |
||
1987 | if (op == TOK_LE || op == TOK_GE) { |
||
1988 | op = 0x93; /* setae */ |
||
1989 | } else { |
||
1990 | op = 0x97; /* seta */ |
||
1991 | } |
||
1992 | } |
||
1993 | |||
1994 | if (swapped) { |
||
1995 | gv(RC_FLOAT); |
||
1996 | vswap(); |
||
1997 | } |
||
1998 | assert(!(vtop[-1].r & VT_LVAL)); |
||
1999 | |||
2000 | if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) |
||
2001 | o(0x66); |
||
2002 | if (op == TOK_EQ || op == TOK_NE) |
||
2003 | o(0x2e0f); /* ucomisd */ |
||
2004 | else |
||
2005 | o(0x2f0f); /* comisd */ |
||
2006 | |||
2007 | if (vtop->r & VT_LVAL) { |
||
2008 | gen_modrm(vtop[-1].r, r, vtop->sym, fc); |
||
2009 | } else { |
||
2010 | o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); |
||
2011 | } |
||
2012 | |||
2013 | vtop--; |
||
2014 | vtop->r = VT_CMP; |
||
2015 | vtop->c.i = op | 0x100; |
||
2016 | } else { |
||
2017 | assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); |
||
2018 | switch(op) { |
||
2019 | default: |
||
2020 | case '+': |
||
2021 | a = 0; |
||
2022 | break; |
||
2023 | case '-': |
||
2024 | a = 4; |
||
2025 | break; |
||
2026 | case '*': |
||
2027 | a = 1; |
||
2028 | break; |
||
2029 | case '/': |
||
2030 | a = 6; |
||
2031 | break; |
||
2032 | } |
||
2033 | ft = vtop->type.t; |
||
2034 | fc = vtop->c.i; |
||
2035 | assert((ft & VT_BTYPE) != VT_LDOUBLE); |
||
2036 | |||
2037 | r = vtop->r; |
||
2038 | /* if saved lvalue, then we must reload it */ |
||
2039 | if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { |
||
2040 | SValue v1; |
||
2041 | r = get_reg(RC_INT); |
||
2042 | v1.type.t = VT_PTR; |
||
2043 | v1.r = VT_LOCAL | VT_LVAL; |
||
2044 | v1.c.i = fc; |
||
2045 | load(r, &v1); |
||
2046 | fc = 0; |
||
2047 | } |
||
2048 | |||
2049 | assert(!(vtop[-1].r & VT_LVAL)); |
||
2050 | if (swapped) { |
||
2051 | assert(vtop->r & VT_LVAL); |
||
2052 | gv(RC_FLOAT); |
||
2053 | vswap(); |
||
2054 | } |
||
2055 | |||
2056 | if ((ft & VT_BTYPE) == VT_DOUBLE) { |
||
2057 | o(0xf2); |
||
2058 | } else { |
||
2059 | o(0xf3); |
||
2060 | } |
||
2061 | o(0x0f); |
||
2062 | o(0x58 + a); |
||
2063 | |||
2064 | if (vtop->r & VT_LVAL) { |
||
2065 | gen_modrm(vtop[-1].r, r, vtop->sym, fc); |
||
2066 | } else { |
||
2067 | o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); |
||
2068 | } |
||
2069 | |||
2070 | vtop--; |
||
2071 | } |
||
2072 | } |
||
2073 | } |
||
2074 | |||
2075 | /* convert integers to fp 't' type. Must handle 'int', 'unsigned int' |
||
2076 | and 'long long' cases. */ |
||
2077 | void gen_cvt_itof(int t) |
||
2078 | { |
||
2079 | if ((t & VT_BTYPE) == VT_LDOUBLE) { |
||
2080 | save_reg(TREG_ST0); |
||
2081 | gv(RC_INT); |
||
2082 | if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { |
||
2083 | /* signed long long to float/double/long double (unsigned case |
||
2084 | is handled generically) */ |
||
2085 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
||
2086 | o(0x242cdf); /* fildll (%rsp) */ |
||
2087 | o(0x08c48348); /* add $8, %rsp */ |
||
2088 | } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == |
||
2089 | (VT_INT | VT_UNSIGNED)) { |
||
2090 | /* unsigned int to float/double/long double */ |
||
2091 | o(0x6a); /* push $0 */ |
||
2092 | g(0x00); |
||
2093 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
||
2094 | o(0x242cdf); /* fildll (%rsp) */ |
||
2095 | o(0x10c48348); /* add $16, %rsp */ |
||
2096 | } else { |
||
2097 | /* int to float/double/long double */ |
||
2098 | o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ |
||
2099 | o(0x2404db); /* fildl (%rsp) */ |
||
2100 | o(0x08c48348); /* add $8, %rsp */ |
||
2101 | } |
||
2102 | vtop->r = TREG_ST0; |
||
2103 | } else { |
||
2104 | int r = get_reg(RC_FLOAT); |
||
2105 | gv(RC_INT); |
||
2106 | o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); |
||
2107 | if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == |
||
2108 | (VT_INT | VT_UNSIGNED) || |
||
2109 | (vtop->type.t & VT_BTYPE) == VT_LLONG) { |
||
2110 | o(0x48); /* REX */ |
||
2111 | } |
||
2112 | o(0x2a0f); |
||
2113 | o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ |
||
2114 | vtop->r = r; |
||
2115 | } |
||
2116 | } |
||
2117 | |||
2118 | /* convert from one floating point type to another */ |
||
2119 | void gen_cvt_ftof(int t) |
||
2120 | { |
||
2121 | int ft, bt, tbt; |
||
2122 | |||
2123 | ft = vtop->type.t; |
||
2124 | bt = ft & VT_BTYPE; |
||
2125 | tbt = t & VT_BTYPE; |
||
2126 | |||
2127 | if (bt == VT_FLOAT) { |
||
2128 | gv(RC_FLOAT); |
||
2129 | if (tbt == VT_DOUBLE) { |
||
2130 | o(0x140f); /* unpcklps */ |
||
2131 | o(0xc0 + REG_VALUE(vtop->r)*9); |
||
2132 | o(0x5a0f); /* cvtps2pd */ |
||
2133 | o(0xc0 + REG_VALUE(vtop->r)*9); |
||
2134 | } else if (tbt == VT_LDOUBLE) { |
||
2135 | save_reg(RC_ST0); |
||
2136 | /* movss %xmm0,-0x10(%rsp) */ |
||
2137 | o(0x110ff3); |
||
2138 | o(0x44 + REG_VALUE(vtop->r)*8); |
||
2139 | o(0xf024); |
||
2140 | o(0xf02444d9); /* flds -0x10(%rsp) */ |
||
2141 | vtop->r = TREG_ST0; |
||
2142 | } |
||
2143 | } else if (bt == VT_DOUBLE) { |
||
2144 | gv(RC_FLOAT); |
||
2145 | if (tbt == VT_FLOAT) { |
||
2146 | o(0x140f66); /* unpcklpd */ |
||
2147 | o(0xc0 + REG_VALUE(vtop->r)*9); |
||
2148 | o(0x5a0f66); /* cvtpd2ps */ |
||
2149 | o(0xc0 + REG_VALUE(vtop->r)*9); |
||
2150 | } else if (tbt == VT_LDOUBLE) { |
||
2151 | save_reg(RC_ST0); |
||
2152 | /* movsd %xmm0,-0x10(%rsp) */ |
||
2153 | o(0x110ff2); |
||
2154 | o(0x44 + REG_VALUE(vtop->r)*8); |
||
2155 | o(0xf024); |
||
2156 | o(0xf02444dd); /* fldl -0x10(%rsp) */ |
||
2157 | vtop->r = TREG_ST0; |
||
2158 | } |
||
2159 | } else { |
||
2160 | int r; |
||
2161 | gv(RC_ST0); |
||
2162 | r = get_reg(RC_FLOAT); |
||
2163 | if (tbt == VT_DOUBLE) { |
||
2164 | o(0xf0245cdd); /* fstpl -0x10(%rsp) */ |
||
2165 | /* movsd -0x10(%rsp),%xmm0 */ |
||
2166 | o(0x100ff2); |
||
2167 | o(0x44 + REG_VALUE(r)*8); |
||
2168 | o(0xf024); |
||
2169 | vtop->r = r; |
||
2170 | } else if (tbt == VT_FLOAT) { |
||
2171 | o(0xf0245cd9); /* fstps -0x10(%rsp) */ |
||
2172 | /* movss -0x10(%rsp),%xmm0 */ |
||
2173 | o(0x100ff3); |
||
2174 | o(0x44 + REG_VALUE(r)*8); |
||
2175 | o(0xf024); |
||
2176 | vtop->r = r; |
||
2177 | } |
||
2178 | } |
||
2179 | } |
||
2180 | |||
2181 | /* convert fp to int 't' type */ |
||
2182 | void gen_cvt_ftoi(int t) |
||
2183 | { |
||
2184 | int ft, bt, size, r; |
||
2185 | ft = vtop->type.t; |
||
2186 | bt = ft & VT_BTYPE; |
||
2187 | if (bt == VT_LDOUBLE) { |
||
2188 | gen_cvt_ftof(VT_DOUBLE); |
||
2189 | bt = VT_DOUBLE; |
||
2190 | } |
||
2191 | |||
2192 | gv(RC_FLOAT); |
||
2193 | if (t != VT_INT) |
||
2194 | size = 8; |
||
2195 | else |
||
2196 | size = 4; |
||
2197 | |||
2198 | r = get_reg(RC_INT); |
||
2199 | if (bt == VT_FLOAT) { |
||
2200 | o(0xf3); |
||
2201 | } else if (bt == VT_DOUBLE) { |
||
2202 | o(0xf2); |
||
2203 | } else { |
||
2204 | assert(0); |
||
2205 | } |
||
2206 | orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ |
||
2207 | o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); |
||
2208 | vtop->r = r; |
||
2209 | } |
||
2210 | |||
2211 | /* computed goto support */ |
||
2212 | void ggoto(void) |
||
2213 | { |
||
2214 | gcall_or_jmp(1); |
||
2215 | vtop--; |
||
2216 | } |
||
2217 | |||
2218 | /* Save the stack pointer onto the stack and return the location of its address */ |
||
2219 | ST_FUNC void gen_vla_sp_save(int addr) { |
||
2220 | /* mov %rsp,addr(%rbp)*/ |
||
2221 | gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr); |
||
2222 | } |
||
2223 | |||
2224 | /* Restore the SP from a location on the stack */ |
||
2225 | ST_FUNC void gen_vla_sp_restore(int addr) { |
||
2226 | gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr); |
||
2227 | } |
||
2228 | |||
2229 | /* Subtract from the stack pointer, and push the resulting value onto the stack */ |
||
2230 | ST_FUNC void gen_vla_alloc(CType *type, int align) { |
||
2231 | #ifdef TCC_TARGET_PE |
||
2232 | /* alloca does more than just adjust %rsp on Windows */ |
||
2233 | vpush_global_sym(&func_old_type, TOK_alloca); |
||
2234 | vswap(); /* Move alloca ref past allocation size */ |
||
2235 | gfunc_call(1); |
||
2236 | vset(type, REG_IRET, 0); |
||
2237 | #else |
||
2238 | int r; |
||
2239 | r = gv(RC_INT); /* allocation size */ |
||
2240 | /* sub r,%rsp */ |
||
2241 | o(0x2b48); |
||
2242 | o(0xe0 | REG_VALUE(r)); |
||
2243 | /* We align to 16 bytes rather than align */ |
||
2244 | /* and ~15, %rsp */ |
||
2245 | o(0xf0e48348); |
||
2246 | vpop(); |
||
2247 | #endif |
||
2248 | } |
||
2249 | |||
2250 | |||
2251 | /* end of x86-64 code generator */ |
||
2252 | /*************************************************************/ |
||
2253 | #endif /* ! TARGET_DEFS_ONLY */ |
||
2254 | /******************************************************/=>><>=>><>=>><>><>><>>=>>=>>>=>=>>><>><>><>>=>=>>>>>>>>><>>>=>>>><>><>><>><>=>=>=>><>><>><>><>><> |