Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6429 | siemargl | 1 | /* |
2 | * A64 code generator for TCC |
||
3 | * |
||
4 | * Copyright (c) 2014-2015 Edmund Grimley Evans |
||
5 | * |
||
6 | * Copying and distribution of this file, with or without modification, |
||
7 | * are permitted in any medium without royalty provided the copyright |
||
8 | * notice and this notice are preserved. This file is offered as-is, |
||
9 | * without any warranty. |
||
10 | */ |
||
11 | |||
12 | #ifdef TARGET_DEFS_ONLY |
||
13 | |||
14 | // Number of registers available to allocator: |
||
15 | #define NB_REGS 28 // x0-x18, x30, v0-v7 |
||
16 | |||
17 | #define TREG_R(x) (x) // x = 0..18 |
||
18 | #define TREG_R30 19 |
||
19 | #define TREG_F(x) (x + 20) // x = 0..7 |
||
20 | |||
21 | // Register classes sorted from more general to more precise: |
||
22 | #define RC_INT (1 << 0) |
||
23 | #define RC_FLOAT (1 << 1) |
||
24 | #define RC_R(x) (1 << (2 + (x))) // x = 0..18 |
||
25 | #define RC_R30 (1 << 21) |
||
26 | #define RC_F(x) (1 << (22 + (x))) // x = 0..7 |
||
27 | |||
28 | #define RC_IRET (RC_R(0)) // int return register class |
||
29 | #define RC_FRET (RC_F(0)) // float return register class |
||
30 | |||
31 | #define REG_IRET (TREG_R(0)) // int return register number |
||
32 | #define REG_FRET (TREG_F(0)) // float return register number |
||
33 | |||
34 | #define PTR_SIZE 8 |
||
35 | |||
36 | #define LDOUBLE_SIZE 16 |
||
37 | #define LDOUBLE_ALIGN 16 |
||
38 | |||
39 | #define MAX_ALIGN 16 |
||
40 | |||
41 | #define CHAR_IS_UNSIGNED |
||
42 | |||
43 | /******************************************************/ |
||
44 | /* ELF defines */ |
||
45 | |||
46 | #define EM_TCC_TARGET EM_AARCH64 |
||
47 | |||
48 | #define R_DATA_32 R_AARCH64_ABS32 |
||
49 | #define R_DATA_PTR R_AARCH64_ABS64 |
||
50 | #define R_JMP_SLOT R_AARCH64_JUMP_SLOT |
||
51 | #define R_COPY R_AARCH64_COPY |
||
52 | |||
53 | #define ELF_START_ADDR 0x00400000 |
||
54 | #define ELF_PAGE_SIZE 0x1000 |
||
55 | |||
56 | /******************************************************/ |
||
57 | #else /* ! TARGET_DEFS_ONLY */ |
||
58 | /******************************************************/ |
||
59 | #include "tcc.h" |
||
60 | #include |
||
61 | |||
62 | ST_DATA const int reg_classes[NB_REGS] = { |
||
63 | RC_INT | RC_R(0), |
||
64 | RC_INT | RC_R(1), |
||
65 | RC_INT | RC_R(2), |
||
66 | RC_INT | RC_R(3), |
||
67 | RC_INT | RC_R(4), |
||
68 | RC_INT | RC_R(5), |
||
69 | RC_INT | RC_R(6), |
||
70 | RC_INT | RC_R(7), |
||
71 | RC_INT | RC_R(8), |
||
72 | RC_INT | RC_R(9), |
||
73 | RC_INT | RC_R(10), |
||
74 | RC_INT | RC_R(11), |
||
75 | RC_INT | RC_R(12), |
||
76 | RC_INT | RC_R(13), |
||
77 | RC_INT | RC_R(14), |
||
78 | RC_INT | RC_R(15), |
||
79 | RC_INT | RC_R(16), |
||
80 | RC_INT | RC_R(17), |
||
81 | RC_INT | RC_R(18), |
||
82 | RC_R30, // not in RC_INT as we make special use of x30 |
||
83 | RC_FLOAT | RC_F(0), |
||
84 | RC_FLOAT | RC_F(1), |
||
85 | RC_FLOAT | RC_F(2), |
||
86 | RC_FLOAT | RC_F(3), |
||
87 | RC_FLOAT | RC_F(4), |
||
88 | RC_FLOAT | RC_F(5), |
||
89 | RC_FLOAT | RC_F(6), |
||
90 | RC_FLOAT | RC_F(7) |
||
91 | }; |
||
92 | |||
93 | #define IS_FREG(x) ((x) >= TREG_F(0)) |
||
94 | |||
95 | static uint32_t intr(int r) |
||
96 | { |
||
97 | assert(TREG_R(0) <= r && r <= TREG_R30); |
||
98 | return r < TREG_R30 ? r : 30; |
||
99 | } |
||
100 | |||
101 | static uint32_t fltr(int r) |
||
102 | { |
||
103 | assert(TREG_F(0) <= r && r <= TREG_F(7)); |
||
104 | return r - TREG_F(0); |
||
105 | } |
||
106 | |||
107 | // Add an instruction to text section: |
||
108 | ST_FUNC void o(unsigned int c) |
||
109 | { |
||
110 | int ind1 = ind + 4; |
||
111 | if (ind1 > cur_text_section->data_allocated) |
||
112 | section_realloc(cur_text_section, ind1); |
||
113 | write32le(cur_text_section->data + ind, c); |
||
114 | ind = ind1; |
||
115 | } |
||
116 | |||
117 | static int arm64_encode_bimm64(uint64_t x) |
||
118 | { |
||
119 | int neg = x & 1; |
||
120 | int rep, pos, len; |
||
121 | |||
122 | if (neg) |
||
123 | x = ~x; |
||
124 | if (!x) |
||
125 | return -1; |
||
126 | |||
127 | if (x >> 2 == (x & (((uint64_t)1 << (64 - 2)) - 1))) |
||
128 | rep = 2, x &= ((uint64_t)1 << 2) - 1; |
||
129 | else if (x >> 4 == (x & (((uint64_t)1 << (64 - 4)) - 1))) |
||
130 | rep = 4, x &= ((uint64_t)1 << 4) - 1; |
||
131 | else if (x >> 8 == (x & (((uint64_t)1 << (64 - 8)) - 1))) |
||
132 | rep = 8, x &= ((uint64_t)1 << 8) - 1; |
||
133 | else if (x >> 16 == (x & (((uint64_t)1 << (64 - 16)) - 1))) |
||
134 | rep = 16, x &= ((uint64_t)1 << 16) - 1; |
||
135 | else if (x >> 32 == (x & (((uint64_t)1 << (64 - 32)) - 1))) |
||
136 | rep = 32, x &= ((uint64_t)1 << 32) - 1; |
||
137 | else |
||
138 | rep = 64; |
||
139 | |||
140 | pos = 0; |
||
141 | if (!(x & (((uint64_t)1 << 32) - 1))) x >>= 32, pos += 32; |
||
142 | if (!(x & (((uint64_t)1 << 16) - 1))) x >>= 16, pos += 16; |
||
143 | if (!(x & (((uint64_t)1 << 8) - 1))) x >>= 8, pos += 8; |
||
144 | if (!(x & (((uint64_t)1 << 4) - 1))) x >>= 4, pos += 4; |
||
145 | if (!(x & (((uint64_t)1 << 2) - 1))) x >>= 2, pos += 2; |
||
146 | if (!(x & (((uint64_t)1 << 1) - 1))) x >>= 1, pos += 1; |
||
147 | |||
148 | len = 0; |
||
149 | if (!(~x & (((uint64_t)1 << 32) - 1))) x >>= 32, len += 32; |
||
150 | if (!(~x & (((uint64_t)1 << 16) - 1))) x >>= 16, len += 16; |
||
151 | if (!(~x & (((uint64_t)1 << 8) - 1))) x >>= 8, len += 8; |
||
152 | if (!(~x & (((uint64_t)1 << 4) - 1))) x >>= 4, len += 4; |
||
153 | if (!(~x & (((uint64_t)1 << 2) - 1))) x >>= 2, len += 2; |
||
154 | if (!(~x & (((uint64_t)1 << 1) - 1))) x >>= 1, len += 1; |
||
155 | |||
156 | if (x) |
||
157 | return -1; |
||
158 | if (neg) { |
||
159 | pos = (pos + len) & (rep - 1); |
||
160 | len = rep - len; |
||
161 | } |
||
162 | return ((0x1000 & rep << 6) | (((rep - 1) ^ 31) << 1 & 63) | |
||
163 | ((rep - pos) & (rep - 1)) << 6 | (len - 1)); |
||
164 | } |
||
165 | |||
166 | static uint32_t arm64_movi(int r, uint64_t x) |
||
167 | { |
||
168 | uint64_t m = 0xffff; |
||
169 | int e; |
||
170 | if (!(x & ~m)) |
||
171 | return 0x52800000 | r | x << 5; // movz w(r),#(x) |
||
172 | if (!(x & ~(m << 16))) |
||
173 | return 0x52a00000 | r | x >> 11; // movz w(r),#(x >> 16),lsl #16 |
||
174 | if (!(x & ~(m << 32))) |
||
175 | return 0xd2c00000 | r | x >> 27; // movz x(r),#(x >> 32),lsl #32 |
||
176 | if (!(x & ~(m << 48))) |
||
177 | return 0xd2e00000 | r | x >> 43; // movz x(r),#(x >> 48),lsl #48 |
||
178 | if ((x & ~m) == m << 16) |
||
179 | return (0x12800000 | r | |
||
180 | (~x << 5 & 0x1fffe0)); // movn w(r),#(~x) |
||
181 | if ((x & ~(m << 16)) == m) |
||
182 | return (0x12a00000 | r | |
||
183 | (~x >> 11 & 0x1fffe0)); // movn w(r),#(~x >> 16),lsl #16 |
||
184 | if (!~(x | m)) |
||
185 | return (0x92800000 | r | |
||
186 | (~x << 5 & 0x1fffe0)); // movn x(r),#(~x) |
||
187 | if (!~(x | m << 16)) |
||
188 | return (0x92a00000 | r | |
||
189 | (~x >> 11 & 0x1fffe0)); // movn x(r),#(~x >> 16),lsl #16 |
||
190 | if (!~(x | m << 32)) |
||
191 | return (0x92c00000 | r | |
||
192 | (~x >> 27 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32 |
||
193 | if (!~(x | m << 48)) |
||
194 | return (0x92e00000 | r | |
||
195 | (~x >> 43 & 0x1fffe0)); // movn x(r),#(~x >> 32),lsl #32 |
||
196 | if (!(x >> 32) && (e = arm64_encode_bimm64(x | x << 32)) >= 0) |
||
197 | return 0x320003e0 | r | (uint32_t)e << 10; // movi w(r),#(x) |
||
198 | if ((e = arm64_encode_bimm64(x)) >= 0) |
||
199 | return 0xb20003e0 | r | (uint32_t)e << 10; // movi x(r),#(x) |
||
200 | return 0; |
||
201 | } |
||
202 | |||
203 | static void arm64_movimm(int r, uint64_t x) |
||
204 | { |
||
205 | uint32_t i; |
||
206 | if ((i = arm64_movi(r, x))) |
||
207 | o(i); // a single MOV |
||
208 | else { |
||
209 | // MOVZ/MOVN and 1-3 MOVKs |
||
210 | int z = 0, m = 0; |
||
211 | uint32_t mov1 = 0xd2800000; // movz |
||
212 | uint64_t x1 = x; |
||
213 | for (i = 0; i < 64; i += 16) { |
||
214 | z += !(x >> i & 0xffff); |
||
215 | m += !(~x >> i & 0xffff); |
||
216 | } |
||
217 | if (m > z) { |
||
218 | x1 = ~x; |
||
219 | mov1 = 0x92800000; // movn |
||
220 | } |
||
221 | for (i = 0; i < 64; i += 16) |
||
222 | if (x1 >> i & 0xffff) { |
||
223 | o(mov1 | r | (x1 >> i & 0xffff) << 5 | i << 17); |
||
224 | // movz/movn x(r),#(*),lsl #(i) |
||
225 | break; |
||
226 | } |
||
227 | for (i += 16; i < 64; i += 16) |
||
228 | if (x1 >> i & 0xffff) |
||
229 | o(0xf2800000 | r | (x >> i & 0xffff) << 5 | i << 17); |
||
230 | // movk x(r),#(*),lsl #(i) |
||
231 | } |
||
232 | } |
||
233 | |||
234 | // Patch all branches in list pointed to by t to branch to a: |
||
235 | ST_FUNC void gsym_addr(int t_, int a_) |
||
236 | { |
||
237 | uint32_t t = t_; |
||
238 | uint32_t a = a_; |
||
239 | while (t) { |
||
240 | unsigned char *ptr = cur_text_section->data + t; |
||
241 | uint32_t next = read32le(ptr); |
||
242 | if (a - t + 0x8000000 >= 0x10000000) |
||
243 | tcc_error("branch out of range"); |
||
244 | write32le(ptr, (a - t == 4 ? 0xd503201f : // nop |
||
245 | 0x14000000 | ((a - t) >> 2 & 0x3ffffff))); // b |
||
246 | t = next; |
||
247 | } |
||
248 | } |
||
249 | |||
250 | // Patch all branches in list pointed to by t to branch to current location: |
||
251 | ST_FUNC void gsym(int t) |
||
252 | { |
||
253 | gsym_addr(t, ind); |
||
254 | } |
||
255 | |||
256 | static int arm64_type_size(int t) |
||
257 | { |
||
258 | switch (t & VT_BTYPE) { |
||
259 | case VT_INT: return 2; |
||
260 | case VT_BYTE: return 0; |
||
261 | case VT_SHORT: return 1; |
||
262 | case VT_PTR: return 3; |
||
263 | case VT_ENUM: return 2; |
||
264 | case VT_FUNC: return 3; |
||
265 | case VT_FLOAT: return 2; |
||
266 | case VT_DOUBLE: return 3; |
||
267 | case VT_LDOUBLE: return 4; |
||
268 | case VT_BOOL: return 0; |
||
269 | case VT_LLONG: return 3; |
||
270 | } |
||
271 | assert(0); |
||
272 | return 0; |
||
273 | } |
||
274 | |||
275 | static void arm64_spoff(int reg, uint64_t off) |
||
276 | { |
||
277 | uint32_t sub = off >> 63; |
||
278 | if (sub) |
||
279 | off = -off; |
||
280 | if (off < 4096) |
||
281 | o(0x910003e0 | sub << 30 | reg | off << 10); |
||
282 | // (add|sub) x(reg),sp,#(off) |
||
283 | else { |
||
284 | arm64_movimm(30, off); // use x30 for offset |
||
285 | o(0x8b3e63e0 | sub << 30 | reg); // (add|sub) x(reg),sp,x30 |
||
286 | } |
||
287 | } |
||
288 | |||
289 | static void arm64_ldrx(int sg, int sz_, int dst, int bas, uint64_t off) |
||
290 | { |
||
291 | uint32_t sz = sz_; |
||
292 | if (sz >= 2) |
||
293 | sg = 0; |
||
294 | if (!(off & ~((uint32_t)0xfff << sz))) |
||
295 | o(0x39400000 | dst | bas << 5 | off << (10 - sz) | |
||
296 | (uint32_t)!!sg << 23 | sz << 30); // ldr(*) x(dst),[x(bas),#(off)] |
||
297 | else if (off < 256 || -off <= 256) |
||
298 | o(0x38400000 | dst | bas << 5 | (off & 511) << 12 | |
||
299 | (uint32_t)!!sg << 23 | sz << 30); // ldur(*) x(dst),[x(bas),#(off)] |
||
300 | else { |
||
301 | arm64_movimm(30, off); // use x30 for offset |
||
302 | o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 | |
||
303 | (uint32_t)(!!sg + 1) << 22 | sz << 30); // ldr(*) x(dst),[x(bas),x30] |
||
304 | } |
||
305 | } |
||
306 | |||
307 | static void arm64_ldrv(int sz_, int dst, int bas, uint64_t off) |
||
308 | { |
||
309 | uint32_t sz = sz_; |
||
310 | if (!(off & ~((uint32_t)0xfff << sz))) |
||
311 | o(0x3d400000 | dst | bas << 5 | off << (10 - sz) | |
||
312 | (sz & 4) << 21 | (sz & 3) << 30); // ldr (s|d|q)(dst),[x(bas),#(off)] |
||
313 | else if (off < 256 || -off <= 256) |
||
314 | o(0x3c400000 | dst | bas << 5 | (off & 511) << 12 | |
||
315 | (sz & 4) << 21 | (sz & 3) << 30); // ldur (s|d|q)(dst),[x(bas),#(off)] |
||
316 | else { |
||
317 | arm64_movimm(30, off); // use x30 for offset |
||
318 | o(0x3c606800 | dst | bas << 5 | (uint32_t)30 << 16 | |
||
319 | sz << 30 | (sz & 4) << 21); // ldr (s|d|q)(dst),[x(bas),x30] |
||
320 | } |
||
321 | } |
||
322 | |||
323 | static void arm64_ldrs(int reg_, int size) |
||
324 | { |
||
325 | uint32_t reg = reg_; |
||
326 | // Use x30 for intermediate value in some cases. |
||
327 | switch (size) { |
||
328 | default: assert(0); break; |
||
329 | case 1: |
||
330 | arm64_ldrx(0, 0, reg, reg, 0); |
||
331 | break; |
||
332 | case 2: |
||
333 | arm64_ldrx(0, 1, reg, reg, 0); |
||
334 | break; |
||
335 | case 3: |
||
336 | arm64_ldrx(0, 1, 30, reg, 0); |
||
337 | arm64_ldrx(0, 0, reg, reg, 2); |
||
338 | o(0x2a0043c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #16 |
||
339 | break; |
||
340 | case 4: |
||
341 | arm64_ldrx(0, 2, reg, reg, 0); |
||
342 | break; |
||
343 | case 5: |
||
344 | arm64_ldrx(0, 2, 30, reg, 0); |
||
345 | arm64_ldrx(0, 0, reg, reg, 4); |
||
346 | o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 |
||
347 | break; |
||
348 | case 6: |
||
349 | arm64_ldrx(0, 2, 30, reg, 0); |
||
350 | arm64_ldrx(0, 1, reg, reg, 4); |
||
351 | o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 |
||
352 | break; |
||
353 | case 7: |
||
354 | arm64_ldrx(0, 2, 30, reg, 0); |
||
355 | arm64_ldrx(0, 2, reg, reg, 3); |
||
356 | o(0x53087c00 | reg | reg << 5); // lsr w(reg), w(reg), #8 |
||
357 | o(0xaa0083c0 | reg | reg << 16); // orr x(reg),x30,x(reg),lsl #32 |
||
358 | break; |
||
359 | case 8: |
||
360 | arm64_ldrx(0, 3, reg, reg, 0); |
||
361 | break; |
||
362 | case 9: |
||
363 | arm64_ldrx(0, 0, reg + 1, reg, 8); |
||
364 | arm64_ldrx(0, 3, reg, reg, 0); |
||
365 | break; |
||
366 | case 10: |
||
367 | arm64_ldrx(0, 1, reg + 1, reg, 8); |
||
368 | arm64_ldrx(0, 3, reg, reg, 0); |
||
369 | break; |
||
370 | case 11: |
||
371 | arm64_ldrx(0, 2, reg + 1, reg, 7); |
||
372 | o(0x53087c00 | (reg+1) | (reg+1) << 5); // lsr w(reg+1), w(reg+1), #8 |
||
373 | arm64_ldrx(0, 3, reg, reg, 0); |
||
374 | break; |
||
375 | case 12: |
||
376 | arm64_ldrx(0, 2, reg + 1, reg, 8); |
||
377 | arm64_ldrx(0, 3, reg, reg, 0); |
||
378 | break; |
||
379 | case 13: |
||
380 | arm64_ldrx(0, 3, reg + 1, reg, 5); |
||
381 | o(0xd358fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #24 |
||
382 | arm64_ldrx(0, 3, reg, reg, 0); |
||
383 | break; |
||
384 | case 14: |
||
385 | arm64_ldrx(0, 3, reg + 1, reg, 6); |
||
386 | o(0xd350fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #16 |
||
387 | arm64_ldrx(0, 3, reg, reg, 0); |
||
388 | break; |
||
389 | case 15: |
||
390 | arm64_ldrx(0, 3, reg + 1, reg, 7); |
||
391 | o(0xd348fc00 | (reg+1) | (reg+1) << 5); // lsr x(reg+1), x(reg+1), #8 |
||
392 | arm64_ldrx(0, 3, reg, reg, 0); |
||
393 | break; |
||
394 | case 16: |
||
395 | o(0xa9400000 | reg | (reg+1) << 10 | reg << 5); |
||
396 | // ldp x(reg),x(reg+1),[x(reg)] |
||
397 | break; |
||
398 | } |
||
399 | } |
||
400 | |||
401 | static void arm64_strx(int sz_, int dst, int bas, uint64_t off) |
||
402 | { |
||
403 | uint32_t sz = sz_; |
||
404 | if (!(off & ~((uint32_t)0xfff << sz))) |
||
405 | o(0x39000000 | dst | bas << 5 | off << (10 - sz) | sz << 30); |
||
406 | // str(*) x(dst),[x(bas],#(off)] |
||
407 | else if (off < 256 || -off <= 256) |
||
408 | o(0x38000000 | dst | bas << 5 | (off & 511) << 12 | sz << 30); |
||
409 | // stur(*) x(dst),[x(bas],#(off)] |
||
410 | else { |
||
411 | arm64_movimm(30, off); // use x30 for offset |
||
412 | o(0x38206800 | dst | bas << 5 | (uint32_t)30 << 16 | sz << 30); |
||
413 | // str(*) x(dst),[x(bas),x30] |
||
414 | } |
||
415 | } |
||
416 | |||
417 | static void arm64_strv(int sz_, int dst, int bas, uint64_t off) |
||
418 | { |
||
419 | uint32_t sz = sz_; |
||
420 | if (!(off & ~((uint32_t)0xfff << sz))) |
||
421 | o(0x3d000000 | dst | bas << 5 | off << (10 - sz) | |
||
422 | (sz & 4) << 21 | (sz & 3) << 30); // str (s|d|q)(dst),[x(bas),#(off)] |
||
423 | else if (off < 256 || -off <= 256) |
||
424 | o(0x3c000000 | dst | bas << 5 | (off & 511) << 12 | |
||
425 | (sz & 4) << 21 | (sz & 3) << 30); // stur (s|d|q)(dst),[x(bas),#(off)] |
||
426 | else { |
||
427 | arm64_movimm(30, off); // use x30 for offset |
||
428 | o(0x3c206800 | dst | bas << 5 | (uint32_t)30 << 16 | |
||
429 | sz << 30 | (sz & 4) << 21); // str (s|d|q)(dst),[x(bas),x30] |
||
430 | } |
||
431 | } |
||
432 | |||
433 | static void arm64_sym(int r, Sym *sym, unsigned long addend) |
||
434 | { |
||
435 | // Currently TCC's linker does not generate COPY relocations for |
||
436 | // STT_OBJECTs when tcc is invoked with "-run". This typically |
||
437 | // results in "R_AARCH64_ADR_PREL_PG_HI21 relocation failed" when |
||
438 | // a program refers to stdin. A workaround is to avoid that |
||
439 | // relocation and use only relocations with unlimited range. |
||
440 | int avoid_adrp = 1; |
||
441 | |||
442 | if (avoid_adrp || (sym->type.t & VT_WEAK)) { |
||
443 | // (GCC uses a R_AARCH64_ABS64 in this case.) |
||
444 | greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G0_NC, addend); |
||
445 | o(0xd2800000 | r); // mov x(rt),#0,lsl #0 |
||
446 | greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G1_NC, addend); |
||
447 | o(0xf2a00000 | r); // movk x(rt),#0,lsl #16 |
||
448 | greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G2_NC, addend); |
||
449 | o(0xf2c00000 | r); // movk x(rt),#0,lsl #32 |
||
450 | greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G3, addend); |
||
451 | o(0xf2e00000 | r); // movk x(rt),#0,lsl #48 |
||
452 | } |
||
453 | else { |
||
454 | greloca(cur_text_section, sym, ind, R_AARCH64_ADR_PREL_PG_HI21, addend); |
||
455 | o(0x90000000 | r); |
||
456 | greloca(cur_text_section, sym, ind, R_AARCH64_ADD_ABS_LO12_NC, addend); |
||
457 | o(0x91000000 | r | r << 5); |
||
458 | } |
||
459 | } |
||
460 | |||
461 | ST_FUNC void load(int r, SValue *sv) |
||
462 | { |
||
463 | int svtt = sv->type.t; |
||
464 | int svr = sv->r & ~VT_LVAL_TYPE; |
||
465 | int svrv = svr & VT_VALMASK; |
||
466 | uint64_t svcul = (uint32_t)sv->c.i; |
||
467 | svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul; |
||
468 | |||
469 | if (svr == (VT_LOCAL | VT_LVAL)) { |
||
470 | if (IS_FREG(r)) |
||
471 | arm64_ldrv(arm64_type_size(svtt), fltr(r), 29, svcul); |
||
472 | else |
||
473 | arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), |
||
474 | intr(r), 29, svcul); |
||
475 | return; |
||
476 | } |
||
477 | |||
478 | if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) { |
||
479 | if (IS_FREG(r)) |
||
480 | arm64_ldrv(arm64_type_size(svtt), fltr(r), intr(svrv), 0); |
||
481 | else |
||
482 | arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), |
||
483 | intr(r), intr(svrv), 0); |
||
484 | return; |
||
485 | } |
||
486 | |||
487 | if (svr == (VT_CONST | VT_LVAL | VT_SYM)) { |
||
488 | arm64_sym(30, sv->sym, svcul); // use x30 for address |
||
489 | if (IS_FREG(r)) |
||
490 | arm64_ldrv(arm64_type_size(svtt), fltr(r), 30, 0); |
||
491 | else |
||
492 | arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), |
||
493 | intr(r), 30, 0); |
||
494 | return; |
||
495 | } |
||
496 | |||
497 | if (svr == (VT_CONST | VT_SYM)) { |
||
498 | arm64_sym(intr(r), sv->sym, svcul); |
||
499 | return; |
||
500 | } |
||
501 | |||
502 | if (svr == VT_CONST) { |
||
503 | if ((svtt & VT_BTYPE) != VT_VOID) |
||
504 | arm64_movimm(intr(r), arm64_type_size(svtt) == 3 ? |
||
505 | sv->c.i : (uint32_t)svcul); |
||
506 | return; |
||
507 | } |
||
508 | |||
509 | if (svr < VT_CONST) { |
||
510 | if (IS_FREG(r) && IS_FREG(svr)) |
||
511 | if (svtt == VT_LDOUBLE) |
||
512 | o(0x4ea01c00 | fltr(r) | fltr(svr) << 5); |
||
513 | // mov v(r).16b,v(svr).16b |
||
514 | else |
||
515 | o(0x1e604000 | fltr(r) | fltr(svr) << 5); // fmov d(r),d(svr) |
||
516 | else if (!IS_FREG(r) && !IS_FREG(svr)) |
||
517 | o(0xaa0003e0 | intr(r) | intr(svr) << 16); // mov x(r),x(svr) |
||
518 | else |
||
519 | assert(0); |
||
520 | return; |
||
521 | } |
||
522 | |||
523 | if (svr == VT_LOCAL) { |
||
524 | if (-svcul < 0x1000) |
||
525 | o(0xd10003a0 | intr(r) | -svcul << 10); // sub x(r),x29,#... |
||
526 | else { |
||
527 | arm64_movimm(30, -svcul); // use x30 for offset |
||
528 | o(0xcb0003a0 | intr(r) | (uint32_t)30 << 16); // sub x(r),x29,x30 |
||
529 | } |
||
530 | return; |
||
531 | } |
||
532 | |||
533 | if (svr == VT_JMP || svr == VT_JMPI) { |
||
534 | int t = (svr == VT_JMPI); |
||
535 | arm64_movimm(intr(r), t); |
||
536 | o(0x14000002); // b .+8 |
||
537 | gsym(svcul); |
||
538 | arm64_movimm(intr(r), t ^ 1); |
||
539 | return; |
||
540 | } |
||
541 | |||
542 | if (svr == (VT_LLOCAL | VT_LVAL)) { |
||
543 | arm64_ldrx(0, 3, 30, 29, svcul); // use x30 for offset |
||
544 | if (IS_FREG(r)) |
||
545 | arm64_ldrv(arm64_type_size(svtt), fltr(r), 30, 0); |
||
546 | else |
||
547 | arm64_ldrx(!(svtt & VT_UNSIGNED), arm64_type_size(svtt), |
||
548 | intr(r), 30, 0); |
||
549 | return; |
||
550 | } |
||
551 | |||
552 | printf("load(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul); |
||
553 | assert(0); |
||
554 | } |
||
555 | |||
556 | ST_FUNC void store(int r, SValue *sv) |
||
557 | { |
||
558 | int svtt = sv->type.t; |
||
559 | int svr = sv->r & ~VT_LVAL_TYPE; |
||
560 | int svrv = svr & VT_VALMASK; |
||
561 | uint64_t svcul = (uint32_t)sv->c.i; |
||
562 | svcul = svcul >> 31 & 1 ? svcul - ((uint64_t)1 << 32) : svcul; |
||
563 | |||
564 | if (svr == (VT_LOCAL | VT_LVAL)) { |
||
565 | if (IS_FREG(r)) |
||
566 | arm64_strv(arm64_type_size(svtt), fltr(r), 29, svcul); |
||
567 | else |
||
568 | arm64_strx(arm64_type_size(svtt), intr(r), 29, svcul); |
||
569 | return; |
||
570 | } |
||
571 | |||
572 | if ((svr & ~VT_VALMASK) == VT_LVAL && svrv < VT_CONST) { |
||
573 | if (IS_FREG(r)) |
||
574 | arm64_strv(arm64_type_size(svtt), fltr(r), intr(svrv), 0); |
||
575 | else |
||
576 | arm64_strx(arm64_type_size(svtt), intr(r), intr(svrv), 0); |
||
577 | return; |
||
578 | } |
||
579 | |||
580 | if (svr == (VT_CONST | VT_LVAL | VT_SYM)) { |
||
581 | arm64_sym(30, sv->sym, svcul); // use x30 for address |
||
582 | if (IS_FREG(r)) |
||
583 | arm64_strv(arm64_type_size(svtt), fltr(r), 30, 0); |
||
584 | else |
||
585 | arm64_strx(arm64_type_size(svtt), intr(r), 30, 0); |
||
586 | return; |
||
587 | } |
||
588 | |||
589 | printf("store(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul); |
||
590 | assert(0); |
||
591 | } |
||
592 | |||
593 | static void arm64_gen_bl_or_b(int b) |
||
594 | { |
||
595 | if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { |
||
596 | assert(!b); |
||
597 | if (vtop->r & VT_SYM) |
||
598 | greloc(cur_text_section, vtop->sym, ind, R_AARCH64_CALL26); |
||
599 | else |
||
600 | assert(0); |
||
601 | o(0x94000000); // bl . |
||
602 | } |
||
603 | else |
||
604 | o(0xd61f0000 | (uint32_t)!b << 21 | intr(gv(RC_R30)) << 5); // br/blr |
||
605 | } |
||
606 | |||
607 | static int arm64_hfa_aux(CType *type, int *fsize, int num) |
||
608 | { |
||
609 | if (is_float(type->t)) { |
||
610 | int a, n = type_size(type, &a); |
||
611 | if (num >= 4 || (*fsize && *fsize != n)) |
||
612 | return -1; |
||
613 | *fsize = n; |
||
614 | return num + 1; |
||
615 | } |
||
616 | else if ((type->t & VT_BTYPE) == VT_STRUCT) { |
||
617 | int is_struct = 0; // rather than union |
||
618 | Sym *field; |
||
619 | for (field = type->ref->next; field; field = field->next) |
||
620 | if (field->c) { |
||
621 | is_struct = 1; |
||
622 | break; |
||
623 | } |
||
624 | if (is_struct) { |
||
625 | int num0 = num; |
||
626 | for (field = type->ref->next; field; field = field->next) { |
||
627 | if (field->c != (num - num0) * *fsize) |
||
628 | return -1; |
||
629 | num = arm64_hfa_aux(&field->type, fsize, num); |
||
630 | if (num == -1) |
||
631 | return -1; |
||
632 | } |
||
633 | if (type->ref->c != (num - num0) * *fsize) |
||
634 | return -1; |
||
635 | return num; |
||
636 | } |
||
637 | else { // union |
||
638 | int num0 = num; |
||
639 | for (field = type->ref->next; field; field = field->next) { |
||
640 | int num1 = arm64_hfa_aux(&field->type, fsize, num0); |
||
641 | if (num1 == -1) |
||
642 | return -1; |
||
643 | num = num1 < num ? num : num1; |
||
644 | } |
||
645 | if (type->ref->c != (num - num0) * *fsize) |
||
646 | return -1; |
||
647 | return num; |
||
648 | } |
||
649 | } |
||
650 | else if (type->t & VT_ARRAY) { |
||
651 | int num1; |
||
652 | if (!type->ref->c) |
||
653 | return num; |
||
654 | num1 = arm64_hfa_aux(&type->ref->type, fsize, num); |
||
655 | if (num1 == -1 || (num1 != num && type->ref->c > 4)) |
||
656 | return -1; |
||
657 | num1 = num + type->ref->c * (num1 - num); |
||
658 | if (num1 > 4) |
||
659 | return -1; |
||
660 | return num1; |
||
661 | } |
||
662 | return -1; |
||
663 | } |
||
664 | |||
665 | static int arm64_hfa(CType *type, int *fsize) |
||
666 | { |
||
667 | if ((type->t & VT_BTYPE) == VT_STRUCT || (type->t & VT_ARRAY)) { |
||
668 | int sz = 0; |
||
669 | int n = arm64_hfa_aux(type, &sz, 0); |
||
670 | if (0 < n && n <= 4) { |
||
671 | if (fsize) |
||
672 | *fsize = sz; |
||
673 | return n; |
||
674 | } |
||
675 | } |
||
676 | return 0; |
||
677 | } |
||
678 | |||
679 | static unsigned long arm64_pcs_aux(int n, CType **type, unsigned long *a) |
||
680 | { |
||
681 | int nx = 0; // next integer register |
||
682 | int nv = 0; // next vector register |
||
683 | unsigned long ns = 32; // next stack offset |
||
684 | int i; |
||
685 | |||
686 | for (i = 0; i < n; i++) { |
||
687 | int hfa = arm64_hfa(type[i], 0); |
||
688 | int size, align; |
||
689 | |||
690 | if ((type[i]->t & VT_ARRAY) || |
||
691 | (type[i]->t & VT_BTYPE) == VT_FUNC) |
||
692 | size = align = 8; |
||
693 | else |
||
694 | size = type_size(type[i], &align); |
||
695 | |||
696 | if (hfa) |
||
697 | // B.2 |
||
698 | ; |
||
699 | else if (size > 16) { |
||
700 | // B.3: replace with pointer |
||
701 | if (nx < 8) |
||
702 | a[i] = nx++ << 1 | 1; |
||
703 | else { |
||
704 | ns = (ns + 7) & ~7; |
||
705 | a[i] = ns | 1; |
||
706 | ns += 8; |
||
707 | } |
||
708 | continue; |
||
709 | } |
||
710 | else if ((type[i]->t & VT_BTYPE) == VT_STRUCT) |
||
711 | // B.4 |
||
712 | size = (size + 7) & ~7; |
||
713 | |||
714 | // C.1 |
||
715 | if (is_float(type[i]->t) && nv < 8) { |
||
716 | a[i] = 16 + (nv++ << 1); |
||
717 | continue; |
||
718 | } |
||
719 | |||
720 | // C.2 |
||
721 | if (hfa && nv + hfa <= 8) { |
||
722 | a[i] = 16 + (nv << 1); |
||
723 | nv += hfa; |
||
724 | continue; |
||
725 | } |
||
726 | |||
727 | // C.3 |
||
728 | if (hfa) { |
||
729 | nv = 8; |
||
730 | size = (size + 7) & ~7; |
||
731 | } |
||
732 | |||
733 | // C.4 |
||
734 | if (hfa || (type[i]->t & VT_BTYPE) == VT_LDOUBLE) { |
||
735 | ns = (ns + 7) & ~7; |
||
736 | ns = (ns + align - 1) & -align; |
||
737 | } |
||
738 | |||
739 | // C.5 |
||
740 | if ((type[i]->t & VT_BTYPE) == VT_FLOAT) |
||
741 | size = 8; |
||
742 | |||
743 | // C.6 |
||
744 | if (hfa || is_float(type[i]->t)) { |
||
745 | a[i] = ns; |
||
746 | ns += size; |
||
747 | continue; |
||
748 | } |
||
749 | |||
750 | // C.7 |
||
751 | if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size <= 8 && nx < 8) { |
||
752 | a[i] = nx++ << 1; |
||
753 | continue; |
||
754 | } |
||
755 | |||
756 | // C.8 |
||
757 | if (align == 16) |
||
758 | nx = (nx + 1) & ~1; |
||
759 | |||
760 | // C.9 |
||
761 | if ((type[i]->t & VT_BTYPE) != VT_STRUCT && size == 16 && nx < 7) { |
||
762 | a[i] = nx << 1; |
||
763 | nx += 2; |
||
764 | continue; |
||
765 | } |
||
766 | |||
767 | // C.10 |
||
768 | if ((type[i]->t & VT_BTYPE) == VT_STRUCT && size <= (8 - nx) * 8) { |
||
769 | a[i] = nx << 1; |
||
770 | nx += (size + 7) >> 3; |
||
771 | continue; |
||
772 | } |
||
773 | |||
774 | // C.11 |
||
775 | nx = 8; |
||
776 | |||
777 | // C.12 |
||
778 | ns = (ns + 7) & ~7; |
||
779 | ns = (ns + align - 1) & -align; |
||
780 | |||
781 | // C.13 |
||
782 | if ((type[i]->t & VT_BTYPE) == VT_STRUCT) { |
||
783 | a[i] = ns; |
||
784 | ns += size; |
||
785 | continue; |
||
786 | } |
||
787 | |||
788 | // C.14 |
||
789 | if (size < 8) |
||
790 | size = 8; |
||
791 | |||
792 | // C.15 |
||
793 | a[i] = ns; |
||
794 | ns += size; |
||
795 | } |
||
796 | |||
797 | return ns - 32; |
||
798 | } |
||
799 | |||
800 | static unsigned long arm64_pcs(int n, CType **type, unsigned long *a) |
||
801 | { |
||
802 | unsigned long stack; |
||
803 | |||
804 | // Return type: |
||
805 | if ((type[0]->t & VT_BTYPE) == VT_VOID) |
||
806 | a[0] = -1; |
||
807 | else { |
||
808 | arm64_pcs_aux(1, type, a); |
||
809 | assert(a[0] == 0 || a[0] == 1 || a[0] == 16); |
||
810 | } |
||
811 | |||
812 | // Argument types: |
||
813 | stack = arm64_pcs_aux(n, type + 1, a + 1); |
||
814 | |||
815 | if (0) { |
||
816 | int i; |
||
817 | for (i = 0; i <= n; i++) { |
||
818 | if (!i) |
||
819 | printf("arm64_pcs return: "); |
||
820 | else |
||
821 | printf("arm64_pcs arg %d: ", i); |
||
822 | if (a[i] == (unsigned long)-1) |
||
823 | printf("void\n"); |
||
824 | else if (a[i] == 1 && !i) |
||
825 | printf("X8 pointer\n"); |
||
826 | else if (a[i] < 16) |
||
827 | printf("X%lu%s\n", a[i] / 2, a[i] & 1 ? " pointer" : ""); |
||
828 | else if (a[i] < 32) |
||
829 | printf("V%lu\n", a[i] / 2 - 8); |
||
830 | else |
||
831 | printf("stack %lu%s\n", |
||
832 | (a[i] - 32) & ~1, a[i] & 1 ? " pointer" : ""); |
||
833 | } |
||
834 | } |
||
835 | |||
836 | return stack; |
||
837 | } |
||
838 | |||
839 | ST_FUNC void gfunc_call(int nb_args) |
||
840 | { |
||
841 | CType *return_type; |
||
842 | CType **t; |
||
843 | unsigned long *a, *a1; |
||
844 | unsigned long stack; |
||
845 | int i; |
||
846 | |||
847 | return_type = &vtop[-nb_args].type.ref->type; |
||
848 | if ((return_type->t & VT_BTYPE) == VT_STRUCT) |
||
849 | --nb_args; |
||
850 | |||
851 | t = tcc_malloc((nb_args + 1) * sizeof(*t)); |
||
852 | a = tcc_malloc((nb_args + 1) * sizeof(*a)); |
||
853 | a1 = tcc_malloc((nb_args + 1) * sizeof(*a1)); |
||
854 | |||
855 | t[0] = return_type; |
||
856 | for (i = 0; i < nb_args; i++) |
||
857 | t[nb_args - i] = &vtop[-i].type; |
||
858 | |||
859 | stack = arm64_pcs(nb_args, t, a); |
||
860 | |||
861 | // Allocate space for structs replaced by pointer: |
||
862 | for (i = nb_args; i; i--) |
||
863 | if (a[i] & 1) { |
||
864 | SValue *arg = &vtop[i - nb_args]; |
||
865 | int align, size = type_size(&arg->type, &align); |
||
866 | assert((arg->type.t & VT_BTYPE) == VT_STRUCT); |
||
867 | stack = (stack + align - 1) & -align; |
||
868 | a1[i] = stack; |
||
869 | stack += size; |
||
870 | } |
||
871 | |||
872 | stack = (stack + 15) >> 4 << 4; |
||
873 | |||
874 | assert(stack < 0x1000); |
||
875 | if (stack) |
||
876 | o(0xd10003ff | stack << 10); // sub sp,sp,#(n) |
||
877 | |||
878 | // First pass: set all values on stack |
||
879 | for (i = nb_args; i; i--) { |
||
880 | vpushv(vtop - nb_args + i); |
||
881 | |||
882 | if (a[i] & 1) { |
||
883 | // struct replaced by pointer |
||
884 | int r = get_reg(RC_INT); |
||
885 | arm64_spoff(intr(r), a1[i]); |
||
886 | vset(&vtop->type, r | VT_LVAL, 0); |
||
887 | vswap(); |
||
888 | vstore(); |
||
889 | if (a[i] >= 32) { |
||
890 | // pointer on stack |
||
891 | r = get_reg(RC_INT); |
||
892 | arm64_spoff(intr(r), a1[i]); |
||
893 | arm64_strx(3, intr(r), 31, (a[i] - 32) >> 1 << 1); |
||
894 | } |
||
895 | } |
||
896 | else if (a[i] >= 32) { |
||
897 | // value on stack |
||
898 | if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { |
||
899 | int r = get_reg(RC_INT); |
||
900 | arm64_spoff(intr(r), a[i] - 32); |
||
901 | vset(&vtop->type, r | VT_LVAL, 0); |
||
902 | vswap(); |
||
903 | vstore(); |
||
904 | } |
||
905 | else if (is_float(vtop->type.t)) { |
||
906 | gv(RC_FLOAT); |
||
907 | arm64_strv(arm64_type_size(vtop[0].type.t), |
||
908 | fltr(vtop[0].r), 31, a[i] - 32); |
||
909 | } |
||
910 | else { |
||
911 | gv(RC_INT); |
||
912 | arm64_strx(arm64_type_size(vtop[0].type.t), |
||
913 | intr(vtop[0].r), 31, a[i] - 32); |
||
914 | } |
||
915 | } |
||
916 | |||
917 | --vtop; |
||
918 | } |
||
919 | |||
920 | // Second pass: assign values to registers |
||
921 | for (i = nb_args; i; i--, vtop--) { |
||
922 | if (a[i] < 16 && !(a[i] & 1)) { |
||
923 | // value in general-purpose registers |
||
924 | if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { |
||
925 | int align, size = type_size(&vtop->type, &align); |
||
926 | vtop->type.t = VT_PTR; |
||
927 | gaddrof(); |
||
928 | gv(RC_R(a[i] / 2)); |
||
929 | arm64_ldrs(a[i] / 2, size); |
||
930 | } |
||
931 | else |
||
932 | gv(RC_R(a[i] / 2)); |
||
933 | } |
||
934 | else if (a[i] < 16) |
||
935 | // struct replaced by pointer in register |
||
936 | arm64_spoff(a[i] / 2, a1[i]); |
||
937 | else if (a[i] < 32) { |
||
938 | // value in floating-point registers |
||
939 | if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { |
||
940 | uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz); |
||
941 | vtop->type.t = VT_PTR; |
||
942 | gaddrof(); |
||
943 | gv(RC_R30); |
||
944 | for (j = 0; j < n; j++) |
||
945 | o(0x3d4003c0 | |
||
946 | (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | |
||
947 | (a[i] / 2 - 8 + j) | |
||
948 | j << 10); // ldr ([sdq])(*),[x30,#(j * sz)] |
||
949 | } |
||
950 | else |
||
951 | gv(RC_F(a[i] / 2 - 8)); |
||
952 | } |
||
953 | } |
||
954 | |||
955 | if ((return_type->t & VT_BTYPE) == VT_STRUCT) { |
||
956 | if (a[0] == 1) { |
||
957 | // indirect return: set x8 and discard the stack value |
||
958 | gv(RC_R(8)); |
||
959 | --vtop; |
||
960 | } |
||
961 | else |
||
962 | // return in registers: keep the address for after the call |
||
963 | vswap(); |
||
964 | } |
||
965 | |||
966 | save_regs(0); |
||
967 | arm64_gen_bl_or_b(0); |
||
968 | --vtop; |
||
969 | if (stack) |
||
970 | o(0x910003ff | stack << 10); // add sp,sp,#(n) |
||
971 | |||
972 | { |
||
973 | int rt = return_type->t; |
||
974 | int bt = rt & VT_BTYPE; |
||
975 | if (bt == VT_BYTE || bt == VT_SHORT) |
||
976 | // Promote small integers: |
||
977 | o(0x13001c00 | (bt == VT_SHORT) << 13 | |
||
978 | (uint32_t)!!(rt & VT_UNSIGNED) << 30); // [su]xt[bh] w0,w0 |
||
979 | else if (bt == VT_STRUCT && !(a[0] & 1)) { |
||
980 | // A struct was returned in registers, so write it out: |
||
981 | gv(RC_R(8)); |
||
982 | --vtop; |
||
983 | if (a[0] == 0) { |
||
984 | int align, size = type_size(return_type, &align); |
||
985 | assert(size <= 16); |
||
986 | if (size > 8) |
||
987 | o(0xa9000500); // stp x0,x1,[x8] |
||
988 | else if (size) |
||
989 | arm64_strx(size > 4 ? 3 : size > 2 ? 2 : size > 1, 0, 8, 0); |
||
990 | |||
991 | } |
||
992 | else if (a[0] == 16) { |
||
993 | uint32_t j, sz, n = arm64_hfa(return_type, &sz); |
||
994 | for (j = 0; j < n; j++) |
||
995 | o(0x3d000100 | |
||
996 | (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | |
||
997 | (a[i] / 2 - 8 + j) | |
||
998 | j << 10); // str ([sdq])(*),[x8,#(j * sz)] |
||
999 | } |
||
1000 | } |
||
1001 | } |
||
1002 | |||
1003 | tcc_free(a1); |
||
1004 | tcc_free(a); |
||
1005 | tcc_free(t); |
||
1006 | } |
||
1007 | |||
1008 | static unsigned long arm64_func_va_list_stack; |
||
1009 | static int arm64_func_va_list_gr_offs; |
||
1010 | static int arm64_func_va_list_vr_offs; |
||
1011 | static int arm64_func_sub_sp_offset; |
||
1012 | |||
1013 | ST_FUNC void gfunc_prolog(CType *func_type) |
||
1014 | { |
||
1015 | int n = 0; |
||
1016 | int i = 0; |
||
1017 | Sym *sym; |
||
1018 | CType **t; |
||
1019 | unsigned long *a; |
||
1020 | |||
1021 | // Why doesn't the caller (gen_function) set func_vt? |
||
1022 | func_vt = func_type->ref->type; |
||
1023 | func_vc = 144; // offset of where x8 is stored |
||
1024 | |||
1025 | for (sym = func_type->ref; sym; sym = sym->next) |
||
1026 | ++n; |
||
1027 | t = tcc_malloc(n * sizeof(*t)); |
||
1028 | a = tcc_malloc(n * sizeof(*a)); |
||
1029 | |||
1030 | for (sym = func_type->ref; sym; sym = sym->next) |
||
1031 | t[i++] = &sym->type; |
||
1032 | |||
1033 | arm64_func_va_list_stack = arm64_pcs(n - 1, t, a); |
||
1034 | |||
1035 | o(0xa9b27bfd); // stp x29,x30,[sp,#-224]! |
||
1036 | o(0xad0087e0); // stp q0,q1,[sp,#16] |
||
1037 | o(0xad018fe2); // stp q2,q3,[sp,#48] |
||
1038 | o(0xad0297e4); // stp q4,q5,[sp,#80] |
||
1039 | o(0xad039fe6); // stp q6,q7,[sp,#112] |
||
1040 | o(0xa90923e8); // stp x8,x8,[sp,#144] |
||
1041 | o(0xa90a07e0); // stp x0,x1,[sp,#160] |
||
1042 | o(0xa90b0fe2); // stp x2,x3,[sp,#176] |
||
1043 | o(0xa90c17e4); // stp x4,x5,[sp,#192] |
||
1044 | o(0xa90d1fe6); // stp x6,x7,[sp,#208] |
||
1045 | |||
1046 | arm64_func_va_list_gr_offs = -64; |
||
1047 | arm64_func_va_list_vr_offs = -128; |
||
1048 | |||
1049 | for (i = 1, sym = func_type->ref->next; sym; i++, sym = sym->next) { |
||
1050 | int off = (a[i] < 16 ? 160 + a[i] / 2 * 8 : |
||
1051 | a[i] < 32 ? 16 + (a[i] - 16) / 2 * 16 : |
||
1052 | 224 + ((a[i] - 32) >> 1 << 1)); |
||
1053 | sym_push(sym->v & ~SYM_FIELD, &sym->type, |
||
1054 | (a[i] & 1 ? VT_LLOCAL : VT_LOCAL) | lvalue_type(sym->type.t), |
||
1055 | off); |
||
1056 | |||
1057 | if (a[i] < 16) { |
||
1058 | int align, size = type_size(&sym->type, &align); |
||
1059 | arm64_func_va_list_gr_offs = (a[i] / 2 - 7 + |
||
1060 | (!(a[i] & 1) && size > 8)) * 8; |
||
1061 | } |
||
1062 | else if (a[i] < 32) { |
||
1063 | uint32_t hfa = arm64_hfa(&sym->type, 0); |
||
1064 | arm64_func_va_list_vr_offs = (a[i] / 2 - 16 + |
||
1065 | (hfa ? hfa : 1)) * 16; |
||
1066 | } |
||
1067 | |||
1068 | // HFAs of float and double need to be written differently: |
||
1069 | if (16 <= a[i] && a[i] < 32 && (sym->type.t & VT_BTYPE) == VT_STRUCT) { |
||
1070 | uint32_t j, sz, k = arm64_hfa(&sym->type, &sz); |
||
1071 | if (sz < 16) |
||
1072 | for (j = 0; j < k; j++) { |
||
1073 | o(0x3d0003e0 | -(sz & 8) << 27 | (sz & 4) << 29 | |
||
1074 | ((a[i] - 16) / 2 + j) | (off / sz + j) << 10); |
||
1075 | // str ([sdq])(*),[sp,#(j * sz)] |
||
1076 | } |
||
1077 | } |
||
1078 | } |
||
1079 | |||
1080 | tcc_free(a); |
||
1081 | tcc_free(t); |
||
1082 | |||
1083 | o(0x910003fd); // mov x29,sp |
||
1084 | arm64_func_sub_sp_offset = ind; |
||
1085 | // In gfunc_epilog these will be replaced with code to decrement SP: |
||
1086 | o(0xd503201f); // nop |
||
1087 | o(0xd503201f); // nop |
||
1088 | loc = 0; |
||
1089 | } |
||
1090 | |||
1091 | ST_FUNC void gen_va_start(void) |
||
1092 | { |
||
1093 | int r; |
||
1094 | --vtop; // we don't need the "arg" |
||
1095 | gaddrof(); |
||
1096 | r = intr(gv(RC_INT)); |
||
1097 | |||
1098 | if (arm64_func_va_list_stack) { |
||
1099 | //xx could use add (immediate) here |
||
1100 | arm64_movimm(30, arm64_func_va_list_stack + 224); |
||
1101 | o(0x8b1e03be); // add x30,x29,x30 |
||
1102 | } |
||
1103 | else |
||
1104 | o(0x910383be); // add x30,x29,#224 |
||
1105 | o(0xf900001e | r << 5); // str x30,[x(r)] |
||
1106 | |||
1107 | if (arm64_func_va_list_gr_offs) { |
||
1108 | if (arm64_func_va_list_stack) |
||
1109 | o(0x910383be); // add x30,x29,#224 |
||
1110 | o(0xf900041e | r << 5); // str x30,[x(r),#8] |
||
1111 | } |
||
1112 | |||
1113 | if (arm64_func_va_list_vr_offs) { |
||
1114 | o(0x910243be); // add x30,x29,#144 |
||
1115 | o(0xf900081e | r << 5); // str x30,[x(r),#16] |
||
1116 | } |
||
1117 | |||
1118 | arm64_movimm(30, arm64_func_va_list_gr_offs); |
||
1119 | o(0xb900181e | r << 5); // str w30,[x(r),#24] |
||
1120 | |||
1121 | arm64_movimm(30, arm64_func_va_list_vr_offs); |
||
1122 | o(0xb9001c1e | r << 5); // str w30,[x(r),#28] |
||
1123 | |||
1124 | --vtop; |
||
1125 | } |
||
1126 | |||
1127 | ST_FUNC void gen_va_arg(CType *t) |
||
1128 | { |
||
1129 | int align, size = type_size(t, &align); |
||
1130 | int fsize, hfa = arm64_hfa(t, &fsize); |
||
1131 | uint32_t r0, r1; |
||
1132 | |||
1133 | if (is_float(t->t)) { |
||
1134 | hfa = 1; |
||
1135 | fsize = size; |
||
1136 | } |
||
1137 | |||
1138 | gaddrof(); |
||
1139 | r0 = intr(gv(RC_INT)); |
||
1140 | r1 = get_reg(RC_INT); |
||
1141 | vtop[0].r = r1 | lvalue_type(t->t); |
||
1142 | r1 = intr(r1); |
||
1143 | |||
1144 | if (!hfa) { |
||
1145 | uint32_t n = size > 16 ? 8 : (size + 7) & -8; |
||
1146 | o(0xb940181e | r0 << 5); // ldr w30,[x(r0),#24] // __gr_offs |
||
1147 | if (align == 16) { |
||
1148 | assert(0); // this path untested but needed for __uint128_t |
||
1149 | o(0x11003fde); // add w30,w30,#15 |
||
1150 | o(0x121c6fde); // and w30,w30,#-16 |
||
1151 | } |
||
1152 | o(0x310003c0 | r1 | n << 10); // adds w(r1),w30,#(n) |
||
1153 | o(0x540000ad); // b.le .+20 |
||
1154 | o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack |
||
1155 | o(0x9100001e | r1 << 5 | n << 10); // add x30,x(r1),#(n) |
||
1156 | o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack |
||
1157 | o(0x14000004); // b .+16 |
||
1158 | o(0xb9001800 | r1 | r0 << 5); // str w(r1),[x(r0),#24] // __gr_offs |
||
1159 | o(0xf9400400 | r1 | r0 << 5); // ldr x(r1),[x(r0),#8] // __gr_top |
||
1160 | o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw |
||
1161 | if (size > 16) |
||
1162 | o(0xf9400000 | r1 | r1 << 5); // ldr x(r1),[x(r1)] |
||
1163 | } |
||
1164 | else { |
||
1165 | uint32_t rsz = hfa << 4; |
||
1166 | uint32_t ssz = (size + 7) & -(uint32_t)8; |
||
1167 | uint32_t b1, b2; |
||
1168 | o(0xb9401c1e | r0 << 5); // ldr w30,[x(r0),#28] // __vr_offs |
||
1169 | o(0x310003c0 | r1 | rsz << 10); // adds w(r1),w30,#(rsz) |
||
1170 | b1 = ind; o(0x5400000d); // b.le lab1 |
||
1171 | o(0xf9400000 | r1 | r0 << 5); // ldr x(r1),[x(r0)] // __stack |
||
1172 | if (fsize == 16) { |
||
1173 | o(0x91003c00 | r1 | r1 << 5); // add x(r1),x(r1),#15 |
||
1174 | o(0x927cec00 | r1 | r1 << 5); // and x(r1),x(r1),#-16 |
||
1175 | } |
||
1176 | o(0x9100001e | r1 << 5 | ssz << 10); // add x30,x(r1),#(ssz) |
||
1177 | o(0xf900001e | r0 << 5); // str x30,[x(r0)] // __stack |
||
1178 | b2 = ind; o(0x14000000); // b lab2 |
||
1179 | // lab1: |
||
1180 | write32le(cur_text_section->data + b1, 0x5400000d | (ind - b1) << 3); |
||
1181 | o(0xb9001c00 | r1 | r0 << 5); // str w(r1),[x(r0),#28] // __vr_offs |
||
1182 | o(0xf9400800 | r1 | r0 << 5); // ldr x(r1),[x(r0),#16] // __vr_top |
||
1183 | if (hfa == 1 || fsize == 16) |
||
1184 | o(0x8b3ec000 | r1 | r1 << 5); // add x(r1),x(r1),w30,sxtw |
||
1185 | else { |
||
1186 | // We need to change the layout of this HFA. |
||
1187 | // Get some space on the stack using global variable "loc": |
||
1188 | loc = (loc - size) & -(uint32_t)align; |
||
1189 | o(0x8b3ec000 | 30 | r1 << 5); // add x30,x(r1),w30,sxtw |
||
1190 | arm64_movimm(r1, loc); |
||
1191 | o(0x8b0003a0 | r1 | r1 << 16); // add x(r1),x29,x(r1) |
||
1192 | o(0x4c402bdc | (uint32_t)fsize << 7 | |
||
1193 | (uint32_t)(hfa == 2) << 15 | |
||
1194 | (uint32_t)(hfa == 3) << 14); // ld1 {v28.(4s|2d),...},[x30] |
||
1195 | o(0x0d00801c | r1 << 5 | (fsize == 8) << 10 | |
||
1196 | (uint32_t)(hfa != 2) << 13 | |
||
1197 | (uint32_t)(hfa != 3) << 21); // st(hfa) {v28.(s|d),...}[0],[x(r1)] |
||
1198 | } |
||
1199 | // lab2: |
||
1200 | write32le(cur_text_section->data + b2, 0x14000000 | (ind - b2) >> 2); |
||
1201 | } |
||
1202 | } |
||
1203 | |||
1204 | ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, |
||
1205 | int *align, int *regsize) |
||
1206 | { |
||
1207 | return 0; |
||
1208 | } |
||
1209 | |||
1210 | ST_FUNC void greturn(void) |
||
1211 | { |
||
1212 | CType *t = &func_vt; |
||
1213 | unsigned long a; |
||
1214 | |||
1215 | arm64_pcs(0, &t, &a); |
||
1216 | switch (a) { |
||
1217 | case -1: |
||
1218 | break; |
||
1219 | case 0: |
||
1220 | if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { |
||
1221 | int align, size = type_size(&func_vt, &align); |
||
1222 | gaddrof(); |
||
1223 | gv(RC_R(0)); |
||
1224 | arm64_ldrs(0, size); |
||
1225 | } |
||
1226 | else |
||
1227 | gv(RC_IRET); |
||
1228 | break; |
||
1229 | case 1: { |
||
1230 | CType type = func_vt; |
||
1231 | mk_pointer(&type); |
||
1232 | vset(&type, VT_LOCAL | VT_LVAL, func_vc); |
||
1233 | indir(); |
||
1234 | vswap(); |
||
1235 | vstore(); |
||
1236 | break; |
||
1237 | } |
||
1238 | case 16: |
||
1239 | if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { |
||
1240 | uint32_t j, sz, n = arm64_hfa(&vtop->type, &sz); |
||
1241 | gaddrof(); |
||
1242 | gv(RC_R(0)); |
||
1243 | for (j = 0; j < n; j++) |
||
1244 | o(0x3d400000 | |
||
1245 | (sz & 16) << 19 | -(sz & 8) << 27 | (sz & 4) << 29 | |
||
1246 | j | j << 10); // ldr ([sdq])(*),[x0,#(j * sz)] |
||
1247 | } |
||
1248 | else |
||
1249 | gv(RC_FRET); |
||
1250 | break; |
||
1251 | default: |
||
1252 | assert(0); |
||
1253 | } |
||
1254 | } |
||
1255 | |||
1256 | ST_FUNC void gfunc_epilog(void) |
||
1257 | { |
||
1258 | if (loc) { |
||
1259 | // Insert instructions to subtract size of stack frame from SP. |
||
1260 | unsigned char *ptr = cur_text_section->data + arm64_func_sub_sp_offset; |
||
1261 | uint64_t diff = (-loc + 15) & ~15; |
||
1262 | if (!(diff >> 24)) { |
||
1263 | if (diff & 0xfff) // sub sp,sp,#(diff & 0xfff) |
||
1264 | write32le(ptr, 0xd10003ff | (diff & 0xfff) << 10); |
||
1265 | if (diff >> 12) // sub sp,sp,#(diff >> 12),lsl #12 |
||
1266 | write32le(ptr + 4, 0xd14003ff | (diff >> 12) << 10); |
||
1267 | } |
||
1268 | else { |
||
1269 | // In this case we may subtract more than necessary, |
||
1270 | // but always less than 17/16 of what we were aiming for. |
||
1271 | int i = 0; |
||
1272 | int j = 0; |
||
1273 | while (diff >> 20) { |
||
1274 | diff = (diff + 0xffff) >> 16; |
||
1275 | ++i; |
||
1276 | } |
||
1277 | while (diff >> 16) { |
||
1278 | diff = (diff + 1) >> 1; |
||
1279 | ++j; |
||
1280 | } |
||
1281 | write32le(ptr, 0xd2800010 | diff << 5 | i << 21); |
||
1282 | // mov x16,#(diff),lsl #(16 * i) |
||
1283 | write32le(ptr + 4, 0xcb3063ff | j << 10); |
||
1284 | // sub sp,sp,x16,lsl #(j) |
||
1285 | } |
||
1286 | } |
||
1287 | o(0x910003bf); // mov sp,x29 |
||
1288 | o(0xa8ce7bfd); // ldp x29,x30,[sp],#224 |
||
1289 | |||
1290 | o(0xd65f03c0); // ret |
||
1291 | } |
||
1292 | |||
1293 | // Generate forward branch to label: |
||
1294 | ST_FUNC int gjmp(int t) |
||
1295 | { |
||
1296 | int r = ind; |
||
1297 | o(t); |
||
1298 | return r; |
||
1299 | } |
||
1300 | |||
1301 | // Generate branch to known address: |
||
1302 | ST_FUNC void gjmp_addr(int a) |
||
1303 | { |
||
1304 | assert(a - ind + 0x8000000 < 0x10000000); |
||
1305 | o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff)); |
||
1306 | } |
||
1307 | |||
1308 | ST_FUNC int gtst(int inv, int t) |
||
1309 | { |
||
1310 | int bt = vtop->type.t & VT_BTYPE; |
||
1311 | if (bt == VT_LDOUBLE) { |
||
1312 | uint32_t a, b, f = fltr(gv(RC_FLOAT)); |
||
1313 | a = get_reg(RC_INT); |
||
1314 | vpushi(0); |
||
1315 | vtop[0].r = a; |
||
1316 | b = get_reg(RC_INT); |
||
1317 | a = intr(a); |
||
1318 | b = intr(b); |
||
1319 | o(0x4e083c00 | a | f << 5); // mov x(a),v(f).d[0] |
||
1320 | o(0x4e183c00 | b | f << 5); // mov x(b),v(f).d[1] |
||
1321 | o(0xaa000400 | a | a << 5 | b << 16); // orr x(a),x(a),x(b),lsl #1 |
||
1322 | o(0xb4000040 | a | !!inv << 24); // cbz/cbnz x(a),.+8 |
||
1323 | --vtop; |
||
1324 | } |
||
1325 | else if (bt == VT_FLOAT || bt == VT_DOUBLE) { |
||
1326 | uint32_t a = fltr(gv(RC_FLOAT)); |
||
1327 | o(0x1e202008 | a << 5 | (bt != VT_FLOAT) << 22); // fcmp |
||
1328 | o(0x54000040 | !!inv); // b.eq/b.ne .+8 |
||
1329 | } |
||
1330 | else { |
||
1331 | uint32_t ll = (bt == VT_PTR || bt == VT_LLONG); |
||
1332 | uint32_t a = intr(gv(RC_INT)); |
||
1333 | o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8 |
||
1334 | } |
||
1335 | --vtop; |
||
1336 | return gjmp(t); |
||
1337 | } |
||
1338 | |||
1339 | static int arm64_iconst(uint64_t *val, SValue *sv) |
||
1340 | { |
||
1341 | if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST) |
||
1342 | return 0; |
||
1343 | if (val) { |
||
1344 | int t = sv->type.t; |
||
1345 | *val = ((t & VT_BTYPE) == VT_LLONG ? sv->c.i : |
||
1346 | (uint32_t)sv->c.i | |
||
1347 | (t & VT_UNSIGNED ? 0 : -(sv->c.i & 0x80000000))); |
||
1348 | } |
||
1349 | return 1; |
||
1350 | } |
||
1351 | |||
1352 | static int arm64_gen_opic(int op, uint32_t l, int rev, uint64_t val, |
||
1353 | uint32_t x, uint32_t a) |
||
1354 | { |
||
1355 | if (op == '-' && !rev) { |
||
1356 | val = -val; |
||
1357 | op = '+'; |
||
1358 | } |
||
1359 | val = l ? val : (uint32_t)val; |
||
1360 | |||
1361 | switch (op) { |
||
1362 | |||
1363 | case '+': { |
||
1364 | uint32_t s = l ? val >> 63 : val >> 31; |
||
1365 | val = s ? -val : val; |
||
1366 | val = l ? val : (uint32_t)val; |
||
1367 | if (!(val & ~(uint64_t)0xfff)) |
||
1368 | o(0x11000000 | l << 31 | s << 30 | x | a << 5 | val << 10); |
||
1369 | else if (!(val & ~(uint64_t)0xfff000)) |
||
1370 | o(0x11400000 | l << 31 | s << 30 | x | a << 5 | val >> 12 << 10); |
||
1371 | else { |
||
1372 | arm64_movimm(30, val); // use x30 |
||
1373 | o(0x0b1e0000 | l << 31 | s << 30 | x | a << 5); |
||
1374 | } |
||
1375 | return 1; |
||
1376 | } |
||
1377 | |||
1378 | case '-': |
||
1379 | if (!val) |
||
1380 | o(0x4b0003e0 | l << 31 | x | a << 16); // neg |
||
1381 | else if (val == (l ? (uint64_t)-1 : (uint32_t)-1)) |
||
1382 | o(0x2a2003e0 | l << 31 | x | a << 16); // mvn |
||
1383 | else { |
||
1384 | arm64_movimm(30, val); // use x30 |
||
1385 | o(0x4b0003c0 | l << 31 | x | a << 16); // sub |
||
1386 | } |
||
1387 | return 1; |
||
1388 | |||
1389 | case '^': |
||
1390 | if (val == -1 || (val == 0xffffffff && !l)) { |
||
1391 | o(0x2a2003e0 | l << 31 | x | a << 16); // mvn |
||
1392 | return 1; |
||
1393 | } |
||
1394 | // fall through |
||
1395 | case '&': |
||
1396 | case '|': { |
||
1397 | int e = arm64_encode_bimm64(l ? val : val | val << 32); |
||
1398 | if (e < 0) |
||
1399 | return 0; |
||
1400 | o((op == '&' ? 0x12000000 : |
||
1401 | op == '|' ? 0x32000000 : 0x52000000) | |
||
1402 | l << 31 | x | a << 5 | (uint32_t)e << 10); |
||
1403 | return 1; |
||
1404 | } |
||
1405 | |||
1406 | case TOK_SAR: |
||
1407 | case TOK_SHL: |
||
1408 | case TOK_SHR: { |
||
1409 | uint32_t n = 32 << l; |
||
1410 | val = val & (n - 1); |
||
1411 | if (rev) |
||
1412 | return 0; |
||
1413 | if (!val) |
||
1414 | assert(0); |
||
1415 | else if (op == TOK_SHL) |
||
1416 | o(0x53000000 | l << 31 | l << 22 | x | a << 5 | |
||
1417 | (n - val) << 16 | (n - 1 - val) << 10); // lsl |
||
1418 | else |
||
1419 | o(0x13000000 | (op == TOK_SHR) << 30 | l << 31 | l << 22 | |
||
1420 | x | a << 5 | val << 16 | (n - 1) << 10); // lsr/asr |
||
1421 | return 1; |
||
1422 | } |
||
1423 | |||
1424 | } |
||
1425 | return 0; |
||
1426 | } |
||
1427 | |||
1428 | static void arm64_gen_opil(int op, uint32_t l) |
||
1429 | { |
||
1430 | uint32_t x, a, b; |
||
1431 | |||
1432 | // Special treatment for operations with a constant operand: |
||
1433 | { |
||
1434 | uint64_t val; |
||
1435 | int rev = 1; |
||
1436 | |||
1437 | if (arm64_iconst(0, &vtop[0])) { |
||
1438 | vswap(); |
||
1439 | rev = 0; |
||
1440 | } |
||
1441 | if (arm64_iconst(&val, &vtop[-1])) { |
||
1442 | gv(RC_INT); |
||
1443 | a = intr(vtop[0].r); |
||
1444 | --vtop; |
||
1445 | x = get_reg(RC_INT); |
||
1446 | ++vtop; |
||
1447 | if (arm64_gen_opic(op, l, rev, val, intr(x), a)) { |
||
1448 | vtop[0].r = x; |
||
1449 | vswap(); |
||
1450 | --vtop; |
||
1451 | return; |
||
1452 | } |
||
1453 | } |
||
1454 | if (!rev) |
||
1455 | vswap(); |
||
1456 | } |
||
1457 | |||
1458 | gv2(RC_INT, RC_INT); |
||
1459 | assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST); |
||
1460 | a = intr(vtop[-1].r); |
||
1461 | b = intr(vtop[0].r); |
||
1462 | vtop -= 2; |
||
1463 | x = get_reg(RC_INT); |
||
1464 | ++vtop; |
||
1465 | vtop[0].r = x; |
||
1466 | x = intr(x); |
||
1467 | |||
1468 | switch (op) { |
||
1469 | case '%': |
||
1470 | // Use x30 for quotient: |
||
1471 | o(0x1ac00c00 | l << 31 | 30 | a << 5 | b << 16); // sdiv |
||
1472 | o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 | |
||
1473 | b << 16 | a << 10); // msub |
||
1474 | break; |
||
1475 | case '&': |
||
1476 | o(0x0a000000 | l << 31 | x | a << 5 | b << 16); // and |
||
1477 | break; |
||
1478 | case '*': |
||
1479 | o(0x1b007c00 | l << 31 | x | a << 5 | b << 16); // mul |
||
1480 | break; |
||
1481 | case '+': |
||
1482 | o(0x0b000000 | l << 31 | x | a << 5 | b << 16); // add |
||
1483 | break; |
||
1484 | case '-': |
||
1485 | o(0x4b000000 | l << 31 | x | a << 5 | b << 16); // sub |
||
1486 | break; |
||
1487 | case '/': |
||
1488 | o(0x1ac00c00 | l << 31 | x | a << 5 | b << 16); // sdiv |
||
1489 | break; |
||
1490 | case '^': |
||
1491 | o(0x4a000000 | l << 31 | x | a << 5 | b << 16); // eor |
||
1492 | break; |
||
1493 | case '|': |
||
1494 | o(0x2a000000 | l << 31 | x | a << 5 | b << 16); // orr |
||
1495 | break; |
||
1496 | case TOK_EQ: |
||
1497 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1498 | o(0x1a9f17e0 | x); // cset wA,eq |
||
1499 | break; |
||
1500 | case TOK_GE: |
||
1501 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1502 | o(0x1a9fb7e0 | x); // cset wA,ge |
||
1503 | break; |
||
1504 | case TOK_GT: |
||
1505 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1506 | o(0x1a9fd7e0 | x); // cset wA,gt |
||
1507 | break; |
||
1508 | case TOK_LE: |
||
1509 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1510 | o(0x1a9fc7e0 | x); // cset wA,le |
||
1511 | break; |
||
1512 | case TOK_LT: |
||
1513 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1514 | o(0x1a9fa7e0 | x); // cset wA,lt |
||
1515 | break; |
||
1516 | case TOK_NE: |
||
1517 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1518 | o(0x1a9f07e0 | x); // cset wA,ne |
||
1519 | break; |
||
1520 | case TOK_SAR: |
||
1521 | o(0x1ac02800 | l << 31 | x | a << 5 | b << 16); // asr |
||
1522 | break; |
||
1523 | case TOK_SHL: |
||
1524 | o(0x1ac02000 | l << 31 | x | a << 5 | b << 16); // lsl |
||
1525 | break; |
||
1526 | case TOK_SHR: |
||
1527 | o(0x1ac02400 | l << 31 | x | a << 5 | b << 16); // lsr |
||
1528 | break; |
||
1529 | case TOK_UDIV: |
||
1530 | case TOK_PDIV: |
||
1531 | o(0x1ac00800 | l << 31 | x | a << 5 | b << 16); // udiv |
||
1532 | break; |
||
1533 | case TOK_UGE: |
||
1534 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1535 | o(0x1a9f37e0 | x); // cset wA,cs |
||
1536 | break; |
||
1537 | case TOK_UGT: |
||
1538 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1539 | o(0x1a9f97e0 | x); // cset wA,hi |
||
1540 | break; |
||
1541 | case TOK_ULT: |
||
1542 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1543 | o(0x1a9f27e0 | x); // cset wA,cc |
||
1544 | break; |
||
1545 | case TOK_ULE: |
||
1546 | o(0x6b00001f | l << 31 | a << 5 | b << 16); // cmp |
||
1547 | o(0x1a9f87e0 | x); // cset wA,ls |
||
1548 | break; |
||
1549 | case TOK_UMOD: |
||
1550 | // Use x30 for quotient: |
||
1551 | o(0x1ac00800 | l << 31 | 30 | a << 5 | b << 16); // udiv |
||
1552 | o(0x1b008000 | l << 31 | x | (uint32_t)30 << 5 | |
||
1553 | b << 16 | a << 10); // msub |
||
1554 | break; |
||
1555 | default: |
||
1556 | assert(0); |
||
1557 | } |
||
1558 | } |
||
1559 | |||
1560 | ST_FUNC void gen_opi(int op) |
||
1561 | { |
||
1562 | arm64_gen_opil(op, 0); |
||
1563 | } |
||
1564 | |||
1565 | ST_FUNC void gen_opl(int op) |
||
1566 | { |
||
1567 | arm64_gen_opil(op, 1); |
||
1568 | } |
||
1569 | |||
1570 | ST_FUNC void gen_opf(int op) |
||
1571 | { |
||
1572 | uint32_t x, a, b, dbl; |
||
1573 | |||
1574 | if (vtop[0].type.t == VT_LDOUBLE) { |
||
1575 | CType type = vtop[0].type; |
||
1576 | int func = 0; |
||
1577 | int cond = -1; |
||
1578 | switch (op) { |
||
1579 | case '*': func = TOK___multf3; break; |
||
1580 | case '+': func = TOK___addtf3; break; |
||
1581 | case '-': func = TOK___subtf3; break; |
||
1582 | case '/': func = TOK___divtf3; break; |
||
1583 | case TOK_EQ: func = TOK___eqtf2; cond = 1; break; |
||
1584 | case TOK_NE: func = TOK___netf2; cond = 0; break; |
||
1585 | case TOK_LT: func = TOK___lttf2; cond = 10; break; |
||
1586 | case TOK_GE: func = TOK___getf2; cond = 11; break; |
||
1587 | case TOK_LE: func = TOK___letf2; cond = 12; break; |
||
1588 | case TOK_GT: func = TOK___gttf2; cond = 13; break; |
||
1589 | default: assert(0); break; |
||
1590 | } |
||
1591 | vpush_global_sym(&func_old_type, func); |
||
1592 | vrott(3); |
||
1593 | gfunc_call(2); |
||
1594 | vpushi(0); |
||
1595 | vtop->r = cond < 0 ? REG_FRET : REG_IRET; |
||
1596 | if (cond < 0) |
||
1597 | vtop->type = type; |
||
1598 | else { |
||
1599 | o(0x7100001f); // cmp w0,#0 |
||
1600 | o(0x1a9f07e0 | (uint32_t)cond << 12); // cset w0,(cond) |
||
1601 | } |
||
1602 | return; |
||
1603 | } |
||
1604 | |||
1605 | dbl = vtop[0].type.t != VT_FLOAT; |
||
1606 | gv2(RC_FLOAT, RC_FLOAT); |
||
1607 | assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST); |
||
1608 | a = fltr(vtop[-1].r); |
||
1609 | b = fltr(vtop[0].r); |
||
1610 | vtop -= 2; |
||
1611 | switch (op) { |
||
1612 | case TOK_EQ: case TOK_NE: |
||
1613 | case TOK_LT: case TOK_GE: case TOK_LE: case TOK_GT: |
||
1614 | x = get_reg(RC_INT); |
||
1615 | ++vtop; |
||
1616 | vtop[0].r = x; |
||
1617 | x = intr(x); |
||
1618 | break; |
||
1619 | default: |
||
1620 | x = get_reg(RC_FLOAT); |
||
1621 | ++vtop; |
||
1622 | vtop[0].r = x; |
||
1623 | x = fltr(x); |
||
1624 | break; |
||
1625 | } |
||
1626 | |||
1627 | switch (op) { |
||
1628 | case '*': |
||
1629 | o(0x1e200800 | dbl << 22 | x | a << 5 | b << 16); // fmul |
||
1630 | break; |
||
1631 | case '+': |
||
1632 | o(0x1e202800 | dbl << 22 | x | a << 5 | b << 16); // fadd |
||
1633 | break; |
||
1634 | case '-': |
||
1635 | o(0x1e203800 | dbl << 22 | x | a << 5 | b << 16); // fsub |
||
1636 | break; |
||
1637 | case '/': |
||
1638 | o(0x1e201800 | dbl << 22 | x | a << 5 | b << 16); // fdiv |
||
1639 | break; |
||
1640 | case TOK_EQ: |
||
1641 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1642 | o(0x1a9f17e0 | x); // cset w(x),eq |
||
1643 | break; |
||
1644 | case TOK_GE: |
||
1645 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1646 | o(0x1a9fb7e0 | x); // cset w(x),ge |
||
1647 | break; |
||
1648 | case TOK_GT: |
||
1649 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1650 | o(0x1a9fd7e0 | x); // cset w(x),gt |
||
1651 | break; |
||
1652 | case TOK_LE: |
||
1653 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1654 | o(0x1a9f87e0 | x); // cset w(x),ls |
||
1655 | break; |
||
1656 | case TOK_LT: |
||
1657 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1658 | o(0x1a9f57e0 | x); // cset w(x),mi |
||
1659 | break; |
||
1660 | case TOK_NE: |
||
1661 | o(0x1e202000 | dbl << 22 | a << 5 | b << 16); // fcmp |
||
1662 | o(0x1a9f07e0 | x); // cset w(x),ne |
||
1663 | break; |
||
1664 | default: |
||
1665 | assert(0); |
||
1666 | } |
||
1667 | } |
||
1668 | |||
1669 | // Generate sign extension from 32 to 64 bits: |
||
1670 | ST_FUNC void gen_cvt_sxtw(void) |
||
1671 | { |
||
1672 | uint32_t r = intr(gv(RC_INT)); |
||
1673 | o(0x93407c00 | r | r << 5); // sxtw x(r),w(r) |
||
1674 | } |
||
1675 | |||
1676 | ST_FUNC void gen_cvt_itof(int t) |
||
1677 | { |
||
1678 | if (t == VT_LDOUBLE) { |
||
1679 | int f = vtop->type.t; |
||
1680 | int func = (f & VT_BTYPE) == VT_LLONG ? |
||
1681 | (f & VT_UNSIGNED ? TOK___floatunditf : TOK___floatditf) : |
||
1682 | (f & VT_UNSIGNED ? TOK___floatunsitf : TOK___floatsitf); |
||
1683 | vpush_global_sym(&func_old_type, func); |
||
1684 | vrott(2); |
||
1685 | gfunc_call(1); |
||
1686 | vpushi(0); |
||
1687 | vtop->type.t = t; |
||
1688 | vtop->r = REG_FRET; |
||
1689 | return; |
||
1690 | } |
||
1691 | else { |
||
1692 | int d, n = intr(gv(RC_INT)); |
||
1693 | int s = !(vtop->type.t & VT_UNSIGNED); |
||
1694 | uint32_t l = ((vtop->type.t & VT_BTYPE) == VT_LLONG); |
||
1695 | --vtop; |
||
1696 | d = get_reg(RC_FLOAT); |
||
1697 | ++vtop; |
||
1698 | vtop[0].r = d; |
||
1699 | o(0x1e220000 | (uint32_t)!s << 16 | |
||
1700 | (uint32_t)(t != VT_FLOAT) << 22 | fltr(d) | |
||
1701 | l << 31 | n << 5); // [us]cvtf [sd](d),[wx](n) |
||
1702 | } |
||
1703 | } |
||
1704 | |||
1705 | ST_FUNC void gen_cvt_ftoi(int t) |
||
1706 | { |
||
1707 | if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { |
||
1708 | int func = (t & VT_BTYPE) == VT_LLONG ? |
||
1709 | (t & VT_UNSIGNED ? TOK___fixunstfdi : TOK___fixtfdi) : |
||
1710 | (t & VT_UNSIGNED ? TOK___fixunstfsi : TOK___fixtfsi); |
||
1711 | vpush_global_sym(&func_old_type, func); |
||
1712 | vrott(2); |
||
1713 | gfunc_call(1); |
||
1714 | vpushi(0); |
||
1715 | vtop->type.t = t; |
||
1716 | vtop->r = REG_IRET; |
||
1717 | return; |
||
1718 | } |
||
1719 | else { |
||
1720 | int d, n = fltr(gv(RC_FLOAT)); |
||
1721 | uint32_t l = ((vtop->type.t & VT_BTYPE) != VT_FLOAT); |
||
1722 | --vtop; |
||
1723 | d = get_reg(RC_INT); |
||
1724 | ++vtop; |
||
1725 | vtop[0].r = d; |
||
1726 | o(0x1e380000 | |
||
1727 | (uint32_t)!!(t & VT_UNSIGNED) << 16 | |
||
1728 | (uint32_t)((t & VT_BTYPE) == VT_LLONG) << 31 | intr(d) | |
||
1729 | l << 22 | n << 5); // fcvtz[su] [wx](d),[sd](n) |
||
1730 | } |
||
1731 | } |
||
1732 | |||
1733 | ST_FUNC void gen_cvt_ftof(int t) |
||
1734 | { |
||
1735 | int f = vtop[0].type.t; |
||
1736 | assert(t == VT_FLOAT || t == VT_DOUBLE || t == VT_LDOUBLE); |
||
1737 | assert(f == VT_FLOAT || f == VT_DOUBLE || f == VT_LDOUBLE); |
||
1738 | if (t == f) |
||
1739 | return; |
||
1740 | |||
1741 | if (t == VT_LDOUBLE || f == VT_LDOUBLE) { |
||
1742 | int func = (t == VT_LDOUBLE) ? |
||
1743 | (f == VT_FLOAT ? TOK___extendsftf2 : TOK___extenddftf2) : |
||
1744 | (t == VT_FLOAT ? TOK___trunctfsf2 : TOK___trunctfdf2); |
||
1745 | vpush_global_sym(&func_old_type, func); |
||
1746 | vrott(2); |
||
1747 | gfunc_call(1); |
||
1748 | vpushi(0); |
||
1749 | vtop->type.t = t; |
||
1750 | vtop->r = REG_FRET; |
||
1751 | } |
||
1752 | else { |
||
1753 | int x, a; |
||
1754 | gv(RC_FLOAT); |
||
1755 | assert(vtop[0].r < VT_CONST); |
||
1756 | a = fltr(vtop[0].r); |
||
1757 | --vtop; |
||
1758 | x = get_reg(RC_FLOAT); |
||
1759 | ++vtop; |
||
1760 | vtop[0].r = x; |
||
1761 | x = fltr(x); |
||
1762 | |||
1763 | if (f == VT_FLOAT) |
||
1764 | o(0x1e22c000 | x | a << 5); // fcvt d(x),s(a) |
||
1765 | else |
||
1766 | o(0x1e624000 | x | a << 5); // fcvt s(x),d(a) |
||
1767 | } |
||
1768 | } |
||
1769 | |||
1770 | ST_FUNC void ggoto(void) |
||
1771 | { |
||
1772 | arm64_gen_bl_or_b(1); |
||
1773 | --vtop; |
||
1774 | } |
||
1775 | |||
1776 | ST_FUNC void gen_clear_cache(void) |
||
1777 | { |
||
1778 | uint32_t beg, end, dsz, isz, p, lab1, b1; |
||
1779 | gv2(RC_INT, RC_INT); |
||
1780 | vpushi(0); |
||
1781 | vtop->r = get_reg(RC_INT); |
||
1782 | vpushi(0); |
||
1783 | vtop->r = get_reg(RC_INT); |
||
1784 | vpushi(0); |
||
1785 | vtop->r = get_reg(RC_INT); |
||
1786 | beg = intr(vtop[-4].r); // x0 |
||
1787 | end = intr(vtop[-3].r); // x1 |
||
1788 | dsz = intr(vtop[-2].r); // x2 |
||
1789 | isz = intr(vtop[-1].r); // x3 |
||
1790 | p = intr(vtop[0].r); // x4 |
||
1791 | vtop -= 5; |
||
1792 | |||
1793 | o(0xd53b0020 | isz); // mrs x(isz),ctr_el0 |
||
1794 | o(0x52800080 | p); // mov w(p),#4 |
||
1795 | o(0x53104c00 | dsz | isz << 5); // ubfx w(dsz),w(isz),#16,#4 |
||
1796 | o(0x1ac02000 | dsz | p << 5 | dsz << 16); // lsl w(dsz),w(p),w(dsz) |
||
1797 | o(0x12000c00 | isz | isz << 5); // and w(isz),w(isz),#15 |
||
1798 | o(0x1ac02000 | isz | p << 5 | isz << 16); // lsl w(isz),w(p),w(isz) |
||
1799 | o(0x51000400 | p | dsz << 5); // sub w(p),w(dsz),#1 |
||
1800 | o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p) |
||
1801 | b1 = ind; o(0x14000000); // b |
||
1802 | lab1 = ind; |
||
1803 | o(0xd50b7b20 | p); // dc cvau,x(p) |
||
1804 | o(0x8b000000 | p | p << 5 | dsz << 16); // add x(p),x(p),x(dsz) |
||
1805 | write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2); |
||
1806 | o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end) |
||
1807 | o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1 |
||
1808 | o(0xd5033b9f); // dsb ish |
||
1809 | o(0x51000400 | p | isz << 5); // sub w(p),w(isz),#1 |
||
1810 | o(0x8a240004 | p | beg << 5 | p << 16); // bic x(p),x(beg),x(p) |
||
1811 | b1 = ind; o(0x14000000); // b |
||
1812 | lab1 = ind; |
||
1813 | o(0xd50b7520 | p); // ic ivau,x(p) |
||
1814 | o(0x8b000000 | p | p << 5 | isz << 16); // add x(p),x(p),x(isz) |
||
1815 | write32le(cur_text_section->data + b1, 0x14000000 | (ind - b1) >> 2); |
||
1816 | o(0xeb00001f | p << 5 | end << 16); // cmp x(p),x(end) |
||
1817 | o(0x54ffffa3 | ((lab1 - ind) << 3 & 0xffffe0)); // b.cc lab1 |
||
1818 | o(0xd5033b9f); // dsb ish |
||
1819 | o(0xd5033fdf); // isb |
||
1820 | } |
||
1821 | |||
1822 | ST_FUNC void gen_vla_sp_save(int addr) { |
||
1823 | uint32_t r = intr(get_reg(RC_INT)); |
||
1824 | o(0x910003e0 | r); // mov x(r),sp |
||
1825 | arm64_strx(3, r, 29, addr); |
||
1826 | } |
||
1827 | |||
1828 | ST_FUNC void gen_vla_sp_restore(int addr) { |
||
1829 | uint32_t r = intr(get_reg(RC_INT)); |
||
1830 | arm64_ldrx(0, 3, r, 29, addr); |
||
1831 | o(0x9100001f | r << 5); // mov sp,x(r) |
||
1832 | } |
||
1833 | |||
1834 | ST_FUNC void gen_vla_alloc(CType *type, int align) { |
||
1835 | uint32_t r = intr(gv(RC_INT)); |
||
1836 | o(0x91003c00 | r | r << 5); // add x(r),x(r),#15 |
||
1837 | o(0x927cec00 | r | r << 5); // bic x(r),x(r),#15 |
||
1838 | o(0xcb2063ff | r << 16); // sub sp,sp,x(r) |
||
1839 | vpop(); |
||
1840 | } |
||
1841 | |||
1842 | /* end of A64 code generator */ |
||
1843 | /*************************************************************/ |
||
1844 | #endif |
||
1845 | /*************************************************************/><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>>=>>>><>>>><>><>><>><>>=>><>><>><>><>><>><>><>>>>>><>><>>><>>>>=>>><>=>><>>><>>=>><>=>><>>><>>>=>>>><>><>>><>><>><>>><>><>><>>>><>><>><>><>><>><>><>><>><>><>=>>><>><>><>><>><>><>><>><>><>><>><>=>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>>><>><>><>><>><>><>><>><>><>><>><>><>><>=>>><>><>><>><>><>><>><>><>>><>><>>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>=>>=>=>><>><>><>><>><> |