i386-gen.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170
  1. /*
  2. * X86 code generator for TCC
  3. *
  4. * Copyright (c) 2001-2004 Fabrice Bellard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. */
  20. #ifdef TARGET_DEFS_ONLY
  21. /* number of available registers */
  22. #define NB_REGS 5
  23. #define NB_ASM_REGS 8
  24. #define CONFIG_TCC_ASM
  25. /* a register can belong to several classes. The classes must be
  26. sorted from more general to more precise (see gv2() code which does
  27. assumptions on it). */
  28. #define RC_INT 0x0001 /* generic integer register */
  29. #define RC_FLOAT 0x0002 /* generic float register */
  30. #define RC_EAX 0x0004
  31. #define RC_ST0 0x0008
  32. #define RC_ECX 0x0010
  33. #define RC_EDX 0x0020
  34. #define RC_EBX 0x0040
  35. #define RC_IRET RC_EAX /* function return: integer register */
  36. #define RC_LRET RC_EDX /* function return: second integer register */
  37. #define RC_FRET RC_ST0 /* function return: float register */
  38. /* pretty names for the registers */
  39. enum {
  40. TREG_EAX = 0,
  41. TREG_ECX,
  42. TREG_EDX,
  43. TREG_EBX,
  44. TREG_ST0,
  45. TREG_ESP = 4
  46. };
  47. /* return registers for function */
  48. #define REG_IRET TREG_EAX /* single word int return register */
  49. #define REG_LRET TREG_EDX /* second word return register (for long long) */
  50. #define REG_FRET TREG_ST0 /* float return register */
  51. /* defined if function parameters must be evaluated in reverse order */
  52. #define INVERT_FUNC_PARAMS
  53. /* defined if structures are passed as pointers. Otherwise structures
  54. are directly pushed on stack. */
  55. /* #define FUNC_STRUCT_PARAM_AS_PTR */
  56. /* pointer size, in bytes */
  57. #define PTR_SIZE 4
  58. /* long double size and alignment, in bytes */
  59. #define LDOUBLE_SIZE 12
  60. #define LDOUBLE_ALIGN 4
  61. /* maximum alignment (for aligned attribute support) */
  62. #define MAX_ALIGN 8
  63. /******************************************************/
  64. #else /* ! TARGET_DEFS_ONLY */
  65. /******************************************************/
  66. #include "tcc.h"
  67. /* define to 1/0 to [not] have EBX as 4th register */
  68. #define USE_EBX 0
  69. ST_DATA const int reg_classes[NB_REGS] = {
  70. /* eax */ RC_INT | RC_EAX,
  71. /* ecx */ RC_INT | RC_ECX,
  72. /* edx */ RC_INT | RC_EDX,
  73. /* ebx */ (RC_INT | RC_EBX) * USE_EBX,
  74. /* st0 */ RC_FLOAT | RC_ST0,
  75. };
  76. static unsigned long func_sub_sp_offset;
  77. static int func_ret_sub;
  78. #ifdef CONFIG_TCC_BCHECK
  79. static addr_t func_bound_offset;
  80. static unsigned long func_bound_ind;
  81. #endif
  82. /* XXX: make it faster ? */
  83. ST_FUNC void g(int c)
  84. {
  85. int ind1;
  86. if (nocode_wanted)
  87. return;
  88. ind1 = ind + 1;
  89. if (ind1 > cur_text_section->data_allocated)
  90. section_realloc(cur_text_section, ind1);
  91. cur_text_section->data[ind] = c;
  92. ind = ind1;
  93. }
  94. ST_FUNC void o(unsigned int c)
  95. {
  96. while (c) {
  97. g(c);
  98. c = c >> 8;
  99. }
  100. }
  101. ST_FUNC void gen_le16(int v)
  102. {
  103. g(v);
  104. g(v >> 8);
  105. }
  106. ST_FUNC void gen_le32(int c)
  107. {
  108. g(c);
  109. g(c >> 8);
  110. g(c >> 16);
  111. g(c >> 24);
  112. }
  113. /* output a symbol and patch all calls to it */
  114. ST_FUNC void gsym_addr(int t, int a)
  115. {
  116. while (t) {
  117. unsigned char *ptr = cur_text_section->data + t;
  118. uint32_t n = read32le(ptr); /* next value */
  119. write32le(ptr, a - t - 4);
  120. t = n;
  121. }
  122. }
  123. ST_FUNC void gsym(int t)
  124. {
  125. gsym_addr(t, ind);
  126. }
  127. /* instruction + 4 bytes data. Return the address of the data */
  128. static int oad(int c, int s)
  129. {
  130. int t;
  131. if (nocode_wanted)
  132. return s;
  133. o(c);
  134. t = ind;
  135. gen_le32(s);
  136. return t;
  137. }
  138. ST_FUNC void gen_fill_nops(int bytes)
  139. {
  140. while (bytes--)
  141. g(0x90);
  142. }
  143. /* generate jmp to a label */
  144. #define gjmp2(instr,lbl) oad(instr,lbl)
  145. /* output constant with relocation if 'r & VT_SYM' is true */
  146. ST_FUNC void gen_addr32(int r, Sym *sym, int c)
  147. {
  148. if (r & VT_SYM)
  149. greloc(cur_text_section, sym, ind, R_386_32);
  150. gen_le32(c);
  151. }
  152. ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
  153. {
  154. if (r & VT_SYM)
  155. greloc(cur_text_section, sym, ind, R_386_PC32);
  156. gen_le32(c - 4);
  157. }
  158. /* generate a modrm reference. 'op_reg' contains the additional 3
  159. opcode bits */
  160. static void gen_modrm(int op_reg, int r, Sym *sym, int c)
  161. {
  162. op_reg = op_reg << 3;
  163. if ((r & VT_VALMASK) == VT_CONST) {
  164. /* constant memory reference */
  165. o(0x05 | op_reg);
  166. gen_addr32(r, sym, c);
  167. } else if ((r & VT_VALMASK) == VT_LOCAL) {
  168. /* currently, we use only ebp as base */
  169. if (c == (char)c) {
  170. /* short reference */
  171. o(0x45 | op_reg);
  172. g(c);
  173. } else {
  174. oad(0x85 | op_reg, c);
  175. }
  176. } else {
  177. g(0x00 | op_reg | (r & VT_VALMASK));
  178. }
  179. }
  180. /* load 'r' from value 'sv' */
  181. ST_FUNC void load(int r, SValue *sv)
  182. {
  183. int v, t, ft, fc, fr;
  184. SValue v1;
  185. #ifdef TCC_TARGET_PE
  186. SValue v2;
  187. sv = pe_getimport(sv, &v2);
  188. #endif
  189. fr = sv->r;
  190. ft = sv->type.t & ~VT_DEFSIGN;
  191. fc = sv->c.i;
  192. ft &= ~(VT_VOLATILE | VT_CONSTANT);
  193. v = fr & VT_VALMASK;
  194. if (fr & VT_LVAL) {
  195. if (v == VT_LLOCAL) {
  196. v1.type.t = VT_INT;
  197. v1.r = VT_LOCAL | VT_LVAL;
  198. v1.c.i = fc;
  199. fr = r;
  200. if (!(reg_classes[fr] & RC_INT))
  201. fr = get_reg(RC_INT);
  202. load(fr, &v1);
  203. }
  204. if ((ft & VT_BTYPE) == VT_FLOAT) {
  205. o(0xd9); /* flds */
  206. r = 0;
  207. } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
  208. o(0xdd); /* fldl */
  209. r = 0;
  210. } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
  211. o(0xdb); /* fldt */
  212. r = 5;
  213. } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
  214. o(0xbe0f); /* movsbl */
  215. } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
  216. o(0xb60f); /* movzbl */
  217. } else if ((ft & VT_TYPE) == VT_SHORT) {
  218. o(0xbf0f); /* movswl */
  219. } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
  220. o(0xb70f); /* movzwl */
  221. } else {
  222. o(0x8b); /* movl */
  223. }
  224. gen_modrm(r, fr, sv->sym, fc);
  225. } else {
  226. if (v == VT_CONST) {
  227. o(0xb8 + r); /* mov $xx, r */
  228. gen_addr32(fr, sv->sym, fc);
  229. } else if (v == VT_LOCAL) {
  230. if (fc) {
  231. o(0x8d); /* lea xxx(%ebp), r */
  232. gen_modrm(r, VT_LOCAL, sv->sym, fc);
  233. } else {
  234. o(0x89);
  235. o(0xe8 + r); /* mov %ebp, r */
  236. }
  237. } else if (v == VT_CMP) {
  238. oad(0xb8 + r, 0); /* mov $0, r */
  239. o(0x0f); /* setxx %br */
  240. o(fc);
  241. o(0xc0 + r);
  242. } else if (v == VT_JMP || v == VT_JMPI) {
  243. t = v & 1;
  244. oad(0xb8 + r, t); /* mov $1, r */
  245. o(0x05eb); /* jmp after */
  246. gsym(fc);
  247. oad(0xb8 + r, t ^ 1); /* mov $0, r */
  248. } else if (v != r) {
  249. o(0x89);
  250. o(0xc0 + r + v * 8); /* mov v, r */
  251. }
  252. }
  253. }
  254. /* store register 'r' in lvalue 'v' */
  255. ST_FUNC void store(int r, SValue *v)
  256. {
  257. int fr, bt, ft, fc;
  258. #ifdef TCC_TARGET_PE
  259. SValue v2;
  260. v = pe_getimport(v, &v2);
  261. #endif
  262. ft = v->type.t;
  263. fc = v->c.i;
  264. fr = v->r & VT_VALMASK;
  265. ft &= ~(VT_VOLATILE | VT_CONSTANT);
  266. bt = ft & VT_BTYPE;
  267. /* XXX: incorrect if float reg to reg */
  268. if (bt == VT_FLOAT) {
  269. o(0xd9); /* fsts */
  270. r = 2;
  271. } else if (bt == VT_DOUBLE) {
  272. o(0xdd); /* fstpl */
  273. r = 2;
  274. } else if (bt == VT_LDOUBLE) {
  275. o(0xc0d9); /* fld %st(0) */
  276. o(0xdb); /* fstpt */
  277. r = 7;
  278. } else {
  279. if (bt == VT_SHORT)
  280. o(0x66);
  281. if (bt == VT_BYTE || bt == VT_BOOL)
  282. o(0x88);
  283. else
  284. o(0x89);
  285. }
  286. if (fr == VT_CONST ||
  287. fr == VT_LOCAL ||
  288. (v->r & VT_LVAL)) {
  289. gen_modrm(r, v->r, v->sym, fc);
  290. } else if (fr != r) {
  291. o(0xc0 + fr + r * 8); /* mov r, fr */
  292. }
  293. }
  294. static void gadd_sp(int val)
  295. {
  296. if (val == (char)val) {
  297. o(0xc483);
  298. g(val);
  299. } else {
  300. oad(0xc481, val); /* add $xxx, %esp */
  301. }
  302. }
  303. #if defined CONFIG_TCC_BCHECK || defined TCC_TARGET_PE
  304. static void gen_static_call(int v)
  305. {
  306. Sym *sym;
  307. sym = external_global_sym(v, &func_old_type, 0);
  308. oad(0xe8, -4);
  309. greloc(cur_text_section, sym, ind-4, R_386_PC32);
  310. }
  311. #endif
  312. /* 'is_jmp' is '1' if it is a jump */
  313. static void gcall_or_jmp(int is_jmp)
  314. {
  315. int r;
  316. if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && (vtop->r & VT_SYM)) {
  317. /* constant and relocation case */
  318. greloc(cur_text_section, vtop->sym, ind + 1, R_386_PC32);
  319. oad(0xe8 + is_jmp, vtop->c.i - 4); /* call/jmp im */
  320. } else {
  321. /* otherwise, indirect call */
  322. r = gv(RC_INT);
  323. o(0xff); /* call/jmp *r */
  324. o(0xd0 + r + (is_jmp << 4));
  325. }
  326. if (!is_jmp) {
  327. int rt;
  328. /* extend the return value to the whole register if necessary
  329. visual studio and gcc do not always set the whole eax register
  330. when assigning the return value of a function */
  331. rt = vtop->type.ref->type.t;
  332. switch (rt & VT_BTYPE) {
  333. case VT_BYTE:
  334. if (rt & VT_UNSIGNED) {
  335. o(0xc0b60f); /* movzx %al, %eax */
  336. }
  337. else {
  338. o(0xc0be0f); /* movsx %al, %eax */
  339. }
  340. break;
  341. case VT_SHORT:
  342. if (rt & VT_UNSIGNED) {
  343. o(0xc0b70f); /* movzx %ax, %eax */
  344. }
  345. else {
  346. o(0xc0bf0f); /* movsx %ax, %eax */
  347. }
  348. break;
  349. default:
  350. break;
  351. }
  352. }
  353. }
  354. static uint8_t fastcall_regs[3] = { TREG_EAX, TREG_EDX, TREG_ECX };
  355. static uint8_t fastcallw_regs[2] = { TREG_ECX, TREG_EDX };
  356. /* Return the number of registers needed to return the struct, or 0 if
  357. returning via struct pointer. */
  358. ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
  359. {
  360. #ifdef TCC_TARGET_PE
  361. int size, align;
  362. *ret_align = 1; // Never have to re-align return values for x86
  363. *regsize = 4;
  364. size = type_size(vt, &align);
  365. if (size > 8 || (size & (size - 1)))
  366. return 0;
  367. if (size == 8)
  368. ret->t = VT_LLONG;
  369. else if (size == 4)
  370. ret->t = VT_INT;
  371. else if (size == 2)
  372. ret->t = VT_SHORT;
  373. else
  374. ret->t = VT_BYTE;
  375. ret->ref = NULL;
  376. return 1;
  377. #else
  378. *ret_align = 1; // Never have to re-align return values for x86
  379. return 0;
  380. #endif
  381. }
  382. /* Generate function call. The function address is pushed first, then
  383. all the parameters in call order. This functions pops all the
  384. parameters and the function address. */
  385. ST_FUNC void gfunc_call(int nb_args)
  386. {
  387. int size, align, r, args_size, i, func_call;
  388. Sym *func_sym;
  389. args_size = 0;
  390. for(i = 0;i < nb_args; i++) {
  391. if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
  392. size = type_size(&vtop->type, &align);
  393. /* align to stack align size */
  394. size = (size + 3) & ~3;
  395. /* allocate the necessary size on stack */
  396. oad(0xec81, size); /* sub $xxx, %esp */
  397. /* generate structure store */
  398. r = get_reg(RC_INT);
  399. o(0x89); /* mov %esp, r */
  400. o(0xe0 + r);
  401. vset(&vtop->type, r | VT_LVAL, 0);
  402. vswap();
  403. vstore();
  404. args_size += size;
  405. } else if (is_float(vtop->type.t)) {
  406. gv(RC_FLOAT); /* only one float register */
  407. if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
  408. size = 4;
  409. else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
  410. size = 8;
  411. else
  412. size = 12;
  413. oad(0xec81, size); /* sub $xxx, %esp */
  414. if (size == 12)
  415. o(0x7cdb);
  416. else
  417. o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */
  418. g(0x24);
  419. g(0x00);
  420. args_size += size;
  421. } else {
  422. /* simple type (currently always same size) */
  423. /* XXX: implicit cast ? */
  424. r = gv(RC_INT);
  425. if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
  426. size = 8;
  427. o(0x50 + vtop->r2); /* push r */
  428. } else {
  429. size = 4;
  430. }
  431. o(0x50 + r); /* push r */
  432. args_size += size;
  433. }
  434. vtop--;
  435. }
  436. save_regs(0); /* save used temporary registers */
  437. func_sym = vtop->type.ref;
  438. func_call = func_sym->f.func_call;
  439. /* fast call case */
  440. if ((func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) ||
  441. func_call == FUNC_FASTCALLW) {
  442. int fastcall_nb_regs;
  443. uint8_t *fastcall_regs_ptr;
  444. if (func_call == FUNC_FASTCALLW) {
  445. fastcall_regs_ptr = fastcallw_regs;
  446. fastcall_nb_regs = 2;
  447. } else {
  448. fastcall_regs_ptr = fastcall_regs;
  449. fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
  450. }
  451. for(i = 0;i < fastcall_nb_regs; i++) {
  452. if (args_size <= 0)
  453. break;
  454. o(0x58 + fastcall_regs_ptr[i]); /* pop r */
  455. /* XXX: incorrect for struct/floats */
  456. args_size -= 4;
  457. }
  458. }
  459. #ifndef TCC_TARGET_PE
  460. else if ((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT)
  461. args_size -= 4;
  462. #endif
  463. gcall_or_jmp(0);
  464. if (args_size && func_call != FUNC_STDCALL && func_call != FUNC_FASTCALLW)
  465. gadd_sp(args_size);
  466. vtop--;
  467. }
  468. #ifdef TCC_TARGET_PE
  469. #define FUNC_PROLOG_SIZE (10 + USE_EBX)
  470. #else
  471. #define FUNC_PROLOG_SIZE (9 + USE_EBX)
  472. #endif
  473. /* generate function prolog of type 't' */
  474. ST_FUNC void gfunc_prolog(CType *func_type)
  475. {
  476. int addr, align, size, func_call, fastcall_nb_regs;
  477. int param_index, param_addr;
  478. uint8_t *fastcall_regs_ptr;
  479. Sym *sym;
  480. CType *type;
  481. sym = func_type->ref;
  482. func_call = sym->f.func_call;
  483. addr = 8;
  484. loc = 0;
  485. func_vc = 0;
  486. if (func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) {
  487. fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1;
  488. fastcall_regs_ptr = fastcall_regs;
  489. } else if (func_call == FUNC_FASTCALLW) {
  490. fastcall_nb_regs = 2;
  491. fastcall_regs_ptr = fastcallw_regs;
  492. } else {
  493. fastcall_nb_regs = 0;
  494. fastcall_regs_ptr = NULL;
  495. }
  496. param_index = 0;
  497. ind += FUNC_PROLOG_SIZE;
  498. func_sub_sp_offset = ind;
  499. /* if the function returns a structure, then add an
  500. implicit pointer parameter */
  501. func_vt = sym->type;
  502. func_var = (sym->f.func_type == FUNC_ELLIPSIS);
  503. #ifdef TCC_TARGET_PE
  504. size = type_size(&func_vt,&align);
  505. if (((func_vt.t & VT_BTYPE) == VT_STRUCT)
  506. && (size > 8 || (size & (size - 1)))) {
  507. #else
  508. if ((func_vt.t & VT_BTYPE) == VT_STRUCT) {
  509. #endif
  510. /* XXX: fastcall case ? */
  511. func_vc = addr;
  512. addr += 4;
  513. param_index++;
  514. }
  515. /* define parameters */
  516. while ((sym = sym->next) != NULL) {
  517. type = &sym->type;
  518. size = type_size(type, &align);
  519. size = (size + 3) & ~3;
  520. #ifdef FUNC_STRUCT_PARAM_AS_PTR
  521. /* structs are passed as pointer */
  522. if ((type->t & VT_BTYPE) == VT_STRUCT) {
  523. size = 4;
  524. }
  525. #endif
  526. if (param_index < fastcall_nb_regs) {
  527. /* save FASTCALL register */
  528. loc -= 4;
  529. o(0x89); /* movl */
  530. gen_modrm(fastcall_regs_ptr[param_index], VT_LOCAL, NULL, loc);
  531. param_addr = loc;
  532. } else {
  533. param_addr = addr;
  534. addr += size;
  535. }
  536. sym_push(sym->v & ~SYM_FIELD, type,
  537. VT_LOCAL | lvalue_type(type->t), param_addr);
  538. param_index++;
  539. }
  540. func_ret_sub = 0;
  541. /* pascal type call or fastcall ? */
  542. if (func_call == FUNC_STDCALL || func_call == FUNC_FASTCALLW)
  543. func_ret_sub = addr - 8;
  544. #ifndef TCC_TARGET_PE
  545. else if (func_vc)
  546. func_ret_sub = 4;
  547. #endif
  548. #ifdef CONFIG_TCC_BCHECK
  549. /* leave some room for bound checking code */
  550. if (tcc_state->do_bounds_check) {
  551. func_bound_offset = lbounds_section->data_offset;
  552. func_bound_ind = ind;
  553. oad(0xb8, 0); /* lbound section pointer */
  554. oad(0xb8, 0); /* call to function */
  555. }
  556. #endif
  557. }
  558. /* generate function epilog */
  559. ST_FUNC void gfunc_epilog(void)
  560. {
  561. addr_t v, saved_ind;
  562. #ifdef CONFIG_TCC_BCHECK
  563. if (tcc_state->do_bounds_check
  564. && func_bound_offset != lbounds_section->data_offset) {
  565. addr_t saved_ind;
  566. addr_t *bounds_ptr;
  567. Sym *sym_data;
  568. /* add end of table info */
  569. bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
  570. *bounds_ptr = 0;
  571. /* generate bound local allocation */
  572. saved_ind = ind;
  573. ind = func_bound_ind;
  574. sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
  575. func_bound_offset, lbounds_section->data_offset);
  576. greloc(cur_text_section, sym_data,
  577. ind + 1, R_386_32);
  578. oad(0xb8, 0); /* mov %eax, xxx */
  579. gen_static_call(TOK___bound_local_new);
  580. ind = saved_ind;
  581. /* generate bound check local freeing */
  582. o(0x5250); /* save returned value, if any */
  583. greloc(cur_text_section, sym_data, ind + 1, R_386_32);
  584. oad(0xb8, 0); /* mov %eax, xxx */
  585. gen_static_call(TOK___bound_local_delete);
  586. o(0x585a); /* restore returned value, if any */
  587. }
  588. #endif
  589. /* align local size to word & save local variables */
  590. v = (-loc + 3) & -4;
  591. #if USE_EBX
  592. o(0x8b);
  593. gen_modrm(TREG_EBX, VT_LOCAL, NULL, -(v+4));
  594. #endif
  595. o(0xc9); /* leave */
  596. if (func_ret_sub == 0) {
  597. o(0xc3); /* ret */
  598. } else {
  599. o(0xc2); /* ret n */
  600. g(func_ret_sub);
  601. g(func_ret_sub >> 8);
  602. }
  603. saved_ind = ind;
  604. ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
  605. #ifdef TCC_TARGET_PE
  606. if (v >= 4096) {
  607. oad(0xb8, v); /* mov stacksize, %eax */
  608. gen_static_call(TOK___chkstk); /* call __chkstk, (does the stackframe too) */
  609. } else
  610. #endif
  611. {
  612. o(0xe58955); /* push %ebp, mov %esp, %ebp */
  613. o(0xec81); /* sub esp, stacksize */
  614. gen_le32(v);
  615. #ifdef TCC_TARGET_PE
  616. o(0x90); /* adjust to FUNC_PROLOG_SIZE */
  617. #endif
  618. }
  619. o(0x53 * USE_EBX); /* push ebx */
  620. ind = saved_ind;
  621. }
  622. /* generate a jump to a label */
  623. ST_FUNC int gjmp(int t)
  624. {
  625. return gjmp2(0xe9, t);
  626. }
  627. /* generate a jump to a fixed address */
  628. ST_FUNC void gjmp_addr(int a)
  629. {
  630. int r;
  631. r = a - ind - 2;
  632. if (r == (char)r) {
  633. g(0xeb);
  634. g(r);
  635. } else {
  636. oad(0xe9, a - ind - 5);
  637. }
  638. }
  639. ST_FUNC void gtst_addr(int inv, int a)
  640. {
  641. int v = vtop->r & VT_VALMASK;
  642. if (v == VT_CMP) {
  643. inv ^= (vtop--)->c.i;
  644. a -= ind + 2;
  645. if (a == (char)a) {
  646. g(inv - 32);
  647. g(a);
  648. } else {
  649. g(0x0f);
  650. oad(inv - 16, a - 4);
  651. }
  652. } else if ((v & ~1) == VT_JMP) {
  653. if ((v & 1) != inv) {
  654. gjmp_addr(a);
  655. gsym(vtop->c.i);
  656. } else {
  657. gsym(vtop->c.i);
  658. o(0x05eb);
  659. gjmp_addr(a);
  660. }
  661. vtop--;
  662. }
  663. }
  664. /* generate a test. set 'inv' to invert test. Stack entry is popped */
  665. ST_FUNC int gtst(int inv, int t)
  666. {
  667. int v = vtop->r & VT_VALMASK;
  668. if (nocode_wanted) {
  669. ;
  670. } else if (v == VT_CMP) {
  671. /* fast case : can jump directly since flags are set */
  672. g(0x0f);
  673. t = gjmp2((vtop->c.i - 16) ^ inv, t);
  674. } else if (v == VT_JMP || v == VT_JMPI) {
  675. /* && or || optimization */
  676. if ((v & 1) == inv) {
  677. /* insert vtop->c jump list in t */
  678. uint32_t n1, n = vtop->c.i;
  679. if (n) {
  680. while ((n1 = read32le(cur_text_section->data + n)))
  681. n = n1;
  682. write32le(cur_text_section->data + n, t);
  683. t = vtop->c.i;
  684. }
  685. } else {
  686. t = gjmp(t);
  687. gsym(vtop->c.i);
  688. }
  689. }
  690. vtop--;
  691. return t;
  692. }
  693. /* generate an integer binary operation */
  694. ST_FUNC void gen_opi(int op)
  695. {
  696. int r, fr, opc, c;
  697. switch(op) {
  698. case '+':
  699. case TOK_ADDC1: /* add with carry generation */
  700. opc = 0;
  701. gen_op8:
  702. if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
  703. /* constant case */
  704. vswap();
  705. r = gv(RC_INT);
  706. vswap();
  707. c = vtop->c.i;
  708. if (c == (char)c) {
  709. /* generate inc and dec for smaller code */
  710. if (c==1 && opc==0 && op != TOK_ADDC1) {
  711. o (0x40 | r); // inc
  712. } else if (c==1 && opc==5 && op != TOK_SUBC1) {
  713. o (0x48 | r); // dec
  714. } else {
  715. o(0x83);
  716. o(0xc0 | (opc << 3) | r);
  717. g(c);
  718. }
  719. } else {
  720. o(0x81);
  721. oad(0xc0 | (opc << 3) | r, c);
  722. }
  723. } else {
  724. gv2(RC_INT, RC_INT);
  725. r = vtop[-1].r;
  726. fr = vtop[0].r;
  727. o((opc << 3) | 0x01);
  728. o(0xc0 + r + fr * 8);
  729. }
  730. vtop--;
  731. if (op >= TOK_ULT && op <= TOK_GT) {
  732. vtop->r = VT_CMP;
  733. vtop->c.i = op;
  734. }
  735. break;
  736. case '-':
  737. case TOK_SUBC1: /* sub with carry generation */
  738. opc = 5;
  739. goto gen_op8;
  740. case TOK_ADDC2: /* add with carry use */
  741. opc = 2;
  742. goto gen_op8;
  743. case TOK_SUBC2: /* sub with carry use */
  744. opc = 3;
  745. goto gen_op8;
  746. case '&':
  747. opc = 4;
  748. goto gen_op8;
  749. case '^':
  750. opc = 6;
  751. goto gen_op8;
  752. case '|':
  753. opc = 1;
  754. goto gen_op8;
  755. case '*':
  756. gv2(RC_INT, RC_INT);
  757. r = vtop[-1].r;
  758. fr = vtop[0].r;
  759. vtop--;
  760. o(0xaf0f); /* imul fr, r */
  761. o(0xc0 + fr + r * 8);
  762. break;
  763. case TOK_SHL:
  764. opc = 4;
  765. goto gen_shift;
  766. case TOK_SHR:
  767. opc = 5;
  768. goto gen_shift;
  769. case TOK_SAR:
  770. opc = 7;
  771. gen_shift:
  772. opc = 0xc0 | (opc << 3);
  773. if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
  774. /* constant case */
  775. vswap();
  776. r = gv(RC_INT);
  777. vswap();
  778. c = vtop->c.i & 0x1f;
  779. o(0xc1); /* shl/shr/sar $xxx, r */
  780. o(opc | r);
  781. g(c);
  782. } else {
  783. /* we generate the shift in ecx */
  784. gv2(RC_INT, RC_ECX);
  785. r = vtop[-1].r;
  786. o(0xd3); /* shl/shr/sar %cl, r */
  787. o(opc | r);
  788. }
  789. vtop--;
  790. break;
  791. case '/':
  792. case TOK_UDIV:
  793. case TOK_PDIV:
  794. case '%':
  795. case TOK_UMOD:
  796. case TOK_UMULL:
  797. /* first operand must be in eax */
  798. /* XXX: need better constraint for second operand */
  799. gv2(RC_EAX, RC_ECX);
  800. r = vtop[-1].r;
  801. fr = vtop[0].r;
  802. vtop--;
  803. save_reg(TREG_EDX);
  804. /* save EAX too if used otherwise */
  805. save_reg_upstack(TREG_EAX, 1);
  806. if (op == TOK_UMULL) {
  807. o(0xf7); /* mul fr */
  808. o(0xe0 + fr);
  809. vtop->r2 = TREG_EDX;
  810. r = TREG_EAX;
  811. } else {
  812. if (op == TOK_UDIV || op == TOK_UMOD) {
  813. o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
  814. o(0xf0 + fr);
  815. } else {
  816. o(0xf799); /* cltd, idiv fr, %eax */
  817. o(0xf8 + fr);
  818. }
  819. if (op == '%' || op == TOK_UMOD)
  820. r = TREG_EDX;
  821. else
  822. r = TREG_EAX;
  823. }
  824. vtop->r = r;
  825. break;
  826. default:
  827. opc = 7;
  828. goto gen_op8;
  829. }
  830. }
  831. /* generate a floating point operation 'v = t1 op t2' instruction. The
  832. two operands are guaranteed to have the same floating point type */
  833. /* XXX: need to use ST1 too */
  834. ST_FUNC void gen_opf(int op)
  835. {
  836. int a, ft, fc, swapped, r;
  837. /* convert constants to memory references */
  838. if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
  839. vswap();
  840. gv(RC_FLOAT);
  841. vswap();
  842. }
  843. if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
  844. gv(RC_FLOAT);
  845. /* must put at least one value in the floating point register */
  846. if ((vtop[-1].r & VT_LVAL) &&
  847. (vtop[0].r & VT_LVAL)) {
  848. vswap();
  849. gv(RC_FLOAT);
  850. vswap();
  851. }
  852. swapped = 0;
  853. /* swap the stack if needed so that t1 is the register and t2 is
  854. the memory reference */
  855. if (vtop[-1].r & VT_LVAL) {
  856. vswap();
  857. swapped = 1;
  858. }
  859. if (op >= TOK_ULT && op <= TOK_GT) {
  860. /* load on stack second operand */
  861. load(TREG_ST0, vtop);
  862. save_reg(TREG_EAX); /* eax is used by FP comparison code */
  863. if (op == TOK_GE || op == TOK_GT)
  864. swapped = !swapped;
  865. else if (op == TOK_EQ || op == TOK_NE)
  866. swapped = 0;
  867. if (swapped)
  868. o(0xc9d9); /* fxch %st(1) */
  869. if (op == TOK_EQ || op == TOK_NE)
  870. o(0xe9da); /* fucompp */
  871. else
  872. o(0xd9de); /* fcompp */
  873. o(0xe0df); /* fnstsw %ax */
  874. if (op == TOK_EQ) {
  875. o(0x45e480); /* and $0x45, %ah */
  876. o(0x40fC80); /* cmp $0x40, %ah */
  877. } else if (op == TOK_NE) {
  878. o(0x45e480); /* and $0x45, %ah */
  879. o(0x40f480); /* xor $0x40, %ah */
  880. op = TOK_NE;
  881. } else if (op == TOK_GE || op == TOK_LE) {
  882. o(0x05c4f6); /* test $0x05, %ah */
  883. op = TOK_EQ;
  884. } else {
  885. o(0x45c4f6); /* test $0x45, %ah */
  886. op = TOK_EQ;
  887. }
  888. vtop--;
  889. vtop->r = VT_CMP;
  890. vtop->c.i = op;
  891. } else {
  892. /* no memory reference possible for long double operations */
  893. if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
  894. load(TREG_ST0, vtop);
  895. swapped = !swapped;
  896. }
  897. switch(op) {
  898. default:
  899. case '+':
  900. a = 0;
  901. break;
  902. case '-':
  903. a = 4;
  904. if (swapped)
  905. a++;
  906. break;
  907. case '*':
  908. a = 1;
  909. break;
  910. case '/':
  911. a = 6;
  912. if (swapped)
  913. a++;
  914. break;
  915. }
  916. ft = vtop->type.t;
  917. fc = vtop->c.i;
  918. if ((ft & VT_BTYPE) == VT_LDOUBLE) {
  919. o(0xde); /* fxxxp %st, %st(1) */
  920. o(0xc1 + (a << 3));
  921. } else {
  922. /* if saved lvalue, then we must reload it */
  923. r = vtop->r;
  924. if ((r & VT_VALMASK) == VT_LLOCAL) {
  925. SValue v1;
  926. r = get_reg(RC_INT);
  927. v1.type.t = VT_INT;
  928. v1.r = VT_LOCAL | VT_LVAL;
  929. v1.c.i = fc;
  930. load(r, &v1);
  931. fc = 0;
  932. }
  933. if ((ft & VT_BTYPE) == VT_DOUBLE)
  934. o(0xdc);
  935. else
  936. o(0xd8);
  937. gen_modrm(a, r, vtop->sym, fc);
  938. }
  939. vtop--;
  940. }
  941. }
  942. /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
  943. and 'long long' cases. */
  944. ST_FUNC void gen_cvt_itof(int t)
  945. {
  946. save_reg(TREG_ST0);
  947. gv(RC_INT);
  948. if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
  949. /* signed long long to float/double/long double (unsigned case
  950. is handled generically) */
  951. o(0x50 + vtop->r2); /* push r2 */
  952. o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
  953. o(0x242cdf); /* fildll (%esp) */
  954. o(0x08c483); /* add $8, %esp */
  955. } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
  956. (VT_INT | VT_UNSIGNED)) {
  957. /* unsigned int to float/double/long double */
  958. o(0x6a); /* push $0 */
  959. g(0x00);
  960. o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
  961. o(0x242cdf); /* fildll (%esp) */
  962. o(0x08c483); /* add $8, %esp */
  963. } else {
  964. /* int to float/double/long double */
  965. o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
  966. o(0x2404db); /* fildl (%esp) */
  967. o(0x04c483); /* add $4, %esp */
  968. }
  969. vtop->r = TREG_ST0;
  970. }
  971. /* convert fp to int 't' type */
  972. ST_FUNC void gen_cvt_ftoi(int t)
  973. {
  974. int bt = vtop->type.t & VT_BTYPE;
  975. if (bt == VT_FLOAT)
  976. vpush_global_sym(&func_old_type, TOK___fixsfdi);
  977. else if (bt == VT_LDOUBLE)
  978. vpush_global_sym(&func_old_type, TOK___fixxfdi);
  979. else
  980. vpush_global_sym(&func_old_type, TOK___fixdfdi);
  981. vswap();
  982. gfunc_call(1);
  983. vpushi(0);
  984. vtop->r = REG_IRET;
  985. vtop->r2 = REG_LRET;
  986. }
  987. /* convert from one floating point type to another */
  988. ST_FUNC void gen_cvt_ftof(int t)
  989. {
  990. /* all we have to do on i386 is to put the float in a register */
  991. gv(RC_FLOAT);
  992. }
  993. /* computed goto support */
  994. ST_FUNC void ggoto(void)
  995. {
  996. gcall_or_jmp(1);
  997. vtop--;
  998. }
  999. /* bound check support functions */
  1000. #ifdef CONFIG_TCC_BCHECK
  1001. /* generate a bounded pointer addition */
  1002. ST_FUNC void gen_bounded_ptr_add(void)
  1003. {
  1004. /* prepare fast i386 function call (args in eax and edx) */
  1005. gv2(RC_EAX, RC_EDX);
  1006. /* save all temporary registers */
  1007. vtop -= 2;
  1008. save_regs(0);
  1009. /* do a fast function call */
  1010. gen_static_call(TOK___bound_ptr_add);
  1011. /* returned pointer is in eax */
  1012. vtop++;
  1013. vtop->r = TREG_EAX | VT_BOUNDED;
  1014. /* address of bounding function call point */
  1015. vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(Elf32_Rel));
  1016. }
  1017. /* patch pointer addition in vtop so that pointer dereferencing is
  1018. also tested */
  1019. ST_FUNC void gen_bounded_ptr_deref(void)
  1020. {
  1021. addr_t func;
  1022. int size, align;
  1023. Elf32_Rel *rel;
  1024. Sym *sym;
  1025. size = 0;
  1026. /* XXX: put that code in generic part of tcc */
  1027. if (!is_float(vtop->type.t)) {
  1028. if (vtop->r & VT_LVAL_BYTE)
  1029. size = 1;
  1030. else if (vtop->r & VT_LVAL_SHORT)
  1031. size = 2;
  1032. }
  1033. if (!size)
  1034. size = type_size(&vtop->type, &align);
  1035. switch(size) {
  1036. case 1: func = TOK___bound_ptr_indir1; break;
  1037. case 2: func = TOK___bound_ptr_indir2; break;
  1038. case 4: func = TOK___bound_ptr_indir4; break;
  1039. case 8: func = TOK___bound_ptr_indir8; break;
  1040. case 12: func = TOK___bound_ptr_indir12; break;
  1041. case 16: func = TOK___bound_ptr_indir16; break;
  1042. default:
  1043. tcc_error("unhandled size when dereferencing bounded pointer");
  1044. func = 0;
  1045. break;
  1046. }
  1047. /* patch relocation */
  1048. /* XXX: find a better solution ? */
  1049. rel = (Elf32_Rel *)(cur_text_section->reloc->data + vtop->c.i);
  1050. sym = external_global_sym(func, &func_old_type, 0);
  1051. if (!sym->c)
  1052. put_extern_sym(sym, NULL, 0, 0);
  1053. rel->r_info = ELF32_R_INFO(sym->c, ELF32_R_TYPE(rel->r_info));
  1054. }
  1055. #endif
  1056. /* Save the stack pointer onto the stack */
  1057. ST_FUNC void gen_vla_sp_save(int addr) {
  1058. /* mov %esp,addr(%ebp)*/
  1059. o(0x89);
  1060. gen_modrm(TREG_ESP, VT_LOCAL, NULL, addr);
  1061. }
  1062. /* Restore the SP from a location on the stack */
  1063. ST_FUNC void gen_vla_sp_restore(int addr) {
  1064. o(0x8b);
  1065. gen_modrm(TREG_ESP, VT_LOCAL, NULL, addr);
  1066. }
  1067. /* Subtract from the stack pointer, and push the resulting value onto the stack */
  1068. ST_FUNC void gen_vla_alloc(CType *type, int align) {
  1069. #ifdef TCC_TARGET_PE
  1070. /* alloca does more than just adjust %rsp on Windows */
  1071. vpush_global_sym(&func_old_type, TOK_alloca);
  1072. vswap(); /* Move alloca ref past allocation size */
  1073. gfunc_call(1);
  1074. #else
  1075. int r;
  1076. r = gv(RC_INT); /* allocation size */
  1077. /* sub r,%rsp */
  1078. o(0x2b);
  1079. o(0xe0 | r);
  1080. /* We align to 16 bytes rather than align */
  1081. /* and ~15, %esp */
  1082. o(0xf0e483);
  1083. vpop();
  1084. #endif
  1085. }
  1086. /* end of X86 code generator */
  1087. /*************************************************************/
  1088. #endif
  1089. /*************************************************************/