Rev 9407 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8834 | Boppan | 1 | import re |
8825 | Boppan | 2 | import os |
8837 | Boppan | 3 | import argparse |
8957 | Boppan | 4 | import sys |
8990 | Boppan | 5 | import pickle |
9402 | Boppan | 6 | import hashlib |
7 | import difflib |
||
8825 | Boppan | 8 | |
8957 | Boppan | 9 | # fasm keywords |
10 | keywords = [ |
||
9408 | Boppan | 11 | "align", "equ", "org", "while", "load", "store", "times", "repeat", |
12 | "display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc", |
||
13 | "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox", |
||
14 | "aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and", |
||
15 | "andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd", |
||
16 | "blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn", |
||
17 | "bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap", |
||
18 | "btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc", |
||
19 | "cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova", |
||
20 | "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle", |
||
21 | "cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge", |
||
22 | "cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", |
||
23 | "cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", |
||
24 | "cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b", |
||
25 | "comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", |
||
26 | "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", |
||
27 | "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", |
||
28 | "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", |
||
29 | "dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms", |
||
30 | "extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", |
||
31 | "fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg", |
||
32 | "fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe", |
||
33 | "fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno", |
||
34 | "fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs", |
||
35 | "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv", |
||
36 | "fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", |
||
37 | "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", |
||
38 | "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", |
||
39 | "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", |
||
40 | "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", |
||
41 | "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", |
||
42 | "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", |
||
43 | "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", |
||
44 | "gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps", |
||
45 | "hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd", |
||
46 | "insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret", |
||
47 | "jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge", |
||
48 | "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", |
||
49 | "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb", |
||
50 | "kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw", |
||
51 | "kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq", |
||
52 | "korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw", |
||
53 | "kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd", |
||
54 | "kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq", |
||
55 | "kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw", |
||
56 | "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", |
||
57 | "lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq", |
||
58 | "loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg", |
||
59 | "loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc", |
||
60 | "loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns", |
||
61 | "loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss", |
||
62 | "lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", |
||
63 | "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", |
||
64 | "movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu", |
||
65 | "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", |
||
66 | "movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq", |
||
67 | "movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq", |
||
68 | "movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul", |
||
69 | "mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or", |
||
70 | "orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd", |
||
71 | "pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd", |
||
72 | "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand", |
||
73 | "pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb", |
||
74 | "pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd", |
||
75 | "pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd", |
||
76 | "pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw", |
||
77 | "pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb", |
||
78 | "pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb", |
||
79 | "pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb", |
||
80 | "pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq", |
||
81 | "pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq", |
||
82 | "prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw", |
||
83 | "pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq", |
||
84 | "psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", |
||
85 | "psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest", |
||
86 | "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", |
||
87 | "punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad", |
||
88 | "pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase", |
||
89 | "rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp", |
||
90 | "repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd", |
||
91 | "roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar", |
||
92 | "sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe", |
||
93 | "sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb", |
||
94 | "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp", |
||
95 | "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence", |
||
96 | "sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1", |
||
97 | "sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd", |
||
98 | "sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac", |
||
99 | "std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str", |
||
100 | "subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter", |
||
101 | "sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud", |
||
102 | "umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd", |
||
103 | "vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps", |
||
104 | "vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq", |
||
105 | "vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi", |
||
106 | "vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", |
||
107 | "vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps", |
||
108 | "vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps", |
||
109 | "vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4", |
||
110 | "vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4", |
||
111 | "vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps", |
||
112 | "vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss", |
||
113 | "vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd", |
||
114 | "vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps", |
||
115 | "vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps", |
||
116 | "vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss", |
||
117 | "vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd", |
||
118 | "vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss", |
||
119 | "vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd", |
||
120 | "vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps", |
||
121 | "vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd", |
||
122 | "vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss", |
||
123 | "vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd", |
||
124 | "vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps", |
||
125 | "vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps", |
||
126 | "vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8", |
||
127 | "vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8", |
||
128 | "vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16", |
||
129 | "vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq", |
||
130 | "vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm", |
||
131 | "vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud", |
||
132 | "vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd", |
||
133 | "vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d", |
||
134 | "vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd", |
||
135 | "vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q", |
||
136 | "vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd", |
||
137 | "vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq", |
||
138 | "vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m", |
||
139 | "vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w", |
||
140 | "vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb", |
||
141 | "vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd", |
||
142 | "vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq", |
||
143 | "vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd", |
||
144 | "vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd", |
||
145 | "vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq", |
||
146 | "vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq", |
||
147 | "vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps", |
||
148 | "vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess", |
||
149 | "vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", |
||
150 | "vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss", |
||
151 | "vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2", |
||
152 | "vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait", |
||
153 | "wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd", |
||
154 | "xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps", |
||
9398 | Boppan | 155 | "xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv", |
8957 | Boppan | 156 | ] |
157 | |||
158 | fasm_types = [ |
||
9407 | Boppan | 159 | "db", "rb", |
160 | "dw", "rw", |
||
161 | "dd", "rd", |
||
162 | "dp", "rp", |
||
163 | "df", "rf", |
||
164 | "dq", "rq", |
||
165 | "dt", "rt", |
||
166 | "du", |
||
8957 | Boppan | 167 | ] |
168 | |||
9408 | Boppan | 169 | |
8976 | Boppan | 170 | # Add kind flag to identifier in id2kind |
171 | def id_add_kind(identifier, kind): |
||
9407 | Boppan | 172 | if identifier not in id2kind: |
173 | id2kind[identifier] = '' |
||
174 | id2kind[identifier] += kind |
||
8976 | Boppan | 175 | |
9408 | Boppan | 176 | |
8976 | Boppan | 177 | # Remove kind flag of identifier in id2kind |
178 | def id_remove_kind(identifier, kind): |
||
9407 | Boppan | 179 | if identifier in id2kind: |
180 | if kind in id2kind[identifier]: |
||
181 | id2kind[identifier] = id2kind[identifier].replace(kind, '') |
||
8976 | Boppan | 182 | |
9408 | Boppan | 183 | |
8976 | Boppan | 184 | # Get kind of an identifier |
185 | def id_get_kind(identifier): |
||
9407 | Boppan | 186 | if identifier in id2kind: |
187 | return id2kind[identifier] |
||
188 | else: |
||
189 | return '' |
||
8976 | Boppan | 190 | |
9408 | Boppan | 191 | |
8957 | Boppan | 192 | class LegacyAsmReader: |
9407 | Boppan | 193 | def __init__(self, file): |
194 | self.file = file |
||
195 | self.lines = open(file, "r", encoding="utf-8").readlines() |
||
196 | self.line_idx = 0 |
||
197 | self.i = 0 |
||
8957 | Boppan | 198 | |
9407 | Boppan | 199 | def currline(self): |
200 | return self.lines[self.line_idx] |
||
9403 | Boppan | 201 | |
9407 | Boppan | 202 | def curr(self): |
9408 | Boppan | 203 | try: |
204 | return self.lines[self.line_idx][self.i] |
||
205 | except: |
||
206 | return '' |
||
8957 | Boppan | 207 | |
9407 | Boppan | 208 | def step(self): |
209 | c = self.curr() |
||
210 | self.i += 1 |
||
211 | # Wrap the line if '\\' followed by whitespaces and/or comment |
||
212 | while self.curr() == '\\': |
||
213 | i_of_backslash = self.i |
||
214 | self.i += 1 |
||
215 | while self.curr().isspace(): |
||
216 | self.i += 1 |
||
217 | if self.curr() == ';' or self.curr() == '': |
||
218 | self.line_idx += 1 |
||
219 | self.i = 0 |
||
220 | else: |
||
221 | # There's something other than a comment after the backslash |
||
222 | # So don't interpret the backslash as a line wrap |
||
223 | self.i = i_of_backslash |
||
224 | break |
||
225 | return c |
||
8957 | Boppan | 226 | |
9407 | Boppan | 227 | def nextline(self): |
228 | c = self.curr() |
||
229 | while c != '': |
||
230 | c = self.step() |
||
231 | self.line_idx += 1 |
||
232 | self.i = 0 |
||
8957 | Boppan | 233 | |
9407 | Boppan | 234 | def no_lines(self): |
235 | if self.line_idx >= len(self.lines): |
||
236 | return True |
||
237 | return False |
||
8957 | Boppan | 238 | |
9408 | Boppan | 239 | def location(self): |
9407 | Boppan | 240 | return f"{self.file}:{self.line_idx + 1}" |
8957 | Boppan | 241 | |
9407 | Boppan | 242 | def skip_spaces(self): |
243 | while self.curr().isspace(): |
||
244 | self.step() |
||
8957 | Boppan | 245 | |
9408 | Boppan | 246 | |
8957 | Boppan | 247 | class AsmReaderRecognizingStrings(LegacyAsmReader): |
9407 | Boppan | 248 | def __init__(self, file): |
249 | super().__init__(file) |
||
250 | self.in_string = None |
||
251 | self.should_recognize_strings = True |
||
8957 | Boppan | 252 | |
9407 | Boppan | 253 | def step(self): |
254 | c = super().step() |
||
255 | if self.should_recognize_strings and (c == '"' or c == "'"): |
||
256 | # If just now we was at the double or single quotation mark |
||
9408 | Boppan | 257 | # and we aren't in a string yet then say |
258 | # "we are in a string openned with this quotation mark now" |
||
259 | if self.in_string is None: |
||
9407 | Boppan | 260 | self.in_string = c |
261 | # If just now we was at the double or single quotation mark |
||
262 | # and we are in the string entered with the same quotation mark |
||
263 | # then say "we aren't in a string anymore" |
||
264 | elif self.in_string == c: |
||
265 | self.in_string = None |
||
266 | return c |
||
8957 | Boppan | 267 | |
9408 | Boppan | 268 | |
8957 | Boppan | 269 | class AsmReaderReadingComments(AsmReaderRecognizingStrings): |
9407 | Boppan | 270 | def __init__(self, file): |
271 | super().__init__(file) |
||
272 | self.status = dict() |
||
273 | self.status_reset() |
||
274 | self.comment = '' |
||
8957 | Boppan | 275 | |
9407 | Boppan | 276 | def status_reset(self): |
277 | # If the line has non-comment code |
||
278 | self.status_has_code = False |
||
279 | # If the line has a comment at the end |
||
280 | self.status_has_comment = False |
||
281 | # Let it recognize strings further, we are definitely out of a comment |
||
282 | self.should_recognize_strings = True |
||
8957 | Boppan | 283 | |
9407 | Boppan | 284 | def status_set_has_comment(self): |
285 | self.status_has_comment = True |
||
286 | # Don't let it recognize strings cause we are in a comment now |
||
287 | self.should_recognize_strings = False |
||
8957 | Boppan | 288 | |
9407 | Boppan | 289 | def status_set_has_code(self): |
290 | self.status_has_code = True |
||
8957 | Boppan | 291 | |
9407 | Boppan | 292 | def update_status(self): |
9408 | Boppan | 293 | # If we aren't in a comment and we aren't in a string - |
294 | # say we are now in a comment if ';' met |
||
295 | if (not self.status_has_comment and |
||
296 | not self.in_string and |
||
297 | self.curr() == ';'): |
||
9407 | Boppan | 298 | self.status_set_has_comment() |
299 | # Else if we are in a comment - collect the comment |
||
300 | elif self.status_has_comment: |
||
301 | self.comment += self.curr() |
||
302 | # Else if there's some non-whitespace character out of a comment |
||
303 | # then the line has code |
||
304 | elif not self.status_has_comment and not self.curr().isspace(): |
||
305 | self.status_set_has_code() |
||
8957 | Boppan | 306 | |
9407 | Boppan | 307 | def step(self): |
308 | # Get to the next character |
||
309 | c = super().step() |
||
310 | # Update status of the line according to the next character |
||
311 | self.update_status() |
||
312 | return c |
||
8957 | Boppan | 313 | |
9407 | Boppan | 314 | def nextline(self): |
315 | prev_line = self.currline() |
||
316 | super().nextline() |
||
317 | # If the line we leave was not a comment-only line |
||
318 | # then forget the collected comment |
||
9408 | Boppan | 319 | # Otherwise the collected comment should be complemented by |
320 | # comment from next line in step() |
||
9407 | Boppan | 321 | if self.status_has_code: |
322 | # But we should preserve comment for the next line |
||
323 | # If previous line set align (cause many functions re documented |
||
324 | # right before align set, not before their labels) |
||
325 | if not prev_line.startswith("align "): |
||
326 | self.comment = '' |
||
327 | # Reset the line status (now it's the status of the new line) |
||
328 | self.status_reset() |
||
9408 | Boppan | 329 | # Set new status for this line according to the |
330 | # first character in the line |
||
9407 | Boppan | 331 | self.update_status() |
8957 | Boppan | 332 | |
9408 | Boppan | 333 | |
8963 | Boppan | 334 | class AsmReaderFetchingIdentifiers(AsmReaderReadingComments): |
9407 | Boppan | 335 | def __init__(self, file): |
336 | super().__init__(file) |
||
8957 | Boppan | 337 | |
9407 | Boppan | 338 | def fetch_identifier(self): |
339 | self.skip_spaces() |
||
340 | result = '' |
||
341 | while is_id(self.curr()): |
||
342 | result += self.step() |
||
343 | return result |
||
8963 | Boppan | 344 | |
9408 | Boppan | 345 | |
8963 | Boppan | 346 | class AsmReader(AsmReaderFetchingIdentifiers): |
9407 | Boppan | 347 | def __init__(self, file): |
348 | super().__init__(file) |
||
8963 | Boppan | 349 | |
9408 | Boppan | 350 | |
9402 | Boppan | 351 | def append_file(full_path, contents): |
9407 | Boppan | 352 | if debug_mode: |
353 | if full_path not in output_files: |
||
354 | output_files[full_path] = "" |
||
355 | output_files[full_path] += contents |
||
356 | else: |
||
357 | f = open(full_path, "a") |
||
358 | f.write(contents) |
||
359 | f.close() |
||
9402 | Boppan | 360 | |
9408 | Boppan | 361 | |
8957 | Boppan | 362 | class AsmElement: |
9407 | Boppan | 363 | def __init__(self, location, name, comment): |
364 | global warnings |
||
8980 | Boppan | 365 | |
9408 | Boppan | 366 | # If the element was constructed during this execution then |
367 | # the element is new |
||
9407 | Boppan | 368 | self.new = True |
369 | self.location = location |
||
370 | self.file = self.location.split(':')[0].replace('\\', '/') |
||
371 | self.line = self.location.split(':')[1] |
||
372 | self.name = name |
||
373 | self.comment = comment |
||
8957 | Boppan | 374 | |
9407 | Boppan | 375 | if self.comment == '': |
376 | warnings += f'{self.location}: Undocumented element\n' |
||
8980 | Boppan | 377 | |
9407 | Boppan | 378 | def dump(self): |
379 | print(f"\n{self.location}: {self.name}") |
||
380 | print(f"{self.comment}") |
||
8957 | Boppan | 381 | |
9408 | Boppan | 382 | def emit(self, dest, doxycomment='', declaration=''): |
9407 | Boppan | 383 | # Do not emit anything if the symbol is marked as hidden in its comment |
384 | if '@dont_give_a_doxygen' in self.comment: |
||
385 | return |
||
8977 | Boppan | 386 | |
9407 | Boppan | 387 | global warnings |
388 | # Redefine default declaration |
||
389 | if declaration == '': |
||
390 | declaration = f'#define {self.name}' |
||
391 | # Check doxycomment |
||
392 | if not doxycomment.endswith('\n'): |
||
393 | doxycomment += '\n' |
||
394 | if doxycomment.split('@brief ')[1][0].islower(): |
||
9408 | Boppan | 395 | warnings += (f"{self.location}: Brief comment starting from " + |
396 | "lowercase\n") |
||
9407 | Boppan | 397 | # Build contents to emit |
398 | contents = '' |
||
399 | contents += '/**\n' |
||
400 | contents += doxycomment |
||
401 | contents += (f"@par Source\n" + |
||
9408 | Boppan | 402 | f"{self.file}:{self.line}\n") |
9407 | Boppan | 404 | contents += '*/\n' |
405 | contents += declaration |
||
406 | contents += '\n\n' |
||
407 | # Get path to file to emit this |
||
408 | full_path = dest + '/' + self.file |
||
9408 | Boppan | 409 | # Remove the file on first access if it was |
410 | # created by previous generation |
||
9407 | Boppan | 411 | if full_path not in created_files: |
412 | if os.path.isfile(full_path): |
||
413 | os.remove(full_path) |
||
414 | created_files.append(full_path) |
||
415 | # Create directories need for the file |
||
416 | os.makedirs(os.path.dirname(full_path), exist_ok=True) |
||
417 | contents = ''.join([i if ord(i) < 128 else '?' for i in contents]) |
||
8957 | Boppan | 418 | |
9407 | Boppan | 419 | append_file(full_path, contents) |
9402 | Boppan | 420 | |
9408 | Boppan | 421 | |
8957 | Boppan | 422 | class AsmVariable(AsmElement): |
9407 | Boppan | 423 | def __init__(self, location, name, comment, type, init): |
424 | super().__init__(location, name, comment) |
||
425 | self.type = type |
||
426 | self.init = init |
||
8855 | Boppan | 427 | |
9407 | Boppan | 428 | def dump(self): |
429 | super().dump() |
||
430 | print(f"(Variable)\n---") |
||
8855 | Boppan | 431 | |
9407 | Boppan | 432 | def emit(self, dest): |
433 | # Build doxycomment specific for the variable |
||
434 | doxycomment = '' |
||
435 | doxycomment += self.comment |
||
436 | if '@brief' not in doxycomment: |
||
437 | doxycomment = '@brief ' + doxycomment |
||
438 | doxycomment += (f"@par Initial value\n" + |
||
439 | f"{self.init}\n") |
||
440 | # Build the declaration |
||
441 | name = self.name.replace(".", "_") |
||
442 | var_type = self.type.replace(".", "_") |
||
443 | declaration = f"{var_type} {name};" |
||
444 | # Emit this |
||
445 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 446 | |
9408 | Boppan | 447 | |
8957 | Boppan | 448 | class AsmFunction(AsmElement): |
9408 | Boppan | 449 | def __init__(self, location, name, comment, calling_convention, |
450 | args, used_regs): |
||
9407 | Boppan | 451 | super().__init__(location, name, comment) |
452 | self.calling_convention = calling_convention |
||
453 | self.args = args |
||
454 | self.used_regs = used_regs |
||
8855 | Boppan | 455 | |
9407 | Boppan | 456 | def dump(self): |
457 | super().dump() |
||
458 | print(f"(Function)\n---") |
||
8855 | Boppan | 459 | |
9407 | Boppan | 460 | def emit(self, dest): |
461 | # Build doxycomment specific for the variable |
||
462 | doxycomment = '' |
||
463 | doxycomment += self.comment |
||
464 | if '@brief' not in doxycomment: |
||
465 | doxycomment = '@brief ' + doxycomment |
||
466 | # If there was no arguments, maybe that's just a label |
||
467 | # then parse parameters from its comment |
||
468 | if len(self.args) == 0 and '@param' in self.comment: |
||
469 | i = 0 |
||
470 | while '@param' in self.comment[i:]: |
||
471 | i = self.comment.index('@param', i) |
||
472 | # Skip '@param' |
||
473 | i += len('@param') |
||
474 | # Skip spaces after '@param' |
||
475 | while self.comment[i].isspace(): |
||
476 | i += 1 |
||
477 | # Get the parameter name |
||
478 | name = '' |
||
479 | while is_id(self.comment[i]): |
||
480 | name += self.comment[i] |
||
481 | i += 1 |
||
482 | # Save the parameter |
||
483 | self.args.append((name, 'arg_t')) |
||
484 | # Build the arg list for declaration |
||
485 | arg_list = '(' |
||
486 | if len(self.args) > 0: |
||
487 | argc = 0 |
||
488 | for arg in self.args: |
||
489 | if argc != 0: |
||
490 | arg_list += ", " |
||
491 | arg_list += f"{arg[1]} {arg[0]}" |
||
492 | argc += 1 |
||
493 | arg_list += ')' |
||
494 | # Build the declaration |
||
495 | name = self.name.replace(".", "_") |
||
496 | declaration = f"void {name}{arg_list};" |
||
497 | # Emit this |
||
498 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 499 | |
9408 | Boppan | 500 | |
8957 | Boppan | 501 | class AsmLabel(AsmElement): |
9407 | Boppan | 502 | def __init__(self, location, name, comment): |
503 | super().__init__(location, name, comment) |
||
8855 | Boppan | 504 | |
9407 | Boppan | 505 | def dump(self): |
506 | super().dump() |
||
507 | print(f"(Label)\n---") |
||
8855 | Boppan | 508 | |
9407 | Boppan | 509 | def emit(self, dest): |
510 | # Build doxycomment specific for the variable |
||
511 | doxycomment = '' |
||
512 | doxycomment += self.comment |
||
513 | if '@brief' not in doxycomment: |
||
514 | doxycomment = '@brief ' + doxycomment |
||
515 | # Build the declaration |
||
516 | name = self.name.replace(".", "_") |
||
517 | declaration = f"label {name};" |
||
518 | # Emit this |
||
519 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 520 | |
9408 | Boppan | 521 | |
8957 | Boppan | 522 | class AsmMacro(AsmElement): |
9407 | Boppan | 523 | def __init__(self, location, name, comment, args): |
524 | super().__init__(location, name, comment) |
||
525 | self.args = args |
||
8855 | Boppan | 526 | |
9407 | Boppan | 527 | def dump(self): |
528 | super().dump() |
||
529 | print(f"(Macro)\n---") |
||
8855 | Boppan | 530 | |
9407 | Boppan | 531 | def emit(self, dest): |
532 | # Construct arg list without '['s, ']'s and '*'s |
||
533 | args = [arg for arg in self.args if arg not in "[]*"] |
||
534 | # Construct C-like arg list |
||
535 | arg_list = "" |
||
536 | if len(args) > 0: |
||
537 | arg_list += '(' |
||
538 | argc = 0 |
||
539 | for arg in args: |
||
540 | if argc != 0: |
||
541 | arg_list += ", " |
||
542 | arg_list += arg |
||
543 | argc += 1 |
||
544 | arg_list += ')' |
||
545 | # Build doxycomment |
||
546 | doxycomment = '' |
||
547 | doxycomment += self.comment |
||
548 | if '@brief' not in doxycomment: |
||
549 | doxycomment = '@brief ' + doxycomment |
||
550 | # Build declaration |
||
551 | declaration = f"#define {self.name}{arg_list}" |
||
552 | # Emit this |
||
553 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 554 | |
9408 | Boppan | 555 | |
8957 | Boppan | 556 | class AsmStruct(AsmElement): |
9407 | Boppan | 557 | def __init__(self, location, name, comment, members): |
558 | super().__init__(location, name, comment) |
||
559 | self.members = members |
||
8855 | Boppan | 560 | |
9407 | Boppan | 561 | def dump(self): |
562 | super().dump() |
||
563 | print(f"(Struct)\n---") |
||
8855 | Boppan | 564 | |
9407 | Boppan | 565 | def emit(self, dest): |
566 | # Build doxycomment |
||
567 | doxycomment = '' |
||
568 | doxycomment += self.comment |
||
569 | if '@brief' not in doxycomment: |
||
570 | doxycomment = '@brief ' + doxycomment |
||
571 | doxycomment += '\n' |
||
572 | # Build declaration |
||
573 | declaration = f"struct {self.name}" + " {\n" |
||
574 | for member in self.members: |
||
575 | if type(member) == AsmVariable: |
||
9408 | Boppan | 576 | declaration += (f'\t{member.type} {member.name}; ' + |
577 | f'/**< {member.comment} */\n') |
||
9407 | Boppan | 578 | declaration += '};' |
579 | # Emit this |
||
580 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 581 | |
9408 | Boppan | 582 | |
8957 | Boppan | 583 | class AsmUnion(AsmElement): |
9407 | Boppan | 584 | def __init__(self, location, name, comment, members): |
585 | super().__init__(location, name, comment) |
||
586 | self.members = members |
||
8855 | Boppan | 587 | |
9407 | Boppan | 588 | def dump(self): |
589 | super().dump() |
||
590 | print(f"(Union)\n---") |
||
8855 | Boppan | 591 | |
9407 | Boppan | 592 | def emit(self, dest): |
593 | # Build doxycomment |
||
594 | doxycomment = '' |
||
595 | doxycomment += self.comment |
||
596 | if '@brief' not in doxycomment: |
||
597 | doxycomment = '@brief ' + doxycomment |
||
598 | # Build declaration |
||
599 | declaration = f"union {self.name}" + " {};" |
||
600 | # Emit this |
||
601 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 602 | |
9408 | Boppan | 603 | |
8957 | Boppan | 604 | class VariableNameIsMacroName: |
9407 | Boppan | 605 | def __init__(self, name): |
606 | self.name = name |
||
8855 | Boppan | 607 | |
9408 | Boppan | 608 | |
8957 | Boppan | 609 | def is_id(c): |
9407 | Boppan | 610 | return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v" |
8855 | Boppan | 611 | |
9408 | Boppan | 612 | |
8957 | Boppan | 613 | def is_starts_as_id(s): |
9407 | Boppan | 614 | return not s[0].isdigit() |
8957 | Boppan | 615 | |
9408 | Boppan | 616 | |
8957 | Boppan | 617 | def parse_after_macro(r): |
9407 | Boppan | 618 | location = r.location() |
8957 | Boppan | 619 | |
9407 | Boppan | 620 | # Skip spaces after the "macro" keyword |
621 | r.skip_spaces() |
||
622 | # Read macro name |
||
623 | name = "" |
||
624 | while is_id(r.curr()) or r.curr() == '#': |
||
625 | name += r.step() |
||
626 | # Skip spaces after macro name |
||
627 | r.skip_spaces() |
||
628 | # Find all arguments |
||
629 | args = [] |
||
630 | arg = '' |
||
631 | while r.curr() and r.curr() != ';' and r.curr() != '{': |
||
632 | # Collect identifier |
||
633 | if is_id(r.curr()): |
||
634 | arg += r.step() |
||
635 | # Save the collected identifier |
||
636 | elif r.curr() == ',': |
||
637 | args.append(arg) |
||
638 | arg = '' |
||
639 | r.step() |
||
640 | # Just push the '[' |
||
641 | elif r.curr() == '[': |
||
642 | args.append(r.step()) |
||
643 | # Just push the identifier and get ']' ready to be pushed on next comma |
||
644 | elif r.curr() == ']': |
||
645 | args.append(arg) |
||
646 | arg = r.step() |
||
647 | # Just push the identifier and get '*' ready to be pushed on next comma |
||
648 | elif r.curr() == '*': |
||
649 | args.append(arg) |
||
650 | arg = r.step() |
||
651 | # Just skip whitespaces |
||
652 | elif r.curr().isspace(): |
||
653 | r.step() |
||
654 | # Something unexpected |
||
655 | else: |
||
9408 | Boppan | 656 | raise Exception(f"Unexpected symbol '{r.curr()}' " + |
657 | f"at index #{r.i} in the macro declaration " + |
||
658 | f"at {location} " + |
||
9407 | Boppan | 659 | f"(line: {r.lines[r.line_idx]})\n''") |
660 | # Append the last argument |
||
661 | if arg != '': |
||
662 | args.append(arg) |
||
663 | # Skip t spaces after the argument list |
||
664 | r.skip_spaces() |
||
9408 | Boppan | 665 | # Get a comment if it is: read till the end of the line and |
666 | # get the comment from the reader |
||
9407 | Boppan | 667 | while r.curr() != '': |
668 | r.step() |
||
669 | comment = r.comment |
||
670 | # Find end of the macro |
||
671 | prev = '' |
||
672 | while True: |
||
673 | if r.curr() == '}' and prev != '\\': |
||
674 | break |
||
675 | elif r.curr() == '': |
||
676 | prev = '' |
||
677 | r.nextline() |
||
678 | continue |
||
679 | prev = r.step() |
||
680 | # Build the output |
||
681 | return AsmMacro(location, name, comment, args) |
||
8855 | Boppan | 682 | |
9408 | Boppan | 683 | |
684 | def parse_variable(r, first_word=None): |
||
9407 | Boppan | 685 | global warnings |
686 | location = r.location() |
||
8825 | Boppan | 687 | |
9407 | Boppan | 688 | # Skip spaces before variable name |
689 | r.skip_spaces() |
||
690 | # Get variable name |
||
691 | name = "" |
||
692 | # Read it if it was not supplied |
||
9408 | Boppan | 693 | if first_word is None: |
9407 | Boppan | 694 | while is_id(r.curr()): |
695 | name += r.step() |
||
696 | # Or use the supplied one instead |
||
697 | else: |
||
698 | name = first_word |
||
699 | # Check the name |
||
9408 | Boppan | 700 | # If it's 0 len, that means threr's something else than an |
701 | # identifier at the beginning |
||
9407 | Boppan | 702 | if len(name) == 0: |
703 | return None |
||
704 | # If it starts from digit or othervice illegally it's illegal |
||
705 | if not is_starts_as_id(name): |
||
706 | return None |
||
707 | # Get kind of the identifier from id2kind table |
||
708 | kind = id_get_kind(name) |
||
709 | # If it's a keyword, that's not a variable declaration |
||
710 | if ID_KIND_KEYWORD in kind: |
||
711 | return None |
||
712 | # If it's a macro name, that's not a variable declaration |
||
713 | if ID_KIND_MACRO_NAME in kind: |
||
714 | return VariableNameIsMacroName(name) |
||
9408 | Boppan | 715 | # If it's a datatype or a structure name that's not a |
716 | # variable declaration: that's just a data |
||
9407 | Boppan | 717 | # don't document just a data for now |
718 | if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind: |
||
719 | return None |
||
720 | # Skip spaces before type name |
||
721 | r.skip_spaces() |
||
722 | # Read type name |
||
723 | var_type = "" |
||
724 | while is_id(r.curr()): |
||
725 | var_type += r.step() |
||
726 | # Check the type name |
||
727 | if len(var_type) == 0: |
||
728 | # If there's no type identifier after the name |
||
729 | # maybe the name is something meaningful for the next parser |
||
730 | # return it |
||
731 | return name |
||
732 | # If it starts from digit or othervice illegally it's illegal |
||
733 | if not is_starts_as_id(var_type): |
||
734 | return None |
||
735 | # Get kind of type identifier |
||
736 | type_kind = id_get_kind(var_type) |
||
737 | # If it's a keyword, that's not a variable declaration |
||
738 | # return the two words of the lexical structure |
||
739 | if ID_KIND_KEYWORD in type_kind: |
||
740 | return (name, var_type) |
||
741 | # Skip spaces before the value |
||
742 | r.skip_spaces() |
||
743 | # Read the value until the comment or end of the line |
||
744 | value = "" |
||
745 | while r.curr() != ';' and r.curr() != '' and r.curr() != '\n': |
||
746 | value += r.step() |
||
747 | # Skip spaces after the value |
||
748 | r.skip_spaces() |
||
749 | # Read till end of the line to get a comment from the reader |
||
750 | while r.curr() != '': |
||
751 | r.step() |
||
752 | # Build the result |
||
753 | return AsmVariable(location, name, r.comment, var_type, value) |
||
8825 | Boppan | 754 | |
9408 | Boppan | 755 | |
756 | def parse_after_struct(r, as_union=True): |
||
9407 | Boppan | 757 | global warnings |
758 | location = r.location() |
||
8825 | Boppan | 759 | |
9407 | Boppan | 760 | # Skip spaces after "struct" keyword |
761 | r.skip_spaces() |
||
762 | # Read struct name |
||
763 | name = "" |
||
764 | while is_id(r.curr()): |
||
765 | name += r.step() |
||
766 | # Read till end of the line and get the comment from the reader |
||
767 | while r.curr() != '': |
||
768 | r.step() |
||
769 | comment = r.comment |
||
770 | # Get to the next line to parse struct members |
||
771 | r.nextline() |
||
772 | # Parse struct members |
||
773 | members = [] |
||
774 | while True: |
||
775 | r.skip_spaces() |
||
776 | var = parse_variable(r) |
||
777 | if type(var) == AsmVariable: |
||
778 | members.append(var) |
||
779 | elif type(var) == str: |
||
780 | if var == 'union': |
||
781 | # Parse the union as a struct |
||
9408 | Boppan | 782 | union = parse_after_struct(r, as_union=True) |
9407 | Boppan | 783 | members.append(union) |
784 | # Skip the ends of the union |
||
785 | r.nextline() |
||
786 | elif r.curr() == ':': |
||
787 | warnings += f"{r.location()}: Skept the label in the struct\n" |
||
788 | else: |
||
9408 | Boppan | 789 | raise Exception(f"Garbage in struct member at {location} " + |
790 | f" (got '{var}' identifier)") |
||
9407 | Boppan | 791 | elif type(var) == VariableNameIsMacroName: |
792 | if var.name == 'ends': |
||
793 | break |
||
794 | r.nextline() |
||
795 | # Return the result |
||
796 | if as_union: |
||
797 | return AsmStruct(location, name, comment, members) |
||
798 | else: |
||
799 | return AsmUnion(location, name, comment, members) |
||
8825 | Boppan | 800 | |
9408 | Boppan | 801 | |
8963 | Boppan | 802 | def parse_after_proc(r): |
9407 | Boppan | 803 | # Get proc name |
804 | name = r.fetch_identifier() |
||
805 | # Next identifier after the proc name |
||
806 | identifier = r.fetch_identifier() |
||
807 | # Check if the id is 'stdcall' or 'c' (calling convention specifier) |
||
808 | # and if so - save the convention and lookup the next identifier |
||
809 | calling_convention = '' |
||
810 | if identifier == 'stdcall' or identifier == 'c': |
||
811 | calling_convention = identifier |
||
812 | # If next is a comma, just skip it |
||
813 | if r.curr() == ',': |
||
814 | r.step() |
||
815 | # Read the next identifier |
||
816 | identifier = r.fetch_identifier() |
||
817 | # Check if the id is 'uses' (used register list specifier) |
||
818 | # and if so save the used register list |
||
819 | used_regs = [] |
||
820 | if identifier == 'uses': |
||
821 | # Read the registers |
||
822 | while True: |
||
823 | reg_name = r.fetch_identifier() |
||
824 | if reg_name != '': |
||
825 | used_regs.append(reg_name) |
||
826 | else: |
||
827 | break |
||
828 | # If next is a comma, just skip it |
||
829 | if r.curr() == ',': |
||
830 | r.step() |
||
831 | # Read the next identifier |
||
832 | identifier = r.fetch_identifier() |
||
833 | # Check if there are argument identifiers |
||
834 | args = [] |
||
835 | while identifier != '': |
||
836 | arg_name = identifier |
||
837 | arg_type = 'arg_t' |
||
838 | # Skip spaces after argument name |
||
839 | r.skip_spaces() |
||
840 | # If there's a ':' after the name - the next identifier is type |
||
841 | if r.curr() == ':': |
||
842 | r.step() |
||
843 | arg_type = r.fetch_identifier() |
||
844 | # If there's a comma - there's one more argument |
||
845 | # else no arguments anymore |
||
846 | if r.curr() == ',': |
||
847 | r.step() |
||
848 | identifier = r.fetch_identifier() |
||
849 | else: |
||
850 | identifier = '' |
||
851 | args.append((arg_name, arg_type)) |
||
852 | # Get to the end of the line and get a comment from the reader |
||
853 | while r.curr() != '': |
||
854 | r.step() |
||
855 | comment = r.comment |
||
856 | # Build the element |
||
9408 | Boppan | 857 | return AsmFunction(r.location(), name, comment, calling_convention, |
858 | args, used_regs) |
||
8963 | Boppan | 859 | |
9408 | Boppan | 860 | |
8957 | Boppan | 861 | def get_declarations(asm_file_contents, asm_file_name): |
9407 | Boppan | 862 | r = AsmReader(asm_file_name) |
8825 | Boppan | 863 | |
9407 | Boppan | 864 | while not r.no_lines(): |
865 | # Skip leading spaces |
||
866 | r.skip_spaces() |
||
867 | # Skip the line if it's starting with a comment |
||
868 | if r.curr() == ';': |
||
869 | r.nextline() |
||
870 | continue |
||
871 | # Get first word |
||
872 | first_word = "" |
||
873 | while is_id(r.curr()): |
||
874 | first_word += r.step() |
||
875 | # Match macro declaration |
||
876 | if first_word == "macro": |
||
877 | macro = parse_after_macro(r) |
||
878 | elements.append(macro) |
||
879 | id_add_kind(macro.name, ID_KIND_MACRO_NAME) |
||
880 | # Match structure declaration |
||
881 | elif first_word == "struct": |
||
882 | struct = parse_after_struct(r) |
||
883 | elements.append(struct) |
||
884 | id_add_kind(struct.name, ID_KIND_STRUCT_NAME) |
||
885 | # Match function definition |
||
886 | elif first_word == "proc": |
||
887 | proc = parse_after_proc(r) |
||
888 | elements.append(proc) |
||
889 | elif first_word == 'format': |
||
890 | # Skip the format directive |
||
891 | pass |
||
892 | elif first_word == 'include': |
||
893 | # Skip the include directive |
||
894 | pass |
||
895 | elif first_word == 'if': |
||
896 | # Skip the conditional directive |
||
897 | pass |
||
898 | elif first_word == 'repeat': |
||
899 | # Skip the repeat directive |
||
900 | pass |
||
901 | elif first_word == 'purge': |
||
902 | while True: |
||
9408 | Boppan | 903 | # Skip spaces after the 'purge' keyword or after |
904 | # the comma what separated the previous macro name |
||
9407 | Boppan | 905 | r.skip_spaces() |
906 | # Get the purged macro name |
||
907 | name = '' |
||
908 | while is_id(r.curr()): |
||
909 | name += r.step() |
||
910 | # Remove the purged macro from the macro names list |
||
911 | try: |
||
912 | id_remove_kind(name, ID_KIND_MACRO_NAME) |
||
913 | except: |
||
914 | pass |
||
915 | # Skip spaces after the name |
||
916 | r.skip_spaces() |
||
9408 | Boppan | 917 | # If it's comma (',') after then that's not the last purged |
918 | # macro, continue purging |
||
9407 | Boppan | 919 | if r.curr() == ',': |
920 | r.step() |
||
921 | continue |
||
922 | # Here we purged all the macros should be purged |
||
923 | break |
||
924 | # Match label or a variable |
||
925 | elif len(first_word) != 0: |
||
926 | # Skip spaces after the identifier |
||
927 | r.skip_spaces() |
||
928 | # Match a variable |
||
929 | var = parse_variable(r, first_word) |
||
930 | if type(var) == AsmVariable: |
||
931 | elements.append(var) |
||
932 | # If it wasn't a variable but there was an identifier |
||
933 | # Maybe that's a label and the identifier is the label name |
||
934 | # The parse_variable returns the first found or supplied identifier |
||
935 | # In this case it returns the first_word which is supplied |
||
936 | # If it didn't match a type identifier after the word |
||
937 | elif type(var) == str: |
||
938 | name = var |
||
939 | # Match label beginning (':' after name) |
||
940 | if r.curr() == ':': |
||
9408 | Boppan | 941 | # Get to the end of the line and |
942 | # get the coment from the reader |
||
9407 | Boppan | 943 | while r.curr() != '': |
944 | r.step() |
||
945 | comment = r.comment |
||
946 | # Only handle non-local labels |
||
947 | if name[0] != '.' and name != "@@" and name != "$Revision": |
||
9408 | Boppan | 948 | # Treate the label as function if there's @return or |
949 | # @param in its comment. Othervice it's just a variable |
||
950 | # with type `label` in generated doxygen C |
||
9407 | Boppan | 951 | if '@return' in comment or '@param' in comment: |
9408 | Boppan | 952 | element = AsmFunction(r.location(), name, comment, |
953 | '', [], []) |
||
9407 | Boppan | 954 | else: |
955 | element = AsmLabel(r.location(), name, comment) |
||
956 | elements.append(element) |
||
957 | elif r.curr() == '=': |
||
958 | # Save the identifier as a set constant |
||
959 | id_add_kind(first_word, ID_KIND_SET_CONSTANT) |
||
960 | elif type(var) == tuple: |
||
961 | (word_one, word_two) = var |
||
962 | if word_two == 'equ': |
||
963 | # Save the identifier as an equated constant |
||
964 | id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT) |
||
965 | r.nextline() |
||
8825 | Boppan | 966 | |
9408 | Boppan | 967 | |
8966 | Boppan | 968 | def it_neds_to_be_parsed(source_file): |
9407 | Boppan | 969 | # If there's no symbols file saved - parse it anyway |
970 | # cause we need to create the symbols file and use it |
||
971 | # if we gonna generate proper doxygen |
||
972 | if not os.path.isfile('asmxygen.elements.pickle'): |
||
973 | return True |
||
974 | dest = doxygen_src_path + '/' + source_file |
||
975 | # If there's no the doxygen file it should be compiled to |
||
976 | # then yes, we should compile it to doxygen |
||
977 | if not os.path.isfile(dest): |
||
978 | return True |
||
979 | source_change_time = os.path.getmtime(source_file) |
||
980 | dest_change_file = os.path.getmtime(dest) |
||
981 | # If the source is newer than the doxygen it was compiled to |
||
982 | # then the source should be recompiled (existing doxygen is old) |
||
983 | if source_change_time > dest_change_file: |
||
984 | return True |
||
985 | return False |
||
8966 | Boppan | 986 | |
9408 | Boppan | 987 | |
988 | def handle_file(handled_files, asm_file_name, subdir="."): |
||
9407 | Boppan | 989 | global elements |
990 | # Canonicalize the file path and get it relative to cwd |
||
991 | cwd = os.path.abspath(os.path.dirname(sys.argv[0])) |
||
992 | asm_file_name = os.path.realpath(asm_file_name) |
||
993 | asm_file_name = asm_file_name[len(cwd) + 1:] |
||
994 | # If it's lang.inc - skip it |
||
995 | if asm_file_name == 'lang.inc': |
||
996 | return |
||
997 | # If the file was handled in this execution before - skip it |
||
998 | if asm_file_name in handled_files: |
||
999 | return |
||
1000 | # Say that the file was handled in this execution |
||
1001 | handled_files.append(asm_file_name) |
||
9408 | Boppan | 1002 | # Check if the file should be parsed |
1003 | # (if it was modified or wasn't parsed yet) |
||
9407 | Boppan | 1004 | should_get_declarations = True |
1005 | if not it_neds_to_be_parsed(asm_file_name): |
||
1006 | print(f"Skipping {asm_file_name} (already newest)") |
||
1007 | should_get_declarations = False |
||
1008 | else: |
||
1009 | print(f"Handling {asm_file_name}") |
||
1010 | # Remove elements parsed from this file before if any |
||
9408 | Boppan | 1011 | elements_to_remove = [ |
1012 | x for x in elements if x.location.split(':')[0] == asm_file_name |
||
1013 | ] |
||
1014 | elements = [ |
||
1015 | x for x in elements if x.location.split(':')[0] != asm_file_name |
||
1016 | ] |
||
9407 | Boppan | 1017 | # Forget types of identifiers of names of the removed elements |
1018 | for element in elements_to_remove: |
||
1019 | if type(element) == AsmStruct: |
||
1020 | id_remove_kind(element.name, ID_KIND_STRUCT_NAME) |
||
1021 | elif type(element) == AsmMacro: |
||
1022 | id_remove_kind(element.name, ID_KIND_MACRO_NAME) |
||
1023 | # Read the source |
||
1024 | asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read() |
||
1025 | # Find includes, fix their paths and handle em recoursively |
||
9408 | Boppan | 1026 | includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents, |
1027 | flags=re.MULTILINE) |
||
9407 | Boppan | 1028 | for include in includes: |
9408 | Boppan | 1029 | include = include[1].replace('\\', '/') |
1030 | full_path = subdir + '/' + include |
||
9407 | Boppan | 1031 | # If the path isn't valid, maybe that's not relative path |
1032 | if not os.path.isfile(full_path): |
||
1033 | full_path = include |
||
1034 | new_subdir = full_path.rsplit('/', 1)[0] |
||
1035 | handle_file(handled_files, full_path, new_subdir) |
||
1036 | # Only collect declarations from the file if it wasn't parsed before |
||
1037 | if should_get_declarations and not clean_generated_stuff: |
||
1038 | get_declarations(asm_file_contents, asm_file_name) |
||
8825 | Boppan | 1039 | |
9401 | Boppan | 1040 | if __name__ == "__main__": |
9408 | Boppan | 1041 | link_root = "http://websvn.kolibrios.org/filedetails.php" |
1042 | link_root += "?repname=Kolibri+OS&path=/kernel/trunk" |
||
9400 | Boppan | 1043 | |
9407 | Boppan | 1044 | # Dict where an identifier is assicoated with a string |
1045 | # The string contains characters specifying flags |
||
1046 | # Available flags: |
||
1047 | # k - Keyword |
||
1048 | # m - Macro name |
||
1049 | # t - fasm data Type name (db, rq, etc.) |
||
1050 | # s - Struct type name |
||
1051 | # e - equated constant (name equ value) |
||
1052 | # = - set constants (name = value) |
||
1053 | ID_KIND_KEYWORD = 'k' |
||
1054 | ID_KIND_MACRO_NAME = 'm' |
||
1055 | ID_KIND_FASM_TYPE = 't' |
||
1056 | ID_KIND_STRUCT_NAME = 's' |
||
1057 | ID_KIND_EQUATED_CONSTANT = 'e' |
||
1058 | ID_KIND_SET_CONSTANT = '=' |
||
1059 | id2kind = {} |
||
9399 | Boppan | 1060 | |
9407 | Boppan | 1061 | for keyword in keywords: |
1062 | id_add_kind(keyword, ID_KIND_KEYWORD) |
||
9399 | Boppan | 1063 | |
9407 | Boppan | 1064 | for fasm_type in fasm_types: |
1065 | id_add_kind(fasm_type, ID_KIND_FASM_TYPE) |
||
9399 | Boppan | 1066 | |
9407 | Boppan | 1067 | # Warning list |
1068 | warnings = "" |
||
9399 | Boppan | 1069 | |
9407 | Boppan | 1070 | # Parameters |
1071 | # Path to doxygen folder to make doxygen files in: -o |
||
1072 | doxygen_src_path = 'docs/doxygen' |
||
1073 | # Remove generated doxygen files: --clean |
||
1074 | clean_generated_stuff = False |
||
1075 | # Dump all defined symbols: --dump |
||
1076 | dump_symbols = False |
||
1077 | # Print symbol stats: --stats |
||
1078 | print_stats = False |
||
1079 | # Do not write warnings file: --nowarn |
||
1080 | enable_warnings = True |
||
9400 | Boppan | 1081 | |
9407 | Boppan | 1082 | # Parse arguments |
1083 | parser = argparse.ArgumentParser() |
||
1084 | parser.add_argument("-o", help="Doxygen output folder") |
||
9408 | Boppan | 1085 | parser.add_argument("--clean", |
1086 | help="Remove generated files", |
||
1087 | action="store_true") |
||
1088 | parser.add_argument("--dump", |
||
1089 | help="Dump all defined symbols", |
||
1090 | action="store_true") |
||
1091 | parser.add_argument("--stats", |
||
1092 | help="Print symbol stats", |
||
1093 | action="store_true") |
||
1094 | parser.add_argument("--nowarn", |
||
1095 | help="Do not write warnings file", |
||
1096 | action="store_true") |
||
1097 | parser.add_argument("--noemit", |
||
1098 | help="Do not emit doxygen files (for testing)", |
||
1099 | action="store_true") |
||
1100 | parser.add_argument("--debug", |
||
1101 | help="Show hashes of files (for testing)", |
||
1102 | action="store_true") |
||
9407 | Boppan | 1103 | args = parser.parse_args() |
1104 | doxygen_src_path = args.o if args.o else 'docs/doxygen' |
||
1105 | clean_generated_stuff = args.clean |
||
1106 | dump_symbols = args.dump |
||
1107 | print_stats = args.stats |
||
1108 | enable_warnings = not args.nowarn |
||
1109 | noemit = args.noemit |
||
1110 | debug_mode = args.debug |
||
9399 | Boppan | 1111 | |
9407 | Boppan | 1112 | # Variables, functions, labels, macros, structure types |
1113 | elements = [] |
||
1114 | created_files = [] |
||
1115 | kernel_files = [] |
||
9408 | Boppan | 1116 | output_files = {} # If --debug then all the files are written here |
8825 | Boppan | 1117 | |
9407 | Boppan | 1118 | # Load remembered list of symbols |
1119 | if os.path.isfile('asmxygen.elements.pickle'): |
||
1120 | print('Reading existing dump of symbols') |
||
9408 | Boppan | 1121 | pickle_file = open('asmxygen.elements.pickle', 'rb') |
1122 | (elements, id2kind) = pickle.load(pickle_file) |
||
1123 | pickle_file.close() |
||
8990 | Boppan | 1124 | |
9408 | Boppan | 1125 | handle_file(kernel_files, "./kernel.asm") |
8825 | Boppan | 1126 | |
9407 | Boppan | 1127 | if dump_symbols: |
1128 | stdout = sys.stdout |
||
9408 | Boppan | 1129 | sys.stdout = open('asmxygen.dump.txt', 'w', encoding='utf-8') |
9407 | Boppan | 1130 | for asm_element in elements: |
1131 | asm_element.dump() |
||
1132 | sys.stdout = stdout |
||
8825 | Boppan | 1133 | |
9407 | Boppan | 1134 | if clean_generated_stuff: |
1135 | kernel_files_set = set(kernel_files) |
||
1136 | for file in kernel_files: |
||
1137 | doxygen_file = f"{doxygen_src_path}/{file}" |
||
1138 | if (os.path.isfile(doxygen_file)): |
||
9408 | Boppan | 1139 | print(f"Removing {file}... ", end='') |
9407 | Boppan | 1140 | os.remove(doxygen_file) |
1141 | print("Done.") |
||
1142 | elif not noemit: |
||
1143 | print(f"Writing doumented sources to {doxygen_src_path}") |
||
8834 | Boppan | 1144 | |
9407 | Boppan | 1145 | i = 0 |
1146 | new_elements = [x for x in elements if x.new] |
||
1147 | for element in new_elements: |
||
9408 | Boppan | 1148 | counter = f"[{i + 1}/{len(new_elements)}]" |
1149 | print(f"{counter} Emitting {element.name} from {element.location}") |
||
9407 | Boppan | 1150 | element.emit(doxygen_src_path) |
1151 | i += 1 |
||
8855 | Boppan | 1152 | |
9407 | Boppan | 1153 | print(f"Writing dump of symbols to asmxygen.elements.pickle") |
8990 | Boppan | 1154 | |
9408 | Boppan | 1155 | # Now when the new elements already was written, there's no new |
1156 | # elements anymore |
||
9407 | Boppan | 1157 | for element in elements: |
1158 | element.new = False |
||
9408 | Boppan | 1159 | pickle_file = open('asmxygen.elements.pickle', 'wb') |
1160 | pickle.dump((elements, id2kind), pickle_file) |
||
1161 | pickle_file.close() |
||
8990 | Boppan | 1162 | |
9407 | Boppan | 1163 | if print_stats: |
1164 | var_count = 0 |
||
1165 | mac_count = 0 |
||
1166 | lab_count = 0 |
||
1167 | fun_count = 0 |
||
1168 | uni_count = 0 |
||
1169 | str_count = 0 |
||
1170 | for element in elements: |
||
1171 | if type(element) == AsmVariable: |
||
1172 | var_count += 1 |
||
1173 | elif type(element) == AsmMacro: |
||
1174 | mac_count += 1 |
||
1175 | elif type(element) == AsmLabel: |
||
1176 | lab_count += 1 |
||
1177 | elif type(element) == AsmFunction: |
||
1178 | fun_count += 1 |
||
1179 | elif type(element) == AsmUnion: |
||
1180 | uni_count += 1 |
||
1181 | elif type(element) == AsmStruct: |
||
1182 | str_count += 1 |
||
1183 | print(f'Parsed variable count: {var_count}') |
||
1184 | print(f'Parsed macro count: {mac_count}') |
||
1185 | print(f'Parsed label count: {lab_count}') |
||
1186 | print(f'Parsed function count: {fun_count}') |
||
1187 | print(f'Parsed union type count: {uni_count}') |
||
1188 | print(f'Parsed structure type count: {str_count}') |
||
8982 | Boppan | 1189 | |
9407 | Boppan | 1190 | if enable_warnings: |
9408 | Boppan | 1191 | open('asmxygen.txt', "w", encoding="utf-8").write(warnings) |
9402 | Boppan | 1192 | |
9407 | Boppan | 1193 | if debug_mode: |
1194 | hash_per_file = "" |
||
1195 | for file in output_files: |
||
1196 | h = hashlib.sha1(bytes(output_files[file], "ascii")).hexdigest() |
||
1197 | hash_per_file += f"{file}: {h}\n" |
||
1198 | if not os.path.exists("asmxygen_hash_per_file.txt"): |
||
1199 | open("asmxygen_hash_per_file.txt", "w").write(hash_per_file) |
||
1200 | print("NEW") |
||
1201 | else: |
||
1202 | reference_hash_per_file = open("asmxygen_hash_per_file.txt").read() |
||
1203 | if reference_hash_per_file != hash_per_file: |
||
9408 | Boppan | 1204 | diffs = difflib.ndiff(reference_hash_per_file, hash_per_file) |
1205 | print(''.join(diffs)) |
||
9407 | Boppan | 1206 | else: |
1207 | print("SUCCESS")>>> |