Rev 9400 | Rev 9402 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8834 | Boppan | 1 | import re |
8825 | Boppan | 2 | import os |
8837 | Boppan | 3 | import argparse |
8957 | Boppan | 4 | import sys |
8990 | Boppan | 5 | import pickle |
8825 | Boppan | 6 | |
8957 | Boppan | 7 | # fasm keywords |
8 | keywords = [ |
||
9398 | Boppan | 9 | "align", "equ", "org", "while", "load", "store", "times", "repeat", |
10 | "display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc", |
||
11 | "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox", |
||
12 | "aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and", |
||
13 | "andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd", |
||
14 | "blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn", |
||
15 | "bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap", |
||
16 | "btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc", |
||
17 | "cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova", |
||
18 | "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle", |
||
19 | "cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge", |
||
20 | "cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", |
||
21 | "cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", |
||
22 | "cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b", |
||
23 | "comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", |
||
24 | "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", |
||
25 | "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", |
||
26 | "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", |
||
27 | "dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms", |
||
28 | "extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", |
||
29 | "fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg", |
||
30 | "fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe", |
||
31 | "fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno", |
||
32 | "fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs", |
||
33 | "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv", |
||
34 | "fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", |
||
35 | "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", |
||
36 | "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", |
||
37 | "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", |
||
38 | "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", |
||
39 | "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", |
||
40 | "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", |
||
41 | "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", |
||
42 | "gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps", |
||
43 | "hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd", |
||
44 | "insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret", |
||
45 | "jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge", |
||
46 | "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", |
||
47 | "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb", |
||
48 | "kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw", |
||
49 | "kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq", |
||
50 | "korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw", |
||
51 | "kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd", |
||
52 | "kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq", |
||
53 | "kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw", |
||
54 | "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", |
||
55 | "lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq", |
||
56 | "loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg", |
||
57 | "loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc", |
||
58 | "loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns", |
||
59 | "loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss", |
||
60 | "lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", |
||
61 | "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", |
||
62 | "movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu", |
||
63 | "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", |
||
64 | "movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq", |
||
65 | "movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq", |
||
66 | "movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul", |
||
67 | "mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or", |
||
68 | "orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd", |
||
69 | "pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd", |
||
70 | "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand", |
||
71 | "pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb", |
||
72 | "pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd", |
||
73 | "pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd", |
||
74 | "pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw", |
||
75 | "pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb", |
||
76 | "pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb", |
||
77 | "pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb", |
||
78 | "pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq", |
||
79 | "pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq", |
||
80 | "prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw", |
||
81 | "pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq", |
||
82 | "psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", |
||
83 | "psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest", |
||
84 | "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", |
||
85 | "punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad", |
||
86 | "pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase", |
||
87 | "rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp", |
||
88 | "repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd", |
||
89 | "roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar", |
||
90 | "sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe", |
||
91 | "sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb", |
||
92 | "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp", |
||
93 | "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence", |
||
94 | "sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1", |
||
95 | "sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd", |
||
96 | "sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac", |
||
97 | "std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str", |
||
98 | "subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter", |
||
99 | "sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud", |
||
100 | "umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd", |
||
101 | "vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps", |
||
102 | "vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq", |
||
103 | "vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi", |
||
104 | "vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", |
||
105 | "vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps", |
||
106 | "vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps", |
||
107 | "vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4", |
||
108 | "vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4", |
||
109 | "vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps", |
||
110 | "vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss", |
||
111 | "vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd", |
||
112 | "vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps", |
||
113 | "vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps", |
||
114 | "vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss", |
||
115 | "vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd", |
||
116 | "vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss", |
||
117 | "vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd", |
||
118 | "vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps", |
||
119 | "vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd", |
||
120 | "vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss", |
||
121 | "vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd", |
||
122 | "vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps", |
||
123 | "vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps", |
||
124 | "vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8", |
||
125 | "vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8", |
||
126 | "vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16", |
||
127 | "vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq", |
||
128 | "vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm", |
||
129 | "vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud", |
||
130 | "vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd", |
||
131 | "vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d", |
||
132 | "vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd", |
||
133 | "vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q", |
||
134 | "vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd", |
||
135 | "vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq", |
||
136 | "vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m", |
||
137 | "vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w", |
||
138 | "vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb", |
||
139 | "vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd", |
||
140 | "vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq", |
||
141 | "vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd", |
||
142 | "vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd", |
||
143 | "vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq", |
||
144 | "vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq", |
||
145 | "vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps", |
||
146 | "vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess", |
||
147 | "vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", |
||
148 | "vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss", |
||
149 | "vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2", |
||
150 | "vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait", |
||
151 | "wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd", |
||
152 | "xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps", |
||
153 | "xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv", |
||
8957 | Boppan | 154 | ] |
155 | |||
156 | fasm_types = [ |
||
157 | "db", "rb", |
||
158 | "dw", "rw", |
||
159 | "dd", "rd", |
||
160 | "dp", "rp", |
||
161 | "df", "rf", |
||
162 | "dq", "rq", |
||
163 | "dt", "rt", |
||
164 | "du", |
||
165 | ] |
||
166 | |||
8976 | Boppan | 167 | # Add kind flag to identifier in id2kind |
168 | def id_add_kind(identifier, kind): |
||
169 | if identifier not in id2kind: |
||
170 | id2kind[identifier] = '' |
||
171 | id2kind[identifier] += kind |
||
172 | |||
173 | # Remove kind flag of identifier in id2kind |
||
174 | def id_remove_kind(identifier, kind): |
||
175 | if identifier in id2kind: |
||
176 | if kind in id2kind[identifier]: |
||
177 | id2kind[identifier] = id2kind[identifier].replace(kind, '') |
||
178 | |||
179 | # Get kind of an identifier |
||
180 | def id_get_kind(identifier): |
||
181 | if identifier in id2kind: |
||
182 | return id2kind[identifier] |
||
183 | else: |
||
184 | return '' |
||
185 | |||
8957 | Boppan | 186 | class LegacyAsmReader: |
187 | def __init__(self, file): |
||
188 | self.file = file |
||
189 | self.lines = open(file, "r", encoding="utf-8").readlines() |
||
190 | self.line_idx = 0 |
||
191 | self.i = 0 |
||
192 | |||
193 | def curr(self): |
||
194 | try: return self.lines[self.line_idx][self.i] |
||
195 | except: return '' |
||
196 | |||
197 | def step(self): |
||
198 | c = self.curr() |
||
199 | self.i += 1 |
||
200 | # Wrap the line if '\\' followed by whitespaces and/or comment |
||
201 | while self.curr() == '\\': |
||
202 | i_of_backslash = self.i |
||
203 | self.i += 1 |
||
204 | while self.curr().isspace(): |
||
205 | self.i += 1 |
||
206 | if self.curr() == ';' or self.curr() == '': |
||
207 | self.line_idx += 1 |
||
208 | self.i = 0 |
||
209 | else: |
||
210 | # There's something other than a comment after the backslash |
||
211 | # So don't interpret the backslash as a line wrap |
||
212 | self.i = i_of_backslash |
||
213 | break |
||
214 | return c |
||
215 | |||
216 | def nextline(self): |
||
217 | c = self.curr() |
||
218 | while c != '': |
||
219 | c = self.step() |
||
220 | self.line_idx += 1 |
||
221 | self.i = 0 |
||
222 | |||
223 | def no_lines(self): |
||
224 | if self.line_idx >= len(self.lines): |
||
225 | return True |
||
226 | return False |
||
227 | |||
228 | def location(self): |
||
229 | return f"{self.file}:{self.line_idx + 1}" |
||
230 | |||
231 | def skip_spaces(self): |
||
232 | while self.curr().isspace(): |
||
233 | self.step() |
||
234 | |||
235 | class AsmReaderRecognizingStrings(LegacyAsmReader): |
||
236 | def __init__(self, file): |
||
237 | super().__init__(file) |
||
238 | self.in_string = None |
||
239 | self.should_recognize_strings = True |
||
240 | |||
241 | def step(self): |
||
242 | c = super().step() |
||
243 | if self.should_recognize_strings and (c == '"' or c == "'"): |
||
244 | # If just now we was at the double or single quotation mark |
||
245 | # and we aren't in a string yet |
||
246 | # then say "we are in a string openned with this quotation mark now" |
||
247 | if self.in_string == None: |
||
248 | self.in_string = c |
||
249 | # If just now we was at the double or single quotation mark |
||
250 | # and we are in the string entered with the same quotation mark |
||
251 | # then say "we aren't in a string anymore" |
||
252 | elif self.in_string == c: |
||
253 | self.in_string = None |
||
254 | return c |
||
255 | |||
256 | class AsmReaderReadingComments(AsmReaderRecognizingStrings): |
||
257 | def __init__(self, file): |
||
258 | super().__init__(file) |
||
259 | self.status = dict() |
||
260 | self.status_reset() |
||
261 | self.comment = '' |
||
262 | |||
263 | def status_reset(self): |
||
264 | # If the line has non-comment code |
||
8974 | Boppan | 265 | self.status_has_code = False |
8957 | Boppan | 266 | # If the line has a comment at the end |
8974 | Boppan | 267 | self.status_has_comment = False |
8957 | Boppan | 268 | # Let it recognize strings further, we are definitely out of a comment |
269 | self.should_recognize_strings = True |
||
270 | |||
271 | def status_set_has_comment(self): |
||
8974 | Boppan | 272 | self.status_has_comment = True |
8957 | Boppan | 273 | # Don't let it recognize strings cause we are in a comment now |
274 | self.should_recognize_strings = False |
||
275 | |||
276 | def status_set_has_code(self): |
||
8974 | Boppan | 277 | self.status_has_code = True |
8957 | Boppan | 278 | |
279 | def update_status(self): |
||
280 | # If we aren't in a comment and we aren't in a string - say we are now in a comment if ';' met |
||
8974 | Boppan | 281 | if not self.status_has_comment and not self.in_string and self.curr() == ';': |
8957 | Boppan | 282 | self.status_set_has_comment() |
283 | # Else if we are in a comment - collect the comment |
||
8974 | Boppan | 284 | elif self.status_has_comment: |
8957 | Boppan | 285 | self.comment += self.curr() |
286 | # Else if there's some non-whitespace character out of a comment |
||
287 | # then the line has code |
||
8974 | Boppan | 288 | elif not self.status_has_comment and not self.curr().isspace(): |
8957 | Boppan | 289 | self.status_set_has_code() |
290 | |||
291 | def step(self): |
||
292 | # Get to the next character |
||
293 | c = super().step() |
||
294 | # Update status of the line according to the next character |
||
295 | self.update_status() |
||
296 | return c |
||
297 | |||
298 | def nextline(self): |
||
299 | super().nextline() |
||
300 | # If the line we leave was not a comment-only line |
||
301 | # then forget the collected comment |
||
302 | # Otherwise the collected comment should be complemented by comment from next line in step() |
||
8974 | Boppan | 303 | if self.status_has_code: |
8957 | Boppan | 304 | self.comment = '' |
305 | # Reset the line status (now it's the status of the new line) |
||
306 | self.status_reset() |
||
307 | # Set new status for this line according to the first character in the line |
||
308 | self.update_status() |
||
309 | |||
8963 | Boppan | 310 | class AsmReaderFetchingIdentifiers(AsmReaderReadingComments): |
8957 | Boppan | 311 | def __init__(self, file): |
312 | super().__init__(file) |
||
313 | |||
8963 | Boppan | 314 | def fetch_identifier(self): |
315 | self.skip_spaces() |
||
316 | result = '' |
||
317 | while is_id(self.curr()): |
||
318 | result += self.step() |
||
319 | return result |
||
320 | |||
321 | class AsmReader(AsmReaderFetchingIdentifiers): |
||
322 | def __init__(self, file): |
||
323 | super().__init__(file) |
||
324 | |||
8957 | Boppan | 325 | class AsmElement: |
326 | def __init__(self, location, name, comment): |
||
8980 | Boppan | 327 | global warnings |
328 | |||
8990 | Boppan | 329 | # If the element was constructed during this execution then the element is new |
330 | self.new = True |
||
8957 | Boppan | 331 | self.location = location |
332 | self.file = self.location.split(':')[0].replace('\\', '/') |
||
333 | self.line = self.location.split(':')[1] |
||
8855 | Boppan | 334 | self.name = name |
8957 | Boppan | 335 | self.comment = comment |
336 | |||
8980 | Boppan | 337 | if self.comment == '': |
338 | warnings += f'{self.location}: Undocumented element\n' |
||
339 | |||
8957 | Boppan | 340 | def dump(self): |
9030 | Boppan | 341 | print(f"\n{self.location}: {self.name}") |
8957 | Boppan | 342 | print(f"{self.comment}") |
343 | |||
344 | def emit(self, dest, doxycomment = '', declaration = ''): |
||
8977 | Boppan | 345 | # Do not emit anything if the symbol is marked as hidden in its comment |
346 | if '@dont_give_a_doxygen' in self.comment: |
||
347 | return |
||
348 | |||
8957 | Boppan | 349 | global warnings |
350 | # Redefine default declaration |
||
351 | if declaration == '': |
||
352 | declaration = f'#define {self.name}' |
||
353 | # Check doxycomment |
||
354 | if not doxycomment.endswith('\n'): |
||
355 | doxycomment += '\n' |
||
356 | if doxycomment.split('@brief ')[1][0].islower(): |
||
357 | warnings += f"{self.location}: Brief comment starting from lowercase\n" |
||
358 | # Build contents to emit |
||
359 | contents = '' |
||
360 | contents += '/**\n' |
||
361 | contents += doxycomment |
||
362 | contents += (f"@par Source\n" + |
||
363 | f"{self.file}:{self.line}\n") |
||
364 | contents += '*/\n' |
||
365 | contents += declaration |
||
366 | contents += '\n\n' |
||
367 | # Get path to file to emit this |
||
368 | full_path = dest + '/' + self.file |
||
369 | # Remove the file on first access if it was created by previous generation |
||
370 | if full_path not in created_files: |
||
371 | if os.path.isfile(full_path): |
||
372 | os.remove(full_path) |
||
373 | created_files.append(full_path) |
||
374 | # Create directories need for the file |
||
375 | os.makedirs(os.path.dirname(full_path), exist_ok=True) |
||
376 | f = open(full_path, "a") |
||
377 | contents = ''.join([i if ord(i) < 128 else '?' for i in contents]) |
||
378 | f.write(contents) |
||
379 | f.close() |
||
380 | |||
381 | class AsmVariable(AsmElement): |
||
382 | def __init__(self, location, name, comment, type, init): |
||
383 | super().__init__(location, name, comment) |
||
8855 | Boppan | 384 | self.type = type |
385 | self.init = init |
||
386 | |||
8957 | Boppan | 387 | def dump(self): |
388 | super().dump() |
||
9030 | Boppan | 389 | print(f"(Variable)\n---") |
8855 | Boppan | 390 | |
8957 | Boppan | 391 | def emit(self, dest): |
392 | # Build doxycomment specific for the variable |
||
393 | doxycomment = '' |
||
394 | doxycomment += self.comment |
||
395 | if '@brief' not in doxycomment: |
||
396 | doxycomment = '@brief ' + doxycomment |
||
397 | doxycomment += (f"@par Initial value\n" + |
||
398 | f"{self.init}\n") |
||
399 | # Build the declaration |
||
400 | name = self.name.replace(".", "_") |
||
401 | var_type = self.type.replace(".", "_") |
||
402 | declaration = f"{var_type} {name};" |
||
403 | # Emit this |
||
404 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 405 | |
8957 | Boppan | 406 | class AsmFunction(AsmElement): |
8963 | Boppan | 407 | def __init__(self, location, name, comment, calling_convention, args, used_regs): |
8957 | Boppan | 408 | super().__init__(location, name, comment) |
8963 | Boppan | 409 | self.calling_convention = calling_convention |
410 | self.args = args |
||
411 | self.used_regs = used_regs |
||
8855 | Boppan | 412 | |
8957 | Boppan | 413 | def dump(self): |
414 | super().dump() |
||
9030 | Boppan | 415 | print(f"(Function)\n---") |
8855 | Boppan | 416 | |
8957 | Boppan | 417 | def emit(self, dest): |
418 | # Build doxycomment specific for the variable |
||
419 | doxycomment = '' |
||
420 | doxycomment += self.comment |
||
421 | if '@brief' not in doxycomment: |
||
422 | doxycomment = '@brief ' + doxycomment |
||
9028 | Boppan | 423 | # If there was no arguments, maybe that's just a label |
424 | # then parse parameters from its comment |
||
425 | if len(self.args) == 0 and '@param' in self.comment: |
||
426 | i = 0 |
||
427 | while '@param' in self.comment[i:]: |
||
428 | i = self.comment.index('@param', i) |
||
429 | # Skip '@param' |
||
430 | i += len('@param') |
||
431 | # Skip spaces after '@param' |
||
432 | while self.comment[i].isspace(): |
||
433 | i += 1 |
||
434 | # Get the parameter name |
||
435 | name = '' |
||
436 | while is_id(self.comment[i]): |
||
437 | name += self.comment[i] |
||
438 | i += 1 |
||
439 | # Save the parameter |
||
440 | self.args.append((name, 'arg_t')) |
||
8963 | Boppan | 441 | # Build the arg list for declaration |
442 | arg_list = '(' |
||
443 | if len(self.args) > 0: |
||
444 | argc = 0 |
||
445 | for arg in self.args: |
||
446 | if argc != 0: |
||
447 | arg_list += ", " |
||
448 | arg_list += f"{arg[1]} {arg[0]}" |
||
449 | argc += 1 |
||
450 | arg_list += ')' |
||
8957 | Boppan | 451 | # Build the declaration |
452 | name = self.name.replace(".", "_") |
||
8963 | Boppan | 453 | declaration = f"void {name}{arg_list};" |
8957 | Boppan | 454 | # Emit this |
455 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 456 | |
8957 | Boppan | 457 | class AsmLabel(AsmElement): |
458 | def __init__(self, location, name, comment): |
||
459 | super().__init__(location, name, comment) |
||
8855 | Boppan | 460 | |
8957 | Boppan | 461 | def dump(self): |
462 | super().dump() |
||
9030 | Boppan | 463 | print(f"(Label)\n---") |
8855 | Boppan | 464 | |
8957 | Boppan | 465 | def emit(self, dest): |
466 | # Build doxycomment specific for the variable |
||
467 | doxycomment = '' |
||
468 | doxycomment += self.comment |
||
469 | if '@brief' not in doxycomment: |
||
470 | doxycomment = '@brief ' + doxycomment |
||
471 | # Build the declaration |
||
472 | name = self.name.replace(".", "_") |
||
473 | declaration = f"label {name};" |
||
474 | # Emit this |
||
475 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 476 | |
8957 | Boppan | 477 | class AsmMacro(AsmElement): |
478 | def __init__(self, location, name, comment, args): |
||
479 | super().__init__(location, name, comment) |
||
480 | self.args = args |
||
8855 | Boppan | 481 | |
8957 | Boppan | 482 | def dump(self): |
483 | super().dump() |
||
9030 | Boppan | 484 | print(f"(Macro)\n---") |
8855 | Boppan | 485 | |
8957 | Boppan | 486 | def emit(self, dest): |
487 | # Construct arg list without '['s, ']'s and '*'s |
||
488 | args = [arg for arg in self.args if arg not in "[]*"] |
||
489 | # Construct C-like arg list |
||
490 | arg_list = "" |
||
491 | if len(args) > 0: |
||
492 | arg_list += '(' |
||
493 | argc = 0 |
||
494 | for arg in args: |
||
495 | if argc != 0: |
||
496 | arg_list += ", " |
||
497 | arg_list += arg |
||
498 | argc += 1 |
||
499 | arg_list += ')' |
||
500 | # Build doxycomment |
||
501 | doxycomment = '' |
||
502 | doxycomment += self.comment |
||
503 | if '@brief' not in doxycomment: |
||
504 | doxycomment = '@brief ' + doxycomment |
||
505 | # Build declaration |
||
506 | declaration = f"#define {self.name}{arg_list}" |
||
507 | # Emit this |
||
508 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 509 | |
8957 | Boppan | 510 | class AsmStruct(AsmElement): |
511 | def __init__(self, location, name, comment, members): |
||
512 | super().__init__(location, name, comment) |
||
513 | self.members = members |
||
8855 | Boppan | 514 | |
8957 | Boppan | 515 | def dump(self): |
516 | super().dump() |
||
9030 | Boppan | 517 | print(f"(Struct)\n---") |
8855 | Boppan | 518 | |
8957 | Boppan | 519 | def emit(self, dest): |
520 | # Build doxycomment |
||
521 | doxycomment = '' |
||
522 | doxycomment += self.comment |
||
523 | if '@brief' not in doxycomment: |
||
524 | doxycomment = '@brief ' + doxycomment |
||
8958 | Boppan | 525 | doxycomment += '\n' |
8957 | Boppan | 526 | # Build declaration |
8958 | Boppan | 527 | declaration = f"struct {self.name}" + " {\n" |
528 | for member in self.members: |
||
529 | if type(member) == AsmVariable: |
||
530 | declaration += f'\t{member.type} {member.name}; /**< {member.comment} */\n' |
||
531 | declaration += '};' |
||
8957 | Boppan | 532 | # Emit this |
533 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 534 | |
8957 | Boppan | 535 | class AsmUnion(AsmElement): |
536 | def __init__(self, location, name, comment, members): |
||
537 | super().__init__(location, name, comment) |
||
538 | self.members = members |
||
8855 | Boppan | 539 | |
8957 | Boppan | 540 | def dump(self): |
541 | super().dump() |
||
9030 | Boppan | 542 | print(f"(Union)\n---") |
8855 | Boppan | 543 | |
8957 | Boppan | 544 | def emit(self, dest): |
545 | # Build doxycomment |
||
546 | doxycomment = '' |
||
547 | doxycomment += self.comment |
||
548 | if '@brief' not in doxycomment: |
||
549 | doxycomment = '@brief ' + doxycomment |
||
550 | # Build declaration |
||
551 | declaration = f"union {self.name}" + " {};" |
||
552 | # Emit this |
||
553 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 554 | |
8957 | Boppan | 555 | class VariableNameIsMacroName: |
556 | def __init__(self, name): |
||
557 | self.name = name |
||
8855 | Boppan | 558 | |
8957 | Boppan | 559 | def is_id(c): |
560 | return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v" |
||
8855 | Boppan | 561 | |
8957 | Boppan | 562 | def is_starts_as_id(s): |
563 | return not s[0].isdigit() |
||
564 | |||
565 | def parse_after_macro(r): |
||
566 | location = r.location() |
||
567 | |||
568 | # Skip spaces after the "macro" keyword |
||
569 | r.skip_spaces() |
||
8855 | Boppan | 570 | # Read macro name |
571 | name = "" |
||
8957 | Boppan | 572 | while is_id(r.curr()) or r.curr() == '#': |
573 | name += r.step() |
||
8855 | Boppan | 574 | # Skip spaces after macro name |
8957 | Boppan | 575 | r.skip_spaces() |
8855 | Boppan | 576 | # Find all arguments |
577 | args = [] |
||
578 | arg = '' |
||
8957 | Boppan | 579 | while r.curr() and r.curr() != ';' and r.curr() != '{': |
8855 | Boppan | 580 | # Collect identifier |
8957 | Boppan | 581 | if is_id(r.curr()): |
582 | arg += r.step() |
||
8855 | Boppan | 583 | # Save the collected identifier |
8957 | Boppan | 584 | elif r.curr() == ',': |
8855 | Boppan | 585 | args.append(arg) |
586 | arg = '' |
||
8957 | Boppan | 587 | r.step() |
8855 | Boppan | 588 | # Just push the '[' |
8957 | Boppan | 589 | elif r.curr() == '[': |
590 | args.append(r.step()) |
||
8855 | Boppan | 591 | # Just push the identifier and get ']' ready to be pushed on next comma |
8957 | Boppan | 592 | elif r.curr() == ']': |
8855 | Boppan | 593 | args.append(arg) |
8957 | Boppan | 594 | arg = r.step() |
8855 | Boppan | 595 | # Just push the identifier and get '*' ready to be pushed on next comma |
8957 | Boppan | 596 | elif r.curr() == '*': |
8855 | Boppan | 597 | args.append(arg) |
8957 | Boppan | 598 | arg = r.step() |
8855 | Boppan | 599 | # Just skip whitespaces |
8957 | Boppan | 600 | elif r.curr().isspace(): |
601 | r.step() |
||
8855 | Boppan | 602 | # Something unexpected |
603 | else: |
||
8957 | Boppan | 604 | raise Exception(f"Unexpected symbol '{r.curr()}' at index #{r.i} " + |
605 | f"in the macro declaration at {location} " + |
||
606 | f"(line: {r.lines[r.line_idx]})\n''") |
||
607 | # Append the last argument |
||
8855 | Boppan | 608 | if arg != '': |
609 | args.append(arg) |
||
8957 | Boppan | 610 | # Skip t spaces after the argument list |
611 | r.skip_spaces() |
||
612 | # Get a comment if it is: read till the end of the line and get the comment from the reader |
||
613 | while r.curr() != '': |
||
614 | r.step() |
||
615 | comment = r.comment |
||
8855 | Boppan | 616 | # Find end of the macro |
8957 | Boppan | 617 | prev = '' |
618 | while True: |
||
619 | if r.curr() == '}' and prev != '\\': |
||
620 | break |
||
621 | elif r.curr() == '': |
||
622 | prev = '' |
||
623 | r.nextline() |
||
624 | continue |
||
625 | prev = r.step() |
||
8855 | Boppan | 626 | # Build the output |
8957 | Boppan | 627 | return AsmMacro(location, name, comment, args) |
8855 | Boppan | 628 | |
8957 | Boppan | 629 | def parse_variable(r, first_word = None): |
630 | global warnings |
||
631 | location = r.location() |
||
8825 | Boppan | 632 | |
8957 | Boppan | 633 | # Skip spaces before variable name |
634 | r.skip_spaces() |
||
635 | # Get variable name |
||
636 | name = "" |
||
637 | # Read it if it was not supplied |
||
638 | if first_word == None: |
||
639 | while is_id(r.curr()): |
||
640 | name += r.step() |
||
641 | # Or use the supplied one instead |
||
642 | else: |
||
643 | name = first_word |
||
644 | # Check the name |
||
645 | # If it's 0 len, that means threr's something else than an identifier at the beginning |
||
646 | if len(name) == 0: |
||
647 | return None |
||
648 | # If it starts from digit or othervice illegally it's illegal |
||
649 | if not is_starts_as_id(name): |
||
650 | return None |
||
8976 | Boppan | 651 | # Get kind of the identifier from id2kind table |
652 | kind = id_get_kind(name) |
||
8957 | Boppan | 653 | # If it's a keyword, that's not a variable declaration |
8976 | Boppan | 654 | if ID_KIND_KEYWORD in kind: |
8957 | Boppan | 655 | return None |
656 | # If it's a macro name, that's not a variable declaration |
||
8976 | Boppan | 657 | if ID_KIND_MACRO_NAME in kind: |
8957 | Boppan | 658 | return VariableNameIsMacroName(name) |
659 | # If it's a datatype or a structure name that's not a variable declaration: that's just a data |
||
660 | # don't document just a data for now |
||
8976 | Boppan | 661 | if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind: |
8957 | Boppan | 662 | return None |
663 | # Skip spaces before type name |
||
664 | r.skip_spaces() |
||
665 | # Read type name |
||
666 | var_type = "" |
||
667 | while is_id(r.curr()): |
||
668 | var_type += r.step() |
||
669 | # Check the type name |
||
670 | if len(var_type) == 0: |
||
671 | # If there's no type identifier after the name |
||
672 | # maybe the name is something meaningful for the next parser |
||
673 | # return it |
||
674 | return name |
||
675 | # If it starts from digit or othervice illegally it's illegal |
||
676 | if not is_starts_as_id(var_type): |
||
677 | return None |
||
8976 | Boppan | 678 | # Get kind of type identifier |
679 | type_kind = id_get_kind(var_type) |
||
8957 | Boppan | 680 | # If it's a keyword, that's not a variable declaration |
681 | # return the two words of the lexical structure |
||
8976 | Boppan | 682 | if ID_KIND_KEYWORD in type_kind: |
8957 | Boppan | 683 | return (name, var_type) |
684 | # Skip spaces before the value |
||
685 | r.skip_spaces() |
||
686 | # Read the value until the comment or end of the line |
||
687 | value = "" |
||
688 | while r.curr() != ';' and r.curr() != '' and r.curr() != '\n': |
||
689 | value += r.step() |
||
690 | # Skip spaces after the value |
||
691 | r.skip_spaces() |
||
8961 | Boppan | 692 | # Read till end of the line to get a comment from the reader |
693 | while r.curr() != '': |
||
8957 | Boppan | 694 | r.step() |
8961 | Boppan | 695 | # Build the result |
8957 | Boppan | 696 | return AsmVariable(location, name, r.comment, var_type, value) |
8825 | Boppan | 697 | |
8957 | Boppan | 698 | def parse_after_struct(r, as_union = True): |
699 | global warnings |
||
700 | location = r.location() |
||
8825 | Boppan | 701 | |
8957 | Boppan | 702 | # Skip spaces after "struct" keyword |
703 | r.skip_spaces() |
||
704 | # Read struct name |
||
705 | name = "" |
||
706 | while is_id(r.curr()): |
||
707 | name += r.step() |
||
708 | # Read till end of the line and get the comment from the reader |
||
709 | while r.curr() != '': |
||
710 | r.step() |
||
711 | comment = r.comment |
||
712 | # Get to the next line to parse struct members |
||
713 | r.nextline() |
||
714 | # Parse struct members |
||
715 | members = [] |
||
716 | while True: |
||
717 | r.skip_spaces() |
||
718 | var = parse_variable(r) |
||
719 | if type(var) == AsmVariable: |
||
720 | members.append(var) |
||
721 | elif type(var) == str: |
||
722 | if var == 'union': |
||
723 | # Parse the union as a struct |
||
724 | union = parse_after_struct(r, as_union = True) |
||
725 | members.append(union) |
||
726 | # Skip the ends of the union |
||
727 | r.nextline() |
||
728 | elif r.curr() == ':': |
||
729 | warnings += f"{r.location()}: Skept the label in the struct\n" |
||
730 | else: |
||
731 | raise Exception(f"Garbage in struct member at {location} (got '{var}' identifier)") |
||
732 | elif type(var) == VariableNameIsMacroName: |
||
733 | if var.name == 'ends': |
||
734 | break |
||
735 | r.nextline() |
||
736 | # Return the result |
||
737 | if as_union: |
||
738 | return AsmStruct(location, name, comment, members) |
||
739 | else: |
||
740 | return AsmUnion(location, name, comment, members) |
||
8825 | Boppan | 741 | |
8963 | Boppan | 742 | def parse_after_proc(r): |
743 | # Get proc name |
||
744 | name = r.fetch_identifier() |
||
745 | # Next identifier after the proc name |
||
746 | identifier = r.fetch_identifier() |
||
747 | # Check if the id is 'stdcall' or 'c' (calling convention specifier) |
||
748 | # and if so - save the convention and lookup the next identifier |
||
749 | calling_convention = '' |
||
750 | if identifier == 'stdcall' or identifier == 'c': |
||
751 | calling_convention = identifier |
||
752 | # If next is a comma, just skip it |
||
753 | if r.curr() == ',': |
||
754 | r.step() |
||
755 | # Read the next identifier |
||
756 | identifier = r.fetch_identifier() |
||
757 | # Check if the id is 'uses' (used register list specifier) |
||
758 | # and if so save the used register list |
||
759 | used_regs = [] |
||
760 | if identifier == 'uses': |
||
761 | # Read the registers |
||
762 | while True: |
||
763 | reg_name = r.fetch_identifier() |
||
764 | if reg_name != '': |
||
765 | used_regs.append(reg_name) |
||
766 | else: |
||
767 | break |
||
768 | # If next is a comma, just skip it |
||
769 | if r.curr() == ',': |
||
770 | r.step() |
||
771 | # Read the next identifier |
||
772 | identifier = r.fetch_identifier() |
||
773 | # Check if there are argument identifiers |
||
774 | args = [] |
||
775 | while identifier != '': |
||
776 | arg_name = identifier |
||
777 | arg_type = 'arg_t' |
||
778 | # Skip spaces after argument name |
||
779 | r.skip_spaces() |
||
780 | # If there's a ':' after the name - the next identifier is type |
||
781 | if r.curr() == ':': |
||
782 | r.step() |
||
783 | arg_type = r.fetch_identifier() |
||
784 | # If there's a comma - there's one more argument |
||
785 | # else no arguments anymore |
||
786 | if r.curr() == ',': |
||
787 | r.step() |
||
788 | identifier = r.fetch_identifier() |
||
789 | else: |
||
790 | identifier = '' |
||
791 | args.append((arg_name, arg_type)) |
||
792 | # Get to the end of the line and get a comment from the reader |
||
793 | while r.curr() != '': |
||
794 | r.step() |
||
8973 | Boppan | 795 | comment = r.comment |
8963 | Boppan | 796 | # Build the element |
797 | return AsmFunction(r.location(), name, comment, calling_convention, args, used_regs) |
||
798 | |||
8957 | Boppan | 799 | def get_declarations(asm_file_contents, asm_file_name): |
800 | r = AsmReader(asm_file_name) |
||
8825 | Boppan | 801 | |
8957 | Boppan | 802 | while not r.no_lines(): |
803 | # Skip leading spaces |
||
804 | r.skip_spaces() |
||
805 | # Skip the line if it's starting with a comment |
||
806 | if r.curr() == ';': |
||
807 | r.nextline() |
||
8825 | Boppan | 808 | continue |
8957 | Boppan | 809 | # Get first word |
810 | first_word = "" |
||
811 | while is_id(r.curr()): |
||
812 | first_word += r.step() |
||
813 | # Match macro declaration |
||
814 | if first_word == "macro": |
||
815 | macro = parse_after_macro(r) |
||
816 | elements.append(macro) |
||
8976 | Boppan | 817 | id_add_kind(macro.name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 818 | # Match structure declaration |
819 | elif first_word == "struct": |
||
820 | struct = parse_after_struct(r) |
||
821 | elements.append(struct) |
||
8976 | Boppan | 822 | id_add_kind(struct.name, ID_KIND_STRUCT_NAME) |
8957 | Boppan | 823 | # Match function definition |
824 | elif first_word == "proc": |
||
8963 | Boppan | 825 | proc = parse_after_proc(r) |
826 | elements.append(proc) |
||
8957 | Boppan | 827 | elif first_word == 'format': |
828 | # Skip the format directive |
||
829 | pass |
||
830 | elif first_word == 'include': |
||
831 | # Skip the include directive |
||
832 | pass |
||
833 | elif first_word == 'if': |
||
834 | # Skip the conditional directive |
||
835 | pass |
||
836 | elif first_word == 'repeat': |
||
837 | # Skip the repeat directive |
||
838 | pass |
||
839 | elif first_word == 'purge': |
||
840 | while True: |
||
841 | # Skip spaces after the 'purge' keyword or after the comma what separated the previous macro name |
||
842 | r.skip_spaces() |
||
843 | # Get the purged macro name |
||
844 | name = '' |
||
845 | while is_id(r.curr()): |
||
846 | name += r.step() |
||
847 | # Remove the purged macro from the macro names list |
||
848 | try: |
||
8976 | Boppan | 849 | id_remove_kind(name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 850 | except: |
851 | pass |
||
852 | # Skip spaces after the name |
||
853 | r.skip_spaces() |
||
854 | # If it's comma (',') after then that's not the last purged macro, continue purging |
||
855 | if r.curr() == ',': |
||
856 | r.step() |
||
857 | continue |
||
858 | # Here we purged all the macros should be purged |
||
859 | break |
||
860 | # Match label or a variable |
||
861 | elif len(first_word) != 0: |
||
862 | # Skip spaces after the identifier |
||
863 | r.skip_spaces() |
||
864 | # Match a variable |
||
865 | var = parse_variable(r, first_word) |
||
866 | if type(var) == AsmVariable: |
||
867 | elements.append(var) |
||
868 | # If it wasn't a variable but there was an identifier |
||
869 | # Maybe that's a label and the identifier is the label name |
||
870 | # The parse_variable returns the first found or supplied identifier |
||
871 | # In this case it returns the first_word which is supplied |
||
872 | # If it didn't match a type identifier after the word |
||
873 | elif type(var) == str: |
||
874 | name = var |
||
875 | # Match label beginning (':' after name) |
||
876 | if r.curr() == ':': |
||
877 | # Get to the end of the line and get the coment from the reader |
||
878 | while r.curr() != '': |
||
879 | r.step() |
||
880 | comment = r.comment |
||
881 | # Only handle non-local labels |
||
882 | if name[0] != '.' and name != "@@" and name != "$Revision": |
||
8989 | Boppan | 883 | if '@return' in comment or '@param' in comment: |
884 | element = AsmFunction(r.location(), name, comment, '', [], []) |
||
885 | else: |
||
886 | element = AsmLabel(r.location(), name, comment) |
||
887 | elements.append(element) |
||
8957 | Boppan | 888 | elif r.curr() == '=': |
8976 | Boppan | 889 | # Save the identifier as a set constant |
890 | id_add_kind(first_word, ID_KIND_SET_CONSTANT) |
||
8957 | Boppan | 891 | elif type(var) == tuple: |
892 | (word_one, word_two) = var |
||
893 | if word_two == 'equ': |
||
8976 | Boppan | 894 | # Save the identifier as an equated constant |
895 | id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT) |
||
8957 | Boppan | 896 | r.nextline() |
8825 | Boppan | 897 | |
8966 | Boppan | 898 | def it_neds_to_be_parsed(source_file): |
8990 | Boppan | 899 | # If there's no symbols file saved - parse it anyway |
900 | # cause we need to create the symbols file and use it |
||
901 | # if we gonna generate proper doxygen |
||
902 | if not os.path.isfile('asmxygen.elements.pickle'): |
||
903 | return True |
||
8966 | Boppan | 904 | dest = doxygen_src_path + '/' + source_file |
905 | # If there's no the doxygen file it should be compiled to |
||
906 | # then yes, we should compile it to doxygen |
||
907 | if not os.path.isfile(dest): |
||
908 | return True |
||
909 | source_change_time = os.path.getmtime(source_file) |
||
910 | dest_change_file = os.path.getmtime(dest) |
||
911 | # If the source is newer than the doxygen it was compiled to |
||
912 | # then the source should be recompiled (existing doxygen is old) |
||
913 | if source_change_time > dest_change_file: |
||
914 | return True |
||
915 | return False |
||
916 | |||
8834 | Boppan | 917 | def handle_file(handled_files, asm_file_name, subdir = "."): |
8990 | Boppan | 918 | global elements |
8966 | Boppan | 919 | # Canonicalize the file path and get it relative to cwd |
920 | cwd = os.path.abspath(os.path.dirname(sys.argv[0])) |
||
921 | asm_file_name = os.path.realpath(asm_file_name) |
||
922 | asm_file_name = asm_file_name[len(cwd) + 1:] |
||
923 | # If it's lang.inc - skip it |
||
924 | if asm_file_name == 'lang.inc': |
||
8967 | Boppan | 925 | return |
926 | # If the file was handled in this execution before - skip it |
||
927 | if asm_file_name in handled_files: |
||
928 | return |
||
929 | # Say that the file was handled in this execution |
||
930 | handled_files.append(asm_file_name) |
||
8966 | Boppan | 931 | # Check if the file should be parsed (if it was modified or wasn't parsed yet) |
932 | should_get_declarations = True |
||
933 | if not it_neds_to_be_parsed(asm_file_name): |
||
934 | print(f"Skipping {asm_file_name} (already newest)") |
||
935 | should_get_declarations = False |
||
936 | else: |
||
8975 | Boppan | 937 | print(f"Handling {asm_file_name}") |
8990 | Boppan | 938 | # Remove elements parsed from this file before if any |
939 | elements_to_remove = [x for x in elements if x.location.split(':')[0] == asm_file_name] |
||
940 | elements = [x for x in elements if x.location.split(':')[0] != asm_file_name] |
||
941 | # Forget types of identifiers of names of the removed elements |
||
942 | for element in elements_to_remove: |
||
943 | if type(element) == AsmStruct: |
||
944 | id_remove_kind(element.name, ID_KIND_STRUCT_NAME) |
||
945 | elif type(element) == AsmMacro: |
||
946 | id_remove_kind(element.name, ID_KIND_MACRO_NAME) |
||
8966 | Boppan | 947 | # Read the source |
948 | asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read() |
||
949 | # Find includes, fix their paths and handle em recoursively |
||
950 | includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents, flags=re.MULTILINE) |
||
8825 | Boppan | 951 | for include in includes: |
8957 | Boppan | 952 | include = include[1].replace('\\', '/'); |
8825 | Boppan | 953 | full_path = subdir + '/' + include; |
8966 | Boppan | 954 | # If the path isn't valid, maybe that's not relative path |
955 | if not os.path.isfile(full_path): |
||
956 | full_path = include |
||
8967 | Boppan | 957 | new_subdir = full_path.rsplit('/', 1)[0] |
958 | handle_file(handled_files, full_path, new_subdir) |
||
8966 | Boppan | 959 | # Only collect declarations from the file if it wasn't parsed before |
8975 | Boppan | 960 | if should_get_declarations and not clean_generated_stuff: |
8966 | Boppan | 961 | get_declarations(asm_file_contents, asm_file_name) |
8825 | Boppan | 962 | |
9401 | Boppan | 963 | if __name__ == "__main__": |
964 | link_root = "http://websvn.kolibrios.org/filedetails.php?repname=Kolibri+OS&path=/kernel/trunk" |
||
9400 | Boppan | 965 | |
9401 | Boppan | 966 | # Dict where an identifier is assicoated with a string |
967 | # The string contains characters specifying flags |
||
968 | # Available flags: |
||
969 | # k - Keyword |
||
970 | # m - Macro name |
||
971 | # t - fasm data Type name (db, rq, etc.) |
||
972 | # s - Struct type name |
||
973 | # e - equated constant (name equ value) |
||
974 | # = - set constants (name = value) |
||
975 | ID_KIND_KEYWORD = 'k' |
||
976 | ID_KIND_MACRO_NAME = 'm' |
||
977 | ID_KIND_FASM_TYPE = 't' |
||
978 | ID_KIND_STRUCT_NAME = 's' |
||
979 | ID_KIND_EQUATED_CONSTANT = 'e' |
||
980 | ID_KIND_SET_CONSTANT = '=' |
||
981 | id2kind = {} |
||
9399 | Boppan | 982 | |
9401 | Boppan | 983 | for keyword in keywords: |
984 | id_add_kind(keyword, ID_KIND_KEYWORD) |
||
9399 | Boppan | 985 | |
9401 | Boppan | 986 | for fasm_type in fasm_types: |
987 | id_add_kind(fasm_type, ID_KIND_FASM_TYPE) |
||
9399 | Boppan | 988 | |
9401 | Boppan | 989 | # Warning list |
990 | warnings = "" |
||
9399 | Boppan | 991 | |
9401 | Boppan | 992 | # Parameters |
993 | # Path to doxygen folder to make doxygen files in: -o |
||
994 | doxygen_src_path = 'docs/doxygen' |
||
995 | # Remove generated doxygen files: --clean |
||
996 | clean_generated_stuff = False |
||
997 | # Dump all defined symbols: --dump |
||
998 | dump_symbols = False |
||
999 | # Print symbol stats: --stats |
||
1000 | print_stats = False |
||
1001 | # Do not write warnings file: --nowarn |
||
1002 | enable_warnings = True |
||
9400 | Boppan | 1003 | |
9401 | Boppan | 1004 | # Parse arguments |
1005 | parser = argparse.ArgumentParser() |
||
1006 | parser.add_argument("-o", help="Doxygen output folder") |
||
1007 | parser.add_argument("--clean", help="Remove generated files", action="store_true") |
||
1008 | parser.add_argument("--dump", help="Dump all defined symbols", action="store_true") |
||
1009 | parser.add_argument("--stats", help="Print symbol stats", action="store_true") |
||
1010 | parser.add_argument("--nowarn", help="Do not write warnings file", action="store_true") |
||
1011 | parser.add_argument("--noemit", help="Do not emit doxygen files (for testing)", action="store_true") |
||
1012 | args = parser.parse_args() |
||
1013 | doxygen_src_path = args.o if args.o else 'docs/doxygen' |
||
1014 | clean_generated_stuff = args.clean |
||
1015 | dump_symbols = args.dump |
||
1016 | print_stats = args.stats |
||
1017 | enable_warnings = not args.nowarn |
||
1018 | noemit = args.noemit |
||
9399 | Boppan | 1019 | |
9401 | Boppan | 1020 | # Variables, functions, labels, macros, structure types |
1021 | elements = [] |
||
9399 | Boppan | 1022 | |
9401 | Boppan | 1023 | created_files = [] |
9399 | Boppan | 1024 | |
9401 | Boppan | 1025 | kernel_files = [] |
8825 | Boppan | 1026 | |
9401 | Boppan | 1027 | # Load remembered list of symbols |
1028 | if os.path.isfile('asmxygen.elements.pickle'): |
||
1029 | print('Reading existing dump of symbols') |
||
1030 | (elements, id2kind) = pickle.load(open('asmxygen.elements.pickle', 'rb')) |
||
8990 | Boppan | 1031 | |
9401 | Boppan | 1032 | handle_file(kernel_files, "./kernel.asm"); |
8825 | Boppan | 1033 | |
9401 | Boppan | 1034 | if dump_symbols: |
1035 | stdout = sys.stdout |
||
1036 | sys.stdout = open('asmxygen.dump.txt', 'w', encoding = 'utf-8') |
||
1037 | for asm_element in elements: |
||
1038 | asm_element.dump() |
||
1039 | sys.stdout = stdout |
||
8825 | Boppan | 1040 | |
9401 | Boppan | 1041 | if clean_generated_stuff: |
1042 | kernel_files_set = set(kernel_files) |
||
1043 | for file in kernel_files: |
||
1044 | doxygen_file = f"{doxygen_src_path}/{file}" |
||
1045 | if (os.path.isfile(doxygen_file)): |
||
1046 | print(f"Removing {file}... ", end = '') |
||
1047 | os.remove(doxygen_file) |
||
1048 | print("Done.") |
||
1049 | elif not noemit: |
||
1050 | print(f"Writing doumented sources to {doxygen_src_path}") |
||
8834 | Boppan | 1051 | |
9401 | Boppan | 1052 | i = 0 |
1053 | new_elements = [x for x in elements if x.new] |
||
1054 | for element in new_elements: |
||
1055 | print(f"[{i + 1}/{len(new_elements)}] Emitting {element.name} from {element.location}") |
||
1056 | element.emit(doxygen_src_path) |
||
1057 | i += 1 |
||
8855 | Boppan | 1058 | |
9401 | Boppan | 1059 | print(f"Writing dump of symbols to asmxygen.elements.pickle") |
8990 | Boppan | 1060 | |
9401 | Boppan | 1061 | # Now when the new elements already was written, there's no new elements anymore |
1062 | for element in elements: |
||
1063 | element.new = False |
||
1064 | pickle.dump((elements, id2kind), open('asmxygen.elements.pickle', 'wb')) |
||
8990 | Boppan | 1065 | |
9401 | Boppan | 1066 | if print_stats: |
1067 | var_count = 0 |
||
1068 | mac_count = 0 |
||
1069 | lab_count = 0 |
||
1070 | fun_count = 0 |
||
1071 | uni_count = 0 |
||
1072 | str_count = 0 |
||
1073 | for element in elements: |
||
1074 | if type(element) == AsmVariable: |
||
1075 | var_count += 1 |
||
1076 | elif type(element) == AsmMacro: |
||
1077 | mac_count += 1 |
||
1078 | elif type(element) == AsmLabel: |
||
1079 | lab_count += 1 |
||
1080 | elif type(element) == AsmFunction: |
||
1081 | fun_count += 1 |
||
1082 | elif type(element) == AsmUnion: |
||
1083 | uni_count += 1 |
||
1084 | elif type(element) == AsmStruct: |
||
1085 | str_count += 1 |
||
1086 | print(f'Parsed variable count: {var_count}') |
||
1087 | print(f'Parsed macro count: {mac_count}') |
||
1088 | print(f'Parsed label count: {lab_count}') |
||
1089 | print(f'Parsed function count: {fun_count}') |
||
1090 | print(f'Parsed union type count: {uni_count}') |
||
1091 | print(f'Parsed structure type count: {str_count}') |
||
8982 | Boppan | 1092 | |
9401 | Boppan | 1093 | if enable_warnings: |
1094 | open('asmxygen.txt', "w", encoding = "utf-8").write(warnings)>>> |