Rev 9402 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8834 | Boppan | 1 | import re |
8825 | Boppan | 2 | import os |
8837 | Boppan | 3 | import argparse |
8957 | Boppan | 4 | import sys |
8990 | Boppan | 5 | import pickle |
9402 | Boppan | 6 | import hashlib |
7 | import difflib |
||
8825 | Boppan | 8 | |
8957 | Boppan | 9 | # fasm keywords |
10 | keywords = [ |
||
9398 | Boppan | 11 | "align", "equ", "org", "while", "load", "store", "times", "repeat", |
12 | "display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc", |
||
13 | "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox", |
||
14 | "aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and", |
||
15 | "andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd", |
||
16 | "blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn", |
||
17 | "bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap", |
||
18 | "btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc", |
||
19 | "cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova", |
||
20 | "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle", |
||
21 | "cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge", |
||
22 | "cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", |
||
23 | "cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", |
||
24 | "cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b", |
||
25 | "comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", |
||
26 | "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", |
||
27 | "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", |
||
28 | "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", |
||
29 | "dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms", |
||
30 | "extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", |
||
31 | "fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg", |
||
32 | "fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe", |
||
33 | "fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno", |
||
34 | "fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs", |
||
35 | "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv", |
||
36 | "fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", |
||
37 | "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", |
||
38 | "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", |
||
39 | "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", |
||
40 | "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", |
||
41 | "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", |
||
42 | "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", |
||
43 | "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", |
||
44 | "gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps", |
||
45 | "hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd", |
||
46 | "insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret", |
||
47 | "jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge", |
||
48 | "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", |
||
49 | "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb", |
||
50 | "kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw", |
||
51 | "kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq", |
||
52 | "korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw", |
||
53 | "kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd", |
||
54 | "kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq", |
||
55 | "kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw", |
||
56 | "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", |
||
57 | "lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq", |
||
58 | "loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg", |
||
59 | "loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc", |
||
60 | "loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns", |
||
61 | "loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss", |
||
62 | "lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", |
||
63 | "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", |
||
64 | "movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu", |
||
65 | "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", |
||
66 | "movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq", |
||
67 | "movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq", |
||
68 | "movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul", |
||
69 | "mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or", |
||
70 | "orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd", |
||
71 | "pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd", |
||
72 | "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand", |
||
73 | "pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb", |
||
74 | "pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd", |
||
75 | "pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd", |
||
76 | "pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw", |
||
77 | "pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb", |
||
78 | "pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb", |
||
79 | "pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb", |
||
80 | "pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq", |
||
81 | "pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq", |
||
82 | "prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw", |
||
83 | "pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq", |
||
84 | "psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", |
||
85 | "psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest", |
||
86 | "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", |
||
87 | "punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad", |
||
88 | "pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase", |
||
89 | "rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp", |
||
90 | "repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd", |
||
91 | "roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar", |
||
92 | "sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe", |
||
93 | "sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb", |
||
94 | "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp", |
||
95 | "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence", |
||
96 | "sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1", |
||
97 | "sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd", |
||
98 | "sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac", |
||
99 | "std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str", |
||
100 | "subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter", |
||
101 | "sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud", |
||
102 | "umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd", |
||
103 | "vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps", |
||
104 | "vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq", |
||
105 | "vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi", |
||
106 | "vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", |
||
107 | "vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps", |
||
108 | "vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps", |
||
109 | "vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4", |
||
110 | "vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4", |
||
111 | "vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps", |
||
112 | "vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss", |
||
113 | "vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd", |
||
114 | "vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps", |
||
115 | "vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps", |
||
116 | "vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss", |
||
117 | "vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd", |
||
118 | "vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss", |
||
119 | "vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd", |
||
120 | "vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps", |
||
121 | "vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd", |
||
122 | "vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss", |
||
123 | "vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd", |
||
124 | "vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps", |
||
125 | "vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps", |
||
126 | "vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8", |
||
127 | "vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8", |
||
128 | "vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16", |
||
129 | "vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq", |
||
130 | "vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm", |
||
131 | "vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud", |
||
132 | "vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd", |
||
133 | "vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d", |
||
134 | "vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd", |
||
135 | "vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q", |
||
136 | "vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd", |
||
137 | "vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq", |
||
138 | "vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m", |
||
139 | "vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w", |
||
140 | "vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb", |
||
141 | "vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd", |
||
142 | "vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq", |
||
143 | "vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd", |
||
144 | "vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd", |
||
145 | "vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq", |
||
146 | "vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq", |
||
147 | "vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps", |
||
148 | "vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess", |
||
149 | "vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", |
||
150 | "vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss", |
||
151 | "vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2", |
||
152 | "vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait", |
||
153 | "wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd", |
||
154 | "xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps", |
||
155 | "xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv", |
||
8957 | Boppan | 156 | ] |
157 | |||
158 | fasm_types = [ |
||
159 | "db", "rb", |
||
160 | "dw", "rw", |
||
161 | "dd", "rd", |
||
162 | "dp", "rp", |
||
163 | "df", "rf", |
||
164 | "dq", "rq", |
||
165 | "dt", "rt", |
||
166 | "du", |
||
167 | ] |
||
168 | |||
8976 | Boppan | 169 | # Add kind flag to identifier in id2kind |
170 | def id_add_kind(identifier, kind): |
||
171 | if identifier not in id2kind: |
||
172 | id2kind[identifier] = '' |
||
173 | id2kind[identifier] += kind |
||
174 | |||
175 | # Remove kind flag of identifier in id2kind |
||
176 | def id_remove_kind(identifier, kind): |
||
177 | if identifier in id2kind: |
||
178 | if kind in id2kind[identifier]: |
||
179 | id2kind[identifier] = id2kind[identifier].replace(kind, '') |
||
180 | |||
181 | # Get kind of an identifier |
||
182 | def id_get_kind(identifier): |
||
183 | if identifier in id2kind: |
||
184 | return id2kind[identifier] |
||
185 | else: |
||
186 | return '' |
||
187 | |||
8957 | Boppan | 188 | class LegacyAsmReader: |
189 | def __init__(self, file): |
||
190 | self.file = file |
||
191 | self.lines = open(file, "r", encoding="utf-8").readlines() |
||
192 | self.line_idx = 0 |
||
193 | self.i = 0 |
||
194 | |||
9403 | Boppan | 195 | def currline(self): |
196 | return self.lines[self.line_idx] |
||
197 | |||
8957 | Boppan | 198 | def curr(self): |
199 | try: return self.lines[self.line_idx][self.i] |
||
200 | except: return '' |
||
201 | |||
202 | def step(self): |
||
203 | c = self.curr() |
||
204 | self.i += 1 |
||
205 | # Wrap the line if '\\' followed by whitespaces and/or comment |
||
206 | while self.curr() == '\\': |
||
207 | i_of_backslash = self.i |
||
208 | self.i += 1 |
||
209 | while self.curr().isspace(): |
||
210 | self.i += 1 |
||
211 | if self.curr() == ';' or self.curr() == '': |
||
212 | self.line_idx += 1 |
||
213 | self.i = 0 |
||
214 | else: |
||
215 | # There's something other than a comment after the backslash |
||
216 | # So don't interpret the backslash as a line wrap |
||
217 | self.i = i_of_backslash |
||
218 | break |
||
219 | return c |
||
220 | |||
221 | def nextline(self): |
||
222 | c = self.curr() |
||
223 | while c != '': |
||
224 | c = self.step() |
||
225 | self.line_idx += 1 |
||
226 | self.i = 0 |
||
227 | |||
228 | def no_lines(self): |
||
229 | if self.line_idx >= len(self.lines): |
||
230 | return True |
||
231 | return False |
||
232 | |||
233 | def location(self): |
||
234 | return f"{self.file}:{self.line_idx + 1}" |
||
235 | |||
236 | def skip_spaces(self): |
||
237 | while self.curr().isspace(): |
||
238 | self.step() |
||
239 | |||
240 | class AsmReaderRecognizingStrings(LegacyAsmReader): |
||
241 | def __init__(self, file): |
||
242 | super().__init__(file) |
||
243 | self.in_string = None |
||
244 | self.should_recognize_strings = True |
||
245 | |||
246 | def step(self): |
||
247 | c = super().step() |
||
248 | if self.should_recognize_strings and (c == '"' or c == "'"): |
||
249 | # If just now we was at the double or single quotation mark |
||
250 | # and we aren't in a string yet |
||
251 | # then say "we are in a string openned with this quotation mark now" |
||
252 | if self.in_string == None: |
||
253 | self.in_string = c |
||
254 | # If just now we was at the double or single quotation mark |
||
255 | # and we are in the string entered with the same quotation mark |
||
256 | # then say "we aren't in a string anymore" |
||
257 | elif self.in_string == c: |
||
258 | self.in_string = None |
||
259 | return c |
||
260 | |||
261 | class AsmReaderReadingComments(AsmReaderRecognizingStrings): |
||
262 | def __init__(self, file): |
||
263 | super().__init__(file) |
||
264 | self.status = dict() |
||
265 | self.status_reset() |
||
266 | self.comment = '' |
||
267 | |||
268 | def status_reset(self): |
||
269 | # If the line has non-comment code |
||
8974 | Boppan | 270 | self.status_has_code = False |
8957 | Boppan | 271 | # If the line has a comment at the end |
8974 | Boppan | 272 | self.status_has_comment = False |
8957 | Boppan | 273 | # Let it recognize strings further, we are definitely out of a comment |
274 | self.should_recognize_strings = True |
||
275 | |||
276 | def status_set_has_comment(self): |
||
8974 | Boppan | 277 | self.status_has_comment = True |
8957 | Boppan | 278 | # Don't let it recognize strings cause we are in a comment now |
279 | self.should_recognize_strings = False |
||
280 | |||
281 | def status_set_has_code(self): |
||
8974 | Boppan | 282 | self.status_has_code = True |
8957 | Boppan | 283 | |
284 | def update_status(self): |
||
285 | # If we aren't in a comment and we aren't in a string - say we are now in a comment if ';' met |
||
8974 | Boppan | 286 | if not self.status_has_comment and not self.in_string and self.curr() == ';': |
8957 | Boppan | 287 | self.status_set_has_comment() |
288 | # Else if we are in a comment - collect the comment |
||
8974 | Boppan | 289 | elif self.status_has_comment: |
8957 | Boppan | 290 | self.comment += self.curr() |
291 | # Else if there's some non-whitespace character out of a comment |
||
292 | # then the line has code |
||
8974 | Boppan | 293 | elif not self.status_has_comment and not self.curr().isspace(): |
8957 | Boppan | 294 | self.status_set_has_code() |
295 | |||
296 | def step(self): |
||
297 | # Get to the next character |
||
298 | c = super().step() |
||
299 | # Update status of the line according to the next character |
||
300 | self.update_status() |
||
301 | return c |
||
302 | |||
303 | def nextline(self): |
||
9403 | Boppan | 304 | prev_line = self.currline() |
8957 | Boppan | 305 | super().nextline() |
306 | # If the line we leave was not a comment-only line |
||
307 | # then forget the collected comment |
||
308 | # Otherwise the collected comment should be complemented by comment from next line in step() |
||
8974 | Boppan | 309 | if self.status_has_code: |
9403 | Boppan | 310 | # But we should preserve comment for the next line |
311 | # If previous line set align (cause many functions re documented |
||
312 | # right before align set, not before their labels) |
||
313 | if not prev_line.startswith("align "): |
||
314 | self.comment = '' |
||
8957 | Boppan | 315 | # Reset the line status (now it's the status of the new line) |
316 | self.status_reset() |
||
317 | # Set new status for this line according to the first character in the line |
||
318 | self.update_status() |
||
319 | |||
8963 | Boppan | 320 | class AsmReaderFetchingIdentifiers(AsmReaderReadingComments): |
8957 | Boppan | 321 | def __init__(self, file): |
322 | super().__init__(file) |
||
323 | |||
8963 | Boppan | 324 | def fetch_identifier(self): |
325 | self.skip_spaces() |
||
326 | result = '' |
||
327 | while is_id(self.curr()): |
||
328 | result += self.step() |
||
329 | return result |
||
330 | |||
331 | class AsmReader(AsmReaderFetchingIdentifiers): |
||
332 | def __init__(self, file): |
||
333 | super().__init__(file) |
||
334 | |||
9402 | Boppan | 335 | def append_file(full_path, contents): |
336 | if debug_mode: |
||
337 | if full_path not in output_files: |
||
338 | output_files[full_path] = "" |
||
339 | output_files[full_path] += contents |
||
340 | else: |
||
341 | f = open(full_path, "a") |
||
342 | f.write(contents) |
||
343 | f.close() |
||
344 | |||
8957 | Boppan | 345 | class AsmElement: |
346 | def __init__(self, location, name, comment): |
||
8980 | Boppan | 347 | global warnings |
348 | |||
8990 | Boppan | 349 | # If the element was constructed during this execution then the element is new |
350 | self.new = True |
||
8957 | Boppan | 351 | self.location = location |
352 | self.file = self.location.split(':')[0].replace('\\', '/') |
||
353 | self.line = self.location.split(':')[1] |
||
8855 | Boppan | 354 | self.name = name |
8957 | Boppan | 355 | self.comment = comment |
356 | |||
8980 | Boppan | 357 | if self.comment == '': |
358 | warnings += f'{self.location}: Undocumented element\n' |
||
359 | |||
8957 | Boppan | 360 | def dump(self): |
9030 | Boppan | 361 | print(f"\n{self.location}: {self.name}") |
8957 | Boppan | 362 | print(f"{self.comment}") |
363 | |||
364 | def emit(self, dest, doxycomment = '', declaration = ''): |
||
8977 | Boppan | 365 | # Do not emit anything if the symbol is marked as hidden in its comment |
366 | if '@dont_give_a_doxygen' in self.comment: |
||
367 | return |
||
368 | |||
8957 | Boppan | 369 | global warnings |
370 | # Redefine default declaration |
||
371 | if declaration == '': |
||
372 | declaration = f'#define {self.name}' |
||
373 | # Check doxycomment |
||
374 | if not doxycomment.endswith('\n'): |
||
375 | doxycomment += '\n' |
||
376 | if doxycomment.split('@brief ')[1][0].islower(): |
||
377 | warnings += f"{self.location}: Brief comment starting from lowercase\n" |
||
378 | # Build contents to emit |
||
379 | contents = '' |
||
380 | contents += '/**\n' |
||
381 | contents += doxycomment |
||
382 | contents += (f"@par Source\n" + |
||
383 | f"{self.file}:{self.line}\n") |
||
384 | contents += '*/\n' |
||
385 | contents += declaration |
||
386 | contents += '\n\n' |
||
387 | # Get path to file to emit this |
||
388 | full_path = dest + '/' + self.file |
||
389 | # Remove the file on first access if it was created by previous generation |
||
390 | if full_path not in created_files: |
||
391 | if os.path.isfile(full_path): |
||
392 | os.remove(full_path) |
||
393 | created_files.append(full_path) |
||
394 | # Create directories need for the file |
||
395 | os.makedirs(os.path.dirname(full_path), exist_ok=True) |
||
396 | contents = ''.join([i if ord(i) < 128 else '?' for i in contents]) |
||
397 | |||
9402 | Boppan | 398 | append_file(full_path, contents) |
399 | |||
8957 | Boppan | 400 | class AsmVariable(AsmElement): |
401 | def __init__(self, location, name, comment, type, init): |
||
402 | super().__init__(location, name, comment) |
||
8855 | Boppan | 403 | self.type = type |
404 | self.init = init |
||
405 | |||
8957 | Boppan | 406 | def dump(self): |
407 | super().dump() |
||
9030 | Boppan | 408 | print(f"(Variable)\n---") |
8855 | Boppan | 409 | |
8957 | Boppan | 410 | def emit(self, dest): |
411 | # Build doxycomment specific for the variable |
||
412 | doxycomment = '' |
||
413 | doxycomment += self.comment |
||
414 | if '@brief' not in doxycomment: |
||
415 | doxycomment = '@brief ' + doxycomment |
||
416 | doxycomment += (f"@par Initial value\n" + |
||
417 | f"{self.init}\n") |
||
418 | # Build the declaration |
||
419 | name = self.name.replace(".", "_") |
||
420 | var_type = self.type.replace(".", "_") |
||
421 | declaration = f"{var_type} {name};" |
||
422 | # Emit this |
||
423 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 424 | |
8957 | Boppan | 425 | class AsmFunction(AsmElement): |
8963 | Boppan | 426 | def __init__(self, location, name, comment, calling_convention, args, used_regs): |
8957 | Boppan | 427 | super().__init__(location, name, comment) |
8963 | Boppan | 428 | self.calling_convention = calling_convention |
429 | self.args = args |
||
430 | self.used_regs = used_regs |
||
8855 | Boppan | 431 | |
8957 | Boppan | 432 | def dump(self): |
433 | super().dump() |
||
9030 | Boppan | 434 | print(f"(Function)\n---") |
8855 | Boppan | 435 | |
8957 | Boppan | 436 | def emit(self, dest): |
437 | # Build doxycomment specific for the variable |
||
438 | doxycomment = '' |
||
439 | doxycomment += self.comment |
||
440 | if '@brief' not in doxycomment: |
||
441 | doxycomment = '@brief ' + doxycomment |
||
9028 | Boppan | 442 | # If there was no arguments, maybe that's just a label |
443 | # then parse parameters from its comment |
||
444 | if len(self.args) == 0 and '@param' in self.comment: |
||
445 | i = 0 |
||
446 | while '@param' in self.comment[i:]: |
||
447 | i = self.comment.index('@param', i) |
||
448 | # Skip '@param' |
||
449 | i += len('@param') |
||
450 | # Skip spaces after '@param' |
||
451 | while self.comment[i].isspace(): |
||
452 | i += 1 |
||
453 | # Get the parameter name |
||
454 | name = '' |
||
455 | while is_id(self.comment[i]): |
||
456 | name += self.comment[i] |
||
457 | i += 1 |
||
458 | # Save the parameter |
||
459 | self.args.append((name, 'arg_t')) |
||
8963 | Boppan | 460 | # Build the arg list for declaration |
461 | arg_list = '(' |
||
462 | if len(self.args) > 0: |
||
463 | argc = 0 |
||
464 | for arg in self.args: |
||
465 | if argc != 0: |
||
466 | arg_list += ", " |
||
467 | arg_list += f"{arg[1]} {arg[0]}" |
||
468 | argc += 1 |
||
469 | arg_list += ')' |
||
8957 | Boppan | 470 | # Build the declaration |
471 | name = self.name.replace(".", "_") |
||
8963 | Boppan | 472 | declaration = f"void {name}{arg_list};" |
8957 | Boppan | 473 | # Emit this |
474 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 475 | |
8957 | Boppan | 476 | class AsmLabel(AsmElement): |
477 | def __init__(self, location, name, comment): |
||
478 | super().__init__(location, name, comment) |
||
8855 | Boppan | 479 | |
8957 | Boppan | 480 | def dump(self): |
481 | super().dump() |
||
9030 | Boppan | 482 | print(f"(Label)\n---") |
8855 | Boppan | 483 | |
8957 | Boppan | 484 | def emit(self, dest): |
485 | # Build doxycomment specific for the variable |
||
486 | doxycomment = '' |
||
487 | doxycomment += self.comment |
||
488 | if '@brief' not in doxycomment: |
||
489 | doxycomment = '@brief ' + doxycomment |
||
490 | # Build the declaration |
||
491 | name = self.name.replace(".", "_") |
||
492 | declaration = f"label {name};" |
||
493 | # Emit this |
||
494 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 495 | |
8957 | Boppan | 496 | class AsmMacro(AsmElement): |
497 | def __init__(self, location, name, comment, args): |
||
498 | super().__init__(location, name, comment) |
||
499 | self.args = args |
||
8855 | Boppan | 500 | |
8957 | Boppan | 501 | def dump(self): |
502 | super().dump() |
||
9030 | Boppan | 503 | print(f"(Macro)\n---") |
8855 | Boppan | 504 | |
8957 | Boppan | 505 | def emit(self, dest): |
506 | # Construct arg list without '['s, ']'s and '*'s |
||
507 | args = [arg for arg in self.args if arg not in "[]*"] |
||
508 | # Construct C-like arg list |
||
509 | arg_list = "" |
||
510 | if len(args) > 0: |
||
511 | arg_list += '(' |
||
512 | argc = 0 |
||
513 | for arg in args: |
||
514 | if argc != 0: |
||
515 | arg_list += ", " |
||
516 | arg_list += arg |
||
517 | argc += 1 |
||
518 | arg_list += ')' |
||
519 | # Build doxycomment |
||
520 | doxycomment = '' |
||
521 | doxycomment += self.comment |
||
522 | if '@brief' not in doxycomment: |
||
523 | doxycomment = '@brief ' + doxycomment |
||
524 | # Build declaration |
||
525 | declaration = f"#define {self.name}{arg_list}" |
||
526 | # Emit this |
||
527 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 528 | |
8957 | Boppan | 529 | class AsmStruct(AsmElement): |
530 | def __init__(self, location, name, comment, members): |
||
531 | super().__init__(location, name, comment) |
||
532 | self.members = members |
||
8855 | Boppan | 533 | |
8957 | Boppan | 534 | def dump(self): |
535 | super().dump() |
||
9030 | Boppan | 536 | print(f"(Struct)\n---") |
8855 | Boppan | 537 | |
8957 | Boppan | 538 | def emit(self, dest): |
539 | # Build doxycomment |
||
540 | doxycomment = '' |
||
541 | doxycomment += self.comment |
||
542 | if '@brief' not in doxycomment: |
||
543 | doxycomment = '@brief ' + doxycomment |
||
8958 | Boppan | 544 | doxycomment += '\n' |
8957 | Boppan | 545 | # Build declaration |
8958 | Boppan | 546 | declaration = f"struct {self.name}" + " {\n" |
547 | for member in self.members: |
||
548 | if type(member) == AsmVariable: |
||
549 | declaration += f'\t{member.type} {member.name}; /**< {member.comment} */\n' |
||
550 | declaration += '};' |
||
8957 | Boppan | 551 | # Emit this |
552 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 553 | |
8957 | Boppan | 554 | class AsmUnion(AsmElement): |
555 | def __init__(self, location, name, comment, members): |
||
556 | super().__init__(location, name, comment) |
||
557 | self.members = members |
||
8855 | Boppan | 558 | |
8957 | Boppan | 559 | def dump(self): |
560 | super().dump() |
||
9030 | Boppan | 561 | print(f"(Union)\n---") |
8855 | Boppan | 562 | |
8957 | Boppan | 563 | def emit(self, dest): |
564 | # Build doxycomment |
||
565 | doxycomment = '' |
||
566 | doxycomment += self.comment |
||
567 | if '@brief' not in doxycomment: |
||
568 | doxycomment = '@brief ' + doxycomment |
||
569 | # Build declaration |
||
570 | declaration = f"union {self.name}" + " {};" |
||
571 | # Emit this |
||
572 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 573 | |
8957 | Boppan | 574 | class VariableNameIsMacroName: |
575 | def __init__(self, name): |
||
576 | self.name = name |
||
8855 | Boppan | 577 | |
8957 | Boppan | 578 | def is_id(c): |
579 | return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v" |
||
8855 | Boppan | 580 | |
8957 | Boppan | 581 | def is_starts_as_id(s): |
582 | return not s[0].isdigit() |
||
583 | |||
584 | def parse_after_macro(r): |
||
585 | location = r.location() |
||
586 | |||
587 | # Skip spaces after the "macro" keyword |
||
588 | r.skip_spaces() |
||
8855 | Boppan | 589 | # Read macro name |
590 | name = "" |
||
8957 | Boppan | 591 | while is_id(r.curr()) or r.curr() == '#': |
592 | name += r.step() |
||
8855 | Boppan | 593 | # Skip spaces after macro name |
8957 | Boppan | 594 | r.skip_spaces() |
8855 | Boppan | 595 | # Find all arguments |
596 | args = [] |
||
597 | arg = '' |
||
8957 | Boppan | 598 | while r.curr() and r.curr() != ';' and r.curr() != '{': |
8855 | Boppan | 599 | # Collect identifier |
8957 | Boppan | 600 | if is_id(r.curr()): |
601 | arg += r.step() |
||
8855 | Boppan | 602 | # Save the collected identifier |
8957 | Boppan | 603 | elif r.curr() == ',': |
8855 | Boppan | 604 | args.append(arg) |
605 | arg = '' |
||
8957 | Boppan | 606 | r.step() |
8855 | Boppan | 607 | # Just push the '[' |
8957 | Boppan | 608 | elif r.curr() == '[': |
609 | args.append(r.step()) |
||
8855 | Boppan | 610 | # Just push the identifier and get ']' ready to be pushed on next comma |
8957 | Boppan | 611 | elif r.curr() == ']': |
8855 | Boppan | 612 | args.append(arg) |
8957 | Boppan | 613 | arg = r.step() |
8855 | Boppan | 614 | # Just push the identifier and get '*' ready to be pushed on next comma |
8957 | Boppan | 615 | elif r.curr() == '*': |
8855 | Boppan | 616 | args.append(arg) |
8957 | Boppan | 617 | arg = r.step() |
8855 | Boppan | 618 | # Just skip whitespaces |
8957 | Boppan | 619 | elif r.curr().isspace(): |
620 | r.step() |
||
8855 | Boppan | 621 | # Something unexpected |
622 | else: |
||
8957 | Boppan | 623 | raise Exception(f"Unexpected symbol '{r.curr()}' at index #{r.i} " + |
624 | f"in the macro declaration at {location} " + |
||
625 | f"(line: {r.lines[r.line_idx]})\n''") |
||
626 | # Append the last argument |
||
8855 | Boppan | 627 | if arg != '': |
628 | args.append(arg) |
||
8957 | Boppan | 629 | # Skip t spaces after the argument list |
630 | r.skip_spaces() |
||
631 | # Get a comment if it is: read till the end of the line and get the comment from the reader |
||
632 | while r.curr() != '': |
||
633 | r.step() |
||
634 | comment = r.comment |
||
8855 | Boppan | 635 | # Find end of the macro |
8957 | Boppan | 636 | prev = '' |
637 | while True: |
||
638 | if r.curr() == '}' and prev != '\\': |
||
639 | break |
||
640 | elif r.curr() == '': |
||
641 | prev = '' |
||
642 | r.nextline() |
||
643 | continue |
||
644 | prev = r.step() |
||
8855 | Boppan | 645 | # Build the output |
8957 | Boppan | 646 | return AsmMacro(location, name, comment, args) |
8855 | Boppan | 647 | |
8957 | Boppan | 648 | def parse_variable(r, first_word = None): |
649 | global warnings |
||
650 | location = r.location() |
||
8825 | Boppan | 651 | |
8957 | Boppan | 652 | # Skip spaces before variable name |
653 | r.skip_spaces() |
||
654 | # Get variable name |
||
655 | name = "" |
||
656 | # Read it if it was not supplied |
||
657 | if first_word == None: |
||
658 | while is_id(r.curr()): |
||
659 | name += r.step() |
||
660 | # Or use the supplied one instead |
||
661 | else: |
||
662 | name = first_word |
||
663 | # Check the name |
||
664 | # If it's 0 len, that means threr's something else than an identifier at the beginning |
||
665 | if len(name) == 0: |
||
666 | return None |
||
667 | # If it starts from digit or othervice illegally it's illegal |
||
668 | if not is_starts_as_id(name): |
||
669 | return None |
||
8976 | Boppan | 670 | # Get kind of the identifier from id2kind table |
671 | kind = id_get_kind(name) |
||
8957 | Boppan | 672 | # If it's a keyword, that's not a variable declaration |
8976 | Boppan | 673 | if ID_KIND_KEYWORD in kind: |
8957 | Boppan | 674 | return None |
675 | # If it's a macro name, that's not a variable declaration |
||
8976 | Boppan | 676 | if ID_KIND_MACRO_NAME in kind: |
8957 | Boppan | 677 | return VariableNameIsMacroName(name) |
678 | # If it's a datatype or a structure name that's not a variable declaration: that's just a data |
||
679 | # don't document just a data for now |
||
8976 | Boppan | 680 | if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind: |
8957 | Boppan | 681 | return None |
682 | # Skip spaces before type name |
||
683 | r.skip_spaces() |
||
684 | # Read type name |
||
685 | var_type = "" |
||
686 | while is_id(r.curr()): |
||
687 | var_type += r.step() |
||
688 | # Check the type name |
||
689 | if len(var_type) == 0: |
||
690 | # If there's no type identifier after the name |
||
691 | # maybe the name is something meaningful for the next parser |
||
692 | # return it |
||
693 | return name |
||
694 | # If it starts from digit or othervice illegally it's illegal |
||
695 | if not is_starts_as_id(var_type): |
||
696 | return None |
||
8976 | Boppan | 697 | # Get kind of type identifier |
698 | type_kind = id_get_kind(var_type) |
||
8957 | Boppan | 699 | # If it's a keyword, that's not a variable declaration |
700 | # return the two words of the lexical structure |
||
8976 | Boppan | 701 | if ID_KIND_KEYWORD in type_kind: |
8957 | Boppan | 702 | return (name, var_type) |
703 | # Skip spaces before the value |
||
704 | r.skip_spaces() |
||
705 | # Read the value until the comment or end of the line |
||
706 | value = "" |
||
707 | while r.curr() != ';' and r.curr() != '' and r.curr() != '\n': |
||
708 | value += r.step() |
||
709 | # Skip spaces after the value |
||
710 | r.skip_spaces() |
||
8961 | Boppan | 711 | # Read till end of the line to get a comment from the reader |
712 | while r.curr() != '': |
||
8957 | Boppan | 713 | r.step() |
8961 | Boppan | 714 | # Build the result |
8957 | Boppan | 715 | return AsmVariable(location, name, r.comment, var_type, value) |
8825 | Boppan | 716 | |
8957 | Boppan | 717 | def parse_after_struct(r, as_union = True): |
718 | global warnings |
||
719 | location = r.location() |
||
8825 | Boppan | 720 | |
8957 | Boppan | 721 | # Skip spaces after "struct" keyword |
722 | r.skip_spaces() |
||
723 | # Read struct name |
||
724 | name = "" |
||
725 | while is_id(r.curr()): |
||
726 | name += r.step() |
||
727 | # Read till end of the line and get the comment from the reader |
||
728 | while r.curr() != '': |
||
729 | r.step() |
||
730 | comment = r.comment |
||
731 | # Get to the next line to parse struct members |
||
732 | r.nextline() |
||
733 | # Parse struct members |
||
734 | members = [] |
||
735 | while True: |
||
736 | r.skip_spaces() |
||
737 | var = parse_variable(r) |
||
738 | if type(var) == AsmVariable: |
||
739 | members.append(var) |
||
740 | elif type(var) == str: |
||
741 | if var == 'union': |
||
742 | # Parse the union as a struct |
||
743 | union = parse_after_struct(r, as_union = True) |
||
744 | members.append(union) |
||
745 | # Skip the ends of the union |
||
746 | r.nextline() |
||
747 | elif r.curr() == ':': |
||
748 | warnings += f"{r.location()}: Skept the label in the struct\n" |
||
749 | else: |
||
750 | raise Exception(f"Garbage in struct member at {location} (got '{var}' identifier)") |
||
751 | elif type(var) == VariableNameIsMacroName: |
||
752 | if var.name == 'ends': |
||
753 | break |
||
754 | r.nextline() |
||
755 | # Return the result |
||
756 | if as_union: |
||
757 | return AsmStruct(location, name, comment, members) |
||
758 | else: |
||
759 | return AsmUnion(location, name, comment, members) |
||
8825 | Boppan | 760 | |
8963 | Boppan | 761 | def parse_after_proc(r): |
762 | # Get proc name |
||
763 | name = r.fetch_identifier() |
||
764 | # Next identifier after the proc name |
||
765 | identifier = r.fetch_identifier() |
||
766 | # Check if the id is 'stdcall' or 'c' (calling convention specifier) |
||
767 | # and if so - save the convention and lookup the next identifier |
||
768 | calling_convention = '' |
||
769 | if identifier == 'stdcall' or identifier == 'c': |
||
770 | calling_convention = identifier |
||
771 | # If next is a comma, just skip it |
||
772 | if r.curr() == ',': |
||
773 | r.step() |
||
774 | # Read the next identifier |
||
775 | identifier = r.fetch_identifier() |
||
776 | # Check if the id is 'uses' (used register list specifier) |
||
777 | # and if so save the used register list |
||
778 | used_regs = [] |
||
779 | if identifier == 'uses': |
||
780 | # Read the registers |
||
781 | while True: |
||
782 | reg_name = r.fetch_identifier() |
||
783 | if reg_name != '': |
||
784 | used_regs.append(reg_name) |
||
785 | else: |
||
786 | break |
||
787 | # If next is a comma, just skip it |
||
788 | if r.curr() == ',': |
||
789 | r.step() |
||
790 | # Read the next identifier |
||
791 | identifier = r.fetch_identifier() |
||
792 | # Check if there are argument identifiers |
||
793 | args = [] |
||
794 | while identifier != '': |
||
795 | arg_name = identifier |
||
796 | arg_type = 'arg_t' |
||
797 | # Skip spaces after argument name |
||
798 | r.skip_spaces() |
||
799 | # If there's a ':' after the name - the next identifier is type |
||
800 | if r.curr() == ':': |
||
801 | r.step() |
||
802 | arg_type = r.fetch_identifier() |
||
803 | # If there's a comma - there's one more argument |
||
804 | # else no arguments anymore |
||
805 | if r.curr() == ',': |
||
806 | r.step() |
||
807 | identifier = r.fetch_identifier() |
||
808 | else: |
||
809 | identifier = '' |
||
810 | args.append((arg_name, arg_type)) |
||
811 | # Get to the end of the line and get a comment from the reader |
||
812 | while r.curr() != '': |
||
813 | r.step() |
||
8973 | Boppan | 814 | comment = r.comment |
8963 | Boppan | 815 | # Build the element |
816 | return AsmFunction(r.location(), name, comment, calling_convention, args, used_regs) |
||
817 | |||
8957 | Boppan | 818 | def get_declarations(asm_file_contents, asm_file_name): |
819 | r = AsmReader(asm_file_name) |
||
8825 | Boppan | 820 | |
8957 | Boppan | 821 | while not r.no_lines(): |
822 | # Skip leading spaces |
||
823 | r.skip_spaces() |
||
824 | # Skip the line if it's starting with a comment |
||
825 | if r.curr() == ';': |
||
826 | r.nextline() |
||
8825 | Boppan | 827 | continue |
8957 | Boppan | 828 | # Get first word |
829 | first_word = "" |
||
830 | while is_id(r.curr()): |
||
831 | first_word += r.step() |
||
832 | # Match macro declaration |
||
833 | if first_word == "macro": |
||
834 | macro = parse_after_macro(r) |
||
835 | elements.append(macro) |
||
8976 | Boppan | 836 | id_add_kind(macro.name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 837 | # Match structure declaration |
838 | elif first_word == "struct": |
||
839 | struct = parse_after_struct(r) |
||
840 | elements.append(struct) |
||
8976 | Boppan | 841 | id_add_kind(struct.name, ID_KIND_STRUCT_NAME) |
8957 | Boppan | 842 | # Match function definition |
843 | elif first_word == "proc": |
||
8963 | Boppan | 844 | proc = parse_after_proc(r) |
845 | elements.append(proc) |
||
8957 | Boppan | 846 | elif first_word == 'format': |
847 | # Skip the format directive |
||
848 | pass |
||
849 | elif first_word == 'include': |
||
850 | # Skip the include directive |
||
851 | pass |
||
852 | elif first_word == 'if': |
||
853 | # Skip the conditional directive |
||
854 | pass |
||
855 | elif first_word == 'repeat': |
||
856 | # Skip the repeat directive |
||
857 | pass |
||
858 | elif first_word == 'purge': |
||
859 | while True: |
||
860 | # Skip spaces after the 'purge' keyword or after the comma what separated the previous macro name |
||
861 | r.skip_spaces() |
||
862 | # Get the purged macro name |
||
863 | name = '' |
||
864 | while is_id(r.curr()): |
||
865 | name += r.step() |
||
866 | # Remove the purged macro from the macro names list |
||
867 | try: |
||
8976 | Boppan | 868 | id_remove_kind(name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 869 | except: |
870 | pass |
||
871 | # Skip spaces after the name |
||
872 | r.skip_spaces() |
||
873 | # If it's comma (',') after then that's not the last purged macro, continue purging |
||
874 | if r.curr() == ',': |
||
875 | r.step() |
||
876 | continue |
||
877 | # Here we purged all the macros should be purged |
||
878 | break |
||
879 | # Match label or a variable |
||
880 | elif len(first_word) != 0: |
||
881 | # Skip spaces after the identifier |
||
882 | r.skip_spaces() |
||
883 | # Match a variable |
||
884 | var = parse_variable(r, first_word) |
||
885 | if type(var) == AsmVariable: |
||
886 | elements.append(var) |
||
887 | # If it wasn't a variable but there was an identifier |
||
888 | # Maybe that's a label and the identifier is the label name |
||
889 | # The parse_variable returns the first found or supplied identifier |
||
890 | # In this case it returns the first_word which is supplied |
||
891 | # If it didn't match a type identifier after the word |
||
892 | elif type(var) == str: |
||
893 | name = var |
||
894 | # Match label beginning (':' after name) |
||
895 | if r.curr() == ':': |
||
896 | # Get to the end of the line and get the coment from the reader |
||
897 | while r.curr() != '': |
||
898 | r.step() |
||
899 | comment = r.comment |
||
900 | # Only handle non-local labels |
||
901 | if name[0] != '.' and name != "@@" and name != "$Revision": |
||
8989 | Boppan | 902 | if '@return' in comment or '@param' in comment: |
903 | element = AsmFunction(r.location(), name, comment, '', [], []) |
||
904 | else: |
||
905 | element = AsmLabel(r.location(), name, comment) |
||
906 | elements.append(element) |
||
8957 | Boppan | 907 | elif r.curr() == '=': |
8976 | Boppan | 908 | # Save the identifier as a set constant |
909 | id_add_kind(first_word, ID_KIND_SET_CONSTANT) |
||
8957 | Boppan | 910 | elif type(var) == tuple: |
911 | (word_one, word_two) = var |
||
912 | if word_two == 'equ': |
||
8976 | Boppan | 913 | # Save the identifier as an equated constant |
914 | id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT) |
||
8957 | Boppan | 915 | r.nextline() |
8825 | Boppan | 916 | |
8966 | Boppan | 917 | def it_neds_to_be_parsed(source_file): |
8990 | Boppan | 918 | # If there's no symbols file saved - parse it anyway |
919 | # cause we need to create the symbols file and use it |
||
920 | # if we gonna generate proper doxygen |
||
921 | if not os.path.isfile('asmxygen.elements.pickle'): |
||
922 | return True |
||
8966 | Boppan | 923 | dest = doxygen_src_path + '/' + source_file |
924 | # If there's no the doxygen file it should be compiled to |
||
925 | # then yes, we should compile it to doxygen |
||
926 | if not os.path.isfile(dest): |
||
927 | return True |
||
928 | source_change_time = os.path.getmtime(source_file) |
||
929 | dest_change_file = os.path.getmtime(dest) |
||
930 | # If the source is newer than the doxygen it was compiled to |
||
931 | # then the source should be recompiled (existing doxygen is old) |
||
932 | if source_change_time > dest_change_file: |
||
933 | return True |
||
934 | return False |
||
935 | |||
8834 | Boppan | 936 | def handle_file(handled_files, asm_file_name, subdir = "."): |
8990 | Boppan | 937 | global elements |
8966 | Boppan | 938 | # Canonicalize the file path and get it relative to cwd |
939 | cwd = os.path.abspath(os.path.dirname(sys.argv[0])) |
||
940 | asm_file_name = os.path.realpath(asm_file_name) |
||
941 | asm_file_name = asm_file_name[len(cwd) + 1:] |
||
942 | # If it's lang.inc - skip it |
||
943 | if asm_file_name == 'lang.inc': |
||
8967 | Boppan | 944 | return |
945 | # If the file was handled in this execution before - skip it |
||
946 | if asm_file_name in handled_files: |
||
947 | return |
||
948 | # Say that the file was handled in this execution |
||
949 | handled_files.append(asm_file_name) |
||
8966 | Boppan | 950 | # Check if the file should be parsed (if it was modified or wasn't parsed yet) |
951 | should_get_declarations = True |
||
952 | if not it_neds_to_be_parsed(asm_file_name): |
||
953 | print(f"Skipping {asm_file_name} (already newest)") |
||
954 | should_get_declarations = False |
||
955 | else: |
||
8975 | Boppan | 956 | print(f"Handling {asm_file_name}") |
8990 | Boppan | 957 | # Remove elements parsed from this file before if any |
958 | elements_to_remove = [x for x in elements if x.location.split(':')[0] == asm_file_name] |
||
959 | elements = [x for x in elements if x.location.split(':')[0] != asm_file_name] |
||
960 | # Forget types of identifiers of names of the removed elements |
||
961 | for element in elements_to_remove: |
||
962 | if type(element) == AsmStruct: |
||
963 | id_remove_kind(element.name, ID_KIND_STRUCT_NAME) |
||
964 | elif type(element) == AsmMacro: |
||
965 | id_remove_kind(element.name, ID_KIND_MACRO_NAME) |
||
8966 | Boppan | 966 | # Read the source |
967 | asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read() |
||
968 | # Find includes, fix their paths and handle em recoursively |
||
969 | includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents, flags=re.MULTILINE) |
||
8825 | Boppan | 970 | for include in includes: |
8957 | Boppan | 971 | include = include[1].replace('\\', '/'); |
8825 | Boppan | 972 | full_path = subdir + '/' + include; |
8966 | Boppan | 973 | # If the path isn't valid, maybe that's not relative path |
974 | if not os.path.isfile(full_path): |
||
975 | full_path = include |
||
8967 | Boppan | 976 | new_subdir = full_path.rsplit('/', 1)[0] |
977 | handle_file(handled_files, full_path, new_subdir) |
||
8966 | Boppan | 978 | # Only collect declarations from the file if it wasn't parsed before |
8975 | Boppan | 979 | if should_get_declarations and not clean_generated_stuff: |
8966 | Boppan | 980 | get_declarations(asm_file_contents, asm_file_name) |
8825 | Boppan | 981 | |
9401 | Boppan | 982 | if __name__ == "__main__": |
983 | link_root = "http://websvn.kolibrios.org/filedetails.php?repname=Kolibri+OS&path=/kernel/trunk" |
||
9400 | Boppan | 984 | |
9401 | Boppan | 985 | # Dict where an identifier is assicoated with a string |
986 | # The string contains characters specifying flags |
||
987 | # Available flags: |
||
988 | # k - Keyword |
||
989 | # m - Macro name |
||
990 | # t - fasm data Type name (db, rq, etc.) |
||
991 | # s - Struct type name |
||
992 | # e - equated constant (name equ value) |
||
993 | # = - set constants (name = value) |
||
994 | ID_KIND_KEYWORD = 'k' |
||
995 | ID_KIND_MACRO_NAME = 'm' |
||
996 | ID_KIND_FASM_TYPE = 't' |
||
997 | ID_KIND_STRUCT_NAME = 's' |
||
998 | ID_KIND_EQUATED_CONSTANT = 'e' |
||
999 | ID_KIND_SET_CONSTANT = '=' |
||
1000 | id2kind = {} |
||
9399 | Boppan | 1001 | |
9401 | Boppan | 1002 | for keyword in keywords: |
1003 | id_add_kind(keyword, ID_KIND_KEYWORD) |
||
9399 | Boppan | 1004 | |
9401 | Boppan | 1005 | for fasm_type in fasm_types: |
1006 | id_add_kind(fasm_type, ID_KIND_FASM_TYPE) |
||
9399 | Boppan | 1007 | |
9401 | Boppan | 1008 | # Warning list |
1009 | warnings = "" |
||
9399 | Boppan | 1010 | |
9401 | Boppan | 1011 | # Parameters |
1012 | # Path to doxygen folder to make doxygen files in: -o |
||
1013 | doxygen_src_path = 'docs/doxygen' |
||
1014 | # Remove generated doxygen files: --clean |
||
1015 | clean_generated_stuff = False |
||
1016 | # Dump all defined symbols: --dump |
||
1017 | dump_symbols = False |
||
1018 | # Print symbol stats: --stats |
||
1019 | print_stats = False |
||
1020 | # Do not write warnings file: --nowarn |
||
1021 | enable_warnings = True |
||
9400 | Boppan | 1022 | |
9401 | Boppan | 1023 | # Parse arguments |
1024 | parser = argparse.ArgumentParser() |
||
1025 | parser.add_argument("-o", help="Doxygen output folder") |
||
1026 | parser.add_argument("--clean", help="Remove generated files", action="store_true") |
||
1027 | parser.add_argument("--dump", help="Dump all defined symbols", action="store_true") |
||
1028 | parser.add_argument("--stats", help="Print symbol stats", action="store_true") |
||
1029 | parser.add_argument("--nowarn", help="Do not write warnings file", action="store_true") |
||
1030 | parser.add_argument("--noemit", help="Do not emit doxygen files (for testing)", action="store_true") |
||
9402 | Boppan | 1031 | parser.add_argument("--debug", help="Show hashes of files (for testing)", action="store_true") |
9401 | Boppan | 1032 | args = parser.parse_args() |
1033 | doxygen_src_path = args.o if args.o else 'docs/doxygen' |
||
1034 | clean_generated_stuff = args.clean |
||
1035 | dump_symbols = args.dump |
||
1036 | print_stats = args.stats |
||
1037 | enable_warnings = not args.nowarn |
||
1038 | noemit = args.noemit |
||
9402 | Boppan | 1039 | debug_mode = args.debug |
9399 | Boppan | 1040 | |
9401 | Boppan | 1041 | # Variables, functions, labels, macros, structure types |
1042 | elements = [] |
||
1043 | created_files = [] |
||
1044 | kernel_files = [] |
||
9402 | Boppan | 1045 | output_files = {} # If --debug then all the files are written here |
8825 | Boppan | 1046 | |
9401 | Boppan | 1047 | # Load remembered list of symbols |
1048 | if os.path.isfile('asmxygen.elements.pickle'): |
||
1049 | print('Reading existing dump of symbols') |
||
1050 | (elements, id2kind) = pickle.load(open('asmxygen.elements.pickle', 'rb')) |
||
8990 | Boppan | 1051 | |
9401 | Boppan | 1052 | handle_file(kernel_files, "./kernel.asm"); |
8825 | Boppan | 1053 | |
9401 | Boppan | 1054 | if dump_symbols: |
1055 | stdout = sys.stdout |
||
1056 | sys.stdout = open('asmxygen.dump.txt', 'w', encoding = 'utf-8') |
||
1057 | for asm_element in elements: |
||
1058 | asm_element.dump() |
||
1059 | sys.stdout = stdout |
||
8825 | Boppan | 1060 | |
9401 | Boppan | 1061 | if clean_generated_stuff: |
1062 | kernel_files_set = set(kernel_files) |
||
1063 | for file in kernel_files: |
||
1064 | doxygen_file = f"{doxygen_src_path}/{file}" |
||
1065 | if (os.path.isfile(doxygen_file)): |
||
1066 | print(f"Removing {file}... ", end = '') |
||
1067 | os.remove(doxygen_file) |
||
1068 | print("Done.") |
||
1069 | elif not noemit: |
||
1070 | print(f"Writing doumented sources to {doxygen_src_path}") |
||
8834 | Boppan | 1071 | |
9401 | Boppan | 1072 | i = 0 |
1073 | new_elements = [x for x in elements if x.new] |
||
1074 | for element in new_elements: |
||
1075 | print(f"[{i + 1}/{len(new_elements)}] Emitting {element.name} from {element.location}") |
||
1076 | element.emit(doxygen_src_path) |
||
1077 | i += 1 |
||
8855 | Boppan | 1078 | |
9401 | Boppan | 1079 | print(f"Writing dump of symbols to asmxygen.elements.pickle") |
8990 | Boppan | 1080 | |
9401 | Boppan | 1081 | # Now when the new elements already was written, there's no new elements anymore |
1082 | for element in elements: |
||
1083 | element.new = False |
||
1084 | pickle.dump((elements, id2kind), open('asmxygen.elements.pickle', 'wb')) |
||
8990 | Boppan | 1085 | |
9401 | Boppan | 1086 | if print_stats: |
1087 | var_count = 0 |
||
1088 | mac_count = 0 |
||
1089 | lab_count = 0 |
||
1090 | fun_count = 0 |
||
1091 | uni_count = 0 |
||
1092 | str_count = 0 |
||
1093 | for element in elements: |
||
1094 | if type(element) == AsmVariable: |
||
1095 | var_count += 1 |
||
1096 | elif type(element) == AsmMacro: |
||
1097 | mac_count += 1 |
||
1098 | elif type(element) == AsmLabel: |
||
1099 | lab_count += 1 |
||
1100 | elif type(element) == AsmFunction: |
||
1101 | fun_count += 1 |
||
1102 | elif type(element) == AsmUnion: |
||
1103 | uni_count += 1 |
||
1104 | elif type(element) == AsmStruct: |
||
1105 | str_count += 1 |
||
1106 | print(f'Parsed variable count: {var_count}') |
||
1107 | print(f'Parsed macro count: {mac_count}') |
||
1108 | print(f'Parsed label count: {lab_count}') |
||
1109 | print(f'Parsed function count: {fun_count}') |
||
1110 | print(f'Parsed union type count: {uni_count}') |
||
1111 | print(f'Parsed structure type count: {str_count}') |
||
8982 | Boppan | 1112 | |
9401 | Boppan | 1113 | if enable_warnings: |
1114 | open('asmxygen.txt', "w", encoding = "utf-8").write(warnings) |
||
9402 | Boppan | 1115 | |
1116 | if debug_mode: |
||
1117 | hash_per_file = "" |
||
1118 | for file in output_files: |
||
1119 | h = hashlib.sha1(bytes(output_files[file], "ascii")).hexdigest() |
||
1120 | hash_per_file += f"{file}: {h}\n" |
||
1121 | if not os.path.exists("asmxygen_hash_per_file.txt"): |
||
1122 | open("asmxygen_hash_per_file.txt", "w").write(hash_per_file) |
||
1123 | print("NEW") |
||
1124 | else: |
||
1125 | reference_hash_per_file = open("asmxygen_hash_per_file.txt").read() |
||
1126 | if reference_hash_per_file != hash_per_file: |
||
1127 | print(''.join(difflib.ndiff(reference_hash_per_file, hash_per_file))) |
||
1128 | else: |
||
1129 | print("SUCCESS")>>> |