Rev 9401 | Rev 9403 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8834 | Boppan | 1 | import re |
8825 | Boppan | 2 | import os |
8837 | Boppan | 3 | import argparse |
8957 | Boppan | 4 | import sys |
8990 | Boppan | 5 | import pickle |
9402 | Boppan | 6 | import hashlib |
7 | import difflib |
||
8825 | Boppan | 8 | |
8957 | Boppan | 9 | # fasm keywords |
10 | keywords = [ |
||
9398 | Boppan | 11 | "align", "equ", "org", "while", "load", "store", "times", "repeat", |
12 | "display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc", |
||
13 | "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox", |
||
14 | "aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and", |
||
15 | "andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd", |
||
16 | "blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn", |
||
17 | "bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap", |
||
18 | "btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc", |
||
19 | "cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova", |
||
20 | "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle", |
||
21 | "cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge", |
||
22 | "cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", |
||
23 | "cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", |
||
24 | "cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b", |
||
25 | "comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", |
||
26 | "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", |
||
27 | "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", |
||
28 | "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", |
||
29 | "dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms", |
||
30 | "extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", |
||
31 | "fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg", |
||
32 | "fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe", |
||
33 | "fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno", |
||
34 | "fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs", |
||
35 | "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv", |
||
36 | "fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", |
||
37 | "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", |
||
38 | "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", |
||
39 | "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", |
||
40 | "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", |
||
41 | "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", |
||
42 | "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", |
||
43 | "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", |
||
44 | "gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps", |
||
45 | "hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd", |
||
46 | "insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret", |
||
47 | "jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge", |
||
48 | "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", |
||
49 | "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb", |
||
50 | "kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw", |
||
51 | "kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq", |
||
52 | "korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw", |
||
53 | "kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd", |
||
54 | "kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq", |
||
55 | "kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw", |
||
56 | "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", |
||
57 | "lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq", |
||
58 | "loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg", |
||
59 | "loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc", |
||
60 | "loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns", |
||
61 | "loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss", |
||
62 | "lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", |
||
63 | "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", |
||
64 | "movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu", |
||
65 | "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", |
||
66 | "movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq", |
||
67 | "movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq", |
||
68 | "movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul", |
||
69 | "mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or", |
||
70 | "orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd", |
||
71 | "pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd", |
||
72 | "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand", |
||
73 | "pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb", |
||
74 | "pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd", |
||
75 | "pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd", |
||
76 | "pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw", |
||
77 | "pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb", |
||
78 | "pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb", |
||
79 | "pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb", |
||
80 | "pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq", |
||
81 | "pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq", |
||
82 | "prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw", |
||
83 | "pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq", |
||
84 | "psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", |
||
85 | "psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest", |
||
86 | "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", |
||
87 | "punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad", |
||
88 | "pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase", |
||
89 | "rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp", |
||
90 | "repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd", |
||
91 | "roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar", |
||
92 | "sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe", |
||
93 | "sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb", |
||
94 | "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp", |
||
95 | "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence", |
||
96 | "sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1", |
||
97 | "sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd", |
||
98 | "sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac", |
||
99 | "std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str", |
||
100 | "subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter", |
||
101 | "sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud", |
||
102 | "umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd", |
||
103 | "vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps", |
||
104 | "vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq", |
||
105 | "vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi", |
||
106 | "vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", |
||
107 | "vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps", |
||
108 | "vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps", |
||
109 | "vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4", |
||
110 | "vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4", |
||
111 | "vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps", |
||
112 | "vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss", |
||
113 | "vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd", |
||
114 | "vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps", |
||
115 | "vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps", |
||
116 | "vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss", |
||
117 | "vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd", |
||
118 | "vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss", |
||
119 | "vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd", |
||
120 | "vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps", |
||
121 | "vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd", |
||
122 | "vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss", |
||
123 | "vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd", |
||
124 | "vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps", |
||
125 | "vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps", |
||
126 | "vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8", |
||
127 | "vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8", |
||
128 | "vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16", |
||
129 | "vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq", |
||
130 | "vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm", |
||
131 | "vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud", |
||
132 | "vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd", |
||
133 | "vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d", |
||
134 | "vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd", |
||
135 | "vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q", |
||
136 | "vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd", |
||
137 | "vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq", |
||
138 | "vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m", |
||
139 | "vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w", |
||
140 | "vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb", |
||
141 | "vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd", |
||
142 | "vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq", |
||
143 | "vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd", |
||
144 | "vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd", |
||
145 | "vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq", |
||
146 | "vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq", |
||
147 | "vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps", |
||
148 | "vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess", |
||
149 | "vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", |
||
150 | "vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss", |
||
151 | "vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2", |
||
152 | "vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait", |
||
153 | "wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd", |
||
154 | "xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps", |
||
155 | "xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv", |
||
8957 | Boppan | 156 | ] |
157 | |||
158 | fasm_types = [ |
||
159 | "db", "rb", |
||
160 | "dw", "rw", |
||
161 | "dd", "rd", |
||
162 | "dp", "rp", |
||
163 | "df", "rf", |
||
164 | "dq", "rq", |
||
165 | "dt", "rt", |
||
166 | "du", |
||
167 | ] |
||
168 | |||
8976 | Boppan | 169 | # Add kind flag to identifier in id2kind |
170 | def id_add_kind(identifier, kind): |
||
171 | if identifier not in id2kind: |
||
172 | id2kind[identifier] = '' |
||
173 | id2kind[identifier] += kind |
||
174 | |||
175 | # Remove kind flag of identifier in id2kind |
||
176 | def id_remove_kind(identifier, kind): |
||
177 | if identifier in id2kind: |
||
178 | if kind in id2kind[identifier]: |
||
179 | id2kind[identifier] = id2kind[identifier].replace(kind, '') |
||
180 | |||
181 | # Get kind of an identifier |
||
182 | def id_get_kind(identifier): |
||
183 | if identifier in id2kind: |
||
184 | return id2kind[identifier] |
||
185 | else: |
||
186 | return '' |
||
187 | |||
8957 | Boppan | 188 | class LegacyAsmReader: |
189 | def __init__(self, file): |
||
190 | self.file = file |
||
191 | self.lines = open(file, "r", encoding="utf-8").readlines() |
||
192 | self.line_idx = 0 |
||
193 | self.i = 0 |
||
194 | |||
195 | def curr(self): |
||
196 | try: return self.lines[self.line_idx][self.i] |
||
197 | except: return '' |
||
198 | |||
199 | def step(self): |
||
200 | c = self.curr() |
||
201 | self.i += 1 |
||
202 | # Wrap the line if '\\' followed by whitespaces and/or comment |
||
203 | while self.curr() == '\\': |
||
204 | i_of_backslash = self.i |
||
205 | self.i += 1 |
||
206 | while self.curr().isspace(): |
||
207 | self.i += 1 |
||
208 | if self.curr() == ';' or self.curr() == '': |
||
209 | self.line_idx += 1 |
||
210 | self.i = 0 |
||
211 | else: |
||
212 | # There's something other than a comment after the backslash |
||
213 | # So don't interpret the backslash as a line wrap |
||
214 | self.i = i_of_backslash |
||
215 | break |
||
216 | return c |
||
217 | |||
218 | def nextline(self): |
||
219 | c = self.curr() |
||
220 | while c != '': |
||
221 | c = self.step() |
||
222 | self.line_idx += 1 |
||
223 | self.i = 0 |
||
224 | |||
225 | def no_lines(self): |
||
226 | if self.line_idx >= len(self.lines): |
||
227 | return True |
||
228 | return False |
||
229 | |||
230 | def location(self): |
||
231 | return f"{self.file}:{self.line_idx + 1}" |
||
232 | |||
233 | def skip_spaces(self): |
||
234 | while self.curr().isspace(): |
||
235 | self.step() |
||
236 | |||
237 | class AsmReaderRecognizingStrings(LegacyAsmReader): |
||
238 | def __init__(self, file): |
||
239 | super().__init__(file) |
||
240 | self.in_string = None |
||
241 | self.should_recognize_strings = True |
||
242 | |||
243 | def step(self): |
||
244 | c = super().step() |
||
245 | if self.should_recognize_strings and (c == '"' or c == "'"): |
||
246 | # If just now we was at the double or single quotation mark |
||
247 | # and we aren't in a string yet |
||
248 | # then say "we are in a string openned with this quotation mark now" |
||
249 | if self.in_string == None: |
||
250 | self.in_string = c |
||
251 | # If just now we was at the double or single quotation mark |
||
252 | # and we are in the string entered with the same quotation mark |
||
253 | # then say "we aren't in a string anymore" |
||
254 | elif self.in_string == c: |
||
255 | self.in_string = None |
||
256 | return c |
||
257 | |||
258 | class AsmReaderReadingComments(AsmReaderRecognizingStrings): |
||
259 | def __init__(self, file): |
||
260 | super().__init__(file) |
||
261 | self.status = dict() |
||
262 | self.status_reset() |
||
263 | self.comment = '' |
||
264 | |||
265 | def status_reset(self): |
||
266 | # If the line has non-comment code |
||
8974 | Boppan | 267 | self.status_has_code = False |
8957 | Boppan | 268 | # If the line has a comment at the end |
8974 | Boppan | 269 | self.status_has_comment = False |
8957 | Boppan | 270 | # Let it recognize strings further, we are definitely out of a comment |
271 | self.should_recognize_strings = True |
||
272 | |||
273 | def status_set_has_comment(self): |
||
8974 | Boppan | 274 | self.status_has_comment = True |
8957 | Boppan | 275 | # Don't let it recognize strings cause we are in a comment now |
276 | self.should_recognize_strings = False |
||
277 | |||
278 | def status_set_has_code(self): |
||
8974 | Boppan | 279 | self.status_has_code = True |
8957 | Boppan | 280 | |
281 | def update_status(self): |
||
282 | # If we aren't in a comment and we aren't in a string - say we are now in a comment if ';' met |
||
8974 | Boppan | 283 | if not self.status_has_comment and not self.in_string and self.curr() == ';': |
8957 | Boppan | 284 | self.status_set_has_comment() |
285 | # Else if we are in a comment - collect the comment |
||
8974 | Boppan | 286 | elif self.status_has_comment: |
8957 | Boppan | 287 | self.comment += self.curr() |
288 | # Else if there's some non-whitespace character out of a comment |
||
289 | # then the line has code |
||
8974 | Boppan | 290 | elif not self.status_has_comment and not self.curr().isspace(): |
8957 | Boppan | 291 | self.status_set_has_code() |
292 | |||
293 | def step(self): |
||
294 | # Get to the next character |
||
295 | c = super().step() |
||
296 | # Update status of the line according to the next character |
||
297 | self.update_status() |
||
298 | return c |
||
299 | |||
300 | def nextline(self): |
||
301 | super().nextline() |
||
302 | # If the line we leave was not a comment-only line |
||
303 | # then forget the collected comment |
||
304 | # Otherwise the collected comment should be complemented by comment from next line in step() |
||
8974 | Boppan | 305 | if self.status_has_code: |
8957 | Boppan | 306 | self.comment = '' |
307 | # Reset the line status (now it's the status of the new line) |
||
308 | self.status_reset() |
||
309 | # Set new status for this line according to the first character in the line |
||
310 | self.update_status() |
||
311 | |||
8963 | Boppan | 312 | class AsmReaderFetchingIdentifiers(AsmReaderReadingComments): |
8957 | Boppan | 313 | def __init__(self, file): |
314 | super().__init__(file) |
||
315 | |||
8963 | Boppan | 316 | def fetch_identifier(self): |
317 | self.skip_spaces() |
||
318 | result = '' |
||
319 | while is_id(self.curr()): |
||
320 | result += self.step() |
||
321 | return result |
||
322 | |||
323 | class AsmReader(AsmReaderFetchingIdentifiers): |
||
324 | def __init__(self, file): |
||
325 | super().__init__(file) |
||
326 | |||
9402 | Boppan | 327 | def append_file(full_path, contents): |
328 | if debug_mode: |
||
329 | if full_path not in output_files: |
||
330 | output_files[full_path] = "" |
||
331 | output_files[full_path] += contents |
||
332 | else: |
||
333 | f = open(full_path, "a") |
||
334 | f.write(contents) |
||
335 | f.close() |
||
336 | |||
8957 | Boppan | 337 | class AsmElement: |
338 | def __init__(self, location, name, comment): |
||
8980 | Boppan | 339 | global warnings |
340 | |||
8990 | Boppan | 341 | # If the element was constructed during this execution then the element is new |
342 | self.new = True |
||
8957 | Boppan | 343 | self.location = location |
344 | self.file = self.location.split(':')[0].replace('\\', '/') |
||
345 | self.line = self.location.split(':')[1] |
||
8855 | Boppan | 346 | self.name = name |
8957 | Boppan | 347 | self.comment = comment |
348 | |||
8980 | Boppan | 349 | if self.comment == '': |
350 | warnings += f'{self.location}: Undocumented element\n' |
||
351 | |||
8957 | Boppan | 352 | def dump(self): |
9030 | Boppan | 353 | print(f"\n{self.location}: {self.name}") |
8957 | Boppan | 354 | print(f"{self.comment}") |
355 | |||
356 | def emit(self, dest, doxycomment = '', declaration = ''): |
||
8977 | Boppan | 357 | # Do not emit anything if the symbol is marked as hidden in its comment |
358 | if '@dont_give_a_doxygen' in self.comment: |
||
359 | return |
||
360 | |||
8957 | Boppan | 361 | global warnings |
362 | # Redefine default declaration |
||
363 | if declaration == '': |
||
364 | declaration = f'#define {self.name}' |
||
365 | # Check doxycomment |
||
366 | if not doxycomment.endswith('\n'): |
||
367 | doxycomment += '\n' |
||
368 | if doxycomment.split('@brief ')[1][0].islower(): |
||
369 | warnings += f"{self.location}: Brief comment starting from lowercase\n" |
||
370 | # Build contents to emit |
||
371 | contents = '' |
||
372 | contents += '/**\n' |
||
373 | contents += doxycomment |
||
374 | contents += (f"@par Source\n" + |
||
375 | f"{self.file}:{self.line}\n") |
||
376 | contents += '*/\n' |
||
377 | contents += declaration |
||
378 | contents += '\n\n' |
||
379 | # Get path to file to emit this |
||
380 | full_path = dest + '/' + self.file |
||
381 | # Remove the file on first access if it was created by previous generation |
||
382 | if full_path not in created_files: |
||
383 | if os.path.isfile(full_path): |
||
384 | os.remove(full_path) |
||
385 | created_files.append(full_path) |
||
386 | # Create directories need for the file |
||
387 | os.makedirs(os.path.dirname(full_path), exist_ok=True) |
||
388 | contents = ''.join([i if ord(i) < 128 else '?' for i in contents]) |
||
389 | |||
9402 | Boppan | 390 | append_file(full_path, contents) |
391 | |||
8957 | Boppan | 392 | class AsmVariable(AsmElement): |
393 | def __init__(self, location, name, comment, type, init): |
||
394 | super().__init__(location, name, comment) |
||
8855 | Boppan | 395 | self.type = type |
396 | self.init = init |
||
397 | |||
8957 | Boppan | 398 | def dump(self): |
399 | super().dump() |
||
9030 | Boppan | 400 | print(f"(Variable)\n---") |
8855 | Boppan | 401 | |
8957 | Boppan | 402 | def emit(self, dest): |
403 | # Build doxycomment specific for the variable |
||
404 | doxycomment = '' |
||
405 | doxycomment += self.comment |
||
406 | if '@brief' not in doxycomment: |
||
407 | doxycomment = '@brief ' + doxycomment |
||
408 | doxycomment += (f"@par Initial value\n" + |
||
409 | f"{self.init}\n") |
||
410 | # Build the declaration |
||
411 | name = self.name.replace(".", "_") |
||
412 | var_type = self.type.replace(".", "_") |
||
413 | declaration = f"{var_type} {name};" |
||
414 | # Emit this |
||
415 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 416 | |
8957 | Boppan | 417 | class AsmFunction(AsmElement): |
8963 | Boppan | 418 | def __init__(self, location, name, comment, calling_convention, args, used_regs): |
8957 | Boppan | 419 | super().__init__(location, name, comment) |
8963 | Boppan | 420 | self.calling_convention = calling_convention |
421 | self.args = args |
||
422 | self.used_regs = used_regs |
||
8855 | Boppan | 423 | |
8957 | Boppan | 424 | def dump(self): |
425 | super().dump() |
||
9030 | Boppan | 426 | print(f"(Function)\n---") |
8855 | Boppan | 427 | |
8957 | Boppan | 428 | def emit(self, dest): |
429 | # Build doxycomment specific for the variable |
||
430 | doxycomment = '' |
||
431 | doxycomment += self.comment |
||
432 | if '@brief' not in doxycomment: |
||
433 | doxycomment = '@brief ' + doxycomment |
||
9028 | Boppan | 434 | # If there was no arguments, maybe that's just a label |
435 | # then parse parameters from its comment |
||
436 | if len(self.args) == 0 and '@param' in self.comment: |
||
437 | i = 0 |
||
438 | while '@param' in self.comment[i:]: |
||
439 | i = self.comment.index('@param', i) |
||
440 | # Skip '@param' |
||
441 | i += len('@param') |
||
442 | # Skip spaces after '@param' |
||
443 | while self.comment[i].isspace(): |
||
444 | i += 1 |
||
445 | # Get the parameter name |
||
446 | name = '' |
||
447 | while is_id(self.comment[i]): |
||
448 | name += self.comment[i] |
||
449 | i += 1 |
||
450 | # Save the parameter |
||
451 | self.args.append((name, 'arg_t')) |
||
8963 | Boppan | 452 | # Build the arg list for declaration |
453 | arg_list = '(' |
||
454 | if len(self.args) > 0: |
||
455 | argc = 0 |
||
456 | for arg in self.args: |
||
457 | if argc != 0: |
||
458 | arg_list += ", " |
||
459 | arg_list += f"{arg[1]} {arg[0]}" |
||
460 | argc += 1 |
||
461 | arg_list += ')' |
||
8957 | Boppan | 462 | # Build the declaration |
463 | name = self.name.replace(".", "_") |
||
8963 | Boppan | 464 | declaration = f"void {name}{arg_list};" |
8957 | Boppan | 465 | # Emit this |
466 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 467 | |
8957 | Boppan | 468 | class AsmLabel(AsmElement): |
469 | def __init__(self, location, name, comment): |
||
470 | super().__init__(location, name, comment) |
||
8855 | Boppan | 471 | |
8957 | Boppan | 472 | def dump(self): |
473 | super().dump() |
||
9030 | Boppan | 474 | print(f"(Label)\n---") |
8855 | Boppan | 475 | |
8957 | Boppan | 476 | def emit(self, dest): |
477 | # Build doxycomment specific for the variable |
||
478 | doxycomment = '' |
||
479 | doxycomment += self.comment |
||
480 | if '@brief' not in doxycomment: |
||
481 | doxycomment = '@brief ' + doxycomment |
||
482 | # Build the declaration |
||
483 | name = self.name.replace(".", "_") |
||
484 | declaration = f"label {name};" |
||
485 | # Emit this |
||
486 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 487 | |
8957 | Boppan | 488 | class AsmMacro(AsmElement): |
489 | def __init__(self, location, name, comment, args): |
||
490 | super().__init__(location, name, comment) |
||
491 | self.args = args |
||
8855 | Boppan | 492 | |
8957 | Boppan | 493 | def dump(self): |
494 | super().dump() |
||
9030 | Boppan | 495 | print(f"(Macro)\n---") |
8855 | Boppan | 496 | |
8957 | Boppan | 497 | def emit(self, dest): |
498 | # Construct arg list without '['s, ']'s and '*'s |
||
499 | args = [arg for arg in self.args if arg not in "[]*"] |
||
500 | # Construct C-like arg list |
||
501 | arg_list = "" |
||
502 | if len(args) > 0: |
||
503 | arg_list += '(' |
||
504 | argc = 0 |
||
505 | for arg in args: |
||
506 | if argc != 0: |
||
507 | arg_list += ", " |
||
508 | arg_list += arg |
||
509 | argc += 1 |
||
510 | arg_list += ')' |
||
511 | # Build doxycomment |
||
512 | doxycomment = '' |
||
513 | doxycomment += self.comment |
||
514 | if '@brief' not in doxycomment: |
||
515 | doxycomment = '@brief ' + doxycomment |
||
516 | # Build declaration |
||
517 | declaration = f"#define {self.name}{arg_list}" |
||
518 | # Emit this |
||
519 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 520 | |
8957 | Boppan | 521 | class AsmStruct(AsmElement): |
522 | def __init__(self, location, name, comment, members): |
||
523 | super().__init__(location, name, comment) |
||
524 | self.members = members |
||
8855 | Boppan | 525 | |
8957 | Boppan | 526 | def dump(self): |
527 | super().dump() |
||
9030 | Boppan | 528 | print(f"(Struct)\n---") |
8855 | Boppan | 529 | |
8957 | Boppan | 530 | def emit(self, dest): |
531 | # Build doxycomment |
||
532 | doxycomment = '' |
||
533 | doxycomment += self.comment |
||
534 | if '@brief' not in doxycomment: |
||
535 | doxycomment = '@brief ' + doxycomment |
||
8958 | Boppan | 536 | doxycomment += '\n' |
8957 | Boppan | 537 | # Build declaration |
8958 | Boppan | 538 | declaration = f"struct {self.name}" + " {\n" |
539 | for member in self.members: |
||
540 | if type(member) == AsmVariable: |
||
541 | declaration += f'\t{member.type} {member.name}; /**< {member.comment} */\n' |
||
542 | declaration += '};' |
||
8957 | Boppan | 543 | # Emit this |
544 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 545 | |
8957 | Boppan | 546 | class AsmUnion(AsmElement): |
547 | def __init__(self, location, name, comment, members): |
||
548 | super().__init__(location, name, comment) |
||
549 | self.members = members |
||
8855 | Boppan | 550 | |
8957 | Boppan | 551 | def dump(self): |
552 | super().dump() |
||
9030 | Boppan | 553 | print(f"(Union)\n---") |
8855 | Boppan | 554 | |
8957 | Boppan | 555 | def emit(self, dest): |
556 | # Build doxycomment |
||
557 | doxycomment = '' |
||
558 | doxycomment += self.comment |
||
559 | if '@brief' not in doxycomment: |
||
560 | doxycomment = '@brief ' + doxycomment |
||
561 | # Build declaration |
||
562 | declaration = f"union {self.name}" + " {};" |
||
563 | # Emit this |
||
564 | super().emit(dest, doxycomment, declaration) |
||
8855 | Boppan | 565 | |
8957 | Boppan | 566 | class VariableNameIsMacroName: |
567 | def __init__(self, name): |
||
568 | self.name = name |
||
8855 | Boppan | 569 | |
8957 | Boppan | 570 | def is_id(c): |
571 | return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v" |
||
8855 | Boppan | 572 | |
8957 | Boppan | 573 | def is_starts_as_id(s): |
574 | return not s[0].isdigit() |
||
575 | |||
576 | def parse_after_macro(r): |
||
577 | location = r.location() |
||
578 | |||
579 | # Skip spaces after the "macro" keyword |
||
580 | r.skip_spaces() |
||
8855 | Boppan | 581 | # Read macro name |
582 | name = "" |
||
8957 | Boppan | 583 | while is_id(r.curr()) or r.curr() == '#': |
584 | name += r.step() |
||
8855 | Boppan | 585 | # Skip spaces after macro name |
8957 | Boppan | 586 | r.skip_spaces() |
8855 | Boppan | 587 | # Find all arguments |
588 | args = [] |
||
589 | arg = '' |
||
8957 | Boppan | 590 | while r.curr() and r.curr() != ';' and r.curr() != '{': |
8855 | Boppan | 591 | # Collect identifier |
8957 | Boppan | 592 | if is_id(r.curr()): |
593 | arg += r.step() |
||
8855 | Boppan | 594 | # Save the collected identifier |
8957 | Boppan | 595 | elif r.curr() == ',': |
8855 | Boppan | 596 | args.append(arg) |
597 | arg = '' |
||
8957 | Boppan | 598 | r.step() |
8855 | Boppan | 599 | # Just push the '[' |
8957 | Boppan | 600 | elif r.curr() == '[': |
601 | args.append(r.step()) |
||
8855 | Boppan | 602 | # Just push the identifier and get ']' ready to be pushed on next comma |
8957 | Boppan | 603 | elif r.curr() == ']': |
8855 | Boppan | 604 | args.append(arg) |
8957 | Boppan | 605 | arg = r.step() |
8855 | Boppan | 606 | # Just push the identifier and get '*' ready to be pushed on next comma |
8957 | Boppan | 607 | elif r.curr() == '*': |
8855 | Boppan | 608 | args.append(arg) |
8957 | Boppan | 609 | arg = r.step() |
8855 | Boppan | 610 | # Just skip whitespaces |
8957 | Boppan | 611 | elif r.curr().isspace(): |
612 | r.step() |
||
8855 | Boppan | 613 | # Something unexpected |
614 | else: |
||
8957 | Boppan | 615 | raise Exception(f"Unexpected symbol '{r.curr()}' at index #{r.i} " + |
616 | f"in the macro declaration at {location} " + |
||
617 | f"(line: {r.lines[r.line_idx]})\n''") |
||
618 | # Append the last argument |
||
8855 | Boppan | 619 | if arg != '': |
620 | args.append(arg) |
||
8957 | Boppan | 621 | # Skip t spaces after the argument list |
622 | r.skip_spaces() |
||
623 | # Get a comment if it is: read till the end of the line and get the comment from the reader |
||
624 | while r.curr() != '': |
||
625 | r.step() |
||
626 | comment = r.comment |
||
8855 | Boppan | 627 | # Find end of the macro |
8957 | Boppan | 628 | prev = '' |
629 | while True: |
||
630 | if r.curr() == '}' and prev != '\\': |
||
631 | break |
||
632 | elif r.curr() == '': |
||
633 | prev = '' |
||
634 | r.nextline() |
||
635 | continue |
||
636 | prev = r.step() |
||
8855 | Boppan | 637 | # Build the output |
8957 | Boppan | 638 | return AsmMacro(location, name, comment, args) |
8855 | Boppan | 639 | |
8957 | Boppan | 640 | def parse_variable(r, first_word = None): |
641 | global warnings |
||
642 | location = r.location() |
||
8825 | Boppan | 643 | |
8957 | Boppan | 644 | # Skip spaces before variable name |
645 | r.skip_spaces() |
||
646 | # Get variable name |
||
647 | name = "" |
||
648 | # Read it if it was not supplied |
||
649 | if first_word == None: |
||
650 | while is_id(r.curr()): |
||
651 | name += r.step() |
||
652 | # Or use the supplied one instead |
||
653 | else: |
||
654 | name = first_word |
||
655 | # Check the name |
||
656 | # If it's 0 len, that means threr's something else than an identifier at the beginning |
||
657 | if len(name) == 0: |
||
658 | return None |
||
659 | # If it starts from digit or othervice illegally it's illegal |
||
660 | if not is_starts_as_id(name): |
||
661 | return None |
||
8976 | Boppan | 662 | # Get kind of the identifier from id2kind table |
663 | kind = id_get_kind(name) |
||
8957 | Boppan | 664 | # If it's a keyword, that's not a variable declaration |
8976 | Boppan | 665 | if ID_KIND_KEYWORD in kind: |
8957 | Boppan | 666 | return None |
667 | # If it's a macro name, that's not a variable declaration |
||
8976 | Boppan | 668 | if ID_KIND_MACRO_NAME in kind: |
8957 | Boppan | 669 | return VariableNameIsMacroName(name) |
670 | # If it's a datatype or a structure name that's not a variable declaration: that's just a data |
||
671 | # don't document just a data for now |
||
8976 | Boppan | 672 | if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind: |
8957 | Boppan | 673 | return None |
674 | # Skip spaces before type name |
||
675 | r.skip_spaces() |
||
676 | # Read type name |
||
677 | var_type = "" |
||
678 | while is_id(r.curr()): |
||
679 | var_type += r.step() |
||
680 | # Check the type name |
||
681 | if len(var_type) == 0: |
||
682 | # If there's no type identifier after the name |
||
683 | # maybe the name is something meaningful for the next parser |
||
684 | # return it |
||
685 | return name |
||
686 | # If it starts from digit or othervice illegally it's illegal |
||
687 | if not is_starts_as_id(var_type): |
||
688 | return None |
||
8976 | Boppan | 689 | # Get kind of type identifier |
690 | type_kind = id_get_kind(var_type) |
||
8957 | Boppan | 691 | # If it's a keyword, that's not a variable declaration |
692 | # return the two words of the lexical structure |
||
8976 | Boppan | 693 | if ID_KIND_KEYWORD in type_kind: |
8957 | Boppan | 694 | return (name, var_type) |
695 | # Skip spaces before the value |
||
696 | r.skip_spaces() |
||
697 | # Read the value until the comment or end of the line |
||
698 | value = "" |
||
699 | while r.curr() != ';' and r.curr() != '' and r.curr() != '\n': |
||
700 | value += r.step() |
||
701 | # Skip spaces after the value |
||
702 | r.skip_spaces() |
||
8961 | Boppan | 703 | # Read till end of the line to get a comment from the reader |
704 | while r.curr() != '': |
||
8957 | Boppan | 705 | r.step() |
8961 | Boppan | 706 | # Build the result |
8957 | Boppan | 707 | return AsmVariable(location, name, r.comment, var_type, value) |
8825 | Boppan | 708 | |
8957 | Boppan | 709 | def parse_after_struct(r, as_union = True): |
710 | global warnings |
||
711 | location = r.location() |
||
8825 | Boppan | 712 | |
8957 | Boppan | 713 | # Skip spaces after "struct" keyword |
714 | r.skip_spaces() |
||
715 | # Read struct name |
||
716 | name = "" |
||
717 | while is_id(r.curr()): |
||
718 | name += r.step() |
||
719 | # Read till end of the line and get the comment from the reader |
||
720 | while r.curr() != '': |
||
721 | r.step() |
||
722 | comment = r.comment |
||
723 | # Get to the next line to parse struct members |
||
724 | r.nextline() |
||
725 | # Parse struct members |
||
726 | members = [] |
||
727 | while True: |
||
728 | r.skip_spaces() |
||
729 | var = parse_variable(r) |
||
730 | if type(var) == AsmVariable: |
||
731 | members.append(var) |
||
732 | elif type(var) == str: |
||
733 | if var == 'union': |
||
734 | # Parse the union as a struct |
||
735 | union = parse_after_struct(r, as_union = True) |
||
736 | members.append(union) |
||
737 | # Skip the ends of the union |
||
738 | r.nextline() |
||
739 | elif r.curr() == ':': |
||
740 | warnings += f"{r.location()}: Skept the label in the struct\n" |
||
741 | else: |
||
742 | raise Exception(f"Garbage in struct member at {location} (got '{var}' identifier)") |
||
743 | elif type(var) == VariableNameIsMacroName: |
||
744 | if var.name == 'ends': |
||
745 | break |
||
746 | r.nextline() |
||
747 | # Return the result |
||
748 | if as_union: |
||
749 | return AsmStruct(location, name, comment, members) |
||
750 | else: |
||
751 | return AsmUnion(location, name, comment, members) |
||
8825 | Boppan | 752 | |
8963 | Boppan | 753 | def parse_after_proc(r): |
754 | # Get proc name |
||
755 | name = r.fetch_identifier() |
||
756 | # Next identifier after the proc name |
||
757 | identifier = r.fetch_identifier() |
||
758 | # Check if the id is 'stdcall' or 'c' (calling convention specifier) |
||
759 | # and if so - save the convention and lookup the next identifier |
||
760 | calling_convention = '' |
||
761 | if identifier == 'stdcall' or identifier == 'c': |
||
762 | calling_convention = identifier |
||
763 | # If next is a comma, just skip it |
||
764 | if r.curr() == ',': |
||
765 | r.step() |
||
766 | # Read the next identifier |
||
767 | identifier = r.fetch_identifier() |
||
768 | # Check if the id is 'uses' (used register list specifier) |
||
769 | # and if so save the used register list |
||
770 | used_regs = [] |
||
771 | if identifier == 'uses': |
||
772 | # Read the registers |
||
773 | while True: |
||
774 | reg_name = r.fetch_identifier() |
||
775 | if reg_name != '': |
||
776 | used_regs.append(reg_name) |
||
777 | else: |
||
778 | break |
||
779 | # If next is a comma, just skip it |
||
780 | if r.curr() == ',': |
||
781 | r.step() |
||
782 | # Read the next identifier |
||
783 | identifier = r.fetch_identifier() |
||
784 | # Check if there are argument identifiers |
||
785 | args = [] |
||
786 | while identifier != '': |
||
787 | arg_name = identifier |
||
788 | arg_type = 'arg_t' |
||
789 | # Skip spaces after argument name |
||
790 | r.skip_spaces() |
||
791 | # If there's a ':' after the name - the next identifier is type |
||
792 | if r.curr() == ':': |
||
793 | r.step() |
||
794 | arg_type = r.fetch_identifier() |
||
795 | # If there's a comma - there's one more argument |
||
796 | # else no arguments anymore |
||
797 | if r.curr() == ',': |
||
798 | r.step() |
||
799 | identifier = r.fetch_identifier() |
||
800 | else: |
||
801 | identifier = '' |
||
802 | args.append((arg_name, arg_type)) |
||
803 | # Get to the end of the line and get a comment from the reader |
||
804 | while r.curr() != '': |
||
805 | r.step() |
||
8973 | Boppan | 806 | comment = r.comment |
8963 | Boppan | 807 | # Build the element |
808 | return AsmFunction(r.location(), name, comment, calling_convention, args, used_regs) |
||
809 | |||
8957 | Boppan | 810 | def get_declarations(asm_file_contents, asm_file_name): |
811 | r = AsmReader(asm_file_name) |
||
8825 | Boppan | 812 | |
8957 | Boppan | 813 | while not r.no_lines(): |
814 | # Skip leading spaces |
||
815 | r.skip_spaces() |
||
816 | # Skip the line if it's starting with a comment |
||
817 | if r.curr() == ';': |
||
818 | r.nextline() |
||
8825 | Boppan | 819 | continue |
8957 | Boppan | 820 | # Get first word |
821 | first_word = "" |
||
822 | while is_id(r.curr()): |
||
823 | first_word += r.step() |
||
824 | # Match macro declaration |
||
825 | if first_word == "macro": |
||
826 | macro = parse_after_macro(r) |
||
827 | elements.append(macro) |
||
8976 | Boppan | 828 | id_add_kind(macro.name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 829 | # Match structure declaration |
830 | elif first_word == "struct": |
||
831 | struct = parse_after_struct(r) |
||
832 | elements.append(struct) |
||
8976 | Boppan | 833 | id_add_kind(struct.name, ID_KIND_STRUCT_NAME) |
8957 | Boppan | 834 | # Match function definition |
835 | elif first_word == "proc": |
||
8963 | Boppan | 836 | proc = parse_after_proc(r) |
837 | elements.append(proc) |
||
8957 | Boppan | 838 | elif first_word == 'format': |
839 | # Skip the format directive |
||
840 | pass |
||
841 | elif first_word == 'include': |
||
842 | # Skip the include directive |
||
843 | pass |
||
844 | elif first_word == 'if': |
||
845 | # Skip the conditional directive |
||
846 | pass |
||
847 | elif first_word == 'repeat': |
||
848 | # Skip the repeat directive |
||
849 | pass |
||
850 | elif first_word == 'purge': |
||
851 | while True: |
||
852 | # Skip spaces after the 'purge' keyword or after the comma what separated the previous macro name |
||
853 | r.skip_spaces() |
||
854 | # Get the purged macro name |
||
855 | name = '' |
||
856 | while is_id(r.curr()): |
||
857 | name += r.step() |
||
858 | # Remove the purged macro from the macro names list |
||
859 | try: |
||
8976 | Boppan | 860 | id_remove_kind(name, ID_KIND_MACRO_NAME) |
8957 | Boppan | 861 | except: |
862 | pass |
||
863 | # Skip spaces after the name |
||
864 | r.skip_spaces() |
||
865 | # If it's comma (',') after then that's not the last purged macro, continue purging |
||
866 | if r.curr() == ',': |
||
867 | r.step() |
||
868 | continue |
||
869 | # Here we purged all the macros should be purged |
||
870 | break |
||
871 | # Match label or a variable |
||
872 | elif len(first_word) != 0: |
||
873 | # Skip spaces after the identifier |
||
874 | r.skip_spaces() |
||
875 | # Match a variable |
||
876 | var = parse_variable(r, first_word) |
||
877 | if type(var) == AsmVariable: |
||
878 | elements.append(var) |
||
879 | # If it wasn't a variable but there was an identifier |
||
880 | # Maybe that's a label and the identifier is the label name |
||
881 | # The parse_variable returns the first found or supplied identifier |
||
882 | # In this case it returns the first_word which is supplied |
||
883 | # If it didn't match a type identifier after the word |
||
884 | elif type(var) == str: |
||
885 | name = var |
||
886 | # Match label beginning (':' after name) |
||
887 | if r.curr() == ':': |
||
888 | # Get to the end of the line and get the coment from the reader |
||
889 | while r.curr() != '': |
||
890 | r.step() |
||
891 | comment = r.comment |
||
892 | # Only handle non-local labels |
||
893 | if name[0] != '.' and name != "@@" and name != "$Revision": |
||
8989 | Boppan | 894 | if '@return' in comment or '@param' in comment: |
895 | element = AsmFunction(r.location(), name, comment, '', [], []) |
||
896 | else: |
||
897 | element = AsmLabel(r.location(), name, comment) |
||
898 | elements.append(element) |
||
8957 | Boppan | 899 | elif r.curr() == '=': |
8976 | Boppan | 900 | # Save the identifier as a set constant |
901 | id_add_kind(first_word, ID_KIND_SET_CONSTANT) |
||
8957 | Boppan | 902 | elif type(var) == tuple: |
903 | (word_one, word_two) = var |
||
904 | if word_two == 'equ': |
||
8976 | Boppan | 905 | # Save the identifier as an equated constant |
906 | id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT) |
||
8957 | Boppan | 907 | r.nextline() |
8825 | Boppan | 908 | |
8966 | Boppan | 909 | def it_neds_to_be_parsed(source_file): |
8990 | Boppan | 910 | # If there's no symbols file saved - parse it anyway |
911 | # cause we need to create the symbols file and use it |
||
912 | # if we gonna generate proper doxygen |
||
913 | if not os.path.isfile('asmxygen.elements.pickle'): |
||
914 | return True |
||
8966 | Boppan | 915 | dest = doxygen_src_path + '/' + source_file |
916 | # If there's no the doxygen file it should be compiled to |
||
917 | # then yes, we should compile it to doxygen |
||
918 | if not os.path.isfile(dest): |
||
919 | return True |
||
920 | source_change_time = os.path.getmtime(source_file) |
||
921 | dest_change_file = os.path.getmtime(dest) |
||
922 | # If the source is newer than the doxygen it was compiled to |
||
923 | # then the source should be recompiled (existing doxygen is old) |
||
924 | if source_change_time > dest_change_file: |
||
925 | return True |
||
926 | return False |
||
927 | |||
8834 | Boppan | 928 | def handle_file(handled_files, asm_file_name, subdir = "."): |
8990 | Boppan | 929 | global elements |
8966 | Boppan | 930 | # Canonicalize the file path and get it relative to cwd |
931 | cwd = os.path.abspath(os.path.dirname(sys.argv[0])) |
||
932 | asm_file_name = os.path.realpath(asm_file_name) |
||
933 | asm_file_name = asm_file_name[len(cwd) + 1:] |
||
934 | # If it's lang.inc - skip it |
||
935 | if asm_file_name == 'lang.inc': |
||
8967 | Boppan | 936 | return |
937 | # If the file was handled in this execution before - skip it |
||
938 | if asm_file_name in handled_files: |
||
939 | return |
||
940 | # Say that the file was handled in this execution |
||
941 | handled_files.append(asm_file_name) |
||
8966 | Boppan | 942 | # Check if the file should be parsed (if it was modified or wasn't parsed yet) |
943 | should_get_declarations = True |
||
944 | if not it_neds_to_be_parsed(asm_file_name): |
||
945 | print(f"Skipping {asm_file_name} (already newest)") |
||
946 | should_get_declarations = False |
||
947 | else: |
||
8975 | Boppan | 948 | print(f"Handling {asm_file_name}") |
8990 | Boppan | 949 | # Remove elements parsed from this file before if any |
950 | elements_to_remove = [x for x in elements if x.location.split(':')[0] == asm_file_name] |
||
951 | elements = [x for x in elements if x.location.split(':')[0] != asm_file_name] |
||
952 | # Forget types of identifiers of names of the removed elements |
||
953 | for element in elements_to_remove: |
||
954 | if type(element) == AsmStruct: |
||
955 | id_remove_kind(element.name, ID_KIND_STRUCT_NAME) |
||
956 | elif type(element) == AsmMacro: |
||
957 | id_remove_kind(element.name, ID_KIND_MACRO_NAME) |
||
8966 | Boppan | 958 | # Read the source |
959 | asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read() |
||
960 | # Find includes, fix their paths and handle em recoursively |
||
961 | includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents, flags=re.MULTILINE) |
||
8825 | Boppan | 962 | for include in includes: |
8957 | Boppan | 963 | include = include[1].replace('\\', '/'); |
8825 | Boppan | 964 | full_path = subdir + '/' + include; |
8966 | Boppan | 965 | # If the path isn't valid, maybe that's not relative path |
966 | if not os.path.isfile(full_path): |
||
967 | full_path = include |
||
8967 | Boppan | 968 | new_subdir = full_path.rsplit('/', 1)[0] |
969 | handle_file(handled_files, full_path, new_subdir) |
||
8966 | Boppan | 970 | # Only collect declarations from the file if it wasn't parsed before |
8975 | Boppan | 971 | if should_get_declarations and not clean_generated_stuff: |
8966 | Boppan | 972 | get_declarations(asm_file_contents, asm_file_name) |
8825 | Boppan | 973 | |
9401 | Boppan | 974 | if __name__ == "__main__": |
975 | link_root = "http://websvn.kolibrios.org/filedetails.php?repname=Kolibri+OS&path=/kernel/trunk" |
||
9400 | Boppan | 976 | |
9401 | Boppan | 977 | # Dict where an identifier is assicoated with a string |
978 | # The string contains characters specifying flags |
||
979 | # Available flags: |
||
980 | # k - Keyword |
||
981 | # m - Macro name |
||
982 | # t - fasm data Type name (db, rq, etc.) |
||
983 | # s - Struct type name |
||
984 | # e - equated constant (name equ value) |
||
985 | # = - set constants (name = value) |
||
986 | ID_KIND_KEYWORD = 'k' |
||
987 | ID_KIND_MACRO_NAME = 'm' |
||
988 | ID_KIND_FASM_TYPE = 't' |
||
989 | ID_KIND_STRUCT_NAME = 's' |
||
990 | ID_KIND_EQUATED_CONSTANT = 'e' |
||
991 | ID_KIND_SET_CONSTANT = '=' |
||
992 | id2kind = {} |
||
9399 | Boppan | 993 | |
9401 | Boppan | 994 | for keyword in keywords: |
995 | id_add_kind(keyword, ID_KIND_KEYWORD) |
||
9399 | Boppan | 996 | |
9401 | Boppan | 997 | for fasm_type in fasm_types: |
998 | id_add_kind(fasm_type, ID_KIND_FASM_TYPE) |
||
9399 | Boppan | 999 | |
9401 | Boppan | 1000 | # Warning list |
1001 | warnings = "" |
||
9399 | Boppan | 1002 | |
9401 | Boppan | 1003 | # Parameters |
1004 | # Path to doxygen folder to make doxygen files in: -o |
||
1005 | doxygen_src_path = 'docs/doxygen' |
||
1006 | # Remove generated doxygen files: --clean |
||
1007 | clean_generated_stuff = False |
||
1008 | # Dump all defined symbols: --dump |
||
1009 | dump_symbols = False |
||
1010 | # Print symbol stats: --stats |
||
1011 | print_stats = False |
||
1012 | # Do not write warnings file: --nowarn |
||
1013 | enable_warnings = True |
||
9400 | Boppan | 1014 | |
9401 | Boppan | 1015 | # Parse arguments |
1016 | parser = argparse.ArgumentParser() |
||
1017 | parser.add_argument("-o", help="Doxygen output folder") |
||
1018 | parser.add_argument("--clean", help="Remove generated files", action="store_true") |
||
1019 | parser.add_argument("--dump", help="Dump all defined symbols", action="store_true") |
||
1020 | parser.add_argument("--stats", help="Print symbol stats", action="store_true") |
||
1021 | parser.add_argument("--nowarn", help="Do not write warnings file", action="store_true") |
||
1022 | parser.add_argument("--noemit", help="Do not emit doxygen files (for testing)", action="store_true") |
||
9402 | Boppan | 1023 | parser.add_argument("--debug", help="Show hashes of files (for testing)", action="store_true") |
9401 | Boppan | 1024 | args = parser.parse_args() |
1025 | doxygen_src_path = args.o if args.o else 'docs/doxygen' |
||
1026 | clean_generated_stuff = args.clean |
||
1027 | dump_symbols = args.dump |
||
1028 | print_stats = args.stats |
||
1029 | enable_warnings = not args.nowarn |
||
1030 | noemit = args.noemit |
||
9402 | Boppan | 1031 | debug_mode = args.debug |
9399 | Boppan | 1032 | |
9401 | Boppan | 1033 | # Variables, functions, labels, macros, structure types |
1034 | elements = [] |
||
1035 | created_files = [] |
||
1036 | kernel_files = [] |
||
9402 | Boppan | 1037 | output_files = {} # If --debug then all the files are written here |
8825 | Boppan | 1038 | |
9401 | Boppan | 1039 | # Load remembered list of symbols |
1040 | if os.path.isfile('asmxygen.elements.pickle'): |
||
1041 | print('Reading existing dump of symbols') |
||
1042 | (elements, id2kind) = pickle.load(open('asmxygen.elements.pickle', 'rb')) |
||
8990 | Boppan | 1043 | |
9401 | Boppan | 1044 | handle_file(kernel_files, "./kernel.asm"); |
8825 | Boppan | 1045 | |
9401 | Boppan | 1046 | if dump_symbols: |
1047 | stdout = sys.stdout |
||
1048 | sys.stdout = open('asmxygen.dump.txt', 'w', encoding = 'utf-8') |
||
1049 | for asm_element in elements: |
||
1050 | asm_element.dump() |
||
1051 | sys.stdout = stdout |
||
8825 | Boppan | 1052 | |
9401 | Boppan | 1053 | if clean_generated_stuff: |
1054 | kernel_files_set = set(kernel_files) |
||
1055 | for file in kernel_files: |
||
1056 | doxygen_file = f"{doxygen_src_path}/{file}" |
||
1057 | if (os.path.isfile(doxygen_file)): |
||
1058 | print(f"Removing {file}... ", end = '') |
||
1059 | os.remove(doxygen_file) |
||
1060 | print("Done.") |
||
1061 | elif not noemit: |
||
1062 | print(f"Writing doumented sources to {doxygen_src_path}") |
||
8834 | Boppan | 1063 | |
9401 | Boppan | 1064 | i = 0 |
1065 | new_elements = [x for x in elements if x.new] |
||
1066 | for element in new_elements: |
||
1067 | print(f"[{i + 1}/{len(new_elements)}] Emitting {element.name} from {element.location}") |
||
1068 | element.emit(doxygen_src_path) |
||
1069 | i += 1 |
||
8855 | Boppan | 1070 | |
9401 | Boppan | 1071 | print(f"Writing dump of symbols to asmxygen.elements.pickle") |
8990 | Boppan | 1072 | |
9401 | Boppan | 1073 | # Now when the new elements already was written, there's no new elements anymore |
1074 | for element in elements: |
||
1075 | element.new = False |
||
1076 | pickle.dump((elements, id2kind), open('asmxygen.elements.pickle', 'wb')) |
||
8990 | Boppan | 1077 | |
9401 | Boppan | 1078 | if print_stats: |
1079 | var_count = 0 |
||
1080 | mac_count = 0 |
||
1081 | lab_count = 0 |
||
1082 | fun_count = 0 |
||
1083 | uni_count = 0 |
||
1084 | str_count = 0 |
||
1085 | for element in elements: |
||
1086 | if type(element) == AsmVariable: |
||
1087 | var_count += 1 |
||
1088 | elif type(element) == AsmMacro: |
||
1089 | mac_count += 1 |
||
1090 | elif type(element) == AsmLabel: |
||
1091 | lab_count += 1 |
||
1092 | elif type(element) == AsmFunction: |
||
1093 | fun_count += 1 |
||
1094 | elif type(element) == AsmUnion: |
||
1095 | uni_count += 1 |
||
1096 | elif type(element) == AsmStruct: |
||
1097 | str_count += 1 |
||
1098 | print(f'Parsed variable count: {var_count}') |
||
1099 | print(f'Parsed macro count: {mac_count}') |
||
1100 | print(f'Parsed label count: {lab_count}') |
||
1101 | print(f'Parsed function count: {fun_count}') |
||
1102 | print(f'Parsed union type count: {uni_count}') |
||
1103 | print(f'Parsed structure type count: {str_count}') |
||
8982 | Boppan | 1104 | |
9401 | Boppan | 1105 | if enable_warnings: |
1106 | open('asmxygen.txt', "w", encoding = "utf-8").write(warnings) |
||
9402 | Boppan | 1107 | |
1108 | if debug_mode: |
||
1109 | hash_per_file = "" |
||
1110 | for file in output_files: |
||
1111 | h = hashlib.sha1(bytes(output_files[file], "ascii")).hexdigest() |
||
1112 | hash_per_file += f"{file}: {h}\n" |
||
1113 | if not os.path.exists("asmxygen_hash_per_file.txt"): |
||
1114 | open("asmxygen_hash_per_file.txt", "w").write(hash_per_file) |
||
1115 | print("NEW") |
||
1116 | else: |
||
1117 | reference_hash_per_file = open("asmxygen_hash_per_file.txt").read() |
||
1118 | if reference_hash_per_file != hash_per_file: |
||
1119 | print(''.join(difflib.ndiff(reference_hash_per_file, hash_per_file))) |
||
1120 | else: |
||
1121 | print("SUCCESS")>>> |