Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | # Content (c) 2002, 2004, 2006-2009, 2012 |
2 | # David Turner |
||
3 | # |
||
4 | # This file contains routines used to parse the content of documentation |
||
5 | # comment blocks and build more structured objects out of them. |
||
6 | # |
||
7 | |||
8 | from sources import * |
||
9 | from utils import * |
||
10 | import string, re |
||
11 | |||
12 | |||
13 | # this regular expression is used to detect code sequences. these |
||
14 | # are simply code fragments embedded in '{' and '}' like in: |
||
15 | # |
||
16 | # { |
||
17 | # x = y + z; |
||
18 | # if ( zookoo == 2 ) |
||
19 | # { |
||
20 | # foobar(); |
||
21 | # } |
||
22 | # } |
||
23 | # |
||
24 | # note that indentation of the starting and ending accolades must be |
||
25 | # exactly the same. the code sequence can contain accolades at greater |
||
26 | # indentation |
||
27 | # |
||
28 | re_code_start = re.compile( r"(\s*){\s*$" ) |
||
29 | re_code_end = re.compile( r"(\s*)}\s*$" ) |
||
30 | |||
31 | |||
32 | # this regular expression is used to isolate identifiers from |
||
33 | # other text |
||
34 | # |
||
35 | re_identifier = re.compile( r'((?:\w|-)*)' ) |
||
36 | |||
37 | |||
38 | # we collect macros ending in `_H'; while outputting the object data, we use |
||
39 | # this info together with the object's file location to emit the appropriate |
||
40 | # header file macro and name before the object itself |
||
41 | # |
||
42 | re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) |
||
43 | |||
44 | |||
45 | ############################################################################# |
||
46 | # |
||
47 | # The DocCode class is used to store source code lines. |
||
48 | # |
||
49 | # 'self.lines' contains a set of source code lines that will be dumped as |
||
50 | # HTML in atag. |
||
51 | # |
||
52 | # The object is filled line by line by the parser; it strips the leading |
||
53 | # "margin" space from each input line before storing it in 'self.lines'. |
||
54 | # |
||
55 | class DocCode: |
||
56 | |||
57 | def __init__( self, margin, lines ): |
||
58 | self.lines = [] |
||
59 | self.words = None |
||
60 | |||
61 | # remove margin spaces |
||
62 | for l in lines: |
||
63 | if string.strip( l[:margin] ) == "": |
||
64 | l = l[margin:] |
||
65 | self.lines.append( l ) |
||
66 | |||
67 | def dump( self, prefix = "", width = 60 ): |
||
68 | lines = self.dump_lines( 0, width ) |
||
69 | for l in lines: |
||
70 | print prefix + l |
||
71 | |||
72 | def dump_lines( self, margin = 0, width = 60 ): |
||
73 | result = [] |
||
74 | for l in self.lines: |
||
75 | result.append( " " * margin + l ) |
||
76 | return result |
||
77 | |||
78 | |||
79 | |||
80 | ############################################################################# |
||
81 | # |
||
82 | # The DocPara class is used to store "normal" text paragraph. |
||
83 | # |
||
84 | # 'self.words' contains the list of words that make up the paragraph |
||
85 | # |
||
86 | class DocPara: |
||
87 | |||
88 | def __init__( self, lines ): |
||
89 | self.lines = None |
||
90 | self.words = [] |
||
91 | for l in lines: |
||
92 | l = string.strip( l ) |
||
93 | self.words.extend( string.split( l ) ) |
||
94 | |||
95 | def dump( self, prefix = "", width = 60 ): |
||
96 | lines = self.dump_lines( 0, width ) |
||
97 | for l in lines: |
||
98 | print prefix + l |
||
99 | |||
100 | def dump_lines( self, margin = 0, width = 60 ): |
||
101 | cur = "" # current line |
||
102 | col = 0 # current width |
||
103 | result = [] |
||
104 | |||
105 | for word in self.words: |
||
106 | ln = len( word ) |
||
107 | if col > 0: |
||
108 | ln = ln + 1 |
||
109 | |||
110 | if col + ln > width: |
||
111 | result.append( " " * margin + cur ) |
||
112 | cur = word |
||
113 | col = len( word ) |
||
114 | else: |
||
115 | if col > 0: |
||
116 | cur = cur + " " |
||
117 | cur = cur + word |
||
118 | col = col + ln |
||
119 | |||
120 | if col > 0: |
||
121 | result.append( " " * margin + cur ) |
||
122 | |||
123 | return result |
||
124 | |||
125 | |||
126 | |||
127 | ############################################################################# |
||
128 | # |
||
129 | # The DocField class is used to store a list containing either DocPara or |
||
130 | # DocCode objects. Each DocField also has an optional "name" which is used |
||
131 | # when the object corresponds to a field or value definition |
||
132 | # |
||
133 | class DocField: |
||
134 | |||
135 | def __init__( self, name, lines ): |
||
136 | self.name = name # can be None for normal paragraphs/sources |
||
137 | self.items = [] # list of items |
||
138 | |||
139 | mode_none = 0 # start parsing mode |
||
140 | mode_code = 1 # parsing code sequences |
||
141 | mode_para = 3 # parsing normal paragraph |
||
142 | |||
143 | margin = -1 # current code sequence indentation |
||
144 | cur_lines = [] |
||
145 | |||
146 | # now analyze the markup lines to see if they contain paragraphs, |
||
147 | # code sequences or fields definitions |
||
148 | # |
||
149 | start = 0 |
||
150 | mode = mode_none |
||
151 | |||
152 | for l in lines: |
||
153 | # are we parsing a code sequence ? |
||
154 | if mode == mode_code: |
||
155 | m = re_code_end.match( l ) |
||
156 | if m and len( m.group( 1 ) ) <= margin: |
||
157 | # that's it, we finished the code sequence |
||
158 | code = DocCode( 0, cur_lines ) |
||
159 | self.items.append( code ) |
||
160 | margin = -1 |
||
161 | cur_lines = [] |
||
162 | mode = mode_none |
||
163 | else: |
||
164 | # nope, continue the code sequence |
||
165 | cur_lines.append( l[margin:] ) |
||
166 | else: |
||
167 | # start of code sequence ? |
||
168 | m = re_code_start.match( l ) |
||
169 | if m: |
||
170 | # save current lines |
||
171 | if cur_lines: |
||
172 | para = DocPara( cur_lines ) |
||
173 | self.items.append( para ) |
||
174 | cur_lines = [] |
||
175 | |||
176 | # switch to code extraction mode |
||
177 | margin = len( m.group( 1 ) ) |
||
178 | mode = mode_code |
||
179 | else: |
||
180 | if not string.split( l ) and cur_lines: |
||
181 | # if the line is empty, we end the current paragraph, |
||
182 | # if any |
||
183 | para = DocPara( cur_lines ) |
||
184 | self.items.append( para ) |
||
185 | cur_lines = [] |
||
186 | else: |
||
187 | # otherwise, simply add the line to the current |
||
188 | # paragraph |
||
189 | cur_lines.append( l ) |
||
190 | |||
191 | if mode == mode_code: |
||
192 | # unexpected end of code sequence |
||
193 | code = DocCode( margin, cur_lines ) |
||
194 | self.items.append( code ) |
||
195 | elif cur_lines: |
||
196 | para = DocPara( cur_lines ) |
||
197 | self.items.append( para ) |
||
198 | |||
199 | def dump( self, prefix = "" ): |
||
200 | if self.field: |
||
201 | print prefix + self.field + " ::" |
||
202 | prefix = prefix + "----" |
||
203 | |||
204 | first = 1 |
||
205 | for p in self.items: |
||
206 | if not first: |
||
207 | print "" |
||
208 | p.dump( prefix ) |
||
209 | first = 0 |
||
210 | |||
211 | def dump_lines( self, margin = 0, width = 60 ): |
||
212 | result = [] |
||
213 | nl = None |
||
214 | |||
215 | for p in self.items: |
||
216 | if nl: |
||
217 | result.append( "" ) |
||
218 | |||
219 | result.extend( p.dump_lines( margin, width ) ) |
||
220 | nl = 1 |
||
221 | |||
222 | return result |
||
223 | |||
224 | |||
225 | |||
226 | # this regular expression is used to detect field definitions |
||
227 | # |
||
228 | re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" ) |
||
229 | |||
230 | |||
231 | |||
232 | class DocMarkup: |
||
233 | |||
234 | def __init__( self, tag, lines ): |
||
235 | self.tag = string.lower( tag ) |
||
236 | self.fields = [] |
||
237 | |||
238 | cur_lines = [] |
||
239 | field = None |
||
240 | mode = 0 |
||
241 | |||
242 | for l in lines: |
||
243 | m = re_field.match( l ) |
||
244 | if m: |
||
245 | # we detected the start of a new field definition |
||
246 | |||
247 | # first, save the current one |
||
248 | if cur_lines: |
||
249 | f = DocField( field, cur_lines ) |
||
250 | self.fields.append( f ) |
||
251 | cur_lines = [] |
||
252 | field = None |
||
253 | |||
254 | field = m.group( 1 ) # record field name |
||
255 | ln = len( m.group( 0 ) ) |
||
256 | l = " " * ln + l[ln:] |
||
257 | cur_lines = [l] |
||
258 | else: |
||
259 | cur_lines.append( l ) |
||
260 | |||
261 | if field or cur_lines: |
||
262 | f = DocField( field, cur_lines ) |
||
263 | self.fields.append( f ) |
||
264 | |||
265 | def get_name( self ): |
||
266 | try: |
||
267 | return self.fields[0].items[0].words[0] |
||
268 | except: |
||
269 | return None |
||
270 | |||
271 | def get_start( self ): |
||
272 | try: |
||
273 | result = "" |
||
274 | for word in self.fields[0].items[0].words: |
||
275 | result = result + " " + word |
||
276 | return result[1:] |
||
277 | except: |
||
278 | return "ERROR" |
||
279 | |||
280 | def dump( self, margin ): |
||
281 | print " " * margin + "<" + self.tag + ">" |
||
282 | for f in self.fields: |
||
283 | f.dump( " " ) |
||
284 | print " " * margin + "" + self.tag + ">" |
||
285 | |||
286 | |||
287 | |||
288 | class DocChapter: |
||
289 | |||
290 | def __init__( self, block ): |
||
291 | self.block = block |
||
292 | self.sections = [] |
||
293 | if block: |
||
294 | self.name = block.name |
||
295 | self.title = block.get_markup_words( "title" ) |
||
296 | self.order = block.get_markup_words( "sections" ) |
||
297 | else: |
||
298 | self.name = "Other" |
||
299 | self.title = string.split( "Miscellaneous" ) |
||
300 | self.order = [] |
||
301 | |||
302 | |||
303 | |||
304 | class DocSection: |
||
305 | |||
306 | def __init__( self, name = "Other" ): |
||
307 | self.name = name |
||
308 | self.blocks = {} |
||
309 | self.block_names = [] # ordered block names in section |
||
310 | self.defs = [] |
||
311 | self.abstract = "" |
||
312 | self.description = "" |
||
313 | self.order = [] |
||
314 | self.title = "ERROR" |
||
315 | self.chapter = None |
||
316 | |||
317 | def add_def( self, block ): |
||
318 | self.defs.append( block ) |
||
319 | |||
320 | def add_block( self, block ): |
||
321 | self.block_names.append( block.name ) |
||
322 | self.blocks[block.name] = block |
||
323 | |||
324 | def process( self ): |
||
325 | # look up one block that contains a valid section description |
||
326 | for block in self.defs: |
||
327 | title = block.get_markup_text( "title" ) |
||
328 | if title: |
||
329 | self.title = title |
||
330 | self.abstract = block.get_markup_words( "abstract" ) |
||
331 | self.description = block.get_markup_items( "description" ) |
||
332 | self.order = block.get_markup_words( "order" ) |
||
333 | return |
||
334 | |||
335 | def reorder( self ): |
||
336 | self.block_names = sort_order_list( self.block_names, self.order ) |
||
337 | |||
338 | |||
339 | |||
340 | class ContentProcessor: |
||
341 | |||
342 | def __init__( self ): |
||
343 | """initialize a block content processor""" |
||
344 | self.reset() |
||
345 | |||
346 | self.sections = {} # dictionary of documentation sections |
||
347 | self.section = None # current documentation section |
||
348 | |||
349 | self.chapters = [] # list of chapters |
||
350 | |||
351 | self.headers = {} # dictionary of header macros |
||
352 | |||
353 | def set_section( self, section_name ): |
||
354 | """set current section during parsing""" |
||
355 | if not self.sections.has_key( section_name ): |
||
356 | section = DocSection( section_name ) |
||
357 | self.sections[section_name] = section |
||
358 | self.section = section |
||
359 | else: |
||
360 | self.section = self.sections[section_name] |
||
361 | |||
362 | def add_chapter( self, block ): |
||
363 | chapter = DocChapter( block ) |
||
364 | self.chapters.append( chapter ) |
||
365 | |||
366 | |||
367 | def reset( self ): |
||
368 | """reset the content processor for a new block""" |
||
369 | self.markups = [] |
||
370 | self.markup = None |
||
371 | self.markup_lines = [] |
||
372 | |||
373 | def add_markup( self ): |
||
374 | """add a new markup section""" |
||
375 | if self.markup and self.markup_lines: |
||
376 | |||
377 | # get rid of last line of markup if it's empty |
||
378 | marks = self.markup_lines |
||
379 | if len( marks ) > 0 and not string.strip( marks[-1] ): |
||
380 | self.markup_lines = marks[:-1] |
||
381 | |||
382 | m = DocMarkup( self.markup, self.markup_lines ) |
||
383 | |||
384 | self.markups.append( m ) |
||
385 | |||
386 | self.markup = None |
||
387 | self.markup_lines = [] |
||
388 | |||
389 | def process_content( self, content ): |
||
390 | """process a block content and return a list of DocMarkup objects |
||
391 | corresponding to it""" |
||
392 | markup = None |
||
393 | markup_lines = [] |
||
394 | first = 1 |
||
395 | |||
396 | for line in content: |
||
397 | found = None |
||
398 | for t in re_markup_tags: |
||
399 | m = t.match( line ) |
||
400 | if m: |
||
401 | found = string.lower( m.group( 1 ) ) |
||
402 | prefix = len( m.group( 0 ) ) |
||
403 | line = " " * prefix + line[prefix:] # remove markup from line |
||
404 | break |
||
405 | |||
406 | # is it the start of a new markup section ? |
||
407 | if found: |
||
408 | first = 0 |
||
409 | self.add_markup() # add current markup content |
||
410 | self.markup = found |
||
411 | if len( string.strip( line ) ) > 0: |
||
412 | self.markup_lines.append( line ) |
||
413 | elif first == 0: |
||
414 | self.markup_lines.append( line ) |
||
415 | |||
416 | self.add_markup() |
||
417 | |||
418 | return self.markups |
||
419 | |||
420 | def parse_sources( self, source_processor ): |
||
421 | blocks = source_processor.blocks |
||
422 | count = len( blocks ) |
||
423 | |||
424 | for n in range( count ): |
||
425 | source = blocks[n] |
||
426 | if source.content: |
||
427 | # this is a documentation comment, we need to catch |
||
428 | # all following normal blocks in the "follow" list |
||
429 | # |
||
430 | follow = [] |
||
431 | m = n + 1 |
||
432 | while m < count and not blocks[m].content: |
||
433 | follow.append( blocks[m] ) |
||
434 | m = m + 1 |
||
435 | |||
436 | doc_block = DocBlock( source, follow, self ) |
||
437 | |||
438 | def finish( self ): |
||
439 | # process all sections to extract their abstract, description |
||
440 | # and ordered list of items |
||
441 | # |
||
442 | for sec in self.sections.values(): |
||
443 | sec.process() |
||
444 | |||
445 | # process chapters to check that all sections are correctly |
||
446 | # listed there |
||
447 | for chap in self.chapters: |
||
448 | for sec in chap.order: |
||
449 | if self.sections.has_key( sec ): |
||
450 | section = self.sections[sec] |
||
451 | section.chapter = chap |
||
452 | section.reorder() |
||
453 | chap.sections.append( section ) |
||
454 | else: |
||
455 | sys.stderr.write( "WARNING: chapter '" + \ |
||
456 | chap.name + "' in " + chap.block.location() + \ |
||
457 | " lists unknown section '" + sec + "'\n" ) |
||
458 | |||
459 | # check that all sections are in a chapter |
||
460 | # |
||
461 | others = [] |
||
462 | for sec in self.sections.values(): |
||
463 | if not sec.chapter: |
||
464 | others.append( sec ) |
||
465 | |||
466 | # create a new special chapter for all remaining sections |
||
467 | # when necessary |
||
468 | # |
||
469 | if others: |
||
470 | chap = DocChapter( None ) |
||
471 | chap.sections = others |
||
472 | self.chapters.append( chap ) |
||
473 | |||
474 | |||
475 | |||
476 | class DocBlock: |
||
477 | |||
478 | def __init__( self, source, follow, processor ): |
||
479 | processor.reset() |
||
480 | |||
481 | self.source = source |
||
482 | self.code = [] |
||
483 | self.type = "ERRTYPE" |
||
484 | self.name = "ERRNAME" |
||
485 | self.section = processor.section |
||
486 | self.markups = processor.process_content( source.content ) |
||
487 | |||
488 | # compute block type from first markup tag |
||
489 | try: |
||
490 | self.type = self.markups[0].tag |
||
491 | except: |
||
492 | pass |
||
493 | |||
494 | # compute block name from first markup paragraph |
||
495 | try: |
||
496 | markup = self.markups[0] |
||
497 | para = markup.fields[0].items[0] |
||
498 | name = para.words[0] |
||
499 | m = re_identifier.match( name ) |
||
500 | if m: |
||
501 | name = m.group( 1 ) |
||
502 | self.name = name |
||
503 | except: |
||
504 | pass |
||
505 | |||
506 | if self.type == "section": |
||
507 | # detect new section starts |
||
508 | processor.set_section( self.name ) |
||
509 | processor.section.add_def( self ) |
||
510 | elif self.type == "chapter": |
||
511 | # detect new chapter |
||
512 | processor.add_chapter( self ) |
||
513 | else: |
||
514 | processor.section.add_block( self ) |
||
515 | |||
516 | # now, compute the source lines relevant to this documentation |
||
517 | # block. We keep normal comments in for obvious reasons (??) |
||
518 | source = [] |
||
519 | for b in follow: |
||
520 | if b.format: |
||
521 | break |
||
522 | for l in b.lines: |
||
523 | # collect header macro definitions |
||
524 | m = re_header_macro.match( l ) |
||
525 | if m: |
||
526 | processor.headers[m.group( 2 )] = m.group( 1 ); |
||
527 | |||
528 | # we use "/* */" as a separator |
||
529 | if re_source_sep.match( l ): |
||
530 | break |
||
531 | source.append( l ) |
||
532 | |||
533 | # now strip the leading and trailing empty lines from the sources |
||
534 | start = 0 |
||
535 | end = len( source ) - 1 |
||
536 | |||
537 | while start < end and not string.strip( source[start] ): |
||
538 | start = start + 1 |
||
539 | |||
540 | while start < end and not string.strip( source[end] ): |
||
541 | end = end - 1 |
||
542 | |||
543 | if start == end and not string.strip( source[start] ): |
||
544 | self.code = [] |
||
545 | else: |
||
546 | self.code = source[start:end + 1] |
||
547 | |||
548 | def location( self ): |
||
549 | return self.source.location() |
||
550 | |||
551 | def get_markup( self, tag_name ): |
||
552 | """return the DocMarkup corresponding to a given tag in a block""" |
||
553 | for m in self.markups: |
||
554 | if m.tag == string.lower( tag_name ): |
||
555 | return m |
||
556 | return None |
||
557 | |||
558 | def get_markup_name( self, tag_name ): |
||
559 | """return the name of a given primary markup in a block""" |
||
560 | try: |
||
561 | m = self.get_markup( tag_name ) |
||
562 | return m.get_name() |
||
563 | except: |
||
564 | return None |
||
565 | |||
566 | def get_markup_words( self, tag_name ): |
||
567 | try: |
||
568 | m = self.get_markup( tag_name ) |
||
569 | return m.fields[0].items[0].words |
||
570 | except: |
||
571 | return [] |
||
572 | |||
573 | def get_markup_text( self, tag_name ): |
||
574 | result = self.get_markup_words( tag_name ) |
||
575 | return string.join( result ) |
||
576 | |||
577 | def get_markup_items( self, tag_name ): |
||
578 | try: |
||
579 | m = self.get_markup( tag_name ) |
||
580 | return m.fields[0].items |
||
581 | except: |
||
582 | return None |
||
583 | |||
584 | # eof>>>=>(.*)> |