Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. #  Content (c) 2002, 2004, 2006-2009, 2012
  2. #    David Turner <david@freetype.org>
  3. #
  4. #  This file contains routines used to parse the content of documentation
  5. #  comment blocks and build more structured objects out of them.
  6. #
  7.  
  8. from sources import *
  9. from utils import *
  10. import string, re
  11.  
  12.  
  13. # this regular expression is used to detect code sequences. these
  14. # are simply code fragments embedded in '{' and '}' like in:
  15. #
  16. #  {
  17. #    x = y + z;
  18. #    if ( zookoo == 2 )
  19. #    {
  20. #      foobar();
  21. #    }
  22. #  }
  23. #
  24. # note that indentation of the starting and ending accolades must be
  25. # exactly the same. the code sequence can contain accolades at greater
  26. # indentation
  27. #
  28. re_code_start = re.compile( r"(\s*){\s*$" )
  29. re_code_end   = re.compile( r"(\s*)}\s*$" )
  30.  
  31.  
  32. # this regular expression is used to isolate identifiers from
  33. # other text
  34. #
  35. re_identifier = re.compile( r'((?:\w|-)*)' )
  36.  
  37.  
  38. # we collect macros ending in `_H'; while outputting the object data, we use
  39. # this info together with the object's file location to emit the appropriate
  40. # header file macro and name before the object itself
  41. #
  42. re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
  43.  
  44.  
  45. #############################################################################
  46. #
  47. # The DocCode class is used to store source code lines.
  48. #
  49. #   'self.lines' contains a set of source code lines that will be dumped as
  50. #   HTML in a <PRE> tag.
  51. #
  52. #   The object is filled line by line by the parser; it strips the leading
  53. #   "margin" space from each input line before storing it in 'self.lines'.
  54. #
  55. class  DocCode:
  56.  
  57.     def  __init__( self, margin, lines ):
  58.         self.lines = []
  59.         self.words = None
  60.  
  61.         # remove margin spaces
  62.         for l in lines:
  63.             if string.strip( l[:margin] ) == "":
  64.                 l = l[margin:]
  65.             self.lines.append( l )
  66.  
  67.     def  dump( self, prefix = "", width = 60 ):
  68.         lines = self.dump_lines( 0, width )
  69.         for l in lines:
  70.             print prefix + l
  71.  
  72.     def  dump_lines( self, margin = 0, width = 60 ):
  73.         result = []
  74.         for l in self.lines:
  75.             result.append( " " * margin + l )
  76.         return result
  77.  
  78.  
  79.  
  80. #############################################################################
  81. #
  82. # The DocPara class is used to store "normal" text paragraph.
  83. #
  84. #   'self.words' contains the list of words that make up the paragraph
  85. #
  86. class  DocPara:
  87.  
  88.     def  __init__( self, lines ):
  89.         self.lines = None
  90.         self.words = []
  91.         for l in lines:
  92.             l = string.strip( l )
  93.             self.words.extend( string.split( l ) )
  94.  
  95.     def  dump( self, prefix = "", width = 60 ):
  96.         lines = self.dump_lines( 0, width )
  97.         for l in lines:
  98.             print prefix + l
  99.  
  100.     def  dump_lines( self, margin = 0, width = 60 ):
  101.         cur    = ""  # current line
  102.         col    = 0   # current width
  103.         result = []
  104.  
  105.         for word in self.words:
  106.             ln = len( word )
  107.             if col > 0:
  108.                 ln = ln + 1
  109.  
  110.             if col + ln > width:
  111.                 result.append( " " * margin + cur )
  112.                 cur = word
  113.                 col = len( word )
  114.             else:
  115.                 if col > 0:
  116.                     cur = cur + " "
  117.                 cur = cur + word
  118.                 col = col + ln
  119.  
  120.         if col > 0:
  121.             result.append( " " * margin + cur )
  122.  
  123.         return result
  124.  
  125.  
  126.  
  127. #############################################################################
  128. #
  129. #  The DocField class is used to store a list containing either DocPara or
  130. #  DocCode objects. Each DocField also has an optional "name" which is used
  131. #  when the object corresponds to a field or value definition
  132. #
  133. class  DocField:
  134.  
  135.     def  __init__( self, name, lines ):
  136.         self.name  = name  # can be None for normal paragraphs/sources
  137.         self.items = []    # list of items
  138.  
  139.         mode_none  = 0     # start parsing mode
  140.         mode_code  = 1     # parsing code sequences
  141.         mode_para  = 3     # parsing normal paragraph
  142.  
  143.         margin     = -1    # current code sequence indentation
  144.         cur_lines  = []
  145.  
  146.         # now analyze the markup lines to see if they contain paragraphs,
  147.         # code sequences or fields definitions
  148.         #
  149.         start = 0
  150.         mode  = mode_none
  151.  
  152.         for l in lines:
  153.             # are we parsing a code sequence ?
  154.             if mode == mode_code:
  155.                 m = re_code_end.match( l )
  156.                 if m and len( m.group( 1 ) ) <= margin:
  157.                     # that's it, we finished the code sequence
  158.                     code = DocCode( 0, cur_lines )
  159.                     self.items.append( code )
  160.                     margin    = -1
  161.                     cur_lines = []
  162.                     mode      = mode_none
  163.                 else:
  164.                     # nope, continue the code sequence
  165.                     cur_lines.append( l[margin:] )
  166.             else:
  167.                 # start of code sequence ?
  168.                 m = re_code_start.match( l )
  169.                 if m:
  170.                     # save current lines
  171.                     if cur_lines:
  172.                         para = DocPara( cur_lines )
  173.                         self.items.append( para )
  174.                         cur_lines = []
  175.  
  176.                     # switch to code extraction mode
  177.                     margin = len( m.group( 1 ) )
  178.                     mode   = mode_code
  179.                 else:
  180.                     if not string.split( l ) and cur_lines:
  181.                         # if the line is empty, we end the current paragraph,
  182.                         # if any
  183.                         para = DocPara( cur_lines )
  184.                         self.items.append( para )
  185.                         cur_lines = []
  186.                     else:
  187.                         # otherwise, simply add the line to the current
  188.                         # paragraph
  189.                         cur_lines.append( l )
  190.  
  191.         if mode == mode_code:
  192.             # unexpected end of code sequence
  193.             code = DocCode( margin, cur_lines )
  194.             self.items.append( code )
  195.         elif cur_lines:
  196.             para = DocPara( cur_lines )
  197.             self.items.append( para )
  198.  
  199.     def  dump( self, prefix = "" ):
  200.         if self.field:
  201.             print prefix + self.field + " ::"
  202.             prefix = prefix + "----"
  203.  
  204.         first = 1
  205.         for p in self.items:
  206.             if not first:
  207.                 print ""
  208.             p.dump( prefix )
  209.             first = 0
  210.  
  211.     def  dump_lines( self, margin = 0, width = 60 ):
  212.         result = []
  213.         nl     = None
  214.  
  215.         for p in self.items:
  216.             if nl:
  217.                 result.append( "" )
  218.  
  219.             result.extend( p.dump_lines( margin, width ) )
  220.             nl = 1
  221.  
  222.         return result
  223.  
  224.  
  225.  
  226. # this regular expression is used to detect field definitions
  227. #
  228. re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )
  229.  
  230.  
  231.  
  232. class  DocMarkup:
  233.  
  234.     def  __init__( self, tag, lines ):
  235.         self.tag    = string.lower( tag )
  236.         self.fields = []
  237.  
  238.         cur_lines = []
  239.         field     = None
  240.         mode      = 0
  241.  
  242.         for l in lines:
  243.             m = re_field.match( l )
  244.             if m:
  245.                 # we detected the start of a new field definition
  246.  
  247.                 # first, save the current one
  248.                 if cur_lines:
  249.                     f = DocField( field, cur_lines )
  250.                     self.fields.append( f )
  251.                     cur_lines = []
  252.                     field     = None
  253.  
  254.                 field     = m.group( 1 )   # record field name
  255.                 ln        = len( m.group( 0 ) )
  256.                 l         = " " * ln + l[ln:]
  257.                 cur_lines = [l]
  258.             else:
  259.                 cur_lines.append( l )
  260.  
  261.         if field or cur_lines:
  262.             f = DocField( field, cur_lines )
  263.             self.fields.append( f )
  264.  
  265.     def  get_name( self ):
  266.         try:
  267.             return self.fields[0].items[0].words[0]
  268.         except:
  269.             return None
  270.  
  271.     def  get_start( self ):
  272.         try:
  273.             result = ""
  274.             for word in self.fields[0].items[0].words:
  275.                 result = result + " " + word
  276.             return result[1:]
  277.         except:
  278.             return "ERROR"
  279.  
  280.     def  dump( self, margin ):
  281.         print " " * margin + "<" + self.tag + ">"
  282.         for f in self.fields:
  283.             f.dump( "  " )
  284.         print " " * margin + "</" + self.tag + ">"
  285.  
  286.  
  287.  
  288. class  DocChapter:
  289.  
  290.     def  __init__( self, block ):
  291.         self.block    = block
  292.         self.sections = []
  293.         if block:
  294.             self.name  = block.name
  295.             self.title = block.get_markup_words( "title" )
  296.             self.order = block.get_markup_words( "sections" )
  297.         else:
  298.             self.name  = "Other"
  299.             self.title = string.split( "Miscellaneous" )
  300.             self.order = []
  301.  
  302.  
  303.  
  304. class  DocSection:
  305.  
  306.     def  __init__( self, name = "Other" ):
  307.         self.name        = name
  308.         self.blocks      = {}
  309.         self.block_names = []  # ordered block names in section
  310.         self.defs        = []
  311.         self.abstract    = ""
  312.         self.description = ""
  313.         self.order       = []
  314.         self.title       = "ERROR"
  315.         self.chapter     = None
  316.  
  317.     def  add_def( self, block ):
  318.         self.defs.append( block )
  319.  
  320.     def  add_block( self, block ):
  321.         self.block_names.append( block.name )
  322.         self.blocks[block.name] = block
  323.  
  324.     def  process( self ):
  325.         # look up one block that contains a valid section description
  326.         for block in self.defs:
  327.             title = block.get_markup_text( "title" )
  328.             if title:
  329.                 self.title       = title
  330.                 self.abstract    = block.get_markup_words( "abstract" )
  331.                 self.description = block.get_markup_items( "description" )
  332.                 self.order       = block.get_markup_words( "order" )
  333.                 return
  334.  
  335.     def  reorder( self ):
  336.         self.block_names = sort_order_list( self.block_names, self.order )
  337.  
  338.  
  339.  
  340. class  ContentProcessor:
  341.  
  342.     def  __init__( self ):
  343.         """initialize a block content processor"""
  344.         self.reset()
  345.  
  346.         self.sections = {}    # dictionary of documentation sections
  347.         self.section  = None  # current documentation section
  348.  
  349.         self.chapters = []    # list of chapters
  350.  
  351.         self.headers  = {}    # dictionary of header macros
  352.  
  353.     def  set_section( self, section_name ):
  354.         """set current section during parsing"""
  355.         if not self.sections.has_key( section_name ):
  356.             section = DocSection( section_name )
  357.             self.sections[section_name] = section
  358.             self.section                = section
  359.         else:
  360.             self.section = self.sections[section_name]
  361.  
  362.     def  add_chapter( self, block ):
  363.         chapter = DocChapter( block )
  364.         self.chapters.append( chapter )
  365.  
  366.  
  367.     def  reset( self ):
  368.         """reset the content processor for a new block"""
  369.         self.markups      = []
  370.         self.markup       = None
  371.         self.markup_lines = []
  372.  
  373.     def  add_markup( self ):
  374.         """add a new markup section"""
  375.         if self.markup and self.markup_lines:
  376.  
  377.             # get rid of last line of markup if it's empty
  378.             marks = self.markup_lines
  379.             if len( marks ) > 0 and not string.strip( marks[-1] ):
  380.                 self.markup_lines = marks[:-1]
  381.  
  382.             m = DocMarkup( self.markup, self.markup_lines )
  383.  
  384.             self.markups.append( m )
  385.  
  386.             self.markup       = None
  387.             self.markup_lines = []
  388.  
  389.     def  process_content( self, content ):
  390.         """process a block content and return a list of DocMarkup objects
  391.           corresponding to it"""
  392.         markup       = None
  393.         markup_lines = []
  394.         first        = 1
  395.  
  396.         for line in content:
  397.             found = None
  398.             for t in re_markup_tags:
  399.                 m = t.match( line )
  400.                 if m:
  401.                     found  = string.lower( m.group( 1 ) )
  402.                     prefix = len( m.group( 0 ) )
  403.                     line   = " " * prefix + line[prefix:]   # remove markup from line
  404.                     break
  405.  
  406.             # is it the start of a new markup section ?
  407.             if found:
  408.                 first = 0
  409.                 self.add_markup()  # add current markup content
  410.                 self.markup = found
  411.                 if len( string.strip( line ) ) > 0:
  412.                     self.markup_lines.append( line )
  413.             elif first == 0:
  414.                 self.markup_lines.append( line )
  415.  
  416.         self.add_markup()
  417.  
  418.         return self.markups
  419.  
  420.     def  parse_sources( self, source_processor ):
  421.         blocks = source_processor.blocks
  422.         count  = len( blocks )
  423.  
  424.         for n in range( count ):
  425.             source = blocks[n]
  426.             if source.content:
  427.                 # this is a documentation comment, we need to catch
  428.                 # all following normal blocks in the "follow" list
  429.                 #
  430.                 follow = []
  431.                 m = n + 1
  432.                 while m < count and not blocks[m].content:
  433.                     follow.append( blocks[m] )
  434.                     m = m + 1
  435.  
  436.                 doc_block = DocBlock( source, follow, self )
  437.  
  438.     def  finish( self ):
  439.         # process all sections to extract their abstract, description
  440.         # and ordered list of items
  441.         #
  442.         for sec in self.sections.values():
  443.             sec.process()
  444.  
  445.         # process chapters to check that all sections are correctly
  446.         # listed there
  447.         for chap in self.chapters:
  448.             for sec in chap.order:
  449.                 if self.sections.has_key( sec ):
  450.                     section = self.sections[sec]
  451.                     section.chapter = chap
  452.                     section.reorder()
  453.                     chap.sections.append( section )
  454.                 else:
  455.                     sys.stderr.write( "WARNING: chapter '" +          \
  456.                         chap.name + "' in " + chap.block.location() + \
  457.                         " lists unknown section '" + sec + "'\n" )
  458.  
  459.         # check that all sections are in a chapter
  460.         #
  461.         others = []
  462.         for sec in self.sections.values():
  463.             if not sec.chapter:
  464.                 others.append( sec )
  465.  
  466.         # create a new special chapter for all remaining sections
  467.         # when necessary
  468.         #
  469.         if others:
  470.             chap = DocChapter( None )
  471.             chap.sections = others
  472.             self.chapters.append( chap )
  473.  
  474.  
  475.  
  476. class  DocBlock:
  477.  
  478.     def  __init__( self, source, follow, processor ):
  479.         processor.reset()
  480.  
  481.         self.source  = source
  482.         self.code    = []
  483.         self.type    = "ERRTYPE"
  484.         self.name    = "ERRNAME"
  485.         self.section = processor.section
  486.         self.markups = processor.process_content( source.content )
  487.  
  488.         # compute block type from first markup tag
  489.         try:
  490.             self.type = self.markups[0].tag
  491.         except:
  492.             pass
  493.  
  494.         # compute block name from first markup paragraph
  495.         try:
  496.             markup = self.markups[0]
  497.             para   = markup.fields[0].items[0]
  498.             name   = para.words[0]
  499.             m = re_identifier.match( name )
  500.             if m:
  501.                 name = m.group( 1 )
  502.             self.name = name
  503.         except:
  504.             pass
  505.  
  506.         if self.type == "section":
  507.             # detect new section starts
  508.             processor.set_section( self.name )
  509.             processor.section.add_def( self )
  510.         elif self.type == "chapter":
  511.             # detect new chapter
  512.             processor.add_chapter( self )
  513.         else:
  514.             processor.section.add_block( self )
  515.  
  516.         # now, compute the source lines relevant to this documentation
  517.         # block. We keep normal comments in for obvious reasons (??)
  518.         source = []
  519.         for b in follow:
  520.             if b.format:
  521.                 break
  522.             for l in b.lines:
  523.                 # collect header macro definitions
  524.                 m = re_header_macro.match( l )
  525.                 if m:
  526.                     processor.headers[m.group( 2 )] = m.group( 1 );
  527.  
  528.                 # we use "/* */" as a separator
  529.                 if re_source_sep.match( l ):
  530.                     break
  531.                 source.append( l )
  532.  
  533.         # now strip the leading and trailing empty lines from the sources
  534.         start = 0
  535.         end   = len( source ) - 1
  536.  
  537.         while start < end and not string.strip( source[start] ):
  538.             start = start + 1
  539.  
  540.         while start < end and not string.strip( source[end] ):
  541.             end = end - 1
  542.  
  543.         if start == end and not string.strip( source[start] ):
  544.             self.code = []
  545.         else:
  546.             self.code = source[start:end + 1]
  547.  
  548.     def  location( self ):
  549.         return self.source.location()
  550.  
  551.     def  get_markup( self, tag_name ):
  552.         """return the DocMarkup corresponding to a given tag in a block"""
  553.         for m in self.markups:
  554.             if m.tag == string.lower( tag_name ):
  555.                 return m
  556.         return None
  557.  
  558.     def  get_markup_name( self, tag_name ):
  559.         """return the name of a given primary markup in a block"""
  560.         try:
  561.             m = self.get_markup( tag_name )
  562.             return m.get_name()
  563.         except:
  564.             return None
  565.  
  566.     def  get_markup_words( self, tag_name ):
  567.         try:
  568.             m = self.get_markup( tag_name )
  569.             return m.fields[0].items[0].words
  570.         except:
  571.             return []
  572.  
  573.     def  get_markup_text( self, tag_name ):
  574.         result = self.get_markup_words( tag_name )
  575.         return string.join( result )
  576.  
  577.     def  get_markup_items( self, tag_name ):
  578.         try:
  579.             m = self.get_markup( tag_name )
  580.             return m.fields[0].items
  581.         except:
  582.             return None
  583.  
  584. # eof
  585.