Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. #  Sources (c) 2002-2004, 2006-2009, 2012
  2. #    David Turner <david@freetype.org>
  3. #
  4. #
  5. # this file contains definitions of classes needed to decompose
  6. # C sources files into a series of multi-line "blocks". There are
  7. # two kinds of blocks:
  8. #
  9. #   - normal blocks, which contain source code or ordinary comments
  10. #
  11. #   - documentation blocks, which have restricted formatting, and
  12. #     whose text always start with a documentation markup tag like
  13. #     "<Function>", "<Type>", etc..
  14. #
  15. # the routines used to process the content of documentation blocks
  16. # are not contained here, but in "content.py"
  17. #
  18. # the classes and methods found here only deal with text parsing
  19. # and basic documentation block extraction
  20. #
  21.  
  22. import fileinput, re, sys, os, string
  23.  
  24.  
  25.  
  26. ################################################################
  27. ##
  28. ##  BLOCK FORMAT PATTERN
  29. ##
  30. ##   A simple class containing compiled regular expressions used
  31. ##   to detect potential documentation format block comments within
  32. ##   C source code
  33. ##
  34. ##   note that the 'column' pattern must contain a group that will
  35. ##   be used to "unbox" the content of documentation comment blocks
  36. ##
  37. class  SourceBlockFormat:
  38.  
  39.     def  __init__( self, id, start, column, end ):
  40.         """create a block pattern, used to recognize special documentation blocks"""
  41.         self.id     = id
  42.         self.start  = re.compile( start, re.VERBOSE )
  43.         self.column = re.compile( column, re.VERBOSE )
  44.         self.end    = re.compile( end, re.VERBOSE )
  45.  
  46.  
  47.  
  48. #
  49. # format 1 documentation comment blocks look like the following:
  50. #
  51. #    /************************************/
  52. #    /*                                  */
  53. #    /*                                  */
  54. #    /*                                  */
  55. #    /************************************/
  56. #
  57. # we define a few regular expressions here to detect them
  58. #
  59.  
  60. start = r'''
  61.  \s*      # any number of whitespace
  62.  /\*{2,}/ # followed by '/' and at least two asterisks then '/'
  63.  \s*$     # probably followed by whitespace
  64. '''
  65.  
  66. column = r'''
  67.  \s*      # any number of whitespace
  68.  /\*{1}   # followed by '/' and precisely one asterisk
  69.  ([^*].*) # followed by anything (group 1)
  70.  \*{1}/   # followed by one asterisk and a '/'
  71.  \s*$     # probably followed by whitespace
  72. '''
  73.  
  74. re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
  75.  
  76.  
  77. #
  78. # format 2 documentation comment blocks look like the following:
  79. #
  80. #    /************************************ (at least 2 asterisks)
  81. #     *
  82. #     *
  83. #     *
  84. #     *
  85. #     **/       (1 or more asterisks at the end)
  86. #
  87. # we define a few regular expressions here to detect them
  88. #
  89. start = r'''
  90.  \s*     # any number of whitespace
  91.  /\*{2,} # followed by '/' and at least two asterisks
  92.  \s*$    # probably followed by whitespace
  93. '''
  94.  
  95. column = r'''
  96.  \s*        # any number of whitespace
  97.  \*{1}(?!/) # followed by precisely one asterisk not followed by `/'
  98.  (.*)       # then anything (group1)
  99. '''
  100.  
  101. end = r'''
  102.  \s*  # any number of whitespace
  103.  \*+/ # followed by at least one asterisk, then '/'
  104. '''
  105.  
  106. re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
  107.  
  108.  
  109. #
  110. # the list of supported documentation block formats, we could add new ones
  111. # relatively easily
  112. #
  113. re_source_block_formats = [re_source_block_format1, re_source_block_format2]
  114.  
  115.  
  116. #
  117. # the following regular expressions corresponds to markup tags
  118. # within the documentation comment blocks. they're equivalent
  119. # despite their different syntax
  120. #
  121. # notice how each markup tag _must_ begin a new line
  122. #
  123. re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
  124. re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format
  125.  
  126. #
  127. # the list of supported markup tags, we could add new ones relatively
  128. # easily
  129. #
  130. re_markup_tags = [re_markup_tag1, re_markup_tag2]
  131.  
  132. #
  133. # used to detect a cross-reference, after markup tags have been stripped
  134. #
  135. re_crossref = re.compile( r'@((?:\w|-)*)(.*)' )
  136.  
  137. #
  138. # used to detect italic and bold styles in paragraph text
  139. #
  140. re_italic = re.compile( r"_(\w(\w|')*)_(.*)" )     #  _italic_
  141. re_bold   = re.compile( r"\*(\w(\w|')*)\*(.*)" )   #  *bold*
  142.  
  143. #
  144. # used to detect the end of commented source lines
  145. #
  146. re_source_sep = re.compile( r'\s*/\*\s*\*/' )
  147.  
  148. #
  149. # used to perform cross-reference within source output
  150. #
  151. re_source_crossref = re.compile( r'(\W*)(\w*)' )
  152.  
  153. #
  154. # a list of reserved source keywords
  155. #
  156. re_source_keywords = re.compile( '''\\b ( typedef   |
  157.                                          struct    |
  158.                                          enum      |
  159.                                          union     |
  160.                                          const     |
  161.                                          char      |
  162.                                          int       |
  163.                                          short     |
  164.                                          long      |
  165.                                          void      |
  166.                                          signed    |
  167.                                          unsigned  |
  168.                                          \#include |
  169.                                          \#define  |
  170.                                          \#undef   |
  171.                                          \#if      |
  172.                                          \#ifdef   |
  173.                                          \#ifndef  |
  174.                                          \#else    |
  175.                                          \#endif   ) \\b''', re.VERBOSE )
  176.  
  177.  
  178. ################################################################
  179. ##
  180. ##  SOURCE BLOCK CLASS
  181. ##
  182. ##   A SourceProcessor is in charge of reading a C source file
  183. ##   and decomposing it into a series of different "SourceBlocks".
  184. ##   each one of these blocks can be made of the following data:
  185. ##
  186. ##   - A documentation comment block that starts with "/**" and
  187. ##     whose exact format will be discussed later
  188. ##
  189. ##   - normal sources lines, including comments
  190. ##
  191. ##   the important fields in a text block are the following ones:
  192. ##
  193. ##     self.lines   : a list of text lines for the corresponding block
  194. ##
  195. ##     self.content : for documentation comment blocks only, this is the
  196. ##                    block content that has been "unboxed" from its
  197. ##                    decoration. This is None for all other blocks
  198. ##                    (i.e. sources or ordinary comments with no starting
  199. ##                     markup tag)
  200. ##
  201. class  SourceBlock:
  202.  
  203.     def  __init__( self, processor, filename, lineno, lines ):
  204.         self.processor = processor
  205.         self.filename  = filename
  206.         self.lineno    = lineno
  207.         self.lines     = lines[:]
  208.         self.format    = processor.format
  209.         self.content   = []
  210.  
  211.         if self.format == None:
  212.             return
  213.  
  214.         words = []
  215.  
  216.         # extract comment lines
  217.         lines = []
  218.  
  219.         for line0 in self.lines:
  220.             m = self.format.column.match( line0 )
  221.             if m:
  222.                 lines.append( m.group( 1 ) )
  223.  
  224.         # now, look for a markup tag
  225.         for l in lines:
  226.             l = string.strip( l )
  227.             if len( l ) > 0:
  228.                 for tag in re_markup_tags:
  229.                     if tag.match( l ):
  230.                         self.content = lines
  231.                         return
  232.  
  233.     def  location( self ):
  234.         return "(" + self.filename + ":" + repr( self.lineno ) + ")"
  235.  
  236.     # debugging only - not used in normal operations
  237.     def  dump( self ):
  238.         if self.content:
  239.             print "{{{content start---"
  240.             for l in self.content:
  241.                 print l
  242.             print "---content end}}}"
  243.             return
  244.  
  245.         fmt = ""
  246.         if self.format:
  247.             fmt = repr( self.format.id ) + " "
  248.  
  249.         for line in self.lines:
  250.             print line
  251.  
  252.  
  253.  
  254. ################################################################
  255. ##
  256. ##  SOURCE PROCESSOR CLASS
  257. ##
  258. ##   The SourceProcessor is in charge of reading a C source file
  259. ##   and decomposing it into a series of different "SourceBlock"
  260. ##   objects.
  261. ##
  262. ##   each one of these blocks can be made of the following data:
  263. ##
  264. ##   - A documentation comment block that starts with "/**" and
  265. ##     whose exact format will be discussed later
  266. ##
  267. ##   - normal sources lines, include comments
  268. ##
  269. ##
  270. class  SourceProcessor:
  271.  
  272.     def  __init__( self ):
  273.         """initialize a source processor"""
  274.         self.blocks   = []
  275.         self.filename = None
  276.         self.format   = None
  277.         self.lines    = []
  278.  
  279.     def  reset( self ):
  280.         """reset a block processor, clean all its blocks"""
  281.         self.blocks = []
  282.         self.format = None
  283.  
  284.     def  parse_file( self, filename ):
  285.         """parse a C source file, and add its blocks to the processor's list"""
  286.         self.reset()
  287.  
  288.         self.filename = filename
  289.  
  290.         fileinput.close()
  291.         self.format = None
  292.         self.lineno = 0
  293.         self.lines  = []
  294.  
  295.         for line in fileinput.input( filename ):
  296.             # strip trailing newlines, important on Windows machines!
  297.             if line[-1] == '\012':
  298.                 line = line[0:-1]
  299.  
  300.             if self.format == None:
  301.                 self.process_normal_line( line )
  302.             else:
  303.                 if self.format.end.match( line ):
  304.                     # that's a normal block end, add it to 'lines' and
  305.                     # create a new block
  306.                     self.lines.append( line )
  307.                     self.add_block_lines()
  308.                 elif self.format.column.match( line ):
  309.                     # that's a normal column line, add it to 'lines'
  310.                     self.lines.append( line )
  311.                 else:
  312.                     # humm.. this is an unexpected block end,
  313.                     # create a new block, but don't process the line
  314.                     self.add_block_lines()
  315.  
  316.                     # we need to process the line again
  317.                     self.process_normal_line( line )
  318.  
  319.         # record the last lines
  320.         self.add_block_lines()
  321.  
  322.     def  process_normal_line( self, line ):
  323.         """process a normal line and check whether it is the start of a new block"""
  324.         for f in re_source_block_formats:
  325.             if f.start.match( line ):
  326.                 self.add_block_lines()
  327.                 self.format = f
  328.                 self.lineno = fileinput.filelineno()
  329.  
  330.         self.lines.append( line )
  331.  
  332.     def  add_block_lines( self ):
  333.         """add the current accumulated lines and create a new block"""
  334.         if self.lines != []:
  335.             block = SourceBlock( self, self.filename, self.lineno, self.lines )
  336.  
  337.             self.blocks.append( block )
  338.             self.format = None
  339.             self.lines  = []
  340.  
  341.     # debugging only, not used in normal operations
  342.     def  dump( self ):
  343.         """print all blocks in a processor"""
  344.         for b in self.blocks:
  345.             b.dump()
  346.  
  347. # eof
  348.