Package epydoc :: Module docparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docparser

   1  # epydoc -- Source code parsing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <edloper@loper.org> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docparser.py 1776 2008-02-24 06:40:17Z edloper $ 
   8   
   9  """ 
  10  Extract API documentation about python objects by parsing their source 
  11  code. 
  12   
  13  The function L{parse_docs()}, which provides the main interface 
  14  of this module, reads and parses the Python source code for a 
  15  module, and uses it to create an L{APIDoc} object containing 
  16  the API documentation for the variables and values defined in 
  17  that modules. 
  18   
  19  Currently, C{parse_docs()} extracts documentation from the following 
  20  source code constructions: 
  21   
  22    - module docstring 
  23    - import statements 
  24    - class definition blocks 
  25    - function definition blocks 
  26    - assignment statements 
  27      - simple assignment statements 
  28      - assignment statements with multiple C{'='}s 
  29      - assignment statements with unpacked left-hand sides 
  30      - assignment statements that wrap a function in classmethod 
  31        or staticmethod. 
  32      - assignment to special variables __path__, __all__, and 
  33        __docformat__. 
  34    - delete statements 
  35   
  36  C{parse_docs()} does not yet support the following source code 
  37  constructions: 
  38   
  39    - assignment statements that create properties 
  40   
  41  By default, C{parse_docs()} will expore the contents of top-level 
  42  C{try} and C{if} blocks.  If desired, C{parse_docs()} can also 
  43  be configured to explore the contents of C{while} and C{for} blocks. 
  44  (See the configuration constants, below.) 
  45   
  46  @todo: Make it possible to extend the functionality of C{parse_docs()}, 
  47         by replacing process_line with a dispatch table that can be 
  48         customized (similarly to C{docintrospector.register_introspector()}). 
  49  """ 
  50  __docformat__ = 'epytext en' 
  51   
  52  ###################################################################### 
  53  ## Imports 
  54  ###################################################################### 
  55   
  56  # Python source code parsing: 
  57  import token, tokenize 
  58  # Finding modules: 
  59  import imp 
  60  # File services: 
  61  import os, os.path, sys 
  62  # Unicode: 
  63  import codecs 
  64  # API documentation encoding: 
  65  from epydoc.apidoc import * 
  66  # For looking up the docs of builtins: 
  67  import __builtin__, exceptions 
  68  import epydoc.docintrospecter  
  69  # Misc utility functions: 
  70  from epydoc.util import * 
  71  # Backwards compatibility 
  72  from epydoc.compat import * 
  73   
  74  ###################################################################### 
  75  ## Doc Parser 
  76  ###################################################################### 
  77   
78 -class ParseError(Exception):
79 """ 80 An exception that is used to signify that C{docparser} encountered 81 syntactically invalid Python code while processing a Python source 82 file. 83 """
84 85 _moduledoc_cache = {} 86 """A cache of C{ModuleDoc}s that we've already created. 87 C{_moduledoc_cache} is a dictionary mapping from filenames to 88 C{ValueDoc} objects. 89 @type: C{dict}""" 90 91 #//////////////////////////////////////////////////////////// 92 # Configuration Constants 93 #//////////////////////////////////////////////////////////// 94 95 #{ Configuration Constants: Control Flow 96 PARSE_TRY_BLOCKS = True 97 """Should the contents of C{try} blocks be examined?""" 98 PARSE_EXCEPT_BLOCKS = True 99 """Should the contents of C{except} blocks be examined?""" 100 PARSE_FINALLY_BLOCKS = True 101 """Should the contents of C{finally} blocks be examined?""" 102 PARSE_IF_BLOCKS = True 103 """Should the contents of C{if} blocks be examined?""" 104 PARSE_ELSE_BLOCKS = True 105 """Should the contents of C{else} and C{elif} blocks be examined?""" 106 PARSE_WHILE_BLOCKS = False 107 """Should the contents of C{while} blocks be examined?""" 108 PARSE_FOR_BLOCKS = False 109 """Should the contents of C{for} blocks be examined?""" 110 111 #{ Configuration Constants: Imports 112 IMPORT_HANDLING = 'link' 113 """What should C{docparser} do when it encounters an import 114 statement? 115 - C{'link'}: Create variabledoc objects with imported_from pointers 116 to the source object. 117 - C{'parse'}: Parse the imported file, to find the actual 118 documentation for the imported object. (This will fall back 119 to the 'link' behavior if the imported file can't be parsed, 120 e.g., if it's a builtin.) 121 """ 122 123 IMPORT_STAR_HANDLING = 'parse' 124 """When C{docparser} encounters a C{'from M{m} import *'} 125 statement, and is unable to parse C{M{m}} (either because 126 L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 127 should it determine the list of identifiers expored by C{M{m}}? 128 - C{'ignore'}: ignore the import statement, and don't create 129 any new variables. 130 - C{'parse'}: parse it to find a list of the identifiers that it 131 exports. (This will fall back to the 'ignore' behavior if the 132 imported file can't be parsed, e.g., if it's a builtin.) 133 - C{'introspect'}: import the module and introspect it (using C{dir}) 134 to find a list of the identifiers that it exports. (This will 135 fall back to the 'ignore' behavior if the imported file can't 136 be parsed, e.g., if it's a builtin.) 137 """ 138 139 DEFAULT_DECORATOR_BEHAVIOR = 'transparent' 140 """When C{DocParse} encounters an unknown decorator, what should 141 it do to the documentation of the decorated function? 142 - C{'transparent'}: leave the function's documentation as-is. 143 - C{'opaque'}: replace the function's documentation with an 144 empty C{ValueDoc} object, reflecting the fact that we have no 145 knowledge about what value the decorator returns. 146 """ 147 148 PUBLIC_DECORATOR_APPENDS_TO_ALL = True 149 """If true, then the @public decorator will append the function's 150 name to the module's __all__ variable.""" 151 152 BASE_HANDLING = 'parse'#'link' 153 """What should C{docparser} do when it encounters a base class that 154 was imported from another module? 155 - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 156 base class. 157 - C{'parse'}: Parse the file containing the base class, to find 158 the actual documentation for it. (This will fall back to the 159 'link' behavior if the imported file can't be parsed, e.g., if 160 it's a builtin.) 161 """ 162 163 #{ Configuration Constants: Comment docstrings 164 COMMENT_DOCSTRING_MARKER = '#:' 165 """The prefix used to mark comments that contain attribute 166 docstrings for variables.""" 167 168 #{ Configuration Constants: Grouping 169 START_GROUP_MARKER = '#{' 170 """The prefix used to mark a comment that starts a group. This marker 171 should be followed (on the same line) by the name of the group. 172 Following a start-group comment, all variables defined at the same 173 indentation level will be assigned to this group name, until the 174 parser reaches the end of the file, a matching end-group comment, or 175 another start-group comment at the same indentation level. 176 """ 177 178 END_GROUP_MARKER = '#}' 179 """The prefix used to mark a comment that ends a group. See 180 L{START_GROUP_MARKER}.""" 181 182 #///////////////////////////////////////////////////////////////// 183 #{ Module parser 184 #///////////////////////////////////////////////////////////////// 185
186 -def parse_docs(filename=None, name=None, context=None, is_script=False):
187 """ 188 Generate the API documentation for a specified object by 189 parsing Python source files, and return it as a L{ValueDoc}. 190 The object to generate documentation for may be specified 191 using the C{filename} parameter I{or} the C{name} parameter. 192 (It is an error to specify both a filename and a name; or to 193 specify neither a filename nor a name). 194 195 @param filename: The name of the file that contains the python 196 source code for a package, module, or script. If 197 C{filename} is specified, then C{parse} will return a 198 C{ModuleDoc} describing its contents. 199 @param name: The fully-qualified python dotted name of any 200 value (including packages, modules, classes, and 201 functions). C{parse_docs()} will automatically figure out 202 which module(s) it needs to parse in order to find the 203 documentation for the specified object. 204 @param context: The API documentation for the package that 205 contains C{filename}. If no context is given, then 206 C{filename} is assumed to contain a top-level module or 207 package. It is an error to specify a C{context} if the 208 C{name} argument is used. 209 @rtype: L{ValueDoc} 210 """ 211 # Always introspect __builtins__ & exceptions (e.g., in case 212 # they're used as base classes.) 213 epydoc.docintrospecter.introspect_docs(__builtin__) 214 epydoc.docintrospecter.introspect_docs(exceptions) 215 216 # If our input is a python object name, then delegate to 217 # _find(). 218 if filename is None and name is not None: 219 if context: 220 raise ValueError("context should only be specified together " 221 "with filename, not with name.") 222 name = DottedName(name) 223 val_doc = _find(name) 224 if val_doc.canonical_name is UNKNOWN: 225 val_doc.canonical_name = name 226 return val_doc 227 228 # If our input is a filename, then create a ModuleDoc for it, 229 # and use process_file() to populate its attributes. 230 elif filename is not None and name is None: 231 # Use a python source version, if possible. 232 if not is_script: 233 try: filename = py_src_filename(filename) 234 except ValueError, e: raise ImportError('%s' % e) 235 236 # Check the cache, first. 237 if filename in _moduledoc_cache: 238 return _moduledoc_cache[filename] 239 240 log.info("Parsing %s" % filename) 241 242 # If the context wasn't provided, then check if the file is in 243 # a package directory. If so, then update basedir & name to 244 # contain the topmost package's directory and the fully 245 # qualified name for this file. (This update assume the 246 # default value of __path__ for the parent packages; if the 247 # parent packages override their __path__s, then this can 248 # cause us not to find the value.) 249 if context is None and not is_script: 250 basedir = os.path.split(filename)[0] 251 name = os.path.splitext(os.path.split(filename)[1])[0] 252 if name == '__init__': 253 basedir, name = os.path.split(basedir) 254 context = _parse_package(basedir) 255 256 # Figure out the canonical name of the module we're parsing. 257 if not is_script: 258 module_name, is_pkg = _get_module_name(filename, context) 259 else: 260 module_name = DottedName(munge_script_name(filename)) 261 is_pkg = False 262 263 # Create a new ModuleDoc for the module, & add it to the cache. 264 module_doc = ModuleDoc(canonical_name=module_name, variables={}, 265 sort_spec=[], imports=[], 266 filename=filename, package=context, 267 is_package=is_pkg, submodules=[], 268 docs_extracted_by='parser') 269 module_doc.defining_module = module_doc 270 _moduledoc_cache[filename] = module_doc 271 272 # Set the module's __path__ to its default value. 273 if is_pkg: 274 module_doc.path = [os.path.split(module_doc.filename)[0]] 275 276 # Add this module to the parent package's list of submodules. 277 if context is not None: 278 context.submodules.append(module_doc) 279 280 # Tokenize & process the contents of the module's source file. 281 try: 282 process_file(module_doc) 283 except tokenize.TokenError, e: 284 msg, (srow, scol) = e.args 285 raise ParseError('Error during parsing: %s ' 286 '(%s, line %d, char %d)' % 287 (msg, module_doc.filename, srow, scol)) 288 except (IndentationError, UnicodeDecodeError), e: 289 raise ParseError('Error during parsing: %s (%s)' % 290 (e, module_doc.filename)) 291 292 # Handle any special variables (__path__, __docformat__, etc.) 293 handle_special_module_vars(module_doc) 294 295 # Return the completed ModuleDoc 296 return module_doc 297 else: 298 raise ValueError("Expected exactly one of the following " 299 "arguments: name, filename")
300
301 -def _parse_package(package_dir):
302 """ 303 If the given directory is a package directory, then parse its 304 __init__.py file (and the __init__.py files of all ancestor 305 packages); and return its C{ModuleDoc}. 306 """ 307 if not is_package_dir(package_dir): 308 return None 309 parent_dir = os.path.split(package_dir)[0] 310 parent_doc = _parse_package(parent_dir) 311 package_file = os.path.join(package_dir, '__init__') 312 return parse_docs(filename=package_file, context=parent_doc)
313 314 # Special vars: 315 # C{__docformat__}, C{__all__}, and C{__path__}.
316 -def handle_special_module_vars(module_doc):
317 # If __docformat__ is defined, parse its value. 318 toktree = _module_var_toktree(module_doc, '__docformat__') 319 if toktree is not None: 320 try: module_doc.docformat = parse_string(toktree) 321 except: pass 322 del module_doc.variables['__docformat__'] 323 324 # If __all__ is defined, parse its value. 325 toktree = _module_var_toktree(module_doc, '__all__') 326 if toktree is not None: 327 try: 328 public_names = set(parse_string_list(toktree)) 329 for name, var_doc in module_doc.variables.items(): 330 if name in public_names: 331 var_doc.is_public = True 332 if not isinstance(var_doc, ModuleDoc): 333 var_doc.is_imported = False 334 else: 335 var_doc.is_public = False 336 except ParseError: 337 # If we couldn't parse the list, give precedence to introspection. 338 for name, var_doc in module_doc.variables.items(): 339 if not isinstance(var_doc, ModuleDoc): 340 var_doc.is_imported = UNKNOWN 341 del module_doc.variables['__all__'] 342 343 # If __path__ is defined, then extract its value (pkgs only) 344 if module_doc.is_package: 345 toktree = _module_var_toktree(module_doc, '__path__') 346 if toktree is not None: 347 try: 348 module_doc.path = parse_string_list(toktree) 349 except ParseError: 350 pass # [xx] 351 del module_doc.variables['__path__']
352
353 -def _module_var_toktree(module_doc, name):
354 var_doc = module_doc.variables.get(name) 355 if (var_doc is None or var_doc.value in (None, UNKNOWN) or 356 var_doc.value.toktree is UNKNOWN): 357 return None 358 else: 359 return var_doc.value.toktree
360 361 #//////////////////////////////////////////////////////////// 362 #{ Module Lookup 363 #//////////////////////////////////////////////////////////// 364
365 -def _find(name, package_doc=None):
366 """ 367 Return the API documentaiton for the object whose name is 368 C{name}. C{package_doc}, if specified, is the API 369 documentation for the package containing the named object. 370 """ 371 # If we're inside a package, then find the package's path. 372 if package_doc is None: 373 path = None 374 elif package_doc.path is not UNKNOWN: 375 path = package_doc.path 376 else: 377 path = [os.path.split(package_doc.filename)[0]] 378 379 # The leftmost identifier in `name` should be a module or 380 # package on the given path; find it and parse it. 381 filename = _get_filename(name[0], path) 382 module_doc = parse_docs(filename, context=package_doc) 383 384 # If the name just has one identifier, then the module we just 385 # parsed is the object we're looking for; return it. 386 if len(name) == 1: return module_doc 387 388 # Otherwise, we're looking for something inside the module. 389 # First, check to see if it's in a variable (but ignore 390 # variables that just contain imported submodules). 391 if not _is_submodule_import_var(module_doc, name[1]): 392 try: return _find_in_namespace(name[1:], module_doc) 393 except ImportError: pass 394 395 # If not, then check to see if it's in a subpackage. 396 if module_doc.is_package: 397 return _find(name[1:], module_doc) 398 399 # If it's not in a variable or a subpackage, then we can't 400 # find it. 401 raise ImportError('Could not find value')
402
403 -def _is_submodule_import_var(module_doc, var_name):
404 """ 405 Return true if C{var_name} is the name of a variable in 406 C{module_doc} that just contains an C{imported_from} link to a 407 submodule of the same name. (I.e., is a variable created when 408 a package imports one of its own submodules.) 409 """ 410 var_doc = module_doc.variables.get(var_name) 411 full_var_name = DottedName(module_doc.canonical_name, var_name) 412 return (var_doc is not None and 413 var_doc.imported_from == full_var_name)
414
415 -def _find_in_namespace(name, namespace_doc):
416 if name[0] not in namespace_doc.variables: 417 raise ImportError('Could not find value') 418 419 # Look up the variable in the namespace. 420 var_doc = namespace_doc.variables[name[0]] 421 if var_doc.value is UNKNOWN: 422 raise ImportError('Could not find value') 423 val_doc = var_doc.value 424 425 # If the variable's value was imported, then follow its 426 # alias link. 427 if var_doc.imported_from not in (None, UNKNOWN): 428 return _find(var_doc.imported_from+name[1:]) 429 430 # Otherwise, if the name has one identifier, then this is the 431 # value we're looking for; return it. 432 elif len(name) == 1: 433 return val_doc 434 435 # Otherwise, if this value is a namespace, look inside it. 436 elif isinstance(val_doc, NamespaceDoc): 437 return _find_in_namespace(name[1:], val_doc) 438 439 # Otherwise, we ran into a dead end. 440 else: 441 raise ImportError('Could not find value')
442
443 -def _get_filename(identifier, path=None):
444 if path is UNKNOWN: path = None 445 try: 446 fp, filename, (s,m,typ) = imp.find_module(identifier, path) 447 if fp is not None: fp.close() 448 except ImportError: 449 raise ImportError, 'No Python source file found.' 450 451 if typ == imp.PY_SOURCE: 452 return