epydoc.docparser

Source Code for Module epydoc.docparser

1 # epydoc -- Source code parsing 2 # 3 # Copyright (C) 2005 Edward Loper 4 # Author: Edward Loper <edloper@loper.org> 5 # URL: <http://epydoc.sf.net> 6 # 7 # $Id: docparser.py 1776 2008-02-24 06:40:17Z edloper $ 8 9 """ 10 Extract API documentation about python objects by parsing their source 11 code. 12 13 The function L{parse_docs()}, which provides the main interface 14 of this module, reads and parses the Python source code for a 15 module, and uses it to create an L{APIDoc} object containing 16 the API documentation for the variables and values defined in 17 that modules. 18 19 Currently, C{parse_docs()} extracts documentation from the following 20 source code constructions: 21 22 - module docstring 23 - import statements 24 - class definition blocks 25 - function definition blocks 26 - assignment statements 27 - simple assignment statements 28 - assignment statements with multiple C{'='}s 29 - assignment statements with unpacked left-hand sides 30 - assignment statements that wrap a function in classmethod 31 or staticmethod. 32 - assignment to special variables __path__, __all__, and 33 __docformat__. 34 - delete statements 35 36 C{parse_docs()} does not yet support the following source code 37 constructions: 38 39 - assignment statements that create properties 40 41 By default, C{parse_docs()} will expore the contents of top-level 42 C{try} and C{if} blocks. If desired, C{parse_docs()} can also 43 be configured to explore the contents of C{while} and C{for} blocks. 44 (See the configuration constants, below.) 45 46 @todo: Make it possible to extend the functionality of C{parse_docs()}, 47 by replacing process_line with a dispatch table that can be 48 customized (similarly to C{docintrospector.register_introspector()}). 49 """ 50 __docformat__ = 'epytext en' 51 52 ###################################################################### 53 ## Imports 54 ###################################################################### 55 56 # Python source code parsing: 57 import token, tokenize 58 # Finding modules: 59 import imp 60 # File services: 61 import os, os.path, sys 62 # Unicode: 63 import codecs 64 # API documentation encoding: 65 from epydoc.apidoc import * 66 # For looking up the docs of builtins: 67 import __builtin__, exceptions 68 import epydoc.docintrospecter 69 # Misc utility functions: 70 from epydoc.util import * 71 # Backwards compatibility 72 from epydoc.compat import * 73 74 ###################################################################### 75 ## Doc Parser 76 ###################################################################### 77

78 -class ParseError(Exception):

79 """ 80 An exception that is used to signify that C{docparser} encountered 81 syntactically invalid Python code while processing a Python source 82 file. 83 """

84 85 _moduledoc_cache = {} 86 """A cache of C{ModuleDoc}s that we've already created. 87 C{_moduledoc_cache} is a dictionary mapping from filenames to 88 C{ValueDoc} objects. 89 @type: C{dict}""" 90 91 #//////////////////////////////////////////////////////////// 92 # Configuration Constants 93 #//////////////////////////////////////////////////////////// 94 95 #{ Configuration Constants: Control Flow 96 PARSE_TRY_BLOCKS = True 97 """Should the contents of C{try} blocks be examined?""" 98 PARSE_EXCEPT_BLOCKS = True 99 """Should the contents of C{except} blocks be examined?""" 100 PARSE_FINALLY_BLOCKS = True 101 """Should the contents of C{finally} blocks be examined?""" 102 PARSE_IF_BLOCKS = True 103 """Should the contents of C{if} blocks be examined?""" 104 PARSE_ELSE_BLOCKS = True 105 """Should the contents of C{else} and C{elif} blocks be examined?""" 106 PARSE_WHILE_BLOCKS = False 107 """Should the contents of C{while} blocks be examined?""" 108 PARSE_FOR_BLOCKS = False 109 """Should the contents of C{for} blocks be examined?""" 110 111 #{ Configuration Constants: Imports 112 IMPORT_HANDLING = 'link' 113 """What should C{docparser} do when it encounters an import 114 statement? 115 - C{'link'}: Create variabledoc objects with imported_from pointers 116 to the source object. 117 - C{'parse'}: Parse the imported file, to find the actual 118 documentation for the imported object. (This will fall back 119 to the 'link' behavior if the imported file can't be parsed, 120 e.g., if it's a builtin.) 121 """ 122 123 IMPORT_STAR_HANDLING = 'parse' 124 """When C{docparser} encounters a C{'from M{m} import *'} 125 statement, and is unable to parse C{M{m}} (either because 126 L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 127 should it determine the list of identifiers expored by C{M{m}}? 128 - C{'ignore'}: ignore the import statement, and don't create 129 any new variables. 130 - C{'parse'}: parse it to find a list of the identifiers that it 131 exports. (This will fall back to the 'ignore' behavior if the 132 imported file can't be parsed, e.g., if it's a builtin.) 133 - C{'introspect'}: import the module and introspect it (using C{dir}) 134 to find a list of the identifiers that it exports. (This will 135 fall back to the 'ignore' behavior if the imported file can't 136 be parsed, e.g., if it's a builtin.) 137 """ 138 139 DEFAULT_DECORATOR_BEHAVIOR = 'transparent' 140 """When C{DocParse} encounters an unknown decorator, what should 141 it do to the documentation of the decorated function? 142 - C{'transparent'}: leave the function's documentation as-is. 143 - C{'opaque'}: replace the function's documentation with an 144 empty C{ValueDoc} object, reflecting the fact that we have no 145 knowledge about what value the decorator returns. 146 """ 147 148 PUBLIC_DECORATOR_APPENDS_TO_ALL = True 149 """If true, then the @public decorator will append the function's 150 name to the module's __all__ variable.""" 151 152 BASE_HANDLING = 'parse'#'link' 153 """What should C{docparser} do when it encounters a base class that 154 was imported from another module? 155 - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 156 base class. 157 - C{'parse'}: Parse the file containing the base class, to find 158 the actual documentation for it. (This will fall back to the 159 'link' behavior if the imported file can't be parsed, e.g., if 160 it's a builtin.) 161 """ 162 163 #{ Configuration Constants: Comment docstrings 164 COMMENT_DOCSTRING_MARKER = '#:' 165 """The prefix used to mark comments that contain attribute 166 docstrings for variables.""" 167 168 #{ Configuration Constants: Grouping 169 START_GROUP_MARKER = '#{' 170 """The prefix used to mark a comment that starts a group. This marker 171 should be followed (on the same line) by the name of the group. 172 Following a start-group comment, all variables defined at the same 173 indentation level will be assigned to this group name, until the 174 parser reaches the end of the file, a matching end-group comment, or 175 another start-group comment at the same indentation level. 176 """ 177 178 END_GROUP_MARKER = '#}' 179 """The prefix used to mark a comment that ends a group. See 180 L{START_GROUP_MARKER}.""" 181 182 #///////////////////////////////////////////////////////////////// 183 #{ Module parser 184 #///////////////////////////////////////////////////////////////// 185

186 -def parse_docs(filename=None, name=None, context=None, is_script=False):

187 """ 188 Generate the API documentation for a specified object by 189 parsing Python source files, and return it as a L{ValueDoc}. 190 The object to generate documentation for may be specified 191 using the C{filename} parameter I{or} the C{name} parameter. 192 (It is an error to specify both a filename and a name; or to 193 specify neither a filename nor a name). 194 195 @param filename: The name of the file that contains the python 196 source code for a package, module, or script. If 197 C{filename} is specified, then C{parse} will return a 198 C{ModuleDoc} describing its contents. 199 @param name: The fully-qualified python dotted name of any 200 value (including packages, modules, classes, and 201 functions). C{parse_docs()} will automatically figure out 202 which module(s) it needs to parse in order to find the 203 documentation for the specified object. 204 @param context: The API documentation for the package that 205 contains C{filename}. If no context is given, then 206 C{filename} is assumed to contain a top-level module or 207 package. It is an error to specify a C{context} if the 208 C{name} argument is used. 209 @rtype: L{ValueDoc} 210 """ 211 # Always introspect __builtins__ & exceptions (e.g., in case 212 # they're used as base classes.) 213 epydoc.docintrospecter.introspect_docs(__builtin__) 214 epydoc.docintrospecter.introspect_docs(exceptions) 215 216 # If our input is a python object name, then delegate to 217 # _find(). 218 if filename is None and name is not None: 219 if context: 220 raise ValueError("context should only be specified together " 221 "with filename, not with name.") 222 name = DottedName(name) 223 val_doc = _find(name) 224 if val_doc.canonical_name is UNKNOWN: 225 val_doc.canonical_name = name 226 return val_doc 227 228 # If our input is a filename, then create a ModuleDoc for it, 229 # and use process_file() to populate its attributes. 230 elif filename is not None and name is None: 231 # Use a python source version, if possible. 232 if not is_script: 233 try: filename = py_src_filename(filename) 234 except ValueError, e: raise ImportError('%s' % e) 235 236 # Check the cache, first. 237 if filename in _moduledoc_cache: 238 return _moduledoc_cache[filename] 239 240 log.info("Parsing %s" % filename) 241 242 # If the context wasn't provided, then check if the file is in 243 # a package directory. If so, then update basedir & name to 244 # contain the topmost package's directory and the fully 245 # qualified name for this file. (This update assume the 246 # default value of __path__ for the parent packages; if the 247 # parent packages override their __path__s, then this can 248 # cause us not to find the value.) 249 if context is None and not is_script: 250 basedir = os.path.split(filename)[0] 251 name = os.path.splitext(os.path.split(filename)[1])[0] 252 if name == '__init__': 253 basedir, name = os.path.split(basedir) 254 context = _parse_package(basedir) 255 256 # Figure out the canonical name of the module we're parsing. 257 if not is_script: 258 module_name, is_pkg = _get_module_name(filename, context) 259 else: 260 module_name = DottedName(munge_script_name(filename)) 261 is_pkg = False 262 263 # Create a new ModuleDoc for the module, & add it to the cache. 264 module_doc = ModuleDoc(canonical_name=module_name, variables={}, 265 sort_spec=[], imports=[], 266 filename=filename, package=context, 267 is_package=is_pkg, submodules=[], 268 docs_extracted_by='parser') 269 module_doc.defining_module = module_doc 270 _moduledoc_cache[filename] = module_doc 271 272 # Set the module's __path__ to its default value. 273 if is_pkg: 274 module_doc.path = [os.path.split(module_doc.filename)[0]] 275 276 # Add this module to the parent package's list of submodules. 277 if context is not None: 278 context.submodules.append(module_doc) 279 280 # Tokenize & process the contents of the module's source file. 281 try: 282 process_file(module_doc) 283 except tokenize.TokenError, e: 284 msg, (srow, scol) = e.args 285 raise ParseError('Error during parsing: %s ' 286 '(%s, line %d, char %d)' % 287 (msg, module_doc.filename, srow, scol)) 288 except (IndentationError, UnicodeDecodeError), e: 289 raise ParseError('Error during parsing: %s (%s)' % 290 (e, module_doc.filename)) 291 292 # Handle any special variables (__path__, __docformat__, etc.) 293 handle_special_module_vars(module_doc) 294 295 # Return the completed ModuleDoc 296 return module_doc 297 else: 298 raise ValueError("Expected exactly one of the following " 299 "arguments: name, filename")

300

301 -def _parse_package(package_dir):

302 """ 303 If the given directory is a package directory, then parse its 304 __init__.py file (and the __init__.py files of all ancestor 305 packages); and return its C{ModuleDoc}. 306 """ 307 if not is_package_dir(package_dir): 308 return None 309 parent_dir = os.path.split(package_dir)[0] 310 parent_doc = _parse_package(parent_dir) 311 package_file = os.path.join(package_dir, '__init__') 312 return parse_docs(filename=package_file, context=parent_doc)

313 314 # Special vars: 315 # C{__docformat__}, C{__all__}, and C{__path__}.

316 -def handle_special_module_vars(module_doc):

317 # If __docformat__ is defined, parse its value. 318 toktree = _module_var_toktree(module_doc, '__docformat__') 319 if toktree is not None: 320 try: module_doc.docformat = parse_string(toktree) 321 except: pass 322 del module_doc.variables['__docformat__'] 323 324 # If __all__ is defined, parse its value. 325 toktree = _module_var_toktree(module_doc, '__all__') 326 if toktree is not None: 327 try: 328 public_names = set(parse_string_list(toktree)) 329 for name, var_doc in module_doc.variables.items(): 330 if name in public_names: 331 var_doc.is_public = True 332 if not isinstance(var_doc, ModuleDoc): 333 var_doc.is_imported = False 334 else: 335 var_doc.is_public = False 336 except ParseError: 337 # If we couldn't parse the list, give precedence to introspection. 338 for name, var_doc in module_doc.variables.items(): 339 if not isinstance(var_doc, ModuleDoc): 340 var_doc.is_imported = UNKNOWN 341 del module_doc.variables['__all__'] 342 343 # If __path__ is defined, then extract its value (pkgs only) 344 if module_doc.is_package: 345 toktree = _module_var_toktree(module_doc, '__path__') 346 if toktree is not None: 347 try: 348 module_doc.path = parse_string_list(toktree) 349 except ParseError: 350 pass # [xx] 351 del module_doc.variables['__path__']

352

353 -def _module_var_toktree(module_doc, name):

354 var_doc = module_doc.variables.get(name) 355 if (var_doc is None or var_doc.value in (None, UNKNOWN) or 356 var_doc.value.toktree is UNKNOWN): 357 return None 358 else: 359 return var_doc.value.toktree

360 361 #//////////////////////////////////////////////////////////// 362 #{ Module Lookup 363 #//////////////////////////////////////////////////////////// 364

365 -def _find(name, package_doc=None):

366 """ 367 Return the API documentaiton for the object whose name is 368 C{name}. C{package_doc}, if specified, is the API 369 documentation for the package containing the named object. 370 """ 371 # If we're inside a package, then find the package's path. 372 if package_doc is None: 373 path = None 374 elif package_doc.path is not UNKNOWN: 375 path = package_doc.path 376 else: 377 path = [os.path.split(package_doc.filename)[0]] 378 379 # The leftmost identifier in `name` should be a module or 380 # package on the given path; find it and parse it. 381 filename = _get_filename(name[0], path) 382 module_doc = parse_docs(filename, context=package_doc) 383 384 # If the name just has one identifier, then the module we just 385 # parsed is the object we're looking for; return it. 386 if len(name) == 1: return module_doc 387 388 # Otherwise, we're looking for something inside the module. 389 # First, check to see if it's in a variable (but ignore 390 # variables that just contain imported submodules). 391 if not _is_submodule_import_var(module_doc, name[1]): 392 try: return _find_in_namespace(name[1:], module_doc) 393 except ImportError: pass 394 395 # If not, then check to see if it's in a subpackage. 396 if module_doc.is_package: 397 return _find(name[1:], module_doc) 398 399 # If it's not in a variable or a subpackage, then we can't 400 # find it. 401 raise ImportError('Could not find value')

402

403 -def _is_submodule_import_var(module_doc, var_name):

404 """ 405 Return true if C{var_name} is the name of a variable in 406 C{module_doc} that just contains an C{imported_from} link to a 407 submodule of the same name. (I.e., is a variable created when 408 a package imports one of its own submodules.) 409 """ 410 var_doc = module_doc.variables.get(var_name) 411 full_var_name = DottedName(module_doc.canonical_name, var_name) 412 return (var_doc is not None and 413 var_doc.imported_from == full_var_name)

414

415 -def _find_in_namespace(name, namespace_doc):

416 if name[0] not in namespace_doc.variables: 417 raise ImportError('Could not find value') 418 419 # Look up the variable in the namespace. 420 var_doc = namespace_doc.variables[name[0]] 421 if var_doc.value is UNKNOWN: 422 raise ImportError('Could not find value') 423 val_doc = var_doc.value 424 425 # If the variable's value was imported, then follow its 426 # alias link. 427 if var_doc.imported_from not in (None, UNKNOWN): 428 return _find(var_doc.imported_from+name[1:]) 429 430 # Otherwise, if the name has one identifier, then this is the 431 # value we're looking for; return it. 432 elif len(name) == 1: 433 return val_doc 434 435 # Otherwise, if this value is a namespace, look inside it. 436 elif isinstance(val_doc, NamespaceDoc): 437 return _find_in_namespace(name[1:], val_doc) 438 439 # Otherwise, we ran into a dead end. 440 else: 441 raise ImportError('Could not find value')

442

443 -def _get_filename(identifier, path=None):

444 if path is UNKNOWN: path = None 445 try: 446 fp, filename, (s,m,typ) = imp.find_module(identifier, path) 447 if fp is not None: fp.close() 448 except ImportError: 449 raise ImportError, 'No Python source file found.' 450 451 if typ == imp.PY_SOURCE: 452 return filename 453 elif typ == imp.PY_COMPILED: 454 # See if we can find a corresponding non-compiled version. 455 filename = re.sub('.py\w$', '.py', filename) 456 if not os.path.exists(filename): 457 raise ImportError, 'No Python source file found.' 458 return filename 459 elif typ == imp.PKG_DIRECTORY: 460 filename = os.path.join(filename, '__init__.py') 461 if not os.path.exists(filename): 462 filename = os.path.join(filename, '__init__.pyw') 463 if not os.path.exists(filename): 464 raise ImportError, 'No package file found.' 465 return filename 466 elif typ == imp.C_BUILTIN: 467 raise ImportError, 'No Python source file for builtin modules.' 468 elif typ == imp.C_EXTENSION: 469 raise ImportError, 'No Python source file for c extensions.' 470 else: 471 raise ImportError, 'No Python source file found.'

472 473 #///////////////////////////////////////////////////////////////// 474 #{ File tokenization loop 475 #///////////////////////////////////////////////////////////////// 476

477 -def process_file(module_doc):

478 """ 479 Read the given C{ModuleDoc}'s file, and add variables 480 corresponding to any objects defined in that file. In 481 particular, read and tokenize C{module_doc.filename}, and 482 process each logical line using L{process_line()}. 483 """ 484 # Keep track of the current line number: 485 lineno = None 486 487 # Use this list to collect the tokens on a single logical line: 488 line_toks = [] 489 490 # This list contains one APIDoc for each indentation level. 491 # The first element is the APIDoc for the module, and each 492 # subsequent element is the APIDoc for the object at that 493 # indentation level. The final element of the list is the 494 # C{APIDoc} for the entity that we're currently processing. 495 parent_docs = [module_doc] 496 497 # The APIDoc for the object that was defined by the previous 498 # line, if any; or None otherwise. This is used to update 499 # parent_docs when we encounter an indent; and to decide what 500 # object (if any) is described by a docstring. 501 prev_line_doc = module_doc 502 503 # A list of comments that occur before or on the current 504 # logical line, used to build the comment docstring. Each 505 # element is a tuple (comment_text, comment_lineno). 506 comments = [] 507 508 # A list of decorator lines that occur before the current 509 # logical line. This is used so we can process a function 510 # declaration line and its decorators all at once. 511 decorators = [] 512 513 # A list of group names, one for each indentation level. This is 514 # used to keep track groups that are defined by comment markers 515 # START_GROUP_MARKER and END_GROUP_MARKER. 516 groups = [None] 517 518 # When we encounter a comment start group marker, set this to the 519 # name of the group; but wait until we're ready to process the 520 # next line before we actually set groups[-1] to this value. This 521 # is necessary because at the top of a block, the tokenizer gives 522 # us comments before the INDENT token; but if we encounter a group 523 # start marker at the top of a block, then we want it to apply 524 # inside that block, not outside it. 525 start_group = None 526 527 # Check if the source file declares an encoding. 528 encoding = get_module_encoding(module_doc.filename) 529 530 # The token-eating loop: 531 try: 532 module_file = codecs.open(module_doc.filename, 'rU', encoding) 533 except LookupError: 534 log.warning("Unknown encoding %r for %s; using the default" 535 "encoding instead (iso-8859-1)" % 536 (encoding, module_doc.filename)) 537 encoding = 'iso-8859-1' 538 module_file = codecs.open(module_doc.filename, 'rU', encoding) 539 tok_iter = tokenize.generate_tokens(module_file.readline) 540 for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: 541 # BOM encoding marker: ignore. 542 if (toktype == token.ERRORTOKEN and 543 (toktext == u'\ufeff' or 544 toktext.encode(encoding) == '\xef\xbb\xbf')): 545 pass 546 547 # Error token: abort 548 elif toktype == token.ERRORTOKEN: 549 raise ParseError('Error during parsing: invalid syntax ' 550 '(%s, line %d, char %d: %r)' % 551 (module_doc.filename, srow, scol, toktext)) 552 553 # Indent token: update the parent_doc stack. 554 elif toktype == token.INDENT: 555 if prev_line_doc is None: 556 parent_docs.append(parent_docs[-1]) 557 else: 558 parent_docs.append(prev_line_doc) 559 groups.append(None) 560 561 # Dedent token: update the parent_doc stack. 562 elif toktype == token.DEDENT: 563 if line_toks == []: 564 parent_docs.pop() 565 groups.pop() 566 else: 567 # This *should* only happen if the file ends on an 568 # indented line, with no final newline. 569 # (otherwise, this is the wrong thing to do.) 570 pass 571 572 # Line-internal newline token: if we're still at the start of 573 # the logical line, and we've seen one or more comment lines, 574 # then discard them: blank lines are not allowed between a 575 # comment block and the thing it describes. 576 elif toktype == tokenize.NL: 577 if comments and not line_toks: 578 log.warning('Ignoring docstring comment block followed by ' 579 'a blank line in %r on line %r' % 580 (module_doc.filename, srow-1)) 581 comments = [] 582 583 # Comment token: add to comments if appropriate. 584 elif toktype == tokenize.COMMENT: 585 if toktext.startswith(COMMENT_DOCSTRING_MARKER): 586 comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() 587 if comment_line.startswith(" "): 588 comment_line = comment_line[1:] 589 comments.append( [comment_line, srow]) 590 elif toktext.startswith(START_GROUP_MARKER): 591 start_group = toktext[len(START_GROUP_MARKER):].strip() 592 elif toktext.startswith(END_GROUP_MARKER): 593 for i in range(len(groups)-1, -1, -1): 594 if groups[i]: 595 groups[i] = None 596 break 597 else: 598 log.warning("Got group end marker without a corresponding " 599 "start marker in %r on line %r" % 600 (module_doc.filename, srow)) 601 602 # Normal token: Add it to line_toks. (If it's a non-unicode 603 # string literal, then we need to re-encode using the file's 604 # encoding, to get back to the original 8-bit data; and then 605 # convert that string with 8-bit data to a 7-bit ascii 606 # representation.) 607 elif toktype != token.NEWLINE and toktype != token.ENDMARKER: 608 if lineno is None: lineno = srow 609 if toktype == token.STRING: 610 str_prefixes = re.match('[^\'"]*', toktext).group() 611 if 'u' not in str_prefixes: 612 s = toktext.encode(encoding) 613 toktext = decode_with_backslashreplace(s) 614 line_toks.append( (toktype, toktext) ) 615 616 # Decorator line: add it to the decorators list. 617 elif line_toks and line_toks[0] == (token.OP, '@'): 618 decorators.append(shallow_parse(line_toks)) 619 line_toks = [] 620 621 # End of line token, but nothing to do. 622 elif line_toks == []: 623 pass 624 625 # End of line token: parse the logical line & process it. 626 else: 627 if start_group: 628 groups[-1] = start_group 629 start_group = None 630 631 if parent_docs[-1] != 'skip_block': 632 try: 633 prev_line_doc = process_line( 634 shallow_parse(line_toks), parent_docs, prev_line_doc, 635 lineno, comments, decorators, encoding) 636 except ParseError, e: 637 raise ParseError('Error during parsing: invalid ' 638 'syntax (%s, line %d) -- %s' % 639 (module_doc.filename, lineno, e)) 640 except KeyboardInterrupt, e: raise 641 except Exception, e: 642 log.error('Internal error during parsing (%s, line ' 643 '%s):\n%s' % (module_doc.filename, lineno, e)) 644 raise 645 646 # grouping... 647 if groups[-1] and prev_line_doc not in (None, 'skip_block'): 648 if isinstance(prev_line_doc, VariableDoc): 649 # prev_line_doc's container will only be 650 # UNKNOWN if it's an instance variable that 651 # didn't have a doc-comment, but might still 652 # be followed by a docstring. Since we 653 # tokenize in order, we can't do lookahead to 654 # see if the variable will have a comment; but 655 # it should only be added to the container if 656 # it does. So we defer the grouping of that 657 # to be handled by process_docstring instead. 658 if prev_line_doc.container is not UNKNOWN: 659 add_to_group(prev_line_doc.container, 660 prev_line_doc, groups[-1]) 661 elif isinstance(parent_docs[-1], NamespaceDoc): 662 add_to_group(parent_docs[-1], prev_line_doc, 663 groups[-1]) 664 else: 665 prev_line_doc = None 666 667 # Reset line contents. 668 line_toks = [] 669 lineno = None 670 comments = [] 671 decorators = []

672

673 -def add_to_group(container, api_doc, group_name):

674 if container.group_specs is UNKNOWN: 675 container.group_specs = [] 676 677 if isinstance(api_doc, VariableDoc): 678 var_name = api_doc.name 679 else: 680 if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`) 681 var_name = api_doc.canonical_name[-1] 682 683 for (name, group_vars) in container.group_specs: 684 if name == group_name: 685 group_vars.append(var_name) 686 return 687 else: 688 container.group_specs.append( (group_name, [var_name]) )

689

690 -def script_guard(line):

691 """Detect the idiomatic trick C{if __name__ == "__main__":}""" 692 return (len(line) == 5 693 and line[1][1] == '__name__' # this is the most selective 694 and line[0][1] == 'if' 695 and line[2][1] == '==' 696 and line[4][1] == ':' 697 and line[3][1][1:-1] == '__main__')

698 699 #///////////////////////////////////////////////////////////////// 700 #{ Shallow parser 701 #///////////////////////////////////////////////////////////////// 702

703 -def shallow_parse(line_toks):

704 """ 705 Given a flat list of tokens, return a nested tree structure 706 (called a X{token tree}), whose leaves are identical to the 707 original list, but whose structure reflects the structure 708 implied by the grouping tokens (i.e., parenthases, braces, and 709 brackets). If the parenthases, braces, and brackets do not 710 match, or are not balanced, then raise a ParseError. 711 712 Assign some structure to a sequence of structure (group parens). 713 """ 714 stack = [[]] 715 parens = [] 716 for tok in line_toks: 717 toktype, toktext = tok 718 if toktext in ('(','[','{'): 719 parens.append(tok) 720 stack.append([tok]) 721 elif toktext in ('}',']',')'): 722 if not parens: 723 raise ParseError('Unbalanced parens') 724 left_paren = parens.pop()[1] 725 if left_paren+toktext not in ('()', '[]', '{}'): 726 raise ParseError('Mismatched parens') 727 lst = stack.pop() 728 lst.append(tok) 729 stack[-1].append(lst) 730 else: 731 stack[-1].append(tok) 732 if len(stack) != 1 or len(parens) != 0: 733 raise ParseError('Unbalanced parens') 734 return stack[0]

735 736 #///////////////////////////////////////////////////////////////// 737 #{ Line processing 738 #///////////////////////////////////////////////////////////////// 739 # The methods process_*() are used to handle lines. 740

741 -def process_line(line, parent_docs, prev_line_doc, lineno, 742 comments, decorators, encoding):

743 """ 744 @return: C{new-doc}, C{decorator}..? 745 """ 746 args = (line, parent_docs, prev_line_doc, lineno, 747 comments, decorators, encoding) 748 749 if not line: # blank line. 750 return None 751 elif (token.OP, ':') in line[:-1]: 752 return process_one_line_block(*args) 753 elif (token.OP, ';') in line: 754 return process_multi_stmt(*args) 755 elif line[0] == (token.NAME, 'def'): 756 return process_funcdef(*args) 757 elif line[0] == (token.OP, '@'): 758 return process_funcdef(*args) 759 elif line[0] == (token.NAME, 'class'): 760 return process_classdef(*args) 761 elif line[0] == (token.NAME, 'import'): 762 return process_import(*args) 763 elif line[0] == (token.NAME, 'from'): 764 return process_from_import(*args) 765 elif line[0] == (token.NAME, 'del'): 766 return process_del(*args) 767 elif len(line)==1 and line[0][0] == token.STRING: 768 return process_docstring(*args) 769 elif (token.OP, '=') in line: 770 return process_assignment(*args) 771 elif (line[0][0] == token.NAME and 772 line[0][1] in CONTROL_FLOW_KEYWORDS): 773 return process_control_flow_line(*args) 774 elif line[0] == (token.NAME, '__all__') and is_append_to_all(line): 775 return process_append_to_all(*args) 776 else: 777 return None

778 # [xx] do something with control structures like for/if? 779 780 #///////////////////////////////////////////////////////////////// 781 # Line handler: control flow 782 #///////////////////////////////////////////////////////////////// 783 784 CONTROL_FLOW_KEYWORDS = [ 785 #: A list of the control flow keywords. If a line begins with 786 #: one of these keywords, then it should be handled by 787 #: C{process_control_flow_line}. 788 'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] 789

790 -def process_control_flow_line(line, parent_docs, prev_line_doc, 791 lineno, comments, decorators, encoding):

792 keyword = line[0][1] 793 794 # If it's a 'for' block: create the loop variable. 795 if keyword == 'for' and PARSE_FOR_BLOCKS: 796 loopvar_name = parse_dotted_name( 797 split_on(line[1:], (token.NAME, 'in'))[0]) 798 parent = get_lhs_parent(loopvar_name, parent_docs) 799 if parent is not None: 800 var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False, 801 is_imported=False, is_instvar=False, 802 docs_extracted_by='parser') 803 set_variable(parent, var_doc) 804 805 if ((keyword == 'if' and PARSE_IF_BLOCKS and not script_guard(line)) or 806 (keyword == 'elif' and PARSE_ELSE_BLOCKS) or 807 (keyword == 'else' and PARSE_ELSE_BLOCKS) or 808 (keyword == 'while' and PARSE_WHILE_BLOCKS) or 809 (keyword == 'for' and PARSE_FOR_BLOCKS) or 810 (keyword == 'try' and PARSE_TRY_BLOCKS) or 811 (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or 812 (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): 813 # Return "None" to indicate that we should process the 814 # block using the same context that we were already in. 815 return None 816 else: 817 # Return 'skip_block' to indicate that we should ignore 818 # the contents of this block. 819 return 'skip_block'

820 821 #///////////////////////////////////////////////////////////////// 822 # Line handler: imports 823 #///////////////////////////////////////////////////////////////// 824 # [xx] I could optionally add ValueDoc's for the imported 825 # variables with proxy_for set to the imported source; but 826 # I don't think I gain much of anything by doing so. 827

828 -def process_import(line, parent_docs, prev_line_doc, lineno, 829 comments, decorators, encoding):

830 if not isinstance(parent_docs[-1], NamespaceDoc): return 831 832 names = split_on(line[1:], (token.OP, ',')) 833 834 for name in names: 835 name_pieces = split_on(name, (token.NAME, 'as')) 836 if len(name_pieces) == 1: 837 src_name = parse_dotted_name(name_pieces[0]) 838 _import_var(src_name, parent_docs) 839 elif len(name_pieces) == 2: 840 if len(name_pieces[1]) != 1: 841 raise ParseError('Expected identifier after "as"') 842 src_name = parse_dotted_name(name_pieces[0]) 843 var_name = parse_name(name_pieces[1][0]) 844 _import_var_as(src_name, var_name, parent_docs) 845 else: 846 raise ParseError('Multiple "as" tokens in import')

847

848 -def process_from_import(line, parent_docs, prev_line_doc, lineno, 849 comments, decorators, encoding):

850 if not isinstance(parent_docs[-1], NamespaceDoc): return 851 852 pieces = split_on(line[1:], (token.NAME, 'import')) 853 if len(pieces) != 2 or not pieces[0] or not pieces[1]: 854 raise ParseError("Bad from-import") 855 lhs, rhs = pieces 856 857 # The RHS might be parenthasized, as specified by PEP 328: 858 # http://www.python.org/peps/pep-0328.html 859 if (len(rhs) == 1 and isinstance(rhs[0], list) and 860 rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): 861 rhs = rhs[0][1:-1] 862 863 # >>> from __future__ import nested_scopes 864 if lhs == [(token.NAME, '__future__')]: 865 return 866 867 # >>> from sys import * 868 elif rhs == [(token.OP, '*')]: 869 src_name = parse_dotted_name(lhs) 870 _process_fromstar_import(src_name, parent_docs) 871 872 # >>> from os.path import join, split 873 else: 874 # Allow relative imports in this case, as per PEP 328 875 src_name = parse_dotted_name(lhs, 876 parent_name=parent_docs[-1].canonical_name) 877 parts = split_on(rhs, (token.OP, ',')) 878 for part in parts: 879 # from m import x 880 if len(part) == 1: 881 var_name = parse_name(part[0]) 882 _import_var_as(DottedName(src_name, var_name), 883 var_name, parent_docs) 884 885 # from m import x as y 886 elif len(part) == 3 and part[1] == (token.NAME, 'as'): 887 orig_name = parse_name(part[0]) 888 var_name = parse_name(part[2]) 889 _import_var_as(DottedName(src_name, orig_name), 890 var_name, parent_docs) 891 892 else: 893 ParseError("Bad from-import")

894

895 -def _process_fromstar_import(src, parent_docs):

896 """ 897 Handle a statement of the form: 898 >>> from <src> import * 899 900 If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse 901 the module C{M{<src>}}, and copy all of its exported variables 902 to C{parent_docs[-1]}. 903 904 Otherwise, try to determine the names of the variables exported by 905 C{M{<src>}}, and create a new variable for each export. If 906 L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if 907 found by parsing C{M{<src>}}; if it is C{'introspect'}, then the 908 list of exports is found by importing and introspecting 909 C{M{<src>}}. 910 """ 911 # This is redundant: already checked by caller. 912 if not isinstance(parent_docs[-1], NamespaceDoc): return 913 914 # If src is package-local, then convert it to a global name. 915 src = _global_name(src, parent_docs) 916 917 # Record the import 918 parent_docs[0].imports.append(src) # mark that it's .*?? 919 920 # [xx] add check for if we already have the source docs in our 921 # cache?? 922 923 if (IMPORT_HANDLING == 'parse' or 924 IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? 925 try: module_doc = _find(src) 926 except ImportError: module_doc = None 927 if isinstance(module_doc, ModuleDoc): 928 for name, imp_var in module_doc.variables.items(): 929 # [xx] this is not exactly correct, but close. It 930 # does the wrong thing if a __var__ is explicitly 931 # listed in __all__. 932 if (imp_var.is_public and 933 not (name.startswith('__') and name.endswith('__'))): 934 var_doc = _add_import_var(DottedName(src, name), name, 935 parent_docs[-1]) 936 if IMPORT_HANDLING == 'parse': 937 var_doc.value = imp_var.value 938 939 # If we got here, then either IMPORT_HANDLING='link' or we 940 # failed to parse the `src` module. 941 if IMPORT_STAR_HANDLING == 'introspect': 942 try: module = __import__(str(src), {}, {}, [0]) 943 except: return # We couldn't import it. 944 if module is None: return # We couldn't import it. 945 if hasattr(module, '__all__'): 946 names = list(module.__all__) 947 else: 948 names = [n for n in dir(module) if not n.startswith('_')] 949 for name in names: 950 _add_import_var(DottedName(src, name), name, parent_docs[-1])

951

952 -def _import_var(name, parent_docs):

953 """ 954 Handle a statement of the form: 955 >>> import <name> 956 957 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 958 the value by parsing; and create an appropriate variable in 959 parentdoc. 960 961 Otherwise, add a variable for the imported variable. (More than 962 one variable may be created for cases like C{'import a.b'}, where 963 we need to create a variable C{'a'} in parentdoc containing a 964 proxy module; and a variable C{'b'} in the proxy module. 965 """ 966 # This is redundant: already checked by caller. 967 if not isinstance(parent_docs[-1], NamespaceDoc): return 968 969 # If name is package-local, then convert it to a global name. 970 src = _global_name(name, parent_docs) 971 src_prefix = src[:len(src)-len(name)] 972 973 # Record the import 974 parent_docs[0].imports.append(name) 975 976 # [xx] add check for if we already have the source docs in our 977 # cache?? 978 979 if IMPORT_HANDLING == 'parse': 980 # Check to make sure that we can actually find the value. 981 try: val_doc = _find(src) 982 except ImportError: val_doc = None 983 if val_doc is not None: 984 # We found it; but it's not the value itself we want to 985 # import, but the module containing it; so import that 986 # module (=top_mod) and create a variable for it. 987 top_mod = src_prefix+name[0] 988 var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) 989 var_doc.value = _find(DottedName(name[0])) 990 return 991 992 # If we got here, then either IMPORT_HANDLING='link', or we 993 # did not successfully find the value's docs by parsing; use 994 # a variable with an UNKNOWN value. 995 996 # Create any necessary intermediate proxy module values. 997 container = parent_docs[-1] 998 for i, identifier in enumerate(name[:-1]): 999 if (identifier not in container.variables or 1000 not isinstance(container.variables[identifier], ModuleDoc)): 1001 var_doc = _add_import_var(name[:i+1], identifier, container) 1002 var_doc.value = ModuleDoc(variables={}, sort_spec=[], 1003 proxy_for=src_prefix+name[:i+1], 1004 submodules={}, 1005 docs_extracted_by='parser') 1006 container = container.variables[identifier].value 1007 1008 # Add the variable to the container. 1009 _add_import_var(src, name[-1], container)

1010

1011 -def _import_var_as(src, name, parent_docs):

1012 """ 1013 Handle a statement of the form: 1014 >>> import src as name 1015 1016 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 1017 the value by parsing; and create an appropriate variable in 1018 parentdoc. 1019 1020 Otherwise, create a variables with its C{imported_from} attribute 1021 pointing to the imported object. 1022 """ 1023 # This is redundant: already checked by caller. 1024 if not isinstance(parent_docs[-1], NamespaceDoc): return 1025 1026 # If src is package-local, then convert it to a global name. 1027 src = _global_name(src, parent_docs) 1028 1029 # Record the import 1030 parent_docs[0].imports.append(src) 1031 1032 if IMPORT_HANDLING == 'parse': 1033 # Parse the value and create a variable for it. 1034 try: val_doc = _find(src) 1035 except ImportError: val_doc = None 1036 if val_doc is not None: 1037 var_doc = VariableDoc(name=name, value=val_doc, 1038 is_imported=True, is_alias=False, 1039 imported_from=src, 1040 docs_extracted_by='parser') 1041 set_variable(parent_docs[-1], var_doc) 1042 return 1043 1044 # If we got here, then either IMPORT_HANDLING='link', or we 1045 # did not successfully find the value's docs by parsing; use a 1046 # variable with a proxy value. 1047 _add_import_var(src, name, parent_docs[-1])

1048

1049 -def _add_import_var(src, name, container):

1050 """ 1051 Add a new imported variable named C{name} to C{container}, with 1052 C{imported_from=src}. 1053 """ 1054 var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, 1055 imported_from=src, docs_extracted_by='parser') 1056 set_variable(container, var_doc) 1057 return var_doc

1058

1059 -def _global_name(name, parent_docs):

1060 """ 1061 If the given name is package-local (relative to the current 1062 context, as determined by C{parent_docs}), then convert it 1063 to a global name. 1064 """ 1065 # Get the containing package from parent_docs. 1066 if parent_docs[0].is_package: 1067 package = parent_docs[0] 1068 else: 1069 package = parent_docs[0].package 1070 1071 # Check each package (from closest to furthest) to see if it 1072 # contains a module named name[0]; if so, then treat `name` as 1073 # relative to that package. 1074 while package not in (None, UNKNOWN): 1075 try: 1076 fp = imp.find_module(name[0], package.path)[0] 1077 if fp is not None: fp.close() 1078 except ImportError: 1079 # No submodule found here; try the next package up. 1080 package = package.package 1081 continue 1082 # A submodule was found; return its name. 1083 return package.canonical_name + name 1084 1085 # We didn't find any package containing `name`; so just return 1086 # `name` as-is. 1087 return name

1088 1089 #///////////////////////////////////////////////////////////////// 1090 # Line handler: assignment 1091 #///////////////////////////////////////////////////////////////// 1092

1093 -def process_assignment(line, parent_docs, prev_line_doc, lineno, 1094 comments, decorators, encoding):

1095 # Divide the assignment statement into its pieces. 1096 pieces = split_on(line, (token.OP, '=')) 1097 1098 lhs_pieces = pieces[:-1] 1099 rhs = pieces[-1] 1100 1101 # Decide whether the variable is an instance variable or not. 1102 # If it's an instance var, then discard the value. 1103 is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) 1104 1105 # if it's not an instance var, and we're not in a namespace, 1106 # then it's just a local var -- so ignore it. 1107 if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): 1108 return None 1109 1110 # Evaluate the right hand side. 1111 if not is_instvar: 1112 rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs, lineno) 1113 else: 1114 rhs_val, is_alias = UNKNOWN, False 1115 1116 # Assign the right hand side value to each left hand side. 1117 # (Do the rightmost assignment first) 1118 lhs_pieces.reverse() 1119 for lhs in lhs_pieces: 1120 # Try treating the LHS as a simple dotted name. 1121 try: lhs_name = parse_dotted_name(lhs) 1122 except: lhs_name = None 1123 if lhs_name is not None: 1124 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1125 if lhs_parent is None: continue 1126 1127 # Skip a special class variable. 1128 if lhs_name[-1] == '__slots__': 1129 continue 1130 1131 # Handle metaclass assignment 1132 if (lhs_name[-1] == '__metaclass__' and 1133 isinstance(parent_docs[-1], ClassDoc)): 1134 parent_docs[-1].metaclass = rhs_val 1135 continue 1136 1137 # Create the VariableDoc. 1138 var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, 1139 is_imported=False, is_alias=is_alias, 1140 is_instvar=is_instvar, 1141 docs_extracted_by='parser') 1142 # Extract a docstring from the comments, when present, 1143 # but only if there's a single LHS. 1144 if len(lhs_pieces) == 1: 1145 add_docstring_from_comments(var_doc, comments) 1146 1147 # Assign the variable to the containing namespace, 1148 # *unless* the variable is an instance variable 1149 # without a comment docstring. In that case, we'll 1150 # only want to add it if we later discover that it's 1151 # followed by a variable docstring. If it is, then 1152 # process_docstring will take care of adding it to the 1153 # containing clas. (This is a little hackish, but 1154 # unfortunately is necessary because we won't know if 1155 # this assignment line is followed by a docstring 1156 # until later.) 1157 if (not is_instvar) or comments: 1158 set_variable(lhs_parent, var_doc, True) 1159 1160 # If it's the only var, then return the VarDoc for use 1161 # as the new `prev_line_doc`. 1162 if (len(lhs_pieces) == 1 and 1163 (len(lhs_name) == 1 or is_instvar)): 1164 return var_doc 1165 1166 # Otherwise, the LHS must be a complex expression; use 1167 # dotted_names_in() to decide what variables it contains, 1168 # and create VariableDoc's for all of them (with UNKNOWN 1169 # value). 1170 else: 1171 for lhs_name in dotted_names_in(lhs_pieces): 1172 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1173 if lhs_parent is None: continue 1174 var_doc = VariableDoc(name=lhs_name[-1], 1175 is_imported=False, 1176 is_alias=is_alias, 1177 is_instvar=is_instvar, 1178 docs_extracted_by='parser') 1179 set_variable(lhs_parent, var_doc, True) 1180 1181 # If we have multiple left-hand-sides, then all but the 1182 # rightmost one are considered aliases. 1183 is_alias = True

1184 1185

1186 -def lhs_is_instvar(lhs_pieces, parent_docs):

1187 if not isinstance(parent_docs[-1], RoutineDoc): 1188 return False 1189 # make sure that lhs_pieces is <self>.<name>, where <self> is 1190 # the name of the first arg to the containing routinedoc, and 1191 # <name> is a simple name. 1192 posargs = parent_docs[-1].posargs 1193 if posargs is UNKNOWN: return False 1194 if not (len(lhs_pieces)==1 and len(posargs) > 0 and 1195 len(lhs_pieces[0]) == 3 and 1196 lhs_pieces[0][0] == (token.NAME, posargs[0]) and 1197 lhs_pieces[0][1] == (token.OP, '.') and 1198 lhs_pieces[0][2][0] == token.NAME): 1199 return False 1200 # Make sure we're in an instance method, and not a 1201 # module-level function. 1202 for i in range(len(parent_docs)-1, -1, -1): 1203 if isinstance(parent_docs[i], ClassDoc): 1204 return True 1205 elif parent_docs[i] != parent_docs[-1]: 1206 return False 1207 return False

1208

1209 -def rhs_to_valuedoc(rhs, parent_docs, lineno):

1210 # Dotted variable: 1211 try: 1212 rhs_name = parse_dotted_name(rhs) 1213 rhs_val = lookup_value(rhs_name, parent_docs) 1214 if rhs_val is not None and rhs_val is not UNKNOWN: 1215 return rhs_val, True 1216 except ParseError: 1217 pass 1218 1219 # Decorators: 1220 if (len(rhs)==2 and rhs[0][0] == token.NAME and 1221 isinstance(rhs[1], list)): 1222 arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs, lineno) 1223 if isinstance(arg_val, RoutineDoc): 1224 doc = apply_decorator(DottedName(rhs[0][1]), arg_val, 1225 parent_docs, lineno) 1226 doc.canonical_name = UNKNOWN 1227 doc.parse_repr = pp_toktree(rhs) 1228 return doc, False 1229 1230 # Nothing else to do: make a val with the source as its repr. 1231 return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, 1232 defining_module=parent_docs[0], 1233 docs_extracted_by='parser'), False

1234

1235 -def get_lhs_parent(lhs_name, parent_docs):

1236 assert isinstance(lhs_name, DottedName) 1237 1238 # For instance vars inside an __init__ method: 1239 if isinstance(parent_docs[-1], RoutineDoc): 1240 for i in range(len(parent_docs)-1, -1, -1): 1241 if isinstance(parent_docs[i], ClassDoc): 1242 return parent_docs[i] 1243 else: 1244 raise ValueError("%r is not a namespace or method" % 1245 parent_docs[-1]) 1246 1247 # For local variables: 1248 if len(lhs_name) == 1: 1249 return parent_docs[-1] 1250 1251 # For non-local variables: 1252 return lookup_value(lhs_name.container(), parent_docs)

1253 1254 #///////////////////////////////////////////////////////////////// 1255 # Line handler: single-line blocks 1256 #///////////////////////////////////////////////////////////////// 1257

1258 -def process_one_line_block(line, parent_docs, prev_line_doc, lineno, 1259 comments, decorators, encoding):

1260 """ 1261 The line handler for single-line blocks, such as: 1262 1263 >>> def f(x): return x*2 1264 1265 This handler calls L{process_line} twice: once for the tokens 1266 up to and including the colon, and once for the remaining 1267 tokens. The comment docstring is applied to the first line 1268 only. 1269 @return: C{None} 1270 """ 1271 i = line.index((token.OP, ':')) 1272 doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, 1273 lineno, comments, decorators, encoding) 1274 doc2 = process_line(line[i+1:], parent_docs+[doc1], 1275 doc1, lineno, None, [], encoding) 1276 return doc1

1277 1278 #///////////////////////////////////////////////////////////////// 1279 # Line handler: semicolon-separated statements 1280 #///////////////////////////////////////////////////////////////// 1281

1282 -def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, 1283 comments, decorators, encoding):

1284 """ 1285 The line handler for semicolon-separated statements, such as: 1286 1287 >>> x=1; y=2; z=3 1288 1289 This handler calls L{process_line} once for each statement. 1290 The comment docstring is not passed on to any of the 1291 sub-statements. 1292 @return: C{None} 1293 """ 1294 for statement in split_on(line, (token.OP, ';')): 1295 if not statement: continue 1296 doc = process_line(statement, parent_docs, prev_line_doc, 1297 lineno, None, decorators, encoding) 1298 prev_line_doc = doc 1299 decorators = [] 1300 return None

1301 1302 #///////////////////////////////////////////////////////////////// 1303 # Line handler: delete statements 1304 #///////////////////////////////////////////////////////////////// 1305

1306 -def process_del(line, parent_docs, prev_line_doc, lineno, 1307 comments, decorators, encoding):

1308 """ 1309 The line handler for delete statements, such as: 1310 1311 >>> del x, y.z 1312 1313 This handler calls L{del_variable} for each dotted variable in 1314 the variable list. The variable list may be nested. Complex 1315 expressions in the variable list (such as C{x[3]}) are ignored. 1316 @return: C{None} 1317 """ 1318 # If we're not in a namespace, then ignore it. 1319 parent_doc = parent_docs[-1] 1320 if not isinstance(parent_doc, NamespaceDoc): return 1321 1322 var_list = split_on(line[1:], (token.OP, ',')) 1323 for var_name in dotted_names_in(var_list): 1324 del_variable(parent_docs[-1], var_name) 1325 1326 return None

1327 1328 #///////////////////////////////////////////////////////////////// 1329 # Line handler: docstrings 1330 #///////////////////////////////////////////////////////////////// 1331

1332 -def process_docstring(line, parent_docs, prev_line_doc, lineno, 1333 comments, decorators, encoding):

1334 """ 1335 The line handler for bare string literals. If 1336 C{prev_line_doc} is not C{None}, then the string literal is 1337 added to that C{APIDoc} as a docstring. If it already has a 1338 docstring (from comment docstrings), then the new docstring 1339 will be appended to the old one. 1340 """ 1341 if prev_line_doc is None: return 1342 docstring = parse_string(line) 1343 1344 # If the docstring is a str, then convert it to unicode. 1345 # According to a strict reading of PEP 263, this might not be the 1346 # right thing to do; but it will almost always be what the 1347 # module's author intended. 1348 if isinstance(docstring, str): 1349 try: 1350 docstring = docstring.decode(encoding) 1351 except UnicodeDecodeError: 1352 # If decoding failed, then fall back on using 1353 # decode_with_backslashreplace, which will map e.g. 1354 # "\xe9" -> u"\\xe9". 1355 docstring = decode_with_backslashreplace(docstring) 1356 log.warning("Parsing %s (line %s): %s docstring is not a " 1357 "unicode string, but it contains non-ascii data." % 1358 (parent_docs[0].filename, lineno, 1359 prev_line_doc.canonical_name)) 1360 1361 # If the modified APIDoc is an instance variable, and it has 1362 # not yet been added to its class's C{variables} list, 1363 # then add it now. This is done here, rather than in the 1364 # process_assignment() call that created the variable, because 1365 # we only want to add instance variables if they have an 1366 # associated docstring. (For more info, see the comment above 1367 # the set_variable() call in process_assignment().) 1368 added_instvar = False 1369 if (isinstance(prev_line_doc, VariableDoc) and 1370 prev_line_doc.is_instvar and 1371 prev_line_doc.docstring in (None, UNKNOWN)): 1372 for i in range(len(parent_docs)-1, -1, -1): 1373 if isinstance(parent_docs[i], ClassDoc): 1374 set_variable(parent_docs[i], prev_line_doc, True) 1375 added_instvar = True 1376 break 1377 1378 if prev_line_doc.docstring not in (None, UNKNOWN): 1379 name = prev_line_doc.canonical_name 1380 if name is UNKNOWN and isinstance(prev_line_doc, VariableDoc): 1381 name = prev_line_doc.name 1382 log.warning("Parsing %s (line %s): %s has both a comment-docstring " 1383 "and a normal (string) docstring; ignoring the comment-" 1384 "docstring." % (parent_docs[0].filename, lineno, name)) 1385 1386 prev_line_doc.docstring = docstring 1387 prev_line_doc.docstring_lineno = lineno 1388 1389 # If the modified APIDoc is an instance variable, and we added it 1390 # to the class's variables list here, then it still needs to be 1391 # grouped too; so return it for use as the new "prev_line_doc." 1392 if added_instvar: 1393 return prev_line_doc

1394 1395 1396 #///////////////////////////////////////////////////////////////// 1397 # Line handler: function declarations 1398 #///////////////////////////////////////////////////////////////// 1399

1400 -def process_funcdef(line, parent_docs, prev_line_doc, lineno, 1401 comments, decorators, encoding):

1402 """ 1403 The line handler for function declaration lines, such as: 1404 1405 >>> def f(a, b=22, (c,d)): 1406 1407 This handler creates and initializes a new C{VariableDoc} 1408 containing a C{RoutineDoc}, adds the C{VariableDoc} to the 1409 containing namespace, and returns the C{RoutineDoc}. 1410 """ 1411 # Check syntax. 1412 if len(line) != 4 or line[3] != (token.OP, ':'): 1413 raise ParseError("Bad function definition line") 1414 1415 # If we're not in a namespace, then ignore it. 1416 parent_doc = parent_docs[-1] 1417 if not isinstance(parent_doc, NamespaceDoc): return 1418 1419 # Get the function's name 1420 func_name = parse_name(line[1]) 1421 canonical_name = DottedName(parent_doc.canonical_name, func_name) 1422 1423 # Create the function's RoutineDoc. 1424 func_doc = RoutineDoc(canonical_name=canonical_name, 1425 defining_module=parent_docs[0], 1426 lineno=lineno, docs_extracted_by='parser') 1427 1428 # Process the signature. 1429 init_arglist(func_doc, line[2]) 1430 1431 # If the preceeding comment includes a docstring, then add it. 1432 add_docstring_from_comments(func_doc, comments) 1433 1434 # Apply any decorators. 1435 func_doc.decorators = [pp_toktree(deco[1:]) for deco in decorators] 1436 decorators.reverse() 1437 for decorator in decorators: 1438 try: 1439 deco_name = parse_dotted_name(decorator[1:]) 1440 except ParseError: 1441 deco_name = None 1442 if func_doc.canonical_name is not UNKNOWN: 1443 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1444 func_doc.canonical_name) 1445 elif func_doc.parse_repr not in (None, UNKNOWN): 1446 # [xx] this case should be improved.. when will func_doc 1447 # have a known parse_repr?? 1448 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1449 func_doc.parse_repr) 1450 else: 1451 deco_repr = UNKNOWN 1452 func_doc = apply_decorator(deco_name, func_doc, parent_docs, lineno) 1453 func_doc.parse_repr = deco_repr 1454 # [XX] Is there a reson the following should be done? It 1455 # causes the grouping code to break. Presumably the canonical 1456 # name should remain valid if we're just applying a standard 1457 # decorator. 1458 #func_doc.canonical_name = UNKNOWN 1459 1460 # Add a variable to the containing namespace. 1461 var_doc = VariableDoc(name=func_name, value=func_doc, 1462 is_imported=False, is_alias=False, 1463 docs_extracted_by='parser') 1464 set_variable(parent_doc, var_doc) 1465 1466 # Return the new ValueDoc. 1467 return func_doc

1468

1469 -def apply_decorator(decorator_name, func_doc, parent_docs, lineno):

1470 # [xx] what if func_doc is not a RoutineDoc? 1471 if decorator_name == DottedName('staticmethod'): 1472 return StaticMethodDoc(**func_doc.__dict__) 1473 elif decorator_name == DottedName('classmethod'): 1474 return ClassMethodDoc(**func_doc.__dict__) 1475 elif (decorator_name == DottedName('public') and 1476 PUBLIC_DECORATOR_APPENDS_TO_ALL): 1477 if '"' not in func_doc.canonical_name[-1]: # for security 1478 quoted_func_name = '"%s"' % func_doc.canonical_name[-1] 1479 append_to_all(quoted_func_name, parent_docs, lineno) 1480 return func_doc.__class__(**func_doc.__dict__) # make a copy. 1481 elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': 1482 return func_doc.__class__(**func_doc.__dict__) # make a copy. 1483 elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': 1484 return GenericValueDoc(docs_extracted_by='parser') 1485 else: 1486 raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR'

1487

1488 -def init_arglist(func_doc, arglist):

1489 if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): 1490 raise ParseError("Bad argument list") 1491 1492 # Initialize to defaults. 1493 func_doc.posargs = [] 1494 func_doc.posarg_defaults = [] 1495 func_doc.vararg = None 1496 func_doc.kwarg = None 1497 1498 # Divide the arglist into individual args. 1499 args = split_on(arglist[1:-1], (token.OP, ',')) 1500 1501 # Keyword argument. 1502 if args and args[-1][0] == (token.OP, '**'): 1503 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1504 raise ParseError("Expected name after ** in argument list") 1505 func_doc.kwarg = args[-1][1][1] 1506 args.pop() 1507 1508 # Vararg argument. 1509 if args and args[-1][0] == (token.OP, '*'): 1510 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1511 raise ParseError("Expected name after * in argument list") 1512 func_doc.vararg = args[-1][1][1] 1513 args.pop() 1514 1515 # Positional arguments. 1516 for arg in args: 1517 func_doc.posargs.append(parse_funcdef_arg(arg[0])) 1518 if len(arg) == 1: 1519 func_doc.posarg_defaults.append(None) 1520 elif arg[1] != (token.OP, '=') or len(arg) == 2: 1521 raise ParseError("Bad argument list") 1522 else: 1523 default_repr = pp_toktree(arg[2:], 'tight') 1524 default_val = GenericValueDoc(parse_repr=default_repr, 1525 docs_extracted_by='parser') 1526 func_doc.posarg_defaults.append(default_val)

1527 1528 #///////////////////////////////////////////////////////////////// 1529 # Line handler: class declarations 1530 #///////////////////////////////////////////////////////////////// 1531

1532 -def process_classdef(line, parent_docs, prev_line_doc, lineno, 1533 comments, decorators, encoding):

1534 """ 1535 The line handler for class declaration lines, such as: 1536 1537 >>> class Foo(Bar, Baz): 1538 1539 This handler creates and initializes a new C{VariableDoc} 1540 containing a C{ClassDoc}, adds the C{VariableDoc} to the 1541 containing namespace, and returns the C{ClassDoc}. 1542 """ 1543 # Check syntax 1544 if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): 1545 raise ParseError("Bad class definition line") 1546 1547 # If we're not in a namespace, then ignore it. 1548 parent_doc = parent_docs[-1] 1549 if not isinstance(parent_doc, NamespaceDoc): return 1550 1551 # Get the class's name 1552 class_name = parse_name(line[1]) 1553 canonical_name = DottedName(parent_doc.canonical_name, class_name) 1554 1555 # Create the class's ClassDoc & VariableDoc. 1556 class_doc = ClassDoc(variables={}, sort_spec=[], 1557 bases=[], subclasses=[], 1558 canonical_name=canonical_name, 1559 defining_module=parent_docs[0], 1560 docs_extracted_by='parser') 1561 var_doc = VariableDoc(name=class_name, value=class_doc, 1562 is_imported=False, is_alias=False, 1563 docs_extracted_by='parser') 1564 1565 # Add the bases. 1566 if len(line) == 4: 1567 if (not isinstance(line[2], list) or 1568 line[2][0] != (token.OP, '(')): 1569 raise ParseError("Expected base list") 1570 try: 1571 for base_name in parse_classdef_bases(line[2]): 1572 class_doc.bases.append(find_base(base_name, parent_docs)) 1573 except ParseError, e: 1574 log.warning("Parsing %s (line %s): Unable to extract " 1575 "the base list for class '%s'." % 1576 (parent_docs[0].filename, lineno, canonical_name)) 1577 class_doc.bases = UNKNOWN 1578 else: 1579 class_doc.bases = [] 1580 1581 # Register ourselves as a subclass to our bases. 1582 if class_doc.bases is not UNKNOWN: 1583 for basedoc in class_doc.bases: 1584 if isinstance(basedoc, ClassDoc): 1585 basedoc.subclasses.append(class_doc) 1586 1587 # If the preceeding comment includes a docstring, then add it. 1588 add_docstring_from_comments(class_doc, comments) 1589 1590 # Add the VariableDoc to our container. 1591 set_variable(parent_doc, var_doc) 1592 1593 return class_doc

1594

1595 -def _proxy_base(**attribs):

1596 return ClassDoc(variables={}, sort_spec=[], bases=[], subclasses=[], 1597 docs_extracted_by='parser', **attribs)

1598

1599 -def find_base(name, parent_docs):

1600 assert isinstance(name, DottedName) 1601 1602 # Find the variable containing the base. 1603 base_var = lookup_variable(name, parent_docs) 1604 if base_var is None: 1605 # If we didn't find it, then it must have been imported. 1606 # First, check if it looks like it's contained in any 1607 # known imported variable: 1608 if len(name) > 1: 1609 src = lookup_name(name[0], parent_docs) 1610 if (src is not None and 1611 src.imported_from not in (None, UNKNOWN)): 1612 base_src = DottedName(src.imported_from, name[1:]) 1613 base_var = VariableDoc(name=name[-1], is_imported=True, 1614 is_alias=False, imported_from=base_src, 1615 docs_extracted_by='parser') 1616 # Otherwise, it must have come from an "import *" statement 1617 # (or from magic, such as direct manipulation of the module's 1618 # dictionary), so we don't know where it came from. So 1619 # there's nothing left but to use an empty proxy. 1620 if base_var is None: 1621 return _proxy_base(parse_repr=str(name)) 1622 #raise ParseError("Could not find %s" % name) 1623 1624 # If the variable has a value, return that value. 1625 if base_var.value is not UNKNOWN: 1626 return base_var.value 1627 1628 # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for 1629 # the base class; if that fails, or if BASE_HANDLING is 'link', 1630 # just make a proxy object. 1631 if base_var.imported_from not in (None, UNKNOWN): 1632 if BASE_HANDLING == 'parse': 1633 old_sys_path = sys.path 1634 try: 1635 dirname = os.path.split(parent_docs[0].filename)[0] 1636 sys.path = [dirname] + sys.path 1637 try: 1638 return parse_docs(name=str(base_var.imported_from)) 1639 except ParseError: 1640 log.info('Unable to parse base', base_var.imported_from) 1641 except ImportError: 1642 log.info('Unable to find base', base_var.imported_from) 1643 finally: 1644 sys.path = old_sys_path 1645 1646 # Either BASE_HANDLING='link' or parsing the base class failed; 1647 # return a proxy value for the base class. 1648 return _proxy_base(proxy_for=base_var.imported_from) 1649 else: 1650 return _proxy_base(parse_repr=str(name))

1651 1652 #///////////////////////////////////////////////////////////////// 1653 # Line handler: append to all 1654 #///////////////////////////////////////////////////////////////// 1655

1656 -def process_append_to_all(line, parent_docs, prev_line_doc, lineno, 1657 comments, decorators, encoding):

1658 """ 1659 The line handler for __all__.append() lines; either of: 1660 1661 >>> __all__.append('name') 1662 >>> __all__ += ['name'] 1663 1664 This handler looks up the value of the variable C{__all__} in 1665 parent_docs; and if it is found, and has a list-of-strings value, 1666 the handler appends the new name. 1667 """ 1668 # Extract the string to be appended 1669 assert line[-1][1][0] == token.STRING 1670 append_to_all(line[-1][1][1], parent_docs, lineno)

1671

1672 -def append_to_all(name, parent_docs, lineno):

1673 all_var = lookup_name('__all__', parent_docs) 1674 1675 error = None 1676 if all_var is None or all_var.value in (None, UNKNOWN): 1677 error = "variable __all__ not found." 1678 else: 1679 try: 1680 # Make sure we can parse the __all__ value. 1681 parse_string_list(all_var.value.toktree, True) 1682 1683 # Add the new name to __all__. 1684 if len(all_var.value.toktree[0]) > 2: 1685 all_var.value.toktree[0].insert(-1, (token.OP, ',')) 1686 all_var.value.toktree[0].insert(-1, (token.STRING, name)) 1687 all_var.value.parse_repr = pp_toktree(all_var.value.toktree) 1688 except ParseError: 1689 error = "unable to parse the contents of __all__" 1690 1691 if error: 1692 log.warning("Parsing %s (line %s): while processing an __all__" 1693 ".append() statement or @public decorator: %s" % 1694 (parent_docs[0].filename, lineno, error))

1695

1696 -def is_append_to_all(line):

1697 """ 1698 Check if a line is an __all__.append line() 1699 @see: L{process_append_to_all} 1700 """ 1701 # __all__.append(string) 1702 if (len(line) == 4 and line[0] == (token.NAME, '__all__') and 1703 line[1] == (token.OP, '.') and line[2] == (token.NAME, 'append') and 1704 isinstance(line[3], list) and len(line[3]) == 3 and 1705 line[3][0] == (token.OP, '(') and line[3][1][0] == token.STRING): 1706 return True 1707 1708 # __all__ += [string] 1709 if (len(line) == 3 and line[0] == (token.NAME, '__all__') and 1710 line[1] == (token.OP, '+=') and isinstance(line[2], list) and 1711 len(line[2]) == 3 and line[2][0][1] in '[(' and 1712 line[2][1][0] == token.STRING): 1713 return True

1714 1715 #///////////////////////////////////////////////////////////////// 1716 #{ Parsing 1717 #///////////////////////////////////////////////////////////////// 1718

1719 -def dotted_names_in(elt_list):

1720 """ 1721 Return a list of all simple dotted names in the given 1722 expression. 1723 """ 1724 names = [] 1725 while elt_list: 1726 elt = elt_list.pop() 1727 if len(elt) == 1 and isinstance(elt[0], list): 1728 # Nested list: process the contents 1729 elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) 1730 else: 1731 try: 1732 names.append(parse_dotted_name(elt)) 1733 except ParseError: 1734 pass # complex expression -- ignore 1735 return names

1736

1737 -def parse_name(elt, strip_parens=False):

1738 """ 1739 If the given token tree element is a name token, then return 1740 that name as a string. Otherwise, raise ParseError. 1741 @param strip_parens: If true, then if elt is a single name 1742 enclosed in parenthases, then return that name. 1743 """ 1744 if strip_parens and isinstance(elt, list): 1745 while (isinstance(elt, list) and len(elt) == 3 and 1746 elt[0] == (token.OP, '(') and 1747 elt[-1] == (token.OP, ')')): 1748 elt = elt[1] 1749 if isinstance(elt, list) or elt[0] != token.NAME: 1750 raise ParseError("Bad name") 1751 return elt[1]

1752

1753 -def parse_dotted_name(elt_list, strip_parens=True, parent_name=None):

1754 """ 1755 @param parent_name: canonical name of referring module, to resolve 1756 relative imports. 1757 @type parent_name: L{DottedName} 1758 @bug: does not handle 'x.(y).z' 1759 """ 1760 if len(elt_list) == 0: raise ParseError("Bad dotted name") 1761 1762 # Handle ((x.y).z). (If the contents of the parens include 1763 # anything other than dotted names, such as (x,y), then we'll 1764 # catch it below and raise a ParseError. 1765 while (isinstance(elt_list[0], list) and 1766 len(elt_list[0]) >= 3 and 1767 elt_list[0][0] == (token.OP, '(') and 1768 elt_list[0][-1] == (token.OP, ')')): 1769 elt_list[:1] = elt_list[0][1:-1] 1770 1771 # Convert a relative import into an absolute name. 1772 prefix_name = None 1773 if parent_name is not None and elt_list[0][-1] == '.': 1774 items = 1 1775 while len(elt_list) > items and elt_list[items][-1] == '.': 1776 items += 1 1777 1778 elt_list = elt_list[items:] 1779 prefix_name = parent_name[:-items] 1780 1781 # >>> from . import foo 1782 if not elt_list: 1783 if prefix_name == []: 1784 raise ParseError("Attempted relative import in non-package, " 1785 "or beyond toplevel package") 1786 return prefix_name 1787 1788 if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") 1789 name = DottedName(parse_name(elt_list[0], True)) 1790 if prefix_name is not None: 1791 name = prefix_name + name 1792 1793 for i in range(2, len(elt_list), 2): 1794 dot, identifier = elt_list[i-1], elt_list[i] 1795 if dot != (token.OP, '.'): 1796 raise ParseError("Bad dotted name") 1797 name = DottedName(name, parse_name(identifier, True)) 1798 return name

1799

1800 -def split_on(elt_list, split_tok):

1801 # [xx] add code to guarantee each elt is non-empty. 1802 result = [[]] 1803 for elt in elt_list: 1804 if elt == split_tok: 1805 if result[-1] == []: raise ParseError("Empty element from split") 1806 result.append([]) 1807 else: 1808 result[-1].append(elt) 1809 if result[-1] == []: result.pop() 1810 return result

1811

1812 -def parse_funcdef_arg(elt):

1813 """ 1814 If the given tree token element contains a valid function 1815 definition argument (i.e., an identifier token or nested list 1816 of identifiers), then return a corresponding string identifier 1817 or nested list of string identifiers. Otherwise, raise a 1818 ParseError. 1819 """ 1820 if isinstance(elt, list): 1821 if elt[0] == (token.OP, '('): 1822 if len(elt) == 3: 1823 return parse_funcdef_arg(elt[1]) 1824 else: 1825 return [parse_funcdef_arg(e) 1826 for e in elt[1:-1] 1827 if e != (token.OP, ',')] 1828 else: 1829 raise ParseError("Bad argument -- expected name or tuple") 1830 elif elt[0] == token.NAME: 1831 return elt[1] 1832 else: 1833 raise ParseError("Bad argument -- expected name or tuple")

1834

1835 -def parse_classdef_bases(elt):

1836 """ 1837 If the given tree token element contains a valid base list 1838 (that contains only dotted names), then return a corresponding 1839 list of L{DottedName}s. Otherwise, raise a ParseError. 1840 1841 @bug: Does not handle either of:: 1842 - class A( (base.in.parens) ): pass 1843 - class B( (lambda:calculated.base)() ): pass 1844 """ 1845 if (not isinstance(elt, list) or 1846 elt[0] != (token.OP, '(')): 1847 raise ParseError("Bad base list") 1848 1849 return [parse_dotted_name(n) 1850 for n in split_on(elt[1:-1], (token.OP, ','))]

1851 1852 # Used by: base list; 'del'; ...

1853 -def parse_dotted_name_list(elt_list):

1854 """ 1855 If the given list of tree token elements contains a 1856 comma-separated list of dotted names, then return a 1857 corresponding list of L{DottedName} objects. Otherwise, raise 1858 ParseError. 1859 """ 1860 names = [] 1861 1862 state = 0 1863 for elt in elt_list: 1864 # State 0 -- Expecting a name, or end of arglist 1865 if state == 0: 1866 # Make sure it's a name 1867 if isinstance(elt, tuple) and elt[0] == token.NAME: 1868 names.append(DottedName(elt[1])) 1869 state = 1 1870 else: 1871 raise ParseError("Expected a name") 1872 # State 1 -- Expecting comma, period, or end of arglist 1873 elif state == 1: 1874 if elt == (token.OP, '.'): 1875 state = 2 1876 elif elt == (token.OP, ','): 1877 state = 0 1878 else: 1879 raise ParseError("Expected '.' or ',' or end of list") 1880 # State 2 -- Continuation of dotted name. 1881 elif state == 2: 1882 if isinstance(elt, tuple) and elt[0] == token.NAME: 1883 names[-1] = DottedName(names[-1], elt[1]) 1884 state = 1 1885 else: 1886 raise ParseError("Expected a name") 1887 if state == 2: 1888 raise ParseError("Expected a name") 1889 return names

1890

1891 -def parse_string(elt_list):

1892 if len(elt_list) == 1 and elt_list[0][0] == token.STRING: 1893 # [xx] use something safer here? But it needs to deal with 1894 # any string type (eg r"foo\bar" etc). 1895 return eval(elt_list[0][1]) 1896 else: 1897 raise ParseError("Expected a string")

1898 1899 # ['1', 'b', 'c']

1900 -def parse_string_list(elt_list, require_sequence=False):

1901 if (len(elt_list) == 1 and isinstance(elt_list[0], list) and 1902 elt_list[0][0][1] in ('(', '[')): 1903 elt_list = elt_list[0][1:-1] 1904 elif require_sequence: 1905 raise ParseError("Expected a sequence") 1906 1907 string_list = [] 1908 for string_elt in split_on(elt_list, (token.OP, ',')): 1909 string_list.append(parse_string(string_elt)) 1910 1911 return string_list

1912 1913 #///////////////////////////////////////////////////////////////// 1914 #{ Variable Manipulation 1915 #///////////////////////////////////////////////////////////////// 1916

1917 -def set_variable(namespace, var_doc, preserve_docstring=False):

1918 """ 1919 Add var_doc to namespace. If namespace already contains a 1920 variable with the same name, then discard the old variable. If 1921 C{preserve_docstring} is true, then keep the old variable's 1922 docstring when overwriting a variable. 1923 """ 1924 # Choose which dictionary we'll be storing the variable in. 1925 if not isinstance(namespace, NamespaceDoc): 1926 return 1927 1928 # This happens when the class definition has not been parsed, e.g. in 1929 # sf bug #1693253 on ``Exception.x = y`` 1930 if namespace.sort_spec is UNKNOWN: 1931 namespace.sort_spec = namespace.variables.keys() 1932 1933 # If we already have a variable with this name, then remove the 1934 # old VariableDoc from the sort_spec list; and if we gave its 1935 # value a canonical name, then delete it. 1936 if var_doc.name in namespace.variables: 1937 namespace.sort_spec.remove(var_doc.name) 1938 old_var_doc = namespace.variables[var_doc.name] 1939 if (old_var_doc.is_alias == False and 1940 old_var_doc.value is not UNKNOWN): 1941 old_var_doc.value.canonical_name = UNKNOWN 1942 if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and 1943 old_var_doc.docstring not in (None, UNKNOWN)): 1944 var_doc.docstring = old_var_doc.docstring 1945 var_doc.docstring_lineno = old_var_doc.docstring_lineno 1946 # Add the variable to the namespace. 1947 namespace.variables[var_doc.name] = var_doc 1948 namespace.sort_spec.append(var_doc.name) 1949 assert var_doc.container is UNKNOWN 1950 var_doc.container = namespace

1951

1952 -def del_variable(namespace, name):

1953 if not isinstance(namespace, NamespaceDoc): 1954 return 1955 1956 if name[0] in namespace.variables: 1957 if len(name) == 1: 1958 var_doc = namespace.variables[name[0]] 1959 namespace.sort_spec.remove(name[0]) 1960 del namespace.variables[name[0]] 1961 if not var_doc.is_alias and var_doc.value is not UNKNOWN: 1962 var_doc.value.canonical_name = UNKNOWN 1963 else: 1964 del_variable(namespace.variables[name[0]].value, name[1:])

1965 1966 #///////////////////////////////////////////////////////////////// 1967 #{ Name Lookup 1968 #///////////////////////////////////////////////////////////////// 1969

1970 -def lookup_name(identifier, parent_docs):

1971 """ 1972 Find and return the documentation for the variable named by 1973 the given identifier. 1974 1975 @rtype: L{VariableDoc} or C{None} 1976 """ 1977 # We need to check 3 namespaces: locals, globals, and builtins. 1978 # Note that this is true even if we're in a version of python with 1979 # nested scopes, because nested scope lookup does not apply to 1980 # nested class definitions, and we're not worried about variables 1981 # in nested functions. 1982 if not isinstance(identifier, basestring): 1983 raise TypeError('identifier must be a string') 1984 1985 # Locals 1986 if isinstance(parent_docs[-1], NamespaceDoc): 1987 if identifier in parent_docs[-1].variables: 1988 return parent_docs[-1].variables[identifier] 1989 1990 # Globals (aka the containing module) 1991 if isinstance(parent_docs[0], NamespaceDoc): 1992 if identifier in parent_docs[0].variables: 1993 return parent_docs[0].variables[identifier] 1994 1995 # Builtins 1996 builtins = epydoc.docintrospecter.introspect_docs(__builtin__) 1997 if isinstance(builtins, NamespaceDoc): 1998 if identifier in builtins.variables: 1999 return builtins.variables[identifier] 2000 2001 # We didn't find it; return None. 2002 return None

2003

2004 -def lookup_variable(dotted_name, parent_docs):

2005 assert isinstance(dotted_name, DottedName) 2006 # If it's a simple identifier, use lookup_name. 2007 if len(dotted_name) == 1: 2008 return lookup_name(dotted_name[0], parent_docs) 2009 2010 # If it's a dotted name with multiple pieces, look up the 2011 # namespace containing the var (=parent) first; and then 2012 # look for the var in that namespace. 2013 else: 2014 parent = lookup_value(dotted_name[:-1], parent_docs) 2015 if (isinstance(parent, NamespaceDoc) and 2016 dotted_name[-1] in parent.variables): 2017 return parent.variables[dotted_name[-1]] 2018 else: 2019 return None # var not found.

2020

2021 -def lookup_value(dotted_name, parent_docs):

2022 """ 2023 Find and return the documentation for the value contained in 2024 the variable with the given name in the current namespace. 2025 """ 2026 assert isinstance(dotted_name, DottedName) 2027 var_doc = lookup_name(dotted_name[0], parent_docs) 2028 2029 for i in range(1, len(dotted_name)): 2030 if var_doc is None: return None 2031 2032 if isinstance(var_doc.value, NamespaceDoc): 2033 var_dict = var_doc.value.variables 2034 elif (var_doc.value is UNKNOWN and 2035 var_doc.imported_from not in (None, UNKNOWN)): 2036 src_name = var_doc.imported_from + dotted_name[i:] 2037 # [xx] do I want to create a proxy here?? 2038 return GenericValueDoc(proxy_for=src_name, 2039 parse_repr=str(dotted_name), 2040 docs_extracted_by='parser') 2041 else: 2042 return None 2043 2044 var_doc = var_dict.get(dotted_name[i]) 2045 2046 if var_doc is None: return None 2047 return var_doc.value

2048 2049 #///////////////////////////////////////////////////////////////// 2050 #{ Docstring Comments 2051 #///////////////////////////////////////////////////////////////// 2052

2053 -def add_docstring_from_comments(api_doc, comments):

2054 if api_doc is None or not comments: return 2055 api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) 2056 api_doc.docstring_lineno = comments[0][1]

2057 2058 #///////////////////////////////////////////////////////////////// 2059 #{ Tree tokens 2060 #///////////////////////////////////////////////////////////////// 2061

2062 -def _join_toktree(s1, s2):

2063 # Join them. s1 = left side; s2 = right side. 2064 if (s2=='' or s1=='' or 2065 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 2066 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 2067 (s2[0] == '(' and s1[-1] not in (',','='))): 2068 return '%s%s' % (s1,s2) 2069 elif (spacing=='tight' and 2070 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 2071 return '%s%s' % (s1, s2) 2072 else: 2073 return '%s %s' % (s1, s2)

2074

2075 -def _pp_toktree_add_piece(spacing, pieces, piece):

2076 s1 = pieces[-1] 2077 s2 = piece 2078 2079 if (s2=='' or s1=='' or 2080 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 2081 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 2082 (s2[0] == '(' and s1[-1] not in (',','='))): 2083 pass 2084 elif (spacing=='tight' and 2085 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 2086 pass 2087 else: 2088 pieces.append(' ') 2089 2090 pieces.append(piece)

2091

2092 -def pp_toktree(elts, spacing='normal', indent=0):

2093 pieces = [''] 2094 _pp_toktree(elts, spacing, indent, pieces) 2095 return ''.join(pieces)

2096

2097 -def _pp_toktree(elts, spacing, indent, pieces):

2098 add_piece = _pp_toktree_add_piece 2099 2100 for elt in elts: 2101 # Put a blank line before class & def statements. 2102 if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): 2103 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 2104 2105 if isinstance(elt, tuple): 2106 if elt[0] == token.NEWLINE: 2107 add_piece(spacing, pieces, ' '+elt[1]) 2108 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 2109 elif elt[0] == token.INDENT: 2110 add_piece(spacing, pieces, ' ') 2111 indent += 1 2112 elif elt[0] == token.DEDENT: 2113 assert pieces[-1] == ' ' 2114 pieces.pop() 2115 indent -= 1 2116 elif elt[0] == tokenize.COMMENT: 2117 add_piece(spacing, pieces, elt[1].rstrip() + '\n') 2118 add_piece(' '*indent) 2119 else: 2120 add_piece(spacing, pieces, elt[1]) 2121 else: 2122 _pp_toktree(elt, spacing, indent, pieces)

2123 2124 #///////////////////////////////////////////////////////////////// 2125 #{ Helper Functions 2126 #///////////////////////////////////////////////////////////////// 2127

2128 -def get_module_encoding(filename):

2129 """ 2130 @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} 2131 """ 2132 module_file = open(filename, 'rU') 2133 try: 2134 lines = [module_file.readline() for i in range(2)] 2135 if lines[0].startswith('\xef\xbb\xbf'): 2136 return 'utf-8' 2137 else: 2138 for line in lines: 2139 m = re.search("coding[:=]\s*([-\w.]+)", line) 2140 if m: return m.group(1) 2141 2142 # Fall back on Python's default encoding. 2143 return 'iso-8859-1' # aka 'latin-1' 2144 finally: 2145 module_file.close()

2146

2147 -def _get_module_name(filename, package_doc):

2148 """ 2149 Return (dotted_name, is_package) 2150 """ 2151 name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) 2152 if name == '__init__': 2153 is_package = True 2154 name = os.path.split(os.path.split(filename)[0])[1] 2155 else: 2156 is_package = False 2157 2158 # [XX] if the module contains a script, then `name` may not 2159 # necessarily be a valid identifier -- which will cause 2160 # DottedName to raise an exception. Is that what I want? 2161 if package_doc is None: 2162 dotted_name = DottedName(name) 2163 else: 2164 dotted_name = DottedName(package_doc.canonical_name, name) 2165 2166 # Check if the module looks like it's shadowed by a variable. 2167 # If so, then add a "'" to the end of its canonical name, to 2168 # distinguish it from the variable. 2169 if package_doc is not None and name in package_doc.variables: 2170 vardoc = package_doc.variables[name] 2171 if (vardoc.value not in (None, UNKNOWN) and 2172 vardoc.imported_from != dotted_name): 2173 log.warning("Module %s might be shadowed by a variable with " 2174 "the same name." % dotted_name) 2175 dotted_name = DottedName(str(dotted_name)+"'") 2176 2177 return dotted_name, is_package

2178

2179 -def flatten(lst, out=None):

2180 """ 2181 @return: a flat list containing the leaves of the given nested 2182 list. 2183 @param lst: The nested list that should be flattened. 2184 """ 2185 if out is None: out = [] 2186 for elt in lst: 2187 if isinstance(elt, (list, tuple)): 2188 flatten(elt, out) 2189 else: 2190 out.append(elt) 2191 return out

2192