Package epydoc :: Module docstringparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docstringparser

   1  # epydoc -- Docstring processing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <edloper@loper.org> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docstringparser.py 1715 2008-02-13 19:19:47Z edloper $ 
   8   
   9  """ 
  10  Parse docstrings and handle any fields it defines, such as C{@type} 
  11  and C{@author}.  Fields are used to describe specific information 
  12  about an object.  There are two classes of fields: X{simple fields} 
  13  and X{special fields}. 
  14   
  15  Simple fields are fields that get stored directly in an C{APIDoc}'s 
  16  metadata dictionary, without any special processing.  The set of 
  17  simple fields is defined by the list L{STANDARD_FIELDS}, whose 
  18  elements are L{DocstringField}s. 
  19   
  20  Special fields are fields that perform some sort of processing on the 
  21  C{APIDoc}, or add information to attributes other than the metadata 
  22  dictionary.  Special fields are are handled by field handler 
  23  functions, which are registered using L{register_field_handler}. 
  24  """ 
  25  __docformat__ = 'epytext en' 
  26   
  27   
  28  ###################################################################### 
  29  ## Imports 
  30  ###################################################################### 
  31   
  32  import re, sys 
  33  from epydoc import markup 
  34  from epydoc.markup import epytext 
  35  from epydoc.apidoc import * 
  36  from epydoc.docintrospecter import introspect_docstring_lineno 
  37  from epydoc.util import py_src_filename 
  38  from epydoc import log 
  39  import epydoc.docparser 
  40  import __builtin__, exceptions 
  41   
  42  ###################################################################### 
  43  # Docstring Fields 
  44  ###################################################################### 
  45   
46 -class DocstringField:
47 """ 48 A simple docstring field, which can be used to describe specific 49 information about an object, such as its author or its version. 50 Simple docstring fields are fields that take no arguments, and 51 are displayed as simple sections. 52 53 @ivar tags: The set of tags that can be used to identify this 54 field. 55 @ivar singular: The label that should be used to identify this 56 field in the output, if the field contains one value. 57 @ivar plural: The label that should be used to identify this 58 field in the output, if the field contains multiple values. 59 @ivar short: If true, then multiple values should be combined 60 into a single comma-delimited list. If false, then 61 multiple values should be listed separately in a bulleted 62 list. 63 @ivar multivalue: If true, then multiple values may be given 64 for this field; if false, then this field can only take a 65 single value, and a warning should be issued if it is 66 redefined. 67 @ivar takes_arg: If true, then this field expects an argument; 68 and a separate field section will be constructed for each 69 argument value. The label (and plural label) should include 70 a '%s' to mark where the argument's string rep should be 71 added. 72 """
73 - def __init__(self, tags, label, plural=None, 74 short=0, multivalue=1, takes_arg=0, 75 varnames=None):
76 if type(tags) in (list, tuple): 77 self.tags = tuple(tags) 78 elif type(tags) is str: 79 self.tags = (tags,) 80 else: raise TypeError('Bad tags: %s' % tags) 81 self.singular = label 82 if plural is None: self.plural = label 83 else: self.plural = plural 84 self.multivalue = multivalue 85 self.short = short 86 self.takes_arg = takes_arg 87 self.varnames = varnames or []
88
89 - def __cmp__(self, other):
90 if not isinstance(other, DocstringField): return -1 91 return cmp(self.tags, other.tags)
92
93 - def __hash__(self):
94 return hash(self.tags)
95
96 - def __repr__(self):
97 return '<Field: %s>' % self.tags[0]
98 99 STANDARD_FIELDS = [ 100 #: A list of the standard simple fields accepted by epydoc. This 101 #: list can be augmented at run-time by a docstring with the special 102 #: C{@deffield} field. The order in which fields are listed here 103 #: determines the order in which they will be displayed in the 104 #: output. 105 106 # If it's deprecated, put that first. 107 DocstringField(['deprecated', 'depreciated'], 108 'Deprecated', multivalue=0, varnames=['__deprecated__']), 109 110 # Status info 111 DocstringField(['version'], 'Version', multivalue=0, 112 varnames=['__version__']), 113 DocstringField(['date'], 'Date', multivalue=0, 114 varnames=['__date__']), 115 DocstringField(['status'], 'Status', multivalue=0), 116 117 # Bibliographic Info 118 DocstringField(['author', 'authors'], 'Author', 'Authors', short=1, 119 varnames=['__author__', '__authors__']), 120 DocstringField(['contact'], 'Contact', 'Contacts', short=1, 121 varnames=['__contact__']), 122 DocstringField(['organization', 'org'], 123 'Organization', 'Organizations'), 124 DocstringField(['copyright', '(c)'], 'Copyright', multivalue=0, 125 varnames=['__copyright__']), 126 DocstringField(['license'], 'License', multivalue=0, 127 varnames=['__license__']), 128 129 # Various warnings etc. 130 DocstringField(['bug'], 'Bug', 'Bugs'), 131 DocstringField(['warning', 'warn'], 'Warning', 'Warnings'), 132 DocstringField(['attention'], 'Attention'), 133 DocstringField(['note'], 'Note', 'Notes'), 134 135 # Formal conditions 136 DocstringField(['requires', 'require', 'requirement'], 'Requires'), 137 DocstringField(['precondition', 'precond'], 138 'Precondition', 'Preconditions'), 139 DocstringField(['postcondition', 'postcond'], 140 'Postcondition', 'Postconditions'), 141 DocstringField(['invariant'], 'Invariant'), 142 143 # When was it introduced (version # or date) 144 DocstringField(['since'], 'Since', multivalue=0), 145 146 # Changes made 147 DocstringField(['change', 'changed'], 'Change Log'), 148 149 # Crossreferences 150 DocstringField(['see', 'seealso'], 'See Also', short=1), 151 152 # Future Work 153 DocstringField(['todo'], 'To Do', takes_arg=True), 154 155 # Permissions (used by zope-based projects) 156 DocstringField(['permission', 'permissions'], 'Permission', 'Permissions') 157 ] 158 159 ###################################################################### 160 #{ Docstring Parsing 161 ###################################################################### 162 163 DEFAULT_DOCFORMAT = 'epytext' 164 """The name of the default markup languge used to process docstrings.""" 165 166 # [xx] keep track of which ones we've already done, in case we're 167 # asked to process one twice? e.g., for @include we might have to 168 # parse the included docstring earlier than we might otherwise..?? 169
170 -def parse_docstring(api_doc, docindex, suppress_warnings=[]):
171 """ 172 Process the given C{APIDoc}'s docstring. In particular, populate 173 the C{APIDoc}'s C{descr} and C{summary} attributes, and add any 174 information provided by fields in the docstring. 175 176 @param docindex: A DocIndex, used to find the containing 177 module (to look up the docformat); and to find any 178 user docfields defined by containing objects. 179 @param suppress_warnings: A set of objects for which docstring 180 warnings should be suppressed. 181 """ 182 if api_doc.metadata is not UNKNOWN: 183 if not (isinstance(api_doc, RoutineDoc) 184 and api_doc.canonical_name[-1] == '__init__'): 185 log.debug("%s's docstring processed twice" % 186 api_doc.canonical_name) 187 return 188 189 initialize_api_doc(api_doc) 190 191 # If there's no docstring, then check for special variables (e.g., 192 # __version__), and then return -- there's nothing else to do. 193 if (api_doc.docstring in (None, UNKNOWN)): 194 if isinstance(api_doc, NamespaceDoc): 195 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex): 196 add_metadata_from_var(api_doc, field) 197 return 198 199 # Remove leading indentation from the docstring. 200 api_doc.docstring = unindent_docstring(api_doc.docstring) 201 202 # Decide which docformat is used by this module. 203 docformat = get_docformat(api_doc, docindex) 204 205 # A list of markup errors from parsing. 206 parse_errors = [] 207 208 # Extract a signature from the docstring, if it has one. This 209 # overrides any signature we got via introspection/parsing. 210 if isinstance(api_doc, RoutineDoc): 211 parse_function_signature(api_doc, None, docformat, parse_errors) 212 213 # Parse the docstring. Any errors encountered are stored as 214 # `ParseError` objects in the errors list. 215 parsed_docstring = markup.parse(api_doc.docstring, docformat, 216 parse_errors) 217 218 # Divide the docstring into a description and a list of 219 # fields. 220 descr, fields = parsed_docstring.split_fields(parse_errors) 221 api_doc.descr = descr 222 223 field_warnings = [] 224 225 # Handle the constructor fields that have been defined in the class 226 # docstring. This code assumes that a class docstring is parsed before 227 # the same class __init__ docstring. 228 if isinstance(api_doc, ClassDoc): 229 230 # Parse ahead the __init__ docstring for this class 231 initvar = api_doc.variables.get('__init__') 232 if initvar and isinstance(initvar.value, RoutineDoc): 233 init_api_doc = initvar.value 234 parse_docstring(init_api_doc, docindex, suppress_warnings) 235 236 parse_function_signature(init_api_doc, api_doc, 237 docformat, parse_errors) 238 init_fields = split_init_fields(fields, field_warnings) 239 240 # Process fields 241 for field in init_fields: 242 try: 243 process_field(init_api_doc, docindex, field.tag(), 244 field.arg(), field.body()) 245 except ValueError, e: field_warnings.append(str(e)) 246 247 # Process fields 248 for field in fields: 249 try: 250 process_field(api_doc, docindex, field.tag(), 251 field.arg(), field.body()) 252 except ValueError, e: field_warnings.append(str(e)) 253 254 # Check to make sure that all type parameters correspond to 255 # some documented parameter. 256 check_type_fields(api_doc, field_warnings) 257 258 # Check for special variables (e.g., __version__) 259 if isinstance(api_doc, NamespaceDoc): 260 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex): 261 add_metadata_from_var(api_doc, field) 262 263 # Extract a summary 264 if api_doc.summary is None and api_doc.descr is not None: 265 api_doc.summary, api_doc.other_docs = api_doc.descr.summary() 266 267 # If the summary is empty, but the return field is not, then use 268 # the return field to generate a summary description. 269 if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None and 270 api_doc.return_descr is not None): 271 s, o = api_doc.return_descr.summary() 272 api_doc.summary = RETURN_PDS + s 273 api_doc.other_docs = o 274 275 # [XX] Make sure we don't have types/param descrs for unknown 276 # vars/params? 277 278 # Report any errors that occured 279 if api_doc in suppress_warnings: 280 if parse_errors or field_warnings: 281 log.info("Suppressing docstring warnings for %s, since it " 282 "is not included in the documented set." % 283 api_doc.canonical_name) 284 else: 285 report_errors(api_doc, docindex, parse_errors, field_warnings)
286
287 -def add_metadata_from_var(api_doc, field):
288 for varname in field.varnames: 289 # Check if api_doc has a variable w/ the given name. 290 if varname not in api_doc.variables: continue 291 292 # Check moved here from before the for loop because we expect to 293 # reach rarely this point. The loop below is to be performed more than 294 # once only for fields with more than one varname, which currently is 295 # only 'author'. 296 for md in api_doc.metadata: 297 if field == md[0]: 298 return # We already have a value for this metadata. 299 300 var_doc = api_doc.variables[varname] 301 if var_doc.value is UNKNOWN: continue 302 val_doc = var_doc.value 303 value = [] 304 305 # Try extracting the value from the pyval. 306 ok_types = (basestring, int, float, bool, type(None)) 307 if val_doc.pyval is not UNKNOWN: 308 if isinstance(val_doc.pyval, ok_types): 309 value = [val_doc.pyval] 310 elif field.multivalue: 311 if isinstance(val_doc.pyval, (tuple, list)): 312 for elt in val_doc.pyval: 313 if not isinstance(elt, ok_types): break 314 else: 315 value = list(val_doc.pyval) 316 317 # Try extracting the value from the parse tree. 318 elif val_doc.toktree is not UNKNOWN: 319 try: value = [epydoc.docparser.parse_string(val_doc.toktree)] 320 except KeyboardInterrupt: raise 321 except: pass 322 if field.multivalue and not value: 323 try: value = epydoc.docparser.parse_string_list(val_doc.toktree) 324 except KeyboardInterrupt: raise 325 except: pass 326 327 # Add any values that we found. 328 for elt in value: 329 if isinstance(elt, str): 330 elt = decode_with_backslashreplace(elt) 331 else: 332 elt = unicode(elt) 333 elt = epytext.ParsedEpytextDocstring( 334 epytext.parse_as_para(elt), inline=True) 335 336 # Add in the metadata and remove from the variables 337 api_doc.metadata.append( (field, varname, elt) ) 338 339 # Remove the variable itself (unless it's documented) 340 if var_doc.docstring in (None, UNKNOWN): 341 del api_doc.variables[varname] 342 if api_doc.sort_spec is not UNKNOWN: 343 try: api_doc.sort_spec.remove(varname) 344 except ValueError: pass
345
346 -def initialize_api_doc(api_doc):
347 """A helper function for L{parse_docstring()} that initializes 348 the attributes that C{parse_docstring()} will write to.""" 349 if api_doc.descr is UNKNOWN: 350 api_doc.descr = None 351 if api_doc.summary is UNKNOWN: 352 api_doc.summary = None 353 if api_doc.metadata is UNKNOWN: 354 api_doc.metadata = [] 355 if isinstance(api_doc, RoutineDoc): 356 if api_doc.arg_descrs is UNKNOWN: 357 api_doc.arg_descrs = [] 358 if api_doc.arg_types is UNKNOWN: 359 api_doc.arg_types = {} 360 if api_doc.return_descr is UNKNOWN: 361 api_doc.return_descr = None 362 if api_doc.return_type is UNKNOWN: 363 api_doc.return_type = None 364 if api_doc.exception_descrs is UNKNOWN: 365 api_doc.exception_descrs = [] 366 if isinstance(api_doc, (VariableDoc, PropertyDoc)): 367 if api_doc.type_descr is UNKNOWN: 368 api_doc.type_descr = None 369 if isinstance(api_doc, NamespaceDoc): 370 if api_doc.group_specs is UNKNOWN: 371 api_doc.group_specs = [] 372 if api_doc.sort_spec is UNKNOWN: 373 api_doc.sort_spec = []
374
375 -def split_init_fields(fields, warnings):
376 """ 377 Remove the fields related to the constructor from a class docstring 378 fields list. 379 380 @param fields: The fields to process. The list will be modified in place 381 @type fields: C{list} of L{markup.Field} 382 @param warnings: A list to emit processing warnings 383 @type warnings: C{list} 384 @return: The C{fields} items to be applied to the C{__init__} method 385 @rtype: C{list} of L{markup.Field} 386 """ 387 init_fields = [] 388 389 # Split fields in lists according to their argument, keeping order. 390 arg_fields = {} 391 args_order = [] 392 i = 0 393 while i < len(fields): 394 field = fields[i] 395 396 # gather together all the fields with the same arg 397 if field.arg() is not None: 398 arg_fields.setdefault(field.arg(), []).append(fields.pop(i)) 399 args_order.append(field.arg()) 400 else: 401 i += 1 402 403 # Now check that for each argument there is at most a single variable 404 # and a single parameter, and at most a single type for each of them. 405 for arg in args_order: 406 ff = arg_fields.pop(arg, None) 407 if ff is None: 408 continue 409 410 var = tvar = par = tpar = None 411 for field in ff: 412 if field.tag() in VARIABLE_TAGS: 413 if var is None: 414 var = field 415 fields.append(field) 416 else: 417 warnings.append( 418 "There is more than one variable named '%s'" 419 % arg) 420 elif field.tag() in PARAMETER_TAGS: 421 if par is None: 422 par = field 423 init_fields.append(field) 424 else: 425 warnings.append( 426 "There is more than one parameter named '%s'" 427 % arg) 428 429 elif field.tag() == 'type': 430 if var is None and par is None: 431 # type before obj 432 tvar = tpar = field 433 else: 434 if var is not None and tvar is None: 435 tvar = field 436 if par is not None and tpar is None: 437 tpar = field 438 439 elif field.tag() in EXCEPTION_TAGS: 440 init_fields.append(field) 441 442 else: # Unespected field 443 fields.append(field) 444 445 # Put selected types into the proper output lists 446 if tvar is not None: 447 if var is not None: 448 fields.append(tvar) 449 else: 450 pass # [xx] warn about type w/o object? 451 452 if tpar is not None: 453 if par is not None: 454 init_fields.append(tpar) 455 else: 456 pass # [xx] warn about type w/o object? 457 458 return init_fields
459
460 -def report_errors(api_doc, docindex, parse_errors, field_warnings):
461 """A helper function for L{parse_docstring()} that reports any 462 markup warnings and field warnings that we encountered while 463 processing C{api_doc}'s docstring.""" 464 if not parse_errors and not field_warnings: return 465 466 # Get the name of the item containing the error, and the 467 # filename of its containing module. 468 name = api_doc.canonical_name 469 module = api_doc.defining_module 470 if module is not UNKNOWN and module.filename not in (None, UNKNOWN): 471 try: filename = py_src_filename(module.filename) 472 except: filename = module.filename 473 else: 474 filename = '??' 475 476 # [xx] Don't report markup errors for standard builtins. 477 # n.b. that we must use 'is' to compare pyvals here -- if we use 478 # 'in' or '==', then a user __cmp__ method might raise an 479 # exception, or lie. 480 if isinstance(api_doc, ValueDoc) and api_doc != module: 481 if module not in (None, UNKNOWN) and module.pyval is exceptions: 482 return 483 for builtin_val in __builtin__.__dict__.values(): 484 if builtin_val is api_doc.pyval: 485 return 486 487 # Get the start line of the docstring containing the error. 488 startline = api_doc.docstring_lineno 489 if startline in (None, UNKNOWN): 490 startline = introspect_docstring_lineno(api_doc) 491 if startline in (None, UNKNOWN): 492 startline = None 493 494 # Display a block header. 495 header = 'File %s, ' % filename 496 if startline is not None: 497 header += 'line %d, ' % startline 498 header += 'in %s' % name 499 log.start_block(header) 500 501 502 # Display all parse errors. But first, combine any errors 503 # with duplicate description messages. 504 if startline is None: 505 # remove dups, but keep original order: 506 dups = {} 507 for error in parse_errors: 508 message = error.descr() 509 if message not in dups: 510 log.docstring_warning(message) 511 dups[message] = 1 512 else: 513 # Combine line number fields for dup messages: 514 messages = {} # maps message -> list of linenum 515 for error in parse_errors: 516 error.set_linenum_offset(startline) 517 message = error.descr() 518 messages.setdefault(message, []).append(error.linenum()) 519 message_items = messages.items() 520 message_items.sort(lambda a,b:cmp(min(a[1]), min(b[1]))) 521 for message, linenums in message_items: 522 linenums = [n for n in linenums if n is not None] 523 if len(linenums) == 0: 524 log.docstring_warning(message) 525 elif len(linenums) == 1: 526 log.docstring_warning("Line %s: %s" % (linenums[0], message)) 527 else: 528 linenums = ', '.join(['%s' % l for l in linenums]) 529 log.docstring_warning("Lines %s: %s" % (linenums, message)) 530 531 # Display all field warnings. 532 for warning in field_warnings: 533 log.docstring_warning(warning) 534 535 # End the message block. 536 log.end_block()
537 538 RETURN_PDS = markup.parse('Returns:', markup='epytext') 539 """A ParsedDocstring containing the text 'Returns'. This is used to 540 construct summary descriptions for routines that have empty C{descr}, 541 but non-empty C{return_descr}.""" 542 RETURN_PDS._tree.children[0].attribs['inline'] = True 543 544 ###################################################################### 545 #{ Field Processing Error Messages 546 ###################################################################### 547 548 UNEXPECTED_ARG = '%r did not expect an argument' 549 EXPECTED_ARG = '%r expected an argument' 550 EXPECTED_SINGLE_ARG = '%r expected a single argument' 551 BAD_CONTEXT = 'Invalid context for %r' 552 REDEFINED = 'Redefinition of %s' 553 UNKNOWN_TAG = 'Unknown field tag %r' 554 BAD_PARAM = '@%s for unknown parameter %s' 555 556 ###################################################################### 557 #{ Field Processing 558 ###################################################################### 559
560 -def process_field(api_doc, docindex, tag, arg, descr):
561 """ 562 Process a single field, and use it to update C{api_doc}. If 563 C{tag} is the name of a special field, then call its handler 564 function. If C{tag} is the name of a simple field, then use 565 C{process_simple_field} to process it. Otherwise, check if it's a 566 user-defined field, defined in this docstring or the docstring of 567 a containing object; and if so, process it with 568 C{process_simple_field}. 569 570 @param tag: The field's tag, such as C{'author'} 571 @param arg: The field's optional argument 572 @param descr: The description following the field tag and 573 argument. 574 @raise ValueError: If a problem was encountered while processing 575 the field. The C{ValueError}'s string argument is an 576 explanation of the problem, which should be displayed as a 577 warning message. 578 """ 579 # standard special fields 580 if tag in _field_dispatch_table: 581 handler = _field_dispatch_table[tag] 582 handler(api_doc, docindex, tag, arg, descr) 583 return 584 585 # standard simple fields & user-defined fields 586 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex): 587 if tag in field.tags: 588 # [xx] check if it's redefined if it's not multivalue?? 589 if not field.takes_arg: 590 _check(api_doc, tag, arg, expect_arg=False) 591 api_doc.metadata.append((field, arg, descr)) 592 return 593 594 # If we didn't handle the field, then report a warning. 595 raise ValueError(UNKNOWN_TAG % tag)
596
597 -def user_docfields(api_doc, docindex):
598 """ 599 Return a list of user defined fields that can be used for the 600 given object. This list is taken from the given C{api_doc}, and 601 any of its containing C{NamepaceDoc}s. 602 603 @note: We assume here that a parent's docstring will always be 604 parsed before its childrens'. This is indeed the case when we 605 are called via L{docbuilder.build_doc_index()}. If a child's 606 docstring is parsed before its parents, then its parent won't 607 yet have had its C{extra_docstring_fields} attribute 608 initialized. 609 """ 610 docfields = [] 611 # Get any docfields from `api_doc` itself 612 if api_doc.extra_docstring_fields not in (None, UNKNOWN): 613 docfields += api_doc.extra_docstring_fields 614 # Get any docfields from `api_doc`'s ancestors 615 for i in range(len(api_doc.canonical_name)-1, 0, -1): 616 ancestor = docindex.get_valdoc(api_doc.canonical_name[:i]) 617 if ancestor is not None \ 618 and ancestor.extra_docstring_fields not in (None, UNKNOWN): 619 docfields += ancestor.extra_docstring_fields 620 return docfields
621 622 _field_dispatch_table = {}
623 -def register_field_handler(handler, *field_tags):
624 """ 625 Register the given field handler function for processing any 626 of the given field tags. Field handler functions should 627 have the following signature: 628 629 >>> def field_handler(api_doc, docindex, tag, arg, descr): 630 ... '''update api_doc in response to the field.''' 631 632 Where C{api_doc} is the documentation object to update; 633 C{docindex} is a L{DocIndex} that can be used to look up the 634 documentation for related objects; C{tag} is the field tag that 635 was used; C{arg} is the optional argument; and C{descr} is the 636 description following the field tag and argument. 637 """ 638 for field_tag in field_tags: 639 _field_dispatch_table[field_tag] = handler
640 641 ###################################################################### 642 #{ Field Handler Functions 643 ###################################################################### 644
645 -def process_summary_field(api_doc, docindex, tag, arg, descr):
646 """Store C{descr} in C{api_doc.summary}""" 647 _check(api_doc, tag, arg, expect_arg=False) 648 if api_doc.summary is not None: 649 raise ValueError(REDEFINED % tag) 650 api_doc.summary = descr
651
652 -def process_include_field(api_doc, docindex, tag, arg, descr):
653 """Copy the docstring contents from the object named in C{descr}""" 654 _check(api_doc, tag, arg, expect_arg=False) 655 # options: 656 # a. just append the descr to our own 657 # b. append descr and update metadata 658 # c. append descr and process all fields. 659 # in any case, mark any errors we may find as coming from an 660 # imported docstring. 661 662 # how does this interact with documentation inheritance?? 663 raise ValueError('%s not implemented yet' % tag)
664
665 -def process_undocumented_field(api_doc, docindex, tag, arg, descr):
666 """Remove any documentation for the variables named in C{descr}""" 667 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False) 668 for ident in _descr_to_identifiers(descr): 669 var_name_re = re.compile('^%s$' % ident.replace('*', '(.*)')) 670 for var_name, var_doc in api_doc.variables.items(): 671 if var_name_re.match(var_name): 672 # Remove the variable from `variables`. 673 api_doc.variables.pop(var_name, None) 674 if api_doc.sort_spec is not UNKNOWN: 675 try: api_doc.sort_spec.remove(var_name) 676 except ValueError: pass 677 # For modules, remove any submodules that match var_name_re. 678 if isinstance(api_doc, ModuleDoc): 679 removed = set([m for m in api_doc.submodules 680 if var_name_re.match(m.canonical_name[-1])]) 681 if removed: 682 # Remove the indicated submodules from this module. 683 api_doc.submodules = [m for m in api_doc.submodules 684 if m not in removed] 685 # Remove all ancestors of the indicated submodules 686 # from the docindex root. E.g., if module x 687 # declares y to be undocumented, then x.y.z should 688 # also be undocumented. 689 for elt in docindex.root[:]: 690 for m in removed: 691 if m.canonical_name.dominates(elt.canonical_name): 692 docindex.root.remove(elt)
693
694 -def process_group_field(api_doc, docindex, tag, arg, descr):
695 """Define a group named C{arg} containing the variables whose 696 names are listed in C{descr}.""" 697 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=True) 698 api_doc.group_specs.append( (arg, _descr_to_identifiers(descr)) )
699 # [xx] should this also set sort order? 700
701 -def process_deffield_field(api_doc, docindex, tag, arg, descr):
702 """Define a new custom field.""" 703 _check(api_doc, tag, arg, expect_arg=True) 704 if api_doc.extra_docstring_fields is UNKNOWN: 705 api_doc.extra_docstring_fields = [] 706 try: 707 docstring_field = _descr_to_docstring_field(arg, descr) 708 docstring_field.varnames.append("__%s__" % arg) 709 api_doc.extra_docstring_fields.append(docstring_field) 710 except ValueError, e: 711 raise ValueError('Bad %s: %s' % (tag, e))
712
713 -def process_raise_field(api_doc, docindex, tag, arg, descr):
714 """Record the fact that C{api_doc} can raise the exception named 715 C{tag} in C{api_doc.exception_descrs}.""" 716 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg='single') 717 try: name = DottedName(arg, strict=True) 718 except DottedName.InvalidDottedName: name = arg 719 api_doc.exception_descrs.append( (name, descr) )
720
721 -def process_sort_field(api_doc, docindex, tag, arg, descr):
722 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False) 723 api_doc.sort_spec = _descr_to_identifiers(descr) + api_doc.sort_spec
724 725 # [xx] should I notice when they give a type for an unknown var?
726 -def process_type_field(api_doc, docindex, tag, arg, descr):
727 # In namespace, "@type var: ..." describes the type of a var. 728 if isinstance(api_doc, NamespaceDoc): 729 _check(api_doc, tag, arg, expect_arg='single') 730 set_var_type(api_doc, arg, descr) 731 732 # For variables & properties, "@type: ..." describes the variable. 733 elif isinstance(api_doc, (VariableDoc, PropertyDoc)): 734 _check(api_doc, tag, arg, expect_arg=False) 735 if api_doc.type_descr is not None: 736 raise ValueError(REDEFINED % tag) 737 api_doc.type_descr = descr 738 739 # For routines, "@type param: ..." describes a parameter. 740 elif isinstance(api_doc, RoutineDoc): 741 _check(api_doc, tag, arg, expect_arg='single') 742 if arg in api_doc.arg_types: 743 raise ValueError(REDEFINED % ('type for '+arg)) 744 api_doc.arg_types[arg] = descr 745 746 else: 747 raise ValueError(BAD_CONTEXT % tag)
748
749 -def process_var_field(api_doc, docindex, tag, arg, descr):
750 _check(api_doc, tag, arg, context=ModuleDoc, expect_arg=True) 751 for ident in re.split('[:;, ] *', arg): 752 set_var_descr(api_doc, ident, descr)
753
754 -def process_cvar_field(api_doc, docindex, tag, arg, descr):
755 # If @cvar is used *within* a variable, then use it as the 756 # variable's description, and treat the variable as a class var. 757 if (isinstance(api_doc, VariableDoc) and 758 isinstance(api_doc.container, ClassDoc)): 759 _check(api_doc, tag, arg, expect_arg=False) 760 api_doc.is_instvar = False 761 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr) 762 api_doc.summary, api_doc.other_docs = descr.summary() 763 764 # Otherwise, @cvar should be used in a class. 765 else: 766 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True) 767 for ident in re.split('[:;, ] *', arg): 768 set_var_descr(api_doc, ident, descr) 769 api_doc.variables[ident].is_instvar = False
770
771 -def process_ivar_field(api_doc, docindex, tag, arg, descr):
772 # If @ivar is used *within* a variable, then use it as the 773 # variable's description, and treat the variable as an instvar. 774 if (isinstance(api_doc, VariableDoc) and 775 isinstance(api_doc.container, ClassDoc)): 776 _check(api_doc, tag, arg, expect_arg=False) 777 # require that there be no other descr? 778 api_doc.is_instvar = True 779 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr) 780 api_doc.summary, api_doc.other_docs = descr.summary() 781 782 # Otherwise, @ivar should be used in a class. 783 else: 784 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True) 785 for ident in re.split('[:;, ] *', arg): 786 set_var_descr(api_doc, ident, descr) 787 api_doc.variables[ident].is_instvar = True
788 789 # [xx] '@return: foo' used to get used as a descr if no other 790 # descr was present. is that still true?
791 -def process_return_field(api_doc, docindex, tag, arg, descr):
792 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=False) 793 if api_doc.return_descr is not None: 794 raise ValueError(REDEFINED % 'return value description') 795 api_doc.return_descr = descr
796
797 -def process_rtype_field(api_doc, docindex, tag, arg, descr):
798 _check(api_doc, tag, arg, 799 context=(RoutineDoc, PropertyDoc), expect_arg=False) 800 if isinstance(api_doc, RoutineDoc): 801 if api_doc.return_type is not None: 802 raise ValueError(REDEFINED % 'return value type') 803 api_doc.return_type = descr 804 805 elif isinstance(api_doc, PropertyDoc): 806 _check(api_doc, tag, arg, expect_arg=False) 807 if api_doc.type_descr is not None: 808 raise ValueError(REDEFINED % tag) 809 api_doc.type_descr = descr
810
811 -def process_arg_field(api_doc, docindex, tag, arg, descr):
812 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True) 813 idents = re.split('[:;, ] *', arg) 814 api_doc.arg_descrs.append( (idents, descr) ) 815 # Check to make sure that the documented parameter(s) are 816 # actually part of the function signature. 817 all_args = api_doc.all_args() 818 if all_args not in (['...'], UNKNOWN): 819 bad_params = ['"%s"' % i for i in idents if i not in all_args] 820 if bad_params: 821 raise ValueError(BAD_PARAM % (tag, ', '.join(bad_params)))
822
823 -def process_kwarg_field(api_doc, docindex, tag, arg, descr):
824 # [xx] these should -not- be checked if they exist.. 825 # and listed separately or not?? 826 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True) 827 idents = re.split('[:;, ] *', arg) 828 api_doc.arg_descrs.append( (idents, descr) )
829 830 register_field_handler(process_group_field, 'group') 831 register_field_handler(process_deffield_field, 'deffield', 'newfield') 832 register_field_handler(process_sort_field, 'sort') 833 register_field_handler(process_summary_field, 'summary') 834 register_field_handler(process_undocumented_field, 'undocumented') 835 register_field_handler(process_include_field, 'include') 836 register_field_handler(process_var_field, 'var', 'variable') 837 register_field_handler(process_type_field, 'type') 838 register_field_handler(process_cvar_field, 'cvar', 'cvariable') 839 register_field_handler(process_ivar_field, 'ivar', 'ivariable') 840 register_field_handler(process_return_field, 'return', 'returns') 841 register_field_handler(process_rtype_field, 'rtype', 'returntype') 842 register_field_handler(process_arg_field, 'arg', 'argument', 843 'parameter', 'param') 844 register_field_handler(process_kwarg_field, 'kwarg', 'keyword', 'kwparam') 845 register_field_handler(process_raise_field, 'raise', 'raises', 846 'except', 'exception') 847 848 # Tags related to function parameters 849 PARAMETER_TAGS = ('arg', 'argument', 'parameter', 'param', 850 'kwarg', 'keyword', 'kwparam') 851 852 # Tags related to variables in a class 853 VARIABLE_TAGS = ('cvar', 'cvariable', 'ivar', 'ivariable') 854 855 # Tags related to exceptions 856 EXCEPTION_TAGS = ('raise', 'raises', 'except', 'exception') 857 858 ###################################################################### 859 #{ Helper Functions 860 ###################################################################### 861
862 -def check_type_fields(api_doc, field_warnings):
863 """Check to make sure that all type fields correspond to some 864 documented parameter; if not, append a warning to field_warnings.""" 865 if isinstance(api_doc, RoutineDoc): 866 for arg in api_doc.arg_types: 867 if arg not in api_doc.all_args(): 868 for args, descr in api_doc.arg_descrs: 869 if arg in args: 870 break 871 else: 872 field_warnings.append(BAD_PARAM % ('type', '"%s"' % arg))
873
874 -def set_var_descr(api_doc, ident, descr):
875 if ident not in api_doc.variables: 876 api_doc.variables[ident] = VariableDoc( 877 container=api_doc, name=ident, 878 canonical_name=api_doc.canonical_name+ident) 879 880 var_doc = api_doc.variables[ident] 881 if var_doc.descr not in (None, UNKNOWN): 882 raise ValueError(REDEFINED % ('description for '+ident)) 883 var_doc.descr = descr 884 if var_doc.summary in (None, UNKNOWN): 885 var_doc.summary, var_doc.other_docs = var_doc.descr.summary()
886
887 -def set_var_type(api_doc, ident, descr):
888 if ident not in api_doc.variables: 889 api_doc.variables[ident] = VariableDoc( 890 container=api_doc, name=ident, 891 canonical_name=api_doc.canonical_name+ident) 892 893 var_doc = api_doc.variables[ident] 894 if var_doc.type_descr not in (None, UNKNOWN): 895 raise ValueError(REDEFINED % ('type for '+ident)) 896 var_doc.type_descr = descr
897
898 -def _check(api_doc, tag, arg, context=None, expect_arg=None):
899 if context is not None: 900 if not isinstance(api_doc, context): 901 raise ValueError(BAD_CONTEXT % tag) 902 if expect_arg is not None: 903 if expect_arg == True: 904 if arg is None: 905 raise ValueError(EXPECTED_ARG % tag) 906 elif expect_arg == False: 907 if arg is not None: 908 raise ValueError(UNEXPECTED_ARG % tag) 909 elif expect_arg == 'single': 910 if (arg is None or ' ' in arg): 911 raise ValueError(EXPECTED_SINGLE_ARG % tag) 912 else: 913 assert 0, 'bad value for expect_arg'
914
915 -def get_docformat(api_doc, docindex):
916 """ 917 Return the name of the markup language that should be used to 918 parse the API documentation for the given object. 919 """ 920 # Find the module that defines api_doc. 921 module = api_doc.defining_module 922 # Look up its docformat. 923 if module is not UNKNOWN and module.docformat not in (None, UNKNOWN): 924 docformat = module.docformat 925 else: 926 docformat = DEFAULT_DOCFORMAT 927 # Convert to lower case & strip region codes. 928 try: return docformat.lower().split()[0] 929 except: return DEFAULT_DOCFORMAT
930
931 -def unindent_docstring(docstring):
932 # [xx] copied from inspect.getdoc(); we can't use inspect.getdoc() 933 # itself, since it expects an object, not a string. 934 935 if not docstring: return '' 936 lines = docstring.expandtabs().split('\n') 937 938 # Find minimum indentation of any non-blank lines after first line. 939 margin = sys.maxint 940 for line in lines[1:]: 941 content = len(line.lstrip()) 942 if content: 943 indent = len(line) - content 944 margin = min(margin, indent) 945 # Remove indentation. 946 if lines: 947 lines[0] = lines[0].lstrip() 948 if margin < sys.maxint: 949 for i in range(1, len(lines)): lines[i] = lines[i][margin:] 950 # Remove any trailing (but not leading!) blank lines. 951 while lines and not lines[-1]: 952 lines.pop() 953 #while lines and not lines[0]: 954 # lines.pop(0) 955 return '\n'.join(lines)
956 957 _IDENTIFIER_LIST_REGEXP = re.compile(r'^[\w.\*]+([\s,:;]\s*[\w.\*]+)*$')
958 -def _descr_to_identifiers(descr):
959 """ 960 Given a C{ParsedDocstring} that contains a list of identifiers, 961 return a list of those identifiers. This is used by fields such 962 as C{@group} and C{@sort}, which expect lists of identifiers as 963 their values. To extract the identifiers, the docstring is first 964 converted to plaintext, and then split. The plaintext content of 965 the docstring must be a a list of identifiers, separated by 966 spaces, commas, colons, or semicolons. 967 968 @rtype: C{list} of C{string} 969 @return: A list of the identifier names contained in C{descr}. 970 @type descr: L{markup.ParsedDocstring} 971 @param descr: A C{ParsedDocstring} containing a list of 972 identifiers. 973 @raise ValueError: If C{descr} does not contain a valid list of 974 identifiers. 975 """ 976 idents = descr.to_plaintext(None).strip() 977 idents = re.sub(r'\s+', ' ', idents) 978 if not _IDENTIFIER_LIST_REGEXP.match(idents): 979 raise ValueError, 'Bad Identifier list: %r' % idents 980 rval = re.split('[:;, ] *', idents) 981 return rval
982
983 -def _descr_to_docstring_field(arg, descr):
984 tags = [s.lower() for s in re.split('[:;, ] *', arg)] 985 descr = descr.to_plaintext(None).strip() 986 args = re.split('[:;,] *', descr) 987 if len(args) == 0 or len(args) > 3: 988 raise ValueError, 'Wrong number of arguments' 989 singular = args[0] 990 if len(args) >= 2: plural = args[1] 991 else: plural = None 992 short = 0 993 if len(args) >= 3: 994 if args[2] == 'short': short = 1 995 else: raise ValueError('Bad arg 2 (expected "short")') 996 return DocstringField(tags, singular, plural, short)
997 998 ###################################################################### 999 #{ Function Signature Extraction 1000 ###################################################################### 1001 1002 # [XX] todo: add optional type modifiers? 1003 _SIGNATURE_RE = re.compile( 1004 # Class name (for builtin methods) 1005 r'^\s*((?P<self>\w+)\.)?' + 1006 # The function name (must match exactly) [XX] not anymore! 1007 r'(?P<func>\w+)' + 1008 # The parameters 1009 r'\((?P<params>(\s*\[?\s*\*{0,2}[\w\-\.]+(\s*=.+?)?'+ 1010 r'(\s*\[?\s*,\s*\]?\s*\*{0,2}[\w\-\.]+(\s*=.+?)?)*\]*)?)\s*\)' + 1011 # The return value (optional) 1012 r'(\s*(->)\s*(?P<return>\S.*?))?'+ 1013 # The end marker 1014 r'\s*(\n|\s+(--|<=+>)\s+|$|\.\s+|\.\n)') 1015 """A regular expression that is used to extract signatures from 1016 docstrings.""" 1017
1018 -def parse_function_signature(func_doc, doc_source, docformat, parse_errors):
1019 """ 1020 Construct the signature for a builtin function or method from 1021 its docstring. If the docstring uses the standard convention 1022 of including a signature in the first line of the docstring 1023 (and formats that signature according to standard 1024 conventions), then it will be used to extract a signature. 1025 Otherwise, the signature will be set to a single varargs 1026 variable named C{"..."}. 1027 1028 @param func_doc: The target object where to store parsed signature. Also 1029 container of the docstring to parse if doc_source is C{None} 1030 @type func_doc: L{RoutineDoc} 1031 @param doc_source: Contains the docstring to parse. If C{None}, parse 1032 L{func_doc} docstring instead 1033 @type doc_source: L{APIDoc} 1034 @rtype: C{None} 1035 """ 1036 if doc_source is None: 1037 doc_source = func_doc 1038 1039 # If there's no docstring, then don't do anything. 1040 if not doc_source.docstring: return False 1041 1042 m = _SIGNATURE_RE.match(doc_source.docstring) 1043 if m is None: return False 1044 1045 # Do I want to be this strict? 1046 # Notice that __init__ must match the class name instead, if the signature 1047 # comes from the class docstring 1048 # if not (m.group('func') == func_doc.canonical_name[-1] or 1049 # '_'+m.group('func') == func_doc.canonical_name[-1]): 1050 # log.warning("Not extracting function signature from %s's " 1051 # "docstring, since the name doesn't match." % 1052 # func_doc.canonical_name) 1053 # return False 1054 1055 params = m.group('params') 1056 rtype = m.group('return') 1057 selfparam = m.group('self') 1058 1059 # Extract the parameters from the signature. 1060 func_doc.posargs = [] 1061 func_doc.vararg = None 1062 func_doc.kwarg = None 1063 if func_doc.posarg_defaults is UNKNOWN: 1064 func_doc.posarg_defaults = [] 1065 if params: 1066 # Figure out which parameters are optional. 1067 while '[' in params or ']' in params: 1068 m2 = re.match(r'(.*)\[([^\[\]]+)\](.*)', params) 1069 if not m2: return False 1070 (start, mid, end) = m2.groups() 1071 mid = re.sub(r'((,|^)\s*[\w\-\.]+)', r'\1=...', mid) 1072 params = start+mid+end 1073 1074 params = re.sub(r'=...=' , r'=', params) 1075 for name in params.split(','): 1076 if '=' in name: 1077 (name, default_repr) = name.split('=',1) 1078 default = GenericValueDoc(parse_repr=default_repr) 1079 else: 1080 default = None 1081 name = name.strip() 1082 if name == '...': 1083 func_doc.vararg = '...' 1084 elif name.startswith('**'): 1085 func_doc.kwarg = name[2:] 1086 elif name.startswith('*'): 1087 func_doc.vararg = name[1:] 1088 else: 1089 func_doc.posargs.append(name) 1090 if len(func_doc.posarg_defaults) < len(func_doc.posargs): 1091 func_doc.posarg_defaults.append(default) 1092 elif default is not None: 1093 argnum = len(func_doc.posargs)-1 1094 func_doc.posarg_defaults[argnum] = default 1095 1096 # Extract the return type/value from the signature 1097 if rtype: 1098 func_doc.return_type = markup.parse(rtype, docformat, parse_errors, 1099 inline=True) 1100 1101 # Add the self parameter, if it was specified. 1102 if selfparam: 1103 func_doc.posargs.insert(0, selfparam) 1104 func_doc.posarg_defaults.insert(0, None) 1105 1106 # Remove the signature from the docstring. 1107 doc_source.docstring = doc_source.docstring[m.end():] 1108 1109 # We found a signature. 1110 return True
1111