epydoc.markup.epytext

1 # 2 # epytext.py: epydoc formatted docstring parsing 3 # Edward Loper 4 # 5 # Created [04/10/01 12:00 AM] 6 # $Id: epytext.py 1803 2008-02-27 00:32:35Z edloper $ 7 # 8 9 """ 10 Parser for epytext strings. Epytext is a lightweight markup whose 11 primary intended application is Python documentation strings. This 12 parser converts Epytext strings to a simple DOM-like representation 13 (encoded as a tree of L{Element} objects and strings). Epytext 14 strings can contain the following X{structural blocks}: 15 16 - X{epytext}: The top-level element of the DOM tree. 17 - X{para}: A paragraph of text. Paragraphs contain no newlines, 18 and all spaces are soft. 19 - X{section}: A section or subsection. 20 - X{field}: A tagged field. These fields provide information 21 about specific aspects of a Python object, such as the 22 description of a function's parameter, or the author of a 23 module. 24 - X{literalblock}: A block of literal text. This text should be 25 displayed as it would be displayed in plaintext. The 26 parser removes the appropriate amount of leading whitespace 27 from each line in the literal block. 28 - X{doctestblock}: A block containing sample python code, 29 formatted according to the specifications of the C{doctest} 30 module. 31 - X{ulist}: An unordered list. 32 - X{olist}: An ordered list. 33 - X{li}: A list item. This tag is used both for unordered list 34 items and for ordered list items. 35 36 Additionally, the following X{inline regions} may be used within 37 C{para} blocks: 38 39 - X{code}: Source code and identifiers. 40 - X{math}: Mathematical expressions. 41 - X{index}: A term which should be included in an index, if one 42 is generated. 43 - X{italic}: Italicized text. 44 - X{bold}: Bold-faced text. 45 - X{uri}: A Universal Resource Indicator (URI) or Universal 46 Resource Locator (URL) 47 - X{link}: A Python identifier which should be hyperlinked to 48 the named object's documentation, when possible. 49 50 The returned DOM tree will conform to the the following Document Type 51 Description:: 52 53 <!ENTITY % colorized '(code | math | index | italic | 54 bold | uri | link | symbol)*'> 55 56 <!ELEMENT epytext ((para | literalblock | doctestblock | 57 section | ulist | olist)*, fieldlist?)> 58 59 <!ELEMENT para (#PCDATA | %colorized;)*> 60 61 <!ELEMENT section (para | listblock | doctestblock | 62 section | ulist | olist)+> 63 64 <!ELEMENT fieldlist (field+)> 65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 66 ulist | olist)+)> 67 <!ELEMENT tag (#PCDATA)> 68 <!ELEMENT arg (#PCDATA)> 69 70 <!ELEMENT literalblock (#PCDATA | %colorized;)*> 71 <!ELEMENT doctestblock (#PCDATA)> 72 73 <!ELEMENT ulist (li+)> 74 <!ELEMENT olist (li+)> 75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 76 <!ATTLIST li bullet NMTOKEN #IMPLIED> 77 <!ATTLIST olist start NMTOKEN #IMPLIED> 78 79 <!ELEMENT uri (name, target)> 80 <!ELEMENT link (name, target)> 81 <!ELEMENT name (#PCDATA | %colorized;)*> 82 <!ELEMENT target (#PCDATA)> 83 84 <!ELEMENT code (#PCDATA | %colorized;)*> 85 <!ELEMENT math (#PCDATA | %colorized;)*> 86 <!ELEMENT italic (#PCDATA | %colorized;)*> 87 <!ELEMENT bold (#PCDATA | %colorized;)*> 88 <!ELEMENT indexed (#PCDATA | %colorized;)> 89 <!ATTLIST code style CDATA #IMPLIED> 90 91 <!ELEMENT symbol (#PCDATA)> 92 93 @var SYMBOLS: A list of the of escape symbols that are supported 94 by epydoc. Currently the following symbols are supported: 95 <<<SYMBOLS>>> 96 """ 97 # Note: the symbol list is appended to the docstring automatically, 98 # below. 99 100 __docformat__ = 'epytext en' 101 102 # Code organization.. 103 # 1. parse() 104 # 2. tokenize() 105 # 3. colorize() 106 # 4. helpers 107 # 5. testing 108 109 import re, string, types, sys, os.path 110 from epydoc.markup import * 111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex 113 114 ################################################## 115 ## DOM-Like Encoding 116 ################################################## 117

118 -class Element:

119 """ 120 A very simple DOM-like representation for parsed epytext 121 documents. Each epytext document is encoded as a tree whose nodes 122 are L{Element} objects, and whose leaves are C{string}s. Each 123 node is marked by a I{tag} and zero or more I{attributes}. Each 124 attribute is a mapping from a string key to a string value. 125 """

126 - def __init__(self, tag, *children, **attribs):

127 self.tag = tag 128 """A string tag indicating the type of this element. 129 @type: C{string}""" 130 131 self.children = list(children) 132 """A list of the children of this element. 133 @type: C{list} of (C{string} or C{Element})""" 134 135 self.attribs = attribs 136 """A dictionary mapping attribute names to attribute values 137 for this element. 138 @type: C{dict} from C{string} to C{string}"""

139

140 - def __str__(self):

141 """ 142 Return a string representation of this element, using XML 143 notation. 144 @bug: Doesn't escape '<' or '&' or '>'. 145 """ 146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) 147 return ('<%s%s>' % (self.tag, attribs) + 148 ''.join([str(child) for child in self.children]) + 149 '</%s>' % self.tag)

150

151 - def __repr__(self):

152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) 153 args = ''.join([', %r' % c for c in self.children]) 154 return 'Element(%s%s%s)' % (self.tag, args, attribs)

155 156 ################################################## 157 ## Constants 158 ################################################## 159 160 # The possible heading underline characters, listed in order of 161 # heading depth. 162 _HEADING_CHARS = "=-~" 163 164 # Escape codes. These should be needed very rarely. 165 _ESCAPES = {'lb':'{', 'rb': '}'} 166 167 # Symbols. These can be generated via S{...} escapes. 168 SYMBOLS = [ 169 # Arrows 170 '<-', '->', '^', 'v', 171 172 # Greek letters 173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', 176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu', 179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma', 180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 181 182 # HTML character entities 183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr', 185 'copy', 'times', 'forall', 'exist', 'part', 186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 190 'sube', 'supe', 'oplus', 'otimes', 'perp', 191 192 # Alternate (long) names 193 'infinity', 'integral', 'product', 194 '>=', '<=', 195 ] 196 # Convert to a dictionary, for quick lookup 197 _SYMBOLS = {} 198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 199 200 # Add symbols to the docstring. 201 symblist = ' ' 202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 203 for symbol in SYMBOLS]) 204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 205 del symbol, symblist 206 207 # Tags for colorizing text. 208 _COLORIZING_TAGS = { 209 'C': 'code', 210 'M': 'math', 211 'X': 'indexed', 212 'I': 'italic', 213 'B': 'bold', 214 'U': 'uri', 215 'L': 'link', # A Python identifier that should be linked to 216 'E': 'escape', # escapes characters or creates symbols 217 'S': 'symbol', 218 'G': 'graph', 219 } 220 221 # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? 222 _LINK_COLORIZING_TAGS = ['link', 'uri'] 223 224 ################################################## 225 ## Structuring (Top Level) 226 ################################################## 227

228 -def parse(str, errors = None):

229 """ 230 Return a DOM tree encoding the contents of an epytext string. Any 231 errors generated during parsing will be stored in C{errors}. 232 233 @param str: The epytext string to parse. 234 @type str: C{string} 235 @param errors: A list where any errors generated during parsing 236 will be stored. If no list is specified, then fatal errors 237 will generate exceptions, and non-fatal errors will be 238 ignored. 239 @type errors: C{list} of L{ParseError} 240 @return: a DOM tree encoding the contents of an epytext string. 241 @rtype: C{Element} 242 @raise ParseError: If C{errors} is C{None} and an error is 243 encountered while parsing. 244 """ 245 # Initialize errors list. 246 if errors == None: 247 errors = [] 248 raise_on_error = 1 249 else: 250 raise_on_error = 0 251 252 # Preprocess the string. 253 str = re.sub('\015\012', '\012', str) 254 str = string.expandtabs(str) 255 256 # Tokenize the input string. 257 tokens = _tokenize(str, errors) 258 259 # Have we encountered a field yet? 260 encountered_field = 0 261 262 # Create an document to hold the epytext. 263 doc = Element('epytext') 264 265 # Maintain two parallel stacks: one contains DOM elements, and 266 # gives the ancestors of the current block. The other contains 267 # indentation values, and gives the indentation of the 268 # corresponding DOM elements. An indentation of "None" reflects 269 # an unknown indentation. However, the indentation must be 270 # greater than, or greater than or equal to, the indentation of 271 # the prior element (depending on what type of DOM element it 272 # corresponds to). No 2 consecutive indent_stack values will be 273 # ever be "None." Use initial dummy elements in the stack, so we 274 # don't have to worry about bounds checking. 275 stack = [None, doc] 276 indent_stack = [-1, None] 277 278 for token in tokens: 279 # Uncomment this for debugging: 280 #print ('%s: %s\n%s: %s\n' % 281 # (''.join(['%-11s' % (t and t.tag) for t in stack]), 282 # token.tag, ''.join(['%-11s' % i for i in indent_stack]), 283 # token.indent)) 284 285 # Pop any completed blocks off the stack. 286 _pop_completed_blocks(token, stack, indent_stack) 287 288 # If Token has type PARA, colorize and add the new paragraph 289 if token.tag == Token.PARA: 290 _add_para(doc, token, stack, indent_stack, errors) 291 292 # If Token has type HEADING, add the new section 293 elif token.tag == Token.HEADING: 294 _add_section(doc, token, stack, indent_stack, errors) 295 296 # If Token has type LBLOCK, add the new literal block 297 elif token.tag == Token.LBLOCK: 298 stack[-1].children.append(token.to_dom(doc)) 299 300 # If Token has type DTBLOCK, add the new doctest block 301 elif token.tag == Token.DTBLOCK: 302 stack[-1].children.append(token.to_dom(doc)) 303 304 # If Token has type BULLET, add the new list/list item/field 305 elif token.tag == Token.BULLET: 306 _add_list(doc, token, stack, indent_stack, errors) 307 else: 308 assert 0, 'Unknown token type: '+token.tag 309 310 # Check if the DOM element we just added was a field.. 311 if stack[-1].tag == 'field': 312 encountered_field = 1 313 elif encountered_field == 1: 314 if len(stack) <= 3: 315 estr = ("Fields must be the final elements in an "+ 316 "epytext string.") 317 errors.append(StructuringError(estr, token.startline)) 318 319 # Graphs use inline markup (G{...}) but are really block-level 320 # elements; so "raise" any graphs we generated. This is a bit of 321 # a hack, but the alternative is to define a new markup for 322 # block-level elements, which I'd rather not do. (See sourceforge 323 # bug #1673017.) 324 for child in doc.children: 325 _raise_graphs(child, doc) 326 327 # If there was an error, then signal it! 328 if len([e for e in errors if e.is_fatal()]) > 0: 329 if raise_on_error: 330 raise errors[0] 331 else: 332 return None 333 334 # Return the top-level epytext DOM element. 335 return doc

336

337 -def _raise_graphs(tree, parent):

338 # Recurse to children. 339 have_graph_child = False 340 for elt in tree.children: 341 if isinstance(elt, Element): 342 _raise_graphs(elt, tree) 343 if elt.tag == 'graph': have_graph_child = True 344 345 block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li') 346 if have_graph_child and tree.tag not in block: 347 child_index = 0 348 parent_index = parent.children.index(tree) 349 for elt in tree.children: 350 if isinstance(elt, Element) and elt.tag == 'graph': 351 # We found a graph: splice it into the parent. 352 left = tree.children[:child_index] 353 right = tree.children[child_index+1:] 354 parent.children[parent_index:parent_index+1] = [ 355 Element(tree.tag, *left, **tree.attribs), 356 elt, 357 Element(tree.tag, *right, **tree.attribs)] 358 child_index = 0 359 parent_index += 2 360 tree = parent.children[parent_index] 361 else: 362 child_index += 1

363

364 -def _pop_completed_blocks(token, stack, indent_stack):

365 """ 366 Pop any completed blocks off the stack. This includes any 367 blocks that we have dedented past, as well as any list item 368 blocks that we've dedented to. The top element on the stack 369 should only be a list if we're about to start a new list 370 item (i.e., if the next token is a bullet). 371 """ 372 indent = token.indent 373 if indent != None: 374 while (len(stack) > 2): 375 pop = 0 376 377 # Dedent past a block 378 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 379 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 380 381 # Dedent to a list item, if it is follwed by another list 382 # item with the same indentation. 383 elif (token.tag == 'bullet' and indent==indent_stack[-2] and 384 stack[-1].tag in ('li', 'field')): pop=1 385 386 # End of a list (no more list items available) 387 elif (stack[-1].tag in ('ulist', 'olist') and 388 (token.tag != 'bullet' or token.contents[-1] == ':')): 389 pop=1 390 391 # Pop the block, if it's complete. Otherwise, we're done. 392 if pop == 0: return 393 stack.pop() 394 indent_stack.pop()

395

396 -def _add_para(doc, para_token, stack, indent_stack, errors):

397 """Colorize the given paragraph, and add it to the DOM tree.""" 398 # Check indentation, and update the parent's indentation 399 # when appropriate. 400 if indent_stack[-1] == None: 401 indent_stack[-1] = para_token.indent 402 if para_token.indent == indent_stack[-1]: 403 # Colorize the paragraph and add it. 404 para = _colorize(doc, para_token, errors) 405 if para_token.inline: 406 para.attribs['inline'] = True 407 stack[-1].children.append(para) 408 else: 409 estr = "Improper paragraph indentation." 410 errors.append(StructuringError(estr, para_token.startline))

411

412 -def _add_section(doc, heading_token, stack, indent_stack, errors):

413 """Add a new section to the DOM tree, with the given heading.""" 414 if indent_stack[-1] == None: 415 indent_stack[-1] = heading_token.indent 416 elif indent_stack[-1] != heading_token.indent: 417 estr = "Improper heading indentation." 418 errors.append(StructuringError(estr, heading_token.startline)) 419 420 # Check for errors. 421 for tok in stack[2:]: 422 if tok.tag != "section": 423 estr = "Headings must occur at the top level." 424 errors.append(StructuringError(estr, heading_token.startline)) 425 break 426 if (heading_token.level+2) > len(stack): 427 estr = "Wrong underline character for heading." 428 errors.append(StructuringError(estr, heading_token.startline)) 429 430 # Pop the appropriate number of headings so we're at the 431 # correct level. 432 stack[heading_token.level+2:] = [] 433 indent_stack[heading_token.level+2:] = [] 434 435 # Colorize the heading 436 head = _colorize(doc, heading_token, errors, 'heading') 437 438 # Add the section's and heading's DOM elements. 439 sec = Element("section") 440 stack[-1].children.append(sec) 441 stack.append(sec) 442 sec.children.append(head) 443 indent_stack.append(None)

444

445 -def _add_list(doc, bullet_token, stack, indent_stack, errors):

446 """ 447 Add a new list item or field to the DOM tree, with the given 448 bullet or field tag. When necessary, create the associated 449 list. 450 """ 451 # Determine what type of bullet it is. 452 if bullet_token.contents[-1] == '-': 453 list_type = 'ulist' 454 elif bullet_token.contents[-1] == '.': 455 list_type = 'olist' 456 elif bullet_token.contents[-1] == ':': 457 list_type = 'fieldlist' 458 else: 459 raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 460 461 # Is this a new list? 462 newlist = 0 463 if stack[-1].tag != list_type: 464 newlist = 1 465 elif list_type == 'olist' and stack[-1].tag == 'olist': 466 old_listitem = stack[-1].children[-1] 467 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] 468 new_bullet = bullet_token.contents.split('.')[:-1] 469 if (new_bullet[:-1] != old_bullet[:-1] or 470 int(new_bullet[-1]) != int(old_bullet[-1])+1): 471 newlist = 1 472 473 # Create the new list. 474 if newlist: 475 if stack[-1].tag is 'fieldlist': 476 # The new list item is not a field list item (since this 477 # is a new list); but it's indented the same as the field 478 # list. This either means that they forgot to indent the 479 # list, or they are trying to put something after the 480 # field list. The first one seems more likely, so we'll 481 # just warn about that (to avoid confusion). 482 estr = "Lists must be indented." 483 errors.append(StructuringError(estr, bullet_token.startline)) 484 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): 485 stack.pop() 486 indent_stack.pop() 487 488 if (list_type != 'fieldlist' and indent_stack[-1] is not None and 489 bullet_token.indent == indent_stack[-1]): 490 # Ignore this error if there's text on the same line as 491 # the comment-opening quote -- epydoc can't reliably 492 # determine the indentation for that line. 493 if bullet_token.startline != 1 or bullet_token.indent != 0: 494 estr = "Lists must be indented." 495 errors.append(StructuringError(estr, bullet_token.startline)) 496 497 if list_type == 'fieldlist': 498 # Fieldlist should be at the top-level. 499 for tok in stack[2:]: 500 if tok.tag != "section": 501 estr = "Fields must be at the top level." 502 errors.append( 503 StructuringError(estr, bullet_token.startline)) 504 break 505 stack[2:] = [] 506 indent_stack[2:] = [] 507 508 # Add the new list. 509 lst = Element(list_type) 510 stack[-1].children.append(lst) 511 stack.append(lst) 512 indent_stack.append(bullet_token.indent) 513 if list_type == 'olist': 514 start = bullet_token.contents.split('.')[:-1] 515 if start != '1': 516 lst.attribs["start"] = start[-1] 517 518 # Fields are treated somewhat specially: A "fieldlist" 519 # node is created to make the parsing simpler, but fields 520 # are adjoined directly into the "epytext" node, not into 521 # the "fieldlist" node. 522 if list_type == 'fieldlist': 523 li = Element("field") 524 token_words = bullet_token.contents[1:-1].split(None, 1) 525 tag_elt = Element("tag") 526 tag_elt.children.append(token_words[0]) 527 li.children.append(tag_elt) 528 529 if len(token_words) > 1: 530 arg_elt = Element("arg") 531 arg_elt.children.append(token_words[1]) 532 li.children.append(arg_elt) 533 else: 534 li = Element("li") 535 if list_type == 'olist': 536 li.attribs["bullet"] = bullet_token.contents 537 538 # Add the bullet. 539 stack[-1].children.append(li) 540 stack.append(li) 541 indent_stack.append(None)

542 543 ################################################## 544 ## Tokenization 545 ################################################## 546

547 -class Token:

548 """ 549 C{Token}s are an intermediate data structure used while 550 constructing the structuring DOM tree for a formatted docstring. 551 There are five types of C{Token}: 552 553 - Paragraphs 554 - Literal blocks 555 - Doctest blocks 556 - Headings 557 - Bullets 558 559 The text contained in each C{Token} is stored in the 560 C{contents} variable. The string in this variable has been 561 normalized. For paragraphs, this means that it has been converted 562 into a single line of text, with newline/indentation replaced by 563 single spaces. For literal blocks and doctest blocks, this means 564 that the appropriate amount of leading whitespace has been removed 565 from each line. 566 567 Each C{Token} has an indentation level associated with it, 568 stored in the C{indent} variable. This indentation level is used 569 by the structuring procedure to assemble hierarchical blocks. 570 571 @type tag: C{string} 572 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} 573 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 574 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 575 576 @type startline: C{int} 577 @ivar startline: The line on which this C{Token} begins. This 578 line number is only used for issuing errors. 579 580 @type contents: C{string} 581 @ivar contents: The normalized text contained in this C{Token}. 582 583 @type indent: C{int} or C{None} 584 @ivar indent: The indentation level of this C{Token} (in 585 number of leading spaces). A value of C{None} indicates an 586 unknown indentation; this is used for list items and fields 587 that begin with one-line paragraphs. 588 589 @type level: C{int} or C{None} 590 @ivar level: The heading-level of this C{Token} if it is a 591 heading; C{None}, otherwise. Valid heading levels are 0, 1, 592 and 2. 593 594 @type inline: C{bool} 595 @ivar inline: If True, the element is an inline level element, comparable 596 to an HTML C{} tag. Else, it is a block level element, comparable 597 to an HTML C{<div>}. 598 599 @type PARA: C{string} 600 @cvar PARA: The C{tag} value for paragraph C{Token}s. 601 @type LBLOCK: C{string} 602 @cvar LBLOCK: The C{tag} value for literal C{Token}s. 603 @type DTBLOCK: C{string} 604 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 605 @type HEADING: C{string} 606 @cvar HEADING: The C{tag} value for heading C{Token}s. 607 @type BULLET: C{string} 608 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} 609 value is also used for field tag C{Token}s, since fields 610 function syntactically the same as list items. 611 """ 612 # The possible token types. 613 PARA = "para" 614 LBLOCK = "literalblock" 615 DTBLOCK = "doctestblock" 616 HEADING = "heading" 617 BULLET = "bullet" 618

619 - def __init__(self, tag, startline, contents, indent, level=None, 620 inline=False):

621 """ 622 Create a new C{Token}. 623 624 @param tag: The type of the new C{Token}. 625 @type tag: C{string} 626 @param startline: The line on which the new C{Token} begins. 627 @type startline: C{int} 628 @param contents: The normalized contents of the new C{Token}. 629 @type contents: C{string} 630 @param indent: The indentation of the new C{Token} (in number 631 of leading spaces). A value of C{None} indicates an 632 unknown indentation. 633 @type indent: C{int} or C{None} 634 @param level: The heading-level of this C{Token} if it is a 635 heading; C{None}, otherwise. 636 @type level: C{int} or C{None} 637 @param inline: Is this C{Token} inline as a C{}?. 638 @type inline: C{bool} 639 """ 640 self.tag = tag 641 self.startline = startline 642 self.contents = contents 643 self.indent = indent 644 self.level = level 645 self.inline = inline

646

647 - def __repr__(self):

648 """ 649 @rtype: C{string} 650 @return: the formal representation of this C{Token}. 651 C{Token}s have formal representaitons of the form:: 652 <Token: para at line 12> 653 """ 654 return '<Token: %s at line %s>' % (self.tag, self.startline)

655

656 - def to_dom(self, doc):

657 """ 658 @return: a DOM representation of this C{Token}. 659 @rtype: L{Element} 660 """ 661 e = Element(self.tag) 662 e.children.append(self.contents) 663 return e

664 665 # Construct regular expressions for recognizing bullets. These are 666 # global so they don't have to be reconstructed each time we tokenize 667 # a docstring. 668 _ULIST_BULLET = '[-]( +|$)' 669 _OLIST_BULLET = '(\d+[.])+( +|$)' 670 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' 671 _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 672 _OLIST_BULLET + '|' + 673 _FIELD_BULLET) 674 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 675 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 676 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 677

678 -def _tokenize_doctest(lines, start, block_indent, tokens, errors):

679 """ 680 Construct a L{Token} containing the doctest block starting at 681 C{lines[start]}, and append it to C{tokens}. C{block_indent} 682 should be the indentation of the doctest block. Any errors 683 generated while tokenizing the doctest block will be appended to 684 C{errors}. 685 686 @param lines: The list of lines to be tokenized 687 @param start: The index into C{lines} of the first line of the 688 doctest block to be tokenized. 689 @param block_indent: The indentation of C{lines[start]}. This is 690 the indentation of the doctest block. 691 @param errors: A list where any errors generated during parsing 692 will be stored. If no list is specified, then errors will 693 generate exceptions. 694 @return: The line number of the first line following the doctest 695 block. 696 697 @type lines: C{list} of C{string} 698 @type start: C{int} 699 @type block_indent: C{int} 700 @type tokens: C{list} of L{Token} 701 @type errors: C{list} of L{ParseError} 702 @rtype: C{int} 703 """ 704 # If they dedent past block_indent, keep track of the minimum 705 # indentation. This is used when removing leading indentation 706 # from the lines of the doctest block. 707 min_indent = block_indent 708 709 linenum = start + 1 710 while linenum < len(lines): 711 # Find the indentation of this line. 712 line = lines[linenum] 713 indent = len(line) - len(line.lstrip()) 714 715 # A blank line ends doctest block. 716 if indent == len(line): break 717 718 # A Dedent past block_indent is an error. 719 if indent < block_indent: 720 min_indent = min(min_indent, indent) 721 estr = 'Improper doctest block indentation.' 722 errors.append(TokenizationError(estr, linenum)) 723 724 # Go on to the next line. 725 linenum += 1 726 727 # Add the token, and return the linenum after the token ends. 728 contents = [line[min_indent:] for line in lines[start:linenum]] 729 contents = '\n'.join(contents) 730 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 731 return linenum

732

733 -def _tokenize_literal(lines, start, block_indent, tokens, errors):

734 """ 735 Construct a L{Token} containing the literal block starting at 736 C{lines[start]}, and append it to C{tokens}. C{block_indent} 737 should be the indentation of the literal block. Any errors 738 generated while tokenizing the literal block will be appended to 739 C{errors}. 740 741 @param lines: The list of lines to be tokenized 742 @param start: The index into C{lines} of the first line of the 743 literal block to be tokenized. 744 @param block_indent: The indentation of C{lines[start]}. This is 745 the indentation of the literal block. 746 @param errors: A list of the errors generated by parsing. Any 747 new errors generated while will tokenizing this paragraph 748 will be appended to this list. 749 @return: The line number of the first line following the literal 750 block. 751 752 @type lines: C{list} of C{string} 753 @type start: C{int} 754 @type block_indent: C{int} 755 @type tokens: C{list} of L{Token} 756 @type errors: C{list} of L{ParseError} 757 @rtype: C{int} 758 """ 759 linenum = start + 1 760 while linenum < len(lines): 761 # Find the indentation of this line. 762 line = lines[linenum] 763 indent = len(line) - len(line.lstrip()) 764 765 # A Dedent to block_indent ends the literal block. 766 # (Ignore blank likes, though) 767 if len(line) != indent and indent <= block_indent: 768 break 769 770 # Go on to the next line. 771 linenum += 1 772 773 # Add the token, and return the linenum after the token ends. 774 contents = [line[block_indent+1:] for line in lines[start:linenum]] 775 contents = '\n'.join(contents) 776 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 777 tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 778 return linenum

779

780 -def _tokenize_listart(lines, start, bullet_indent, tokens, errors):

781 """ 782 Construct L{Token}s for the bullet and the first paragraph of the 783 list item (or field) starting at C{lines[start]}, and append them 784 to C{tokens}. C{bullet_indent} should be the indentation of the 785 list item. Any errors generated while tokenizing will be 786 appended to C{errors}. 787 788 @param lines: The list of lines to be tokenized 789 @param start: The index into C{lines} of the first line of the 790 list item to be tokenized. 791 @param bullet_indent: The indentation of C{lines[start]}. This is 792 the indentation of the list item. 793 @param errors: A list of the errors generated by parsing. Any 794 new errors generated while will tokenizing this paragraph 795 will be appended to this list. 796 @return: The line number of the first line following the list 797 item's first paragraph. 798 799 @type lines: C{list} of C{string} 800 @type start: C{int} 801 @type bullet_indent: C{int} 802 @type tokens: C{list} of L{Token} 803 @type errors: C{list} of L{ParseError} 804 @rtype: C{int} 805 """ 806 linenum = start + 1 807 para_indent = None 808 doublecolon = lines[start].rstrip()[-2:] == '::' 809 810 # Get the contents of the bullet. 811 para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 812 bcontents = lines[start][bullet_indent:para_start].strip() 813 814 while linenum < len(lines): 815 # Find the indentation of this line. 816 line = lines[linenum] 817 indent = len(line) - len(line.lstrip()) 818 819 # "::" markers end paragraphs. 820 if doublecolon: break 821 if line.rstrip()[-2:] == '::': doublecolon = 1 822 823 # A blank line ends the token 824 if indent == len(line): break 825 826 # Dedenting past bullet_indent ends the list item. 827 if indent < bullet_indent: break 828 829 # A line beginning with a bullet ends the token. 830 if _BULLET_RE.match(line, indent): break 831 832 # If this is the second line, set the paragraph indentation, or 833 # end the token, as appropriate. 834 if para_indent == None: para_indent = indent 835 836 # A change in indentation ends the token 837 if indent != para_indent: break 838 839 # Go on to the next line. 840 linenum += 1 841 842 # Add the bullet token. 843 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent, 844 inline=True)) 845 846 # Add the paragraph token. 847 pcontents = ([lines[start][para_start:].strip()] + 848 [line.strip() for line in lines[start+1:linenum]]) 849 pcontents = ' '.join(pcontents).strip() 850 if pcontents: 851 tokens.append(Token(Token.PARA, start, pcontents, para_indent, 852 inline=True)) 853 854 # Return the linenum after the paragraph token ends. 855 return linenum

856

857 -def _tokenize_para(lines, start, para_indent, tokens, errors):

858 """ 859 Construct a L{Token} containing the paragraph starting at 860 C{lines[start]}, and append it to C{tokens}. C{para_indent} 861 should be the indentation of the paragraph . Any errors 862 generated while tokenizing the paragraph will be appended to 863 C{errors}. 864 865 @param lines: The list of lines to be tokenized 866 @param start: The index into C{lines} of the first line of the 867 paragraph to be tokenized. 868 @param para_indent: The indentation of C{lines[start]}. This is 869 the indentation of the paragraph. 870 @param errors: A list of the errors generated by parsing. Any 871 new errors generated while will tokenizing this paragraph 872 will be appended to this list. 873 @return: The line number of the first line following the 874 paragraph. 875 876 @type lines: C{list} of C{string} 877 @type start: C{int} 878 @type para_indent: C{int} 879 @type tokens: C{list} of L{Token} 880 @type errors: C{list} of L{ParseError} 881 @rtype: C{int} 882 """ 883 linenum = start + 1 884 doublecolon = 0 885 while linenum < len(lines): 886 # Find the indentation of this line. 887 line = lines[linenum] 888 indent = len(line) - len(line.lstrip()) 889 890 # "::" markers end paragraphs. 891 if doublecolon: break 892 if line.rstrip()[-2:] == '::': doublecolon = 1 893 894 # Blank lines end paragraphs 895 if indent == len(line): break 896 897 # Indentation changes end paragraphs 898 if indent != para_indent: break 899 900 # List bullets end paragraphs 901 if _BULLET_RE.match(line, indent): break 902 903 # Check for mal-formatted field items. 904 if line[indent] == '@': 905 estr = "Possible mal-formatted field item." 906 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 907 908 # Go on to the next line. 909 linenum += 1 910 911 contents = [line.strip() for line in lines[start:linenum]] 912 913 # Does this token look like a heading? 914 if ((len(contents) < 2) or 915 (contents[1][0] not in _HEADING_CHARS) or 916 (abs(len(contents[0])-len(contents[1])) > 5)): 917 looks_like_heading = 0 918 else: 919 looks_like_heading = 1 920 for char in contents[1]: 921 if char != contents[1][0]: 922 looks_like_heading = 0 923 break 924 925 if looks_like_heading: 926 if len(contents[0]) != len(contents[1]): 927 estr = ("Possible heading typo: the number of "+ 928 "underline characters must match the "+ 929 "number of heading characters.") 930 errors.append(TokenizationError(estr, start, is_fatal=0)) 931 else: 932 level = _HEADING_CHARS.index(contents[1][0]) 933 tokens.append(Token(Token.HEADING, start, 934 contents[0], para_indent, level)) 935 return start+2 936 937 # Add the paragraph token, and return the linenum after it ends. 938 contents = ' '.join(contents) 939 tokens.append(Token(Token.PARA, start, contents, para_indent)) 940 return linenum

941

942 -def _tokenize(str, errors):

943 """ 944 Split a given formatted docstring into an ordered list of 945 C{Token}s, according to the epytext markup rules. 946 947 @param str: The epytext string 948 @type str: C{string} 949 @param errors: A list where any errors generated during parsing 950 will be stored. If no list is specified, then errors will 951 generate exceptions. 952 @type errors: C{list} of L{ParseError} 953 @return: a list of the C{Token}s that make up the given string. 954 @rtype: C{list} of L{Token} 955 """ 956 tokens = [] 957 lines = str.split('\n') 958 959 # Scan through the lines, determining what @type of token we're 960 # dealing with, and tokenizing it, as appropriate. 961 linenum = 0 962 while linenum < len(lines): 963 # Get the current line and its indentation. 964 line = lines[linenum] 965 indent = len(line)-len(line.lstrip()) 966 967 if indent == len(line): 968 # Ignore blank lines. 969 linenum += 1 970 continue 971 elif line[indent:indent+4] == '>>> ': 972 # blocks starting with ">>> " are doctest block tokens. 973 linenum = _tokenize_doctest(lines, linenum, indent, 974 tokens, errors) 975 elif _BULLET_RE.match(line, indent): 976 # blocks starting with a bullet are LI start tokens. 977 linenum = _tokenize_listart(lines, linenum, indent, 978 tokens, errors) 979 if tokens[-1].indent != None: 980 indent = tokens[-1].indent 981 else: 982 # Check for mal-formatted field items. 983 if line[indent] == '@': 984 estr = "Possible mal-formatted field item." 985 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 986 987 # anything else is either a paragraph or a heading. 988 linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 989 990 # Paragraph tokens ending in '::' initiate literal blocks. 991 if (tokens[-1].tag == Token.PARA and 992 tokens[-1].contents[-2:] == '::'): 993 tokens[-1].contents = tokens[-1].contents[:-1] 994 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 995 996 return tokens

997 998 999 ################################################## 1000 ## Inline markup ("colorizing") 1001 ################################################## 1002 1003 # Assorted regular expressions used for colorizing. 1004 _BRACE_RE = re.compile('{|}') 1005 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 1006

1007 -def _colorize(doc, token, errors, tagName='para'):

1008 """ 1009 Given a string containing the contents of a paragraph, produce a 1010 DOM C{Element} encoding that paragraph. Colorized regions are 1011 represented using DOM C{Element}s, and text is represented using 1012 DOM C{Text}s. 1013 1014 @param errors: A list of errors. Any newly generated errors will 1015 be appended to this list. 1016 @type errors: C{list} of C{string} 1017 1018 @param tagName: The element tag for the DOM C{Element} that should 1019 be generated. 1020 @type tagName: C{string} 1021 1022 @return: a DOM C{Element} encoding the given paragraph. 1023 @returntype: C{Element} 1024 """ 1025 str = token.contents 1026 linenum = 0 1027 1028 # Maintain a stack of DOM elements, containing the ancestors of 1029 # the text currently being analyzed. New elements are pushed when 1030 # "{" is encountered, and old elements are popped when "}" is 1031 # encountered. 1032 stack = [Element(tagName)] 1033 1034 # This is just used to make error-reporting friendlier. It's a 1035 # stack parallel to "stack" containing the index of each element's 1036 # open brace. 1037 openbrace_stack = [0] 1038 1039 # Process the string, scanning for '{' and '}'s. start is the 1040 # index of the first unprocessed character. Each time through the 1041 # loop, we process the text from the first unprocessed character 1042 # to the next open or close brace. 1043 start = 0 1044 while 1: 1045 match = _BRACE_RE.search(str, start) 1046 if match == None: break 1047 end = match.start() 1048 1049 # Open braces start new colorizing elements. When preceeded 1050 # by a capital letter, they specify a colored region, as 1051 # defined by the _COLORIZING_TAGS dictionary. Otherwise, 1052 # use a special "literal braces" element (with tag "litbrace"), 1053 # and convert them to literal braces once we find the matching 1054 # close-brace. 1055 if match.group() == '{': 1056 if (end>0) and 'A' <= str[end-1] <= 'Z': 1057 if (end-1) > start: 1058 stack[-1].children.append(str[start:end-1]) 1059 if str[end-1] not in _COLORIZING_TAGS: 1060 estr = "Unknown inline markup tag." 1061 errors.append(ColorizingError(estr, token, end-1)) 1062 stack.append(Element('unknown')) 1063 else: 1064 tag = _COLORIZING_TAGS[str[end-1]] 1065 stack.append(Element(tag)) 1066 else: 1067 if end > start: 1068 stack[-1].children.append(str[start:end]) 1069 stack.append(Element('litbrace')) 1070 openbrace_stack.append(end) 1071 stack[-2].children.append(stack[-1]) 1072 1073 # Close braces end colorizing elements. 1074 elif match.group() == '}': 1075 # Check for (and ignore) unbalanced braces. 1076 if len(stack) <= 1: 1077 estr = "Unbalanced '}'." 1078 errors.append(ColorizingError(estr, token, end)) 1079 start = end + 1 1080 continue 1081 1082 # Add any remaining text. 1083 if end > start: 1084 stack[-1].children.append(str[start:end]) 1085 1086 # Special handling for symbols: 1087 if stack[-1].tag == 'symbol': 1088 if (len(stack[-1].children) != 1 or 1089 not isinstance(stack[-1].children[0], basestring)): 1090 estr = "Invalid symbol code." 1091 errors.append(ColorizingError(estr, token, end)) 1092 else: 1093 symb = stack[-1].children[0] 1094 if symb in _SYMBOLS: 1095 # It's a symbol 1096 stack[-2].children[-1] = Element('symbol', symb) 1097 else: 1098 estr = "Invalid symbol code." 1099 errors.append(ColorizingError(estr, token, end)) 1100 1101 # Special handling for escape elements: 1102 if stack[-1].tag == 'escape': 1103 if (len(stack[-1].children) != 1 or 1104 not isinstance(stack[-1].children[0], basestring)): 1105 estr = "Invalid escape code." 1106 errors.append(ColorizingError(estr, token, end)) 1107 else: 1108 escp = stack[-1].children[0] 1109 if escp in _ESCAPES: 1110 # It's an escape from _ESCPAES 1111 stack[-2].children[-1] = _ESCAPES[escp] 1112 elif len(escp) == 1: 1113 # It's a single-character escape (eg E{.}) 1114 stack[-2].children[-1] = escp 1115 else: 1116 estr = "Invalid escape code." 1117 errors.append(ColorizingError(estr, token, end)) 1118 1119 # Special handling for literal braces elements: 1120 if stack[-1].tag == 'litbrace': 1121 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] 1122 1123 # Special handling for graphs: 1124 if stack[-1].tag == 'graph': 1125 _colorize_graph(doc, stack[-1], token, end, errors) 1126 1127 # Special handling for link-type elements: 1128 if stack[-1].tag in _LINK_COLORIZING_TAGS: 1129 _colorize_link(doc, stack[-1], token, end, errors) 1130 1131 # Pop the completed element. 1132 openbrace_stack.pop() 1133 stack.pop() 1134 1135 start = end+1 1136 1137 # Add any final text. 1138 if start < len(str): 1139 stack[-1].children.append(str[start:]) 1140 1141 if len(stack) != 1: 1142 estr = "Unbalanced '{'." 1143 errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 1144 1145 return stack[0]

1146 1147 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 1148

1149 -def _colorize_graph(doc, graph, token, end, errors):

1150 """ 1151 Eg:: 1152 G{classtree} 1153 G{classtree x, y, z} 1154 G{importgraph} 1155 """ 1156 bad_graph_spec = False 1157 1158 children = graph.children[:] 1159 graph.children = [] 1160 1161 if len(children) != 1 or not isinstance(children[0], basestring): 1162 bad_graph_spec = "Bad graph specification" 1163 else: 1164 pieces = children[0].split(None, 1) 1165 graphtype = pieces[0].replace(':','').strip().lower() 1166 if graphtype in GRAPH_TYPES: 1167 if len(pieces) == 2: 1168 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 1169 args = pieces[1].replace(',', ' ').replace(':','').split() 1170 else: 1171 bad_graph_spec = "Bad graph arg list" 1172 else: 1173 args = [] 1174 else: 1175 bad_graph_spec = ("Bad graph type %s -- use one of %s" % 1176 (pieces[0], ', '.join(GRAPH_TYPES))) 1177 1178 if bad_graph_spec: 1179 errors.append(ColorizingError(bad_graph_spec, token, end)) 1180 graph.children.append('none') 1181 graph.children.append('') 1182 return 1183 1184 graph.children.append(graphtype) 1185 for arg in args: 1186 graph.children.append(arg)

1187

1188 -def _colorize_link(doc, link, token, end, errors):

1189 variables = link.children[:] 1190 1191 # If the last child isn't text, we know it's bad. 1192 if len(variables)==0 or not isinstance(variables[-1], basestring): 1193 estr = "Bad %s target." % link.tag 1194 errors.append(ColorizingError(estr, token, end)) 1195 return 1196 1197 # Did they provide an explicit target? 1198 match2 = _TARGET_RE.match(variables[-1]) 1199 if match2: 1200 (text, target) = match2.groups() 1201 variables[-1] = text 1202 # Can we extract an implicit target? 1203 elif len(variables) == 1: 1204 target = variables[0] 1205 else: 1206 estr = "Bad %s target." % link.tag 1207 errors.append(ColorizingError(estr, token, end)) 1208 return 1209 1210 # Construct the name element. 1211 name_elt = Element('name', *variables) 1212 1213 # Clean up the target. For URIs, assume http or mailto if they 1214 # don't specify (no relative urls) 1215 target = re.sub(r'\s', '', target) 1216 if link.tag=='uri': 1217 if not re.match(r'\w+:', target): 1218 if re.match(r'\w+@(\w+)(\.\w+)*', target): 1219 target = 'mailto:' + target 1220 else: 1221 target = 'http://'+target 1222 elif link.tag=='link': 1223 # Remove arg lists for functions (e.g., L{_colorize_link()}) 1224 target = re.sub(r'$.*$$', '', target) 1225 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): 1226 estr = "Bad link target." 1227 errors.append(ColorizingError(estr, token, end)) 1228 return 1229 1230 # Construct the target element. 1231 target_elt = Element('target', target) 1232 1233 # Add them to the link element. 1234 link.children = [name_elt, target_elt]

1235 1236 ################################################## 1237 ## Formatters 1238 ################################################## 1239

1240 -def to_epytext(tree, indent=0, seclevel=0):

1241 """ 1242 Convert a DOM document encoding epytext back to an epytext string. 1243 This is the inverse operation from L{parse}. I.e., assuming there 1244 are no errors, the following is true: 1245 - C{parse(to_epytext(tree)) == tree} 1246 1247 The inverse is true, except that whitespace, line wrapping, and 1248 character escaping may be done differently. 1249 - C{to_epytext(parse(str)) == str} (approximately) 1250 1251 @param tree: A DOM document encoding of an epytext string. 1252 @type tree: C{Element} 1253 @param indent: The indentation for the string representation of 1254 C{tree}. Each line of the returned string will begin with 1255 C{indent} space characters. 1256 @type indent: C{int} 1257 @param seclevel: The section level that C{tree} appears at. This 1258 is used to generate section headings. 1259 @type seclevel: C{int} 1260 @return: The epytext string corresponding to C{tree}. 1261 @rtype: C{string} 1262 """ 1263 if isinstance(tree, basestring): 1264 str = re.sub(r'\{', '\0', tree) 1265 str = re.sub(r'\}', '\1', str) 1266 return str 1267 1268 if tree.tag == 'epytext': indent -= 2 1269 if tree.tag == 'section': seclevel += 1 1270 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] 1271 childstr = ''.join(variables) 1272 1273 # Clean up for literal blocks (add the double "::" back) 1274 childstr = re.sub(':(\s*)\2', '::\\1', childstr) 1275 1276 if tree.tag == 'para': 1277 str = wordwrap(childstr, indent)+'\n' 1278 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1279 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1280 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1281 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1282 str = re.sub('\0', 'E{lb}', str) 1283 str = re.sub('\1', 'E{rb}', str) 1284 return str 1285 elif tree.tag == 'li': 1286 bullet = tree.attribs.get('bullet') or '-' 1287 return indent*' '+ bullet + ' ' + childstr.lstrip() 1288 elif tree.tag == 'heading': 1289 str = re.sub('\0', 'E{lb}',childstr) 1290 str = re.sub('\1', 'E{rb}', str) 1291 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1292 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 1293 elif tree.tag == 'doctestblock': 1294 str = re.sub('\0', '{', childstr) 1295 str = re.sub('\1', '}', str) 1296 lines = [' '+indent*' '+line for line in str.split('\n')] 1297 return '\n'.join(lines) + '\n\n' 1298 elif tree.tag == 'literalblock': 1299 str = re.sub('\0', '{', childstr) 1300 str = re.sub('\1', '}', str) 1301 lines = [(indent+1)*' '+line for line in str.split('\n')] 1302 return '\2' + '\n'.join(lines) + '\n\n' 1303 elif tree.tag == 'field': 1304 numargs = 0 1305 while tree.children[numargs+1].tag == 'arg': numargs += 1 1306 tag = variables[0] 1307 args = variables[1:1+numargs] 1308 body = variables[1+numargs:] 1309 str = (indent)*' '+'@'+variables[0] 1310 if args: str += '(' + ', '.join(args) + ')' 1311 return str + ':\n' + ''.join(body) 1312 elif tree.tag == 'target': 1313 return '<%s>' % childstr 1314 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1315 'section', 'olist', 'ulist', 'name'): 1316 return childstr 1317 elif tree.tag == 'symbol': 1318 return 'E{%s}' % childstr 1319 elif tree.tag == 'graph': 1320 return 'G{%s}' % ' '.join(variables) 1321 else: 1322 for (tag, name) in _COLORIZING_TAGS.items(): 1323 if name == tree.tag: 1324 return '%s{%s}' % (tag, childstr) 1325 raise ValueError('Unknown DOM element %r' % tree.tag)

1326 1327 SYMBOL_TO_PLAINTEXT = { 1328 'crarr': '\\', 1329 } 1330

1331 -def to_plaintext(tree, indent=0, seclevel=0):

1332 """ 1333 Convert a DOM document encoding epytext to a string representation. 1334 This representation is similar to the string generated by 1335 C{to_epytext}, but C{to_plaintext} removes inline markup, prints 1336 escaped characters in unescaped form, etc. 1337 1338 @param tree: A DOM document encoding of an epytext string. 1339 @type tree: C{Element} 1340 @param indent: The indentation for the string representation of 1341 C{tree}. Each line of the returned string will begin with 1342 C{indent} space characters. 1343 @type indent: C{int} 1344 @param seclevel: The section level that C{tree} appears at. This 1345 is used to generate section headings. 1346 @type seclevel: C{int} 1347 @return: The epytext string corresponding to C{tree}. 1348 @rtype: C{string} 1349 """ 1350 if isinstance(tree, basestring): return tree 1351 1352 if tree.tag == 'section': seclevel += 1 1353 1354 # Figure out the child indent level. 1355 if tree.tag == 'epytext': cindent = indent 1356 elif tree.tag == 'li' and tree.attribs.get('bullet'): 1357 cindent = indent + 1 + len(tree.attribs.get('bullet')) 1358 else: 1359 cindent = indent + 2 1360 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] 1361 childstr = ''.join(variables) 1362 1363 if tree.tag == 'para': 1364 return wordwrap(childstr, indent)+'\n' 1365 elif tree.tag == 'li': 1366 # We should be able to use getAttribute here; but there's no 1367 # convenient way to test if an element has an attribute.. 1368 bullet = tree.attribs.get('bullet') or '-' 1369 return indent*' ' + bullet + ' ' + childstr.lstrip() 1370 elif tree.tag == 'heading': 1371 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1372 return ((indent-2)*' ' + childstr + '\n' + 1373 (indent-2)*' ' + uline + '\n') 1374 elif tree.tag == 'doctestblock': 1375 lines = [(indent+2)*' '+line for line in childstr.split('\n')] 1376 return '\n'.join(lines) + '\n\n' 1377 elif tree.tag == 'literalblock': 1378 lines = [(indent+1)*' '+line for line in childstr.split('\n')] 1379 return '\n'.join(lines) + '\n\n' 1380 elif tree.tag == 'fieldlist': 1381 return childstr 1382 elif tree.tag == 'field': 1383 numargs = 0 1384 while tree.children[numargs+1].tag == 'arg': numargs += 1 1385 tag = variables[0] 1386 args = variables[1:1+numargs] 1387 body = variables[1+numargs:] 1388 str = (indent)*' '+'@'+variables[0] 1389 if args: str += '(' + ', '.join(args) + ')' 1390 return str + ':\n' + ''.join(body) 1391 elif tree.tag == 'uri': 1392 if len(variables) != 2: raise ValueError('Bad URI ') 1393 elif variables[0] == variables[1]: return '<%s>' % variables[1] 1394 else: return '%r<%s>' % (variables[0], variables[1]) 1395 elif tree.tag == 'link': 1396 if len(variables) != 2: raise ValueError('Bad Link') 1397 return '%s' % variables[0] 1398 elif tree.tag in ('olist', 'ulist'): 1399 # [xx] always use condensed lists. 1400 ## Use a condensed list if each list item is 1 line long. 1401 #for child in variables: 1402 # if child.count('\n') > 2: return childstr 1403 return childstr.replace('\n\n', '\n')+'\n' 1404 elif tree.tag == 'symbol': 1405 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) 1406 elif tree.tag == 'graph': 1407 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 1408 else: 1409 # Assume that anything else can be passed through. 1410 return childstr

1411

1412 -def to_debug(tree, indent=4, seclevel=0):

1413 """ 1414 Convert a DOM document encoding epytext back to an epytext string, 1415 annotated with extra debugging information. This function is 1416 similar to L{to_epytext}, but it adds explicit information about 1417 where different blocks begin, along the left margin. 1418 1419 @param tree: A DOM document encoding of an epytext string. 1420 @type tree: C{Element} 1421 @param indent: The indentation for the string representation of 1422 C{tree}. Each line of the returned string will begin with 1423 C{indent} space characters. 1424 @type indent: C{int} 1425 @param seclevel: The section level that C{tree} appears at. This 1426 is used to generate section headings. 1427 @type seclevel: C{int} 1428 @return: The epytext string corresponding to C{tree}. 1429 @rtype: C{string} 1430 """ 1431 if isinstance(tree, basestring): 1432 str = re.sub(r'\{', '\0', tree) 1433 str = re.sub(r'\}', '\1', str) 1434 return str 1435 1436 if tree.tag == 'section': seclevel += 1 1437 variables = [to_debug(c, indent+2, seclevel) for c in tree.children] 1438 childstr = ''.join(variables) 1439 1440 # Clean up for literal blocks (add the double "::" back) 1441 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) 1442 1443 if tree.tag == 'para': 1444 str = wordwrap(childstr, indent-6, 69)+'\n' 1445 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1446 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1447 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1448 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1449 str = re.sub('\0', 'E{lb}', str) 1450 str = re.sub('\1', 'E{rb}', str) 1451 lines = str.rstrip().split('\n') 1452 lines[0] = ' P>|' + lines[0] 1453 lines[1:] = [' |'+l for l in lines[1:]] 1454 return '\n'.join(lines)+'\n |\n' 1455 elif tree.tag == 'li': 1456 bullet = tree.attribs.get('bullet') or '-' 1457 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 1458 elif tree.tag in ('olist', 'ulist'): 1459 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 1460 elif tree.tag == 'heading': 1461 str = re.sub('\0', 'E{lb}', childstr) 1462 str = re.sub('\1', 'E{rb}', str) 1463 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1464 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 1465 ' |'+(indent-8)*' ' + uline + '\n') 1466 elif tree.tag == 'doctestblock': 1467 str = re.sub('\0', '{', childstr) 1468 str = re.sub('\1', '}', str) 1469 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] 1470 lines[0] = 'DTST>'+lines[0][5:] 1471 return '\n'.join(lines) + '\n |\n' 1472 elif tree.tag == 'literalblock': 1473 str = re.sub('\0', '{', childstr) 1474 str = re.sub('\1', '}', str) 1475 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] 1476 lines[0] = ' LIT>'+lines[0][5:] 1477 return '\2' + '\n'.join(lines) + '\n |\n' 1478 elif tree.tag == 'field': 1479 numargs = 0 1480 while tree.children[numargs+1].tag == 'arg': numargs += 1 1481 tag = variables[0] 1482 args = variables[1:1+numargs] 1483 body = variables[1+numargs:] 1484 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 1485 if args: str += '(' + ', '.join(args) + ')' 1486 return str + ':\n' + ''.join(body) 1487 elif tree.tag == 'target': 1488 return '<%s>' % childstr 1489 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1490 'section', 'olist', 'ulist', 'name'): 1491 return childstr 1492 elif tree.tag == 'symbol': 1493 return 'E{%s}' % childstr 1494 elif tree.tag == 'graph': 1495 return 'G{%s}' % ' '.join(variables) 1496 else: 1497 for (tag, name) in _COLORIZING_TAGS.items(): 1498 if name == tree.tag: 1499 return '%s{%s}' % (tag, childstr) 1500 raise ValueError('Unknown DOM element %r' % tree.tag)

1501

1502 -def to_rst(tree, indent=0, seclevel=0, wrap_startindex=0):

1503 """ 1504 Convert a DOM document encoding epytext into a reStructuredText 1505 markup string. (Because rst is fairly loosely defined, it is 1506 possible that this function will produce incorrect output in some 1507 cases.) 1508 1509 @param tree: A DOM document encoding of an epytext string. 1510 @type tree: C{Element} 1511 @param indent: The indentation for the string representation of 1512 C{tree}. Each line of the returned string will begin with 1513 C{indent} space characters. 1514 @type indent: C{int} 1515 @param seclevel: The section level that C{tree} appears at. This 1516 is used to generate section headings. 1517 @type seclevel: C{int} 1518 @return: The reStructuredText string corresponding to C{tree}. 1519 @rtype: C{string} 1520 """ 1521 if isinstance(tree, basestring): 1522 # escaped characters: \ * ` _ | [ ] 1523 s = tree.replace('\\', '\\\\') 1524 return re.sub(r'(\b[*`|\[]|[*`|\]]\b)', r'\\\1', s) 1525 elif tree.tag == 'para': 1526 # We have to do a bunch of escaping to ensure that the output 1527 # won't be interpreted as various rst constructs. 1528 s = '' 1529 for child in tree.children: 1530 childstr = to_rst(child, indent, seclevel) 1531 if ( s[-1:] in ('*','`','|') and 1532 childstr[:1] not in (' ','\n','.',',',';',"'",'"',')',':') ): 1533 s += r'\ ' # eg "`foo`s" -> "`foo`\ s" 1534 s += childstr 1535 s = wordwrap(s, indent, 75, wrap_startindex)+'\n' 1536 # might look like bulleted lists: 1537 s = re.sub(r'(?m)^(\s*)([\*\-\+])', r'\1\\\2', s) 1538 # might look like enumerated lists: 1539 s = re.sub(r'(?m)^(\s*)(\d+\.|\#\.)', r'\1\\\2', s) 1540 # might look like section titles: 1541 s = re.sub(r'(?m)^(\s*)(([^a-zA-Z0-9\s])\3\3*\s*$)', r'\1\\\2', s) 1542 # might look like field lists or option lists or line blocks 1543 s = re.sub(r'(?m)^(\s*)([:\-\/\|])', r'\1\\\2', s) 1544 # might look like literal block start: 1545 s = re.sub(r'(?m)(::\s*)$', r'\\\1', s) 1546 # might look like a doctest block: 1547 s = re.sub(r'(?m)^(\s*)(>>>)$', r'\1\\\2', s) 1548 # might look like a table or transition: 1549 s = re.sub(r'(?m)^(\s*)([+=-])', r'\1\\\2', s) 1550 # might look like a directive or footnote: 1551 s = re.sub(r'(?m)^(\s*)(\.\.)', r'\1\\\2', s) 1552 return s 1553 elif tree.tag == 'doctestblock': 1554 for c in tree.children: assert isinstance(c, basestring) 1555 childstr = ''.join(tree.children) 1556 return '\n\n%s\n\n' % '\n'.join([' '*indent+line 1557 for line in childstr.split('\n')]) 1558 elif tree.tag == 'literalblock': 1559 for c in tree.children: assert isinstance(c, basestring) 1560 childstr = ''.join(tree.children) 1561 return '\n\n::\n\n%s\n\n' % '\n'.join([' '*(indent+1)+line 1562 for line in childstr.split('\n')]) 1563 elif tree.tag == 'link': 1564 name = to_rst(tree.children[0], indent, seclevel).strip() 1565 target = ''.join(tree.children[1].children).strip() 1566 if target == re.sub(r'$.*$$', '', name).replace('\_', '_'): 1567 return '`%s`' % target 1568 else: 1569 return '`%s <%s>`' % (name, target) 1570 elif tree.tag == 'uri': 1571 name = to_rst(tree.children[0], indent, seclevel).strip() 1572 target = ''.join(tree.children[1].children).strip() 1573 if target == name and name.startswith('http://'): 1574 return target 1575 else: 1576 return '`%s <%s>`__' % (name, target) 1577 elif tree.tag == 'target': 1578 return '<%s>' % ''.join(tree.children) 1579 elif tree.tag == 'symbol': 1580 return '|%s|' % ''.join(tree.children) # [XX!!] 1581 else: 1582 if tree.tag == 'li': 1583 bullet = tree.attribs.get('bullet') or '-' 1584 # bullets like "2.9." are not supported by rst: 1585 bullet = re.sub(r'^(\d+.)*(\d+.)$', r'\2', bullet) 1586 child_indent = indent + len(bullet) + 1 1587 child_wrap_startindex = child_indent 1588 elif tree.tag == 'field': 1589 tt = to_debug(tree) 1590 tagname = ''.join(tree.children.pop(0).children) 1591 args = [] 1592 while tree.children and tree.children[0].tag == 'arg': 1593 args.append(''.join(tree.children.pop(0).children)) 1594 child_indent = max(indent, wrap_startindex) + 4 1595 tag = ':%s%s: ' % (tagname, ''.join([' %s' % arg for arg in args])) 1596 child_wrap_startindex = indent+len(tag) 1597 else: 1598 child_indent = indent 1599 child_wrap_startindex = wrap_startindex 1600 1601 if tree.children: 1602 childstrs = ([to_rst(tree.children[0], child_indent, 1603 seclevel, child_wrap_startindex)] + 1604 [to_rst(c, indent, seclevel) 1605 for c in tree.children[1:]]) 1606 else: 1607 childstrs = [] 1608 childstr = ''.join(childstrs) 1609 1610 if tree.tag in ('fieldlist', 'olist', 'ulist'): 1611 tight = True 1612 for item in childstrs[:-1]: 1613 if '\n' in item.strip(): 1614 tight = False 1615 if tight: 1616 return '\n'.join([item.rstrip() for item in childstrs])+'\n\n' 1617 else: 1618 return childstr+'\n' 1619 elif tree.tag == 'field': 1620 return '%s%s%s' % ((indent-wrap_startindex)*' ', tag, childstr) 1621 elif tree.tag == 'li': 1622 return '%s%s %s' % ((indent-wrap_startindex)*' ', bullet, childstr) 1623 elif tree.tag == 'graph': 1624 return '\n\n(GRAPH: %s)\n\n' % childstr 1625 elif tree.tag == 'heading': 1626 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1627 return '%s%s\n%s%s\n' % (indent*' ', childstr, indent*' ', uline) 1628 elif tree.tag in ('tag', 'arg', 'section', 'name'): 1629 return childstr 1630 elif tree.tag == 'code': 1631 if [c for c in tree.children if not isinstance(c, basestring)]: 1632 # this is probably not right! 1633 print 'Warning: Generating "``%s``"' % childstr 1634 return '``%s``' % childstr 1635 else: 1636 return '``%s``' % ''.join(tree.children) 1637 elif tree.tag == 'math' or tree.tag == 'italic': 1638 return '*%s*' % childstr 1639 elif tree.tag == 'indexed': 1640 return '`%s`:term:' % childstr 1641 elif tree.tag == 'bold': 1642 return '**%s**' % childstr 1643 elif tree.tag == 'epytext': 1644 # this may munge literal blocks. oh well. 1645 childstr = re.sub('\n{3,}', '\n\n', childstr) 1646 childstr = re.sub(':\s*\n+::\n', '::\n', childstr) 1647 return childstr.rstrip()+'\n' 1648 else: 1649 raise ValueError('Unknown DOM element %r' % tree.tag)

1650 1651 1652 ################################################## 1653 ## Top-Level Wrapper function 1654 ################################################## 1655 SCRWIDTH = 75

1656 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):

1657 """ 1658 Pretty-parse the string. This parses the string, and catches any 1659 warnings or errors produced. Any warnings and errors are 1660 displayed, and the resulting DOM parse structure is returned. 1661 1662 @param str: The string to parse. 1663 @type str: C{string} 1664 @param show_warnings: Whether or not to display non-fatal errors 1665 generated by parsing C{str}. 1666 @type show_warnings: C{boolean} 1667 @param show_errors: Whether or not to display fatal errors 1668 generated by parsing C{str}. 1669 @type show_errors: C{boolean} 1670 @param stream: The stream that warnings and errors should be 1671 written to. 1672 @type stream: C{stream} 1673 @return: a DOM document encoding the contents of C{str}. 1674 @rtype: C{Element} 1675 @raise SyntaxError: If any fatal errors were encountered. 1676 """ 1677 errors = [] 1678 confused = 0 1679 try: 1680 val = parse(str, errors) 1681 warnings = [e for e in errors if not e.is_fatal()] 1682 errors = [e for e in errors if e.is_fatal()] 1683 except: 1684 confused = 1 1685 1686 if not show_warnings: warnings = [] 1687 warnings.sort() 1688 errors.sort() 1689 if warnings: 1690 print >>stream, '='*SCRWIDTH 1691 print >>stream, "WARNINGS" 1692 print >>stream, '-'*SCRWIDTH 1693 for warning in warnings: 1694 print >>stream, warning.as_warning() 1695 print >>stream, '='*SCRWIDTH 1696 if errors and show_errors: 1697 if not warnings: print >>stream, '='*SCRWIDTH 1698 print >>stream, "ERRORS" 1699 print >>stream, '-'*SCRWIDTH 1700 for error in errors: 1701 print >>stream, error 1702 print >>stream, '='*SCRWIDTH 1703 1704 if confused: raise 1705 elif errors: raise SyntaxError('Encountered Errors') 1706 else: return val

1707 1708 ################################################## 1709 ## Parse Errors 1710 ################################################## 1711

1712 -class TokenizationError(ParseError):

1713 """ 1714 An error generated while tokenizing a formatted documentation 1715 string. 1716 """

1717

1718 -class StructuringError(ParseError):

1719 """ 1720 An error generated while structuring a formatted documentation 1721 string. 1722 """

1723

1724 -class ColorizingError(ParseError):

1725 """ 1726 An error generated while colorizing a paragraph. 1727 """

1728 - def __init__(self, descr, token, charnum, is_fatal=1):

1729 """ 1730 Construct a new colorizing exception. 1731 1732 @param descr: A short description of the error. 1733 @type descr: C{string} 1734 @param token: The token where the error occured 1735 @type token: L{Token} 1736 @param charnum: The character index of the position in 1737 C{token} where the error occured. 1738 @type charnum: C{int} 1739 """ 1740 ParseError.__init__(self, descr, token.startline, is_fatal) 1741 self.token = token 1742 self.charnum = charnum

1743 1744 CONTEXT_RANGE = 20

1745 - def descr(self):

1746 RANGE = self.CONTEXT_RANGE 1747 if self.charnum <= RANGE: 1748 left = self.token.contents[0:self.charnum] 1749 else: 1750 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 1751 if (len(self.token.contents)-self.charnum) <= RANGE: 1752 right = self.token.contents[self.charnum:] 1753 else: 1754 right = (self.token.contents[self.charnum:self.charnum+RANGE] 1755 + '...') 1756 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))

1757 1758 ################################################## 1759 ## Convenience parsers 1760 ################################################## 1761

1762 -def parse_as_literal(str):

1763 """ 1764 Return a DOM document matching the epytext DTD, containing a 1765 single literal block. That literal block will include the 1766 contents of the given string. This method is typically used as a 1767 fall-back when the parser fails. 1768 1769 @param str: The string which should be enclosed in a literal 1770 block. 1771 @type str: C{string} 1772 1773 @return: A DOM document containing C{str} in a single literal 1774 block. 1775 @rtype: C{Element} 1776 """ 1777 return Element('epytext', Element('literalblock', str))

1778

1779 -def parse_as_para(str):

1780 """ 1781 Return a DOM document matching the epytext DTD, containing a 1782 single paragraph. That paragraph will include the contents of the 1783 given string. This can be used to wrap some forms of 1784 automatically generated information (such as type names) in 1785 paragraphs. 1786 1787 @param str: The string which should be enclosed in a paragraph. 1788 @type str: C{string} 1789 1790 @return: A DOM document containing C{str} in a single paragraph. 1791 @rtype: C{Element} 1792 """ 1793 return Element('epytext', Element('para', str))

1794 1795 ################################################################# 1796 ## SUPPORT FOR EPYDOC 1797 ################################################################# 1798

1799 -def parse_docstring(docstring, errors, **options):

1800 """ 1801 Parse the given docstring, which is formatted using epytext; and 1802 return a C{ParsedDocstring} representation of its contents. 1803 @param docstring: The docstring to parse 1804 @type docstring: C{string} 1805 @param errors: A list where any errors generated during parsing 1806 will be stored. 1807 @type errors: C{list} of L{ParseError} 1808 @param options: Extra options. Unknown options are ignored. 1809 Currently, no extra options are defined. 1810 @rtype: L{ParsedDocstring} 1811 """ 1812 return ParsedEpytextDocstring(parse(docstring, errors), **options)

1813

1814 -class ParsedEpytextDocstring(ParsedDocstring):

1815 SYMBOL_TO_HTML = { 1816 # Symbols 1817 '<-': '←', '->': '→', '^': '↑', 'v': '↓', 1818 1819 # Greek letters 1820 'alpha': 'α', 'beta': 'β', 'gamma': 'γ', 1821 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ', 1822 'eta': 'η', 'theta': 'θ', 'iota': 'ι', 1823 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ', 1824 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο', 1825 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ', 1826 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ', 1827 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', 1828 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ', 1829 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ', 1830 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι', 1831 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ', 1832 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο', 1833 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ', 1834 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ', 1835 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω', 1836 1837 # HTML character entities 1838 'larr': '←', 'rarr': '→', 'uarr': '↑', 1839 'darr': '↓', 'harr': '↔', 'crarr': '&crarr;', 1840 'lArr': '⇐', 'rArr': '⇒', 'uArr': '&uArr;', 1841 'dArr': '&dArr;', 'hArr': '⇔', 1842 'copy': '©', 'times': '×', 'forall': '∀', 1843 'exist': '∃', 'part': '∂', 1844 'empty': '∅', 'isin': '∈', 'notin': '∉', 1845 'ni': '&ni;', 'prod': '∏', 'sum': '∑', 1846 'prop': '&prop;', 'infin': '∞', 'ang': '&ang;', 1847 'and': '&and;', 'or': '&or;', 'cap': '∩', 'cup': '∪', 1848 'int': '∫', 'there4': '&there4;', 'sim': '&sim;', 1849 'cong': '&cong;', 'asymp': '≈', 'ne': '≠', 1850 'equiv': '&equiv;', 'le': '≤', 'ge': '≥', 1851 'sub': '⊂', 'sup': '⊃', 'nsub': '&nsub;', 1852 'sube': '&sube;', 'supe': '&supe;', 'oplus': '&oplus;', 1853 'otimes': '&otimes;', 'perp': '&perp;', 1854 1855 # Alternate (long) names 1856 'infinity': '∞', 'integral': '∫', 'product': '∏', 1857 '<=': '≤', '>=': '≥', 1858 } 1859 1860 SYMBOL_TO_LATEX = { 1861 # Symbols 1862 '<-': r'$\leftarrow$', '->': r'$\rightarrow$', 1863 '^': r'$\uparrow$', 'v': r'$\downarrow$', 1864 1865 # Greek letters (use lower case when upcase not available) 1866 1867 'alpha': r'$\alpha$', 'beta': r'$\beta$', 'gamma': 1868 r'$\gamma$', 'delta': r'$\delta$', 'epsilon': 1869 r'$\epsilon$', 'zeta': r'$\zeta$', 'eta': r'$\eta$', 1870 'theta': r'$\theta$', 'iota': r'$\iota$', 'kappa': 1871 r'$\kappa$', 'lambda': r'$\lambda$', 'mu': r'$\mu$', 1872 'nu': r'$\nu$', 'xi': r'$\xi$', 'omicron': r'$o$', 'pi': 1873 r'$\pi$', 'rho': r'$\rho$', 'sigma': r'$\sigma$', 'tau': 1874 r'$\tau$', 'upsilon': r'$\upsilon$', 'phi': r'$\phi$', 1875 'chi': r'$\chi$', 'psi': r'$\psi$', 'omega': 1876 r'$\omega$', 1877 1878 'Alpha': r'$\alpha$', 'Beta': r'$\beta$', 'Gamma': 1879 r'$\Gamma$', 'Delta': r'$\Delta$', 'Epsilon': 1880 r'$\epsilon$', 'Zeta': r'$\zeta$', 'Eta': r'$\eta$', 1881 'Theta': r'$\Theta$', 'Iota': r'$\iota$', 'Kappa': 1882 r'$\kappa$', 'Lambda': r'$\Lambda$', 'Mu': r'$\mu$', 1883 'Nu': r'$\nu$', 'Xi': r'$\Xi$', 'Omicron': r'$o$', 'Pi': 1884 r'$\Pi$', 'ho': r'$\rho$', 'Sigma': r'$\Sigma$', 'Tau': 1885 r'$\tau$', 'Upsilon': r'$\Upsilon$', 'Phi': r'$\Phi$', 1886 'Chi': r'$\chi$', 'Psi': r'$\Psi$', 'Omega': 1887 r'$\Omega$', 1888 1889 # HTML character entities 1890 'larr': r'$\leftarrow$', 'rarr': r'$\rightarrow$', 'uarr': 1891 r'$\uparrow$', 'darr': r'$\downarrow$', 'harr': 1892 r'$\leftrightarrow$', 'crarr': r'$\hookleftarrow$', 1893 'lArr': r'$\Leftarrow$', 'rArr': r'$\Rightarrow$', 'uArr': 1894 r'$\Uparrow$', 'dArr': r'$\Downarrow$', 'hArr': 1895 r'$\Leftrightarrow$', 'copy': r'{\textcopyright}', 1896 'times': r'$\times$', 'forall': r'$\forall$', 'exist': 1897 r'$\exists$', 'part': r'$\partial$', 'empty': 1898 r'$\emptyset$', 'isin': r'$\in$', 'notin': r'$\notin$', 1899 'ni': r'$\ni$', 'prod': r'$\prod$', 'sum': r'$\sum$', 1900 'prop': r'$\propto$', 'infin': r'$\infty$', 'ang': 1901 r'$\angle$', 'and': r'$\wedge$', 'or': r'$\vee$', 'cap': 1902 r'$\cap$', 'cup': r'$\cup$', 'int': r'$\int$', 'there4': 1903 r'$\therefore$', 'sim': r'$\sim$', 'cong': r'$\cong$', 1904 'asymp': r'$\approx$', 'ne': r'$\ne$', 'equiv': 1905 r'$\equiv$', 'le': r'$\le$', 'ge': r'$\ge$', 'sub': 1906 r'$\subset$', 'sup': r'$\supset$', 'nsub': r'$\supset$', 1907 'sube': r'$\subseteq$', 'supe': r'$\supseteq$', 'oplus': 1908 r'$\oplus$', 'otimes': r'$\otimes$', 'perp': r'$\perp$', 1909 1910 # Alternate (long) names 1911 'infinity': r'$\infty$', 'integral': r'$\int$', 'product': 1912 r'$\prod$', '<=': r'$\le$', '>=': r'$\ge$', 1913 } 1914

1915 - def __init__(self, dom_tree, **options):

1916 self._tree = dom_tree 1917 # Caching: 1918 self._html = self._latex = self._plaintext = None 1919 self._terms = None 1920 # inline option -- mark top-level children as inline. 1921 if options.get('inline') and self._tree is not None: 1922 for elt in self._tree.children: 1923 elt.attribs['inline'] = True

1924

1925 - def __str__(self):

1926 return str(self._tree)

1927

1928 - def to_html(self, docstring_linker, directory=None, docindex=None, 1929 context=None, **options):

1930 if self._html is not None: return self._html 1931 if self._tree is None: return '' 1932 indent = options.get('indent', 0) 1933 self._html = self._to_html(self._tree, docstring_linker, directory, 1934 docindex, context, indent) 1935 return self._html

1936

1937 - def to_latex(self, docstring_linker, directory=None, docindex=None, 1938 context=None, **options):

1939 if self._latex is not None: return self._latex 1940 if self._tree is None: return '' 1941 indent = options.get('indent', 0) 1942 self._hyperref = options.get('hyperref', 1) 1943 self._latex = self._to_latex(self._tree, docstring_linker, directory, 1944 docindex, context, indent) 1945 return self._latex

1946

1947 - def to_plaintext(self, docstring_linker, **options):

1948 # [XX] don't cache -- different options might be used!! 1949 #if self._plaintext is not None: return self._plaintext 1950 if self._tree is None: return '' 1951 if 'indent' in options: 1952 self._plaintext = to_plaintext(self._tree, 1953 indent=options['indent']) 1954 else: 1955 self._plaintext = to_plaintext(self._tree) 1956 return self._plaintext

1957

1958 - def _index_term_key(self, tree):

1959 str = to_plaintext(tree) 1960 str = re.sub(r'\s\s+', '-', str) 1961 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)

1962

1963 - def _to_html(self, tree, linker, directory, docindex, context, 1964 indent=0, seclevel=0):

1965 if isinstance(tree, basestring): 1966 return plaintext_to_html(tree) 1967 1968 if tree.tag == 'epytext': indent -= 2 1969 if tree.tag == 'section': seclevel += 1 1970 1971 # Process the variables first. 1972 variables = [self._to_html(c, linker, directory, docindex, context, 1973 indent+2, seclevel) 1974 for c in tree.children] 1975 1976 # Construct the HTML string for the variables. 1977 childstr = ''.join(variables) 1978 1979 # Perform the approriate action for the DOM tree type. 1980 if tree.tag == 'para': 1981 return wordwrap( 1982 (tree.attribs.get('inline') and '%s' or '%s') % childstr, 1983 indent) 1984 elif tree.tag == 'code': 1985 style = tree.attribs.get('style') 1986 if style: 1987 return '<code class="%s">%s</code>' % (style, childstr) 1988 else: 1989 return '<code>%s</code>' % childstr 1990 elif tree.tag == 'uri': 1991 return ('<a href="%s" target="_top">%s</a>' % 1992 (variables[1], variables[0])) 1993 elif tree.tag == 'link': 1994 return linker.translate_identifier_xref(variables[1], variables[0]) 1995 elif tree.tag == 'italic': 1996 return '%s' % childstr 1997 elif tree.tag == 'math': 1998 return '%s' % childstr 1999 elif tree.tag == 'indexed': 2000 term = Element('epytext', *tree.children, **tree.attribs) 2001 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 2002 #term_key = self._index_term_key(tree) 2003 #return linker.translate_indexterm(childstr, term_key) 2004 elif tree.tag == 'bold': 2005 return '%s' % childstr 2006 elif tree.tag == 'ulist': 2007 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 2008 elif tree.tag == 'olist': 2009 start = tree.attribs.get('start') or '' 2010 return ('%s<ol start="%s">\n%s%s</ol>\n' % 2011 (indent*' ', start, childstr, indent*' ')) 2012 elif tree.tag == 'li': 2013 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 2014 elif tree.tag == 'heading': 2015 return ('%s<h%s class="heading">%s</h%s>\n' % 2016 ((indent-2)*' ', seclevel, childstr, seclevel)) 2017 elif tree.tag == 'literalblock': 2018 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 2019 elif tree.tag == 'doctestblock': 2020 return doctest_to_html(tree.children[0].strip()) 2021 elif tree.tag == 'fieldlist': 2022 raise AssertionError("There should not be any field lists left") 2023 elif tree.tag in ('epytext', 'section', 'tag', 'arg', 2024 'name', 'target', 'html'): 2025 return childstr 2026 elif tree.tag == 'symbol': 2027 symbol = tree.children[0] 2028 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) 2029 elif tree.tag == 'graph': 2030 if directory is None: return '' 2031 # Generate the graph. 2032 graph = self._build_graph(variables[0], variables[1:], linker, 2033 docindex, context) 2034 if not graph: return '' 2035 # Write the graph. 2036 return graph.to_html(directory) 2037 else: 2038 raise ValueError('Unknown epytext DOM element %r' % tree.tag)

2039 2040 #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']

2041 - def _build_graph(self, graph_type, graph_args, linker, 2042 docindex, context):

2043 # Generate the graph 2044 if graph_type == 'classtree': 2045 from epydoc.apidoc import ClassDoc 2046 if graph_args: 2047 bases = [docindex.find(name, context) 2048 for name in graph_args] 2049 elif isinstance(context, ClassDoc): 2050 bases = [context] 2051 else: 2052 log.warning("Could not construct class tree: you must " 2053 "specify one or more base classes.") 2054 return None 2055 from epydoc.docwriter.dotgraph import class_tree_graph 2056 return class_tree_graph(bases, linker, context) 2057 elif graph_type == 'packagetree': 2058 from epydoc.apidoc import ModuleDoc 2059 if graph_args: 2060 packages = [docindex.find(name, context) 2061 for name in graph_args] 2062 elif isinstance(context, ModuleDoc): 2063 packages = [context] 2064 else: 2065 log.warning("Could not construct package tree: you must " 2066 "specify one or more root packages.") 2067 return None 2068 from epydoc.docwriter.dotgraph import package_tree_graph 2069 return package_tree_graph(packages, linker, context) 2070 elif graph_type == 'importgraph': 2071 from epydoc.apidoc import ModuleDoc 2072 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 2073 from epydoc.docwriter.dotgraph import import_graph 2074 return import_graph(modules, docindex, linker, context) 2075 2076 elif graph_type == 'callgraph': 2077 if graph_args: 2078 docs = [docindex.find(name, context) for name in graph_args] 2079 docs = [doc for doc in docs if doc is not None] 2080 else: 2081 docs = [context] 2082 from epydoc.docwriter.dotgraph import call_graph 2083 return call_graph(docs, docindex, linker, context) 2084 else: 2085 log.warning("Unknown graph type %s" % graph_type)

2086

2087 - def _to_latex(self, tree, linker, directory, docindex, context, 2088 indent=0, seclevel=0, breakany=0):

2089 if isinstance(tree, basestring): 2090 return plaintext_to_latex(tree, breakany=breakany) 2091 2092 if tree.tag == 'section': seclevel += 1 2093 2094 # Figure out the child indent level. 2095 if tree.tag == 'epytext': cindent = indent 2096 else: cindent = indent + 2 2097 variables = [self._to_latex(c, linker, directory, docindex, 2098 context, cindent, seclevel, breakany) 2099 for c in tree.children] 2100 childstr = ''.join(variables) 2101 2102 if tree.tag == 'para': 2103 return wordwrap(childstr, indent)+'\n' 2104 elif tree.tag == 'code': 2105 return '\\texttt{%s}' % childstr 2106 elif tree.tag == 'uri': 2107 if len(variables) != 2: raise ValueError('Bad URI ') 2108 if self._hyperref: 2109 # ~ and # should not be escaped in the URI. 2110 uri = tree.children[1].children[0] 2111 uri = uri.replace('{\\textasciitilde}', '~') 2112 uri = uri.replace('\\#', '#') 2113 if variables[0] == variables[1]: 2114 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 2115 else: 2116 return ('%s\\footnote{\\href{%s}{%s}}' % 2117 (variables[0], uri, variables[1])) 2118 else: 2119 if variables[0] == variables[1]: 2120 return '\\textit{%s}' % variables[1] 2121 else: 2122 return '%s\\footnote{%s}' % (variables[0], variables[1]) 2123 elif tree.tag == 'link': 2124 if len(variables) != 2: raise ValueError('Bad Link') 2125 return linker.translate_identifier_xref(variables[1], variables[0]) 2126 elif tree.tag == 'italic': 2127 return '\\textit{%s}' % childstr 2128 elif tree.tag == 'math': 2129 return '\\textit{%s}' % childstr 2130 elif tree.tag == 'indexed': 2131 term = Element('epytext', *tree.children, **tree.attribs) 2132 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 2133 elif tree.tag == 'bold': 2134 return '\\textbf{%s}' % childstr 2135 elif tree.tag == 'li': 2136 return indent*' ' + '\\item ' + childstr.lstrip() 2137 elif tree.tag == 'heading': 2138 sec = ('\\EpydocUser' + 2139 ('%ssection' % ('sub'*(min(seclevel,3)-1))).capitalize()) 2140 return (' '*(indent-2) + '%s{%s}\n\n' % (sec, childstr.strip())) 2141 elif tree.tag == 'doctestblock': 2142 return doctest_to_latex(tree.children[0].strip()) 2143 elif tree.tag == 'literalblock': 2144 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 2145 elif tree.tag == 'fieldlist': 2146 return indent*' '+'{omitted fieldlist}\n' 2147 elif tree.tag == 'olist': 2148 return (' '*indent + '\\begin{enumerate}\n\n' + 2149 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 2150 childstr + 2151 ' '*indent + '\\end{enumerate}\n\n') 2152 elif tree.tag == 'ulist': 2153 return (' '*indent + '\\begin{itemize}\n' + 2154 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 2155 childstr + 2156 ' '*indent + '\\end{itemize}\n\n') 2157 elif tree.tag == 'symbol': 2158 symbol = tree.children[0] 2159 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) 2160 elif tree.tag == 'graph': 2161 if directory is None: return '' 2162 # Generate the graph. 2163 graph = self._build_graph(variables[0], variables[1:], linker, 2164 docindex, context) 2165 if not graph: return '' 2166 # Write the graph. 2167 return graph.to_latex(directory) 2168 else: 2169 # Assume that anything else can be passed through. 2170 return childstr

2171 2172 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') 2173

2174 - def summary(self):

2175 if self._tree is None: return self, False 2176 tree = self._tree 2177 doc = Element('epytext') 2178 2179 # Find the first paragraph. 2180 variables = tree.children 2181 while (len(variables) > 0) and (variables[0].tag != 'para'): 2182 if variables[0].tag in ('section', 'ulist', 'olist', 'li'): 2183 variables = variables[0].children 2184 else: 2185 variables = variables[1:] 2186 2187 # Special case: if the docstring contains a single literal block, 2188 # then try extracting the summary from it. 2189 if (len(variables) == 0 and len(tree.children) == 1 and 2190 tree.children[0].tag == 'literalblock'): 2191 str = re.split(r'\n\s*(\n|$).*', 2192 tree.children[0].children[0], 1)[0] 2193 variables = [Element('para')] 2194 variables[0].children.append(str) 2195 2196 # If we didn't find a paragraph, return an empty epytext. 2197 if len(variables) == 0: return ParsedEpytextDocstring(doc), False 2198 2199 # Is there anything else, excluding tags, after the first variable? 2200 long_docs = False 2201 for var in variables[1:]: 2202 if isinstance(var, Element) and var.tag == 'fieldlist': 2203 continue 2204 long_docs = True 2205 break 2206 2207 # Extract the first sentence. 2208 parachildren = variables[0].children 2209 para = Element('para', inline=True) 2210 doc.children.append(para) 2211 for parachild in parachildren: 2212 if isinstance(parachild, basestring): 2213 m = self._SUMMARY_RE.match(parachild) 2214 if m: 2215 para.children.append(m.group(1)) 2216 long_docs |= parachild is not parachildren[-1] 2217 if not long_docs: 2218 other = parachild[m.end():] 2219 if other and not other.isspace(): 2220 long_docs = True 2221 return ParsedEpytextDocstring(doc), long_docs 2222 para.children.append(parachild) 2223 2224 return ParsedEpytextDocstring(doc), long_docs

2225

2226 - def split_fields(self, errors=None):

2227 if self._tree is None: return (self, ()) 2228 tree = Element(self._tree.tag, *self._tree.children, 2229 **self._tree.attribs) 2230 fields = [] 2231 2232 if (tree.children and 2233 tree.children[-1].tag == 'fieldlist' and 2234 tree.children[-1].children): 2235 field_nodes = tree.children[-1].children 2236 del tree.children[-1] 2237 2238 for field in field_nodes: 2239 # Get the tag 2240 tag = field.children[0].children[0].lower() 2241 del field.children[0] 2242 2243 # Get the argument. 2244 if field.children and field.children[0].tag == 'arg': 2245 arg = field.children[0].children[0] 2246 del field.children[0] 2247 else: 2248 arg = None 2249 2250 # Process the field. 2251 field.tag = 'epytext' 2252 fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 2253 2254 # Save the remaining docstring as the description.. 2255 if tree.children and tree.children[0].children: 2256 return ParsedEpytextDocstring(tree), fields 2257 else: 2258 return None, fields

2259 2260

2261 - def index_terms(self):

2262 if self._terms is None: 2263 self._terms = [] 2264 self._index_terms(self._tree, self._terms) 2265 return self._terms

2266

2267 - def _index_terms(self, tree, terms):

2268 if tree is None or isinstance(tree, basestring): 2269 return 2270 2271 if tree.tag == 'indexed': 2272 term = Element('epytext', *tree.children, **tree.attribs) 2273 terms.append(ParsedEpytextDocstring(term)) 2274 2275 # Look for index items in child nodes. 2276 for child in tree.children: 2277 self._index_terms(child, terms)

2278

Source Code for Module epydoc.markup.epytext