1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9  """ 
  10  Parser for epytext strings.  Epytext is a lightweight markup whose 
  11  primary intended application is Python documentation strings.  This 
  12  parser converts Epytext strings to a simple DOM-like representation 
  13  (encoded as a tree of L{Element} objects and strings).  Epytext 
  14  strings can contain the following X{structural blocks}: 
  15   
  16      - X{epytext}: The top-level element of the DOM tree. 
  17      - X{para}: A paragraph of text.  Paragraphs contain no newlines,  
  18        and all spaces are soft. 
  19      - X{section}: A section or subsection. 
  20      - X{field}: A tagged field.  These fields provide information 
  21        about specific aspects of a Python object, such as the 
  22        description of a function's parameter, or the author of a 
  23        module. 
  24      - X{literalblock}: A block of literal text.  This text should be 
  25        displayed as it would be displayed in plaintext.  The 
  26        parser removes the appropriate amount of leading whitespace  
  27        from each line in the literal block. 
  28      - X{doctestblock}: A block containing sample python code, 
  29        formatted according to the specifications of the C{doctest} 
  30        module. 
  31      - X{ulist}: An unordered list. 
  32      - X{olist}: An ordered list. 
  33      - X{li}: A list item.  This tag is used both for unordered list 
  34        items and for ordered list items. 
  35   
  36  Additionally, the following X{inline regions} may be used within 
  37  C{para} blocks: 
  38       
  39      - X{code}:   Source code and identifiers. 
  40      - X{math}:   Mathematical expressions. 
  41      - X{index}:  A term which should be included in an index, if one 
  42                   is generated. 
  43      - X{italic}: Italicized text. 
  44      - X{bold}:   Bold-faced text. 
  45      - X{uri}:    A Universal Resource Indicator (URI) or Universal 
  46                   Resource Locator (URL) 
  47      - X{link}:   A Python identifier which should be hyperlinked to 
  48                   the named object's documentation, when possible. 
  49   
  50  The returned DOM tree will conform to the the following Document Type 
  51  Description:: 
  52   
  53     <!ENTITY % colorized '(code | math | index | italic | 
  54                            bold | uri | link | symbol)*'> 
  55   
  56     <!ELEMENT epytext ((para | literalblock | doctestblock | 
  57                        section | ulist | olist)*, fieldlist?)> 
  58   
  59     <!ELEMENT para (#PCDATA | %colorized;)*> 
  60   
  61     <!ELEMENT section (para | listblock | doctestblock | 
  62                        section | ulist | olist)+> 
  63   
  64     <!ELEMENT fieldlist (field+)> 
  65     <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 
  66                                  ulist | olist)+)> 
  67     <!ELEMENT tag (#PCDATA)> 
  68     <!ELEMENT arg (#PCDATA)> 
  69      
  70     <!ELEMENT literalblock (#PCDATA | %colorized;)*> 
  71     <!ELEMENT doctestblock (#PCDATA)> 
  72   
  73     <!ELEMENT ulist (li+)> 
  74     <!ELEMENT olist (li+)> 
  75     <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 
  76     <!ATTLIST li bullet NMTOKEN #IMPLIED> 
  77     <!ATTLIST olist start NMTOKEN #IMPLIED> 
  78   
  79     <!ELEMENT uri     (name, target)> 
  80     <!ELEMENT link    (name, target)> 
  81     <!ELEMENT name    (#PCDATA | %colorized;)*> 
  82     <!ELEMENT target  (#PCDATA)> 
  83      
  84     <!ELEMENT code    (#PCDATA | %colorized;)*> 
  85     <!ELEMENT math    (#PCDATA | %colorized;)*> 
  86     <!ELEMENT italic  (#PCDATA | %colorized;)*> 
  87     <!ELEMENT bold    (#PCDATA | %colorized;)*> 
  88     <!ELEMENT indexed (#PCDATA | %colorized;)> 
  89     <!ATTLIST code style CDATA #IMPLIED> 
  90   
  91     <!ELEMENT symbol (#PCDATA)> 
  92   
  93  @var SYMBOLS: A list of the of escape symbols that are supported 
  94        by epydoc.  Currently the following symbols are supported: 
  95  <<<SYMBOLS>>> 
  96  """ 
  97   
  98   
  99   
 100  __docformat__ = 'epytext en' 
 101   
 102   
 103   
 104   
 105   
 106   
 107   
 108   
 109  import re, string, types, sys, os.path 
 110  from epydoc.markup import * 
 111  from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 
 112  from epydoc.markup.doctest import doctest_to_html, doctest_to_latex 
 113   
 114   
 115   
 116   
 117   
 119      """ 
 120      A very simple DOM-like representation for parsed epytext 
 121      documents.  Each epytext document is encoded as a tree whose nodes 
 122      are L{Element} objects, and whose leaves are C{string}s.  Each 
 123      node is marked by a I{tag} and zero or more I{attributes}.  Each 
 124      attribute is a mapping from a string key to a string value. 
 125      """ 
 126 -    def __init__(self, tag, *children, **attribs): 
  127          self.tag = tag 
 128          """A string tag indicating the type of this element. 
 129          @type: C{string}""" 
 130           
 131          self.children = list(children) 
 132          """A list of the children of this element. 
 133          @type: C{list} of (C{string} or C{Element})""" 
 134           
 135          self.attribs = attribs 
 136          """A dictionary mapping attribute names to attribute values 
 137          for this element. 
 138          @type: C{dict} from C{string} to C{string}""" 
  139   
 141          """ 
 142          Return a string representation of this element, using XML 
 143          notation. 
 144          @bug: Doesn't escape '<' or '&' or '>'. 
 145          """ 
 146          attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) 
 147          return ('<%s%s>' % (self.tag, attribs) + 
 148                  ''.join([str(child) for child in self.children]) + 
 149                  '</%s>' % self.tag) 
  150   
 152          attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) 
 153          args = ''.join([', %r' % c for c in self.children]) 
 154          return 'Element(%s%s%s)' % (self.tag, args, attribs) 
   155   
 156   
 157   
 158   
 159   
 160   
 161   
 162  _HEADING_CHARS = "=-~" 
 163   
 164   
 165  _ESCAPES = {'lb':'{', 'rb': '}'} 
 166   
 167   
 168  SYMBOLS = [ 
 169       
 170      '<-', '->', '^', 'v',  
 171   
 172       
 173      'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',   
 174      'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',   
 175      'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',   
 176      'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 
 177      'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',   
 178      'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',   
 179      'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',   
 180      'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 
 181       
 182       
 183      'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 
 184      'lArr', 'rArr', 'uArr', 'dArr', 'hArr',  
 185      'copy', 'times', 'forall', 'exist', 'part', 
 186      'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 
 187      'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 
 188      'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 
 189      'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 
 190      'sube', 'supe', 'oplus', 'otimes', 'perp', 
 191   
 192       
 193      'infinity', 'integral', 'product', 
 194      '>=', '<=',  
 195      ] 
 196   
 197  _SYMBOLS = {} 
 198  for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 
 199   
 200   
 201  symblist = '      ' 
 202  symblist += ';\n      '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 
 203                                for symbol in SYMBOLS]) 
 204  __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 
 205  del symbol, symblist 
 206   
 207   
 208  _COLORIZING_TAGS = { 
 209      'C': 'code', 
 210      'M': 'math', 
 211      'X': 'indexed', 
 212      'I': 'italic',  
 213      'B': 'bold', 
 214      'U': 'uri', 
 215      'L': 'link',        
 216      'E': 'escape',      
 217      'S': 'symbol', 
 218      'G': 'graph', 
 219      } 
 220   
 221   
 222  _LINK_COLORIZING_TAGS = ['link', 'uri'] 
 223   
 224   
 225   
 226   
 227   
 228 -def parse(str, errors = None): 
  229      """ 
 230      Return a DOM tree encoding the contents of an epytext string.  Any 
 231      errors generated during parsing will be stored in C{errors}. 
 232   
 233      @param str: The epytext string to parse. 
 234      @type str: C{string} 
 235      @param errors: A list where any errors generated during parsing 
 236          will be stored.  If no list is specified, then fatal errors 
 237          will generate exceptions, and non-fatal errors will be 
 238          ignored. 
 239      @type errors: C{list} of L{ParseError} 
 240      @return: a DOM tree encoding the contents of an epytext string. 
 241      @rtype: C{Element} 
 242      @raise ParseError: If C{errors} is C{None} and an error is 
 243          encountered while parsing. 
 244      """ 
 245       
 246      if errors == None: 
 247          errors = [] 
 248          raise_on_error = 1 
 249      else: 
 250          raise_on_error = 0 
 251   
 252       
 253      str = re.sub('\015\012', '\012', str) 
 254      str = string.expandtabs(str) 
 255   
 256       
 257      tokens = _tokenize(str, errors) 
 258   
 259       
 260      encountered_field = 0 
 261   
 262       
 263      doc = Element('epytext') 
 264   
 265       
 266       
 267       
 268       
 269       
 270       
 271       
 272       
 273       
 274       
 275      stack = [None, doc] 
 276      indent_stack = [-1, None] 
 277   
 278      for token in tokens: 
 279           
 280           
 281           
 282           
 283           
 284           
 285           
 286          _pop_completed_blocks(token, stack, indent_stack) 
 287   
 288           
 289          if token.tag == Token.PARA: 
 290              _add_para(doc, token, stack, indent_stack, errors) 
 291                        
 292           
 293          elif token.tag == Token.HEADING: 
 294              _add_section(doc, token, stack, indent_stack, errors) 
 295   
 296           
 297          elif token.tag == Token.LBLOCK: 
 298              stack[-1].children.append(token.to_dom(doc)) 
 299   
 300           
 301          elif token.tag == Token.DTBLOCK: 
 302              stack[-1].children.append(token.to_dom(doc)) 
 303   
 304           
 305          elif token.tag == Token.BULLET: 
 306              _add_list(doc, token, stack, indent_stack, errors) 
 307          else: 
 308              assert 0, 'Unknown token type: '+token.tag 
 309   
 310           
 311          if stack[-1].tag == 'field': 
 312              encountered_field = 1 
 313          elif encountered_field == 1: 
 314              if len(stack) <= 3: 
 315                  estr = ("Fields must be the final elements in an "+ 
 316                          "epytext string.") 
 317                  errors.append(StructuringError(estr, token.startline)) 
 318   
 319       
 320       
 321       
 322       
 323       
 324      for child in doc.children: 
 325          _raise_graphs(child, doc) 
 326   
 327       
 328      if len([e for e in errors if e.is_fatal()]) > 0: 
 329          if raise_on_error: 
 330              raise errors[0] 
 331          else: 
 332              return None 
 333           
 334       
 335      return doc 
  336   
 338       
 339      have_graph_child = False 
 340      for elt in tree.children: 
 341          if isinstance(elt, Element): 
 342              _raise_graphs(elt, tree) 
 343              if elt.tag == 'graph': have_graph_child = True 
 344   
 345      block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li') 
 346      if have_graph_child and tree.tag not in block: 
 347          child_index = 0 
 348          parent_index = parent.children.index(tree) 
 349          for elt in tree.children: 
 350              if isinstance(elt, Element) and elt.tag == 'graph': 
 351                   
 352                  left = tree.children[:child_index] 
 353                  right = tree.children[child_index+1:] 
 354                  parent.children[parent_index:parent_index+1] = [ 
 355                      Element(tree.tag, *left, **tree.attribs), 
 356                      elt, 
 357                      Element(tree.tag, *right, **tree.attribs)] 
 358                  child_index = 0 
 359                  parent_index += 2 
 360                  tree = parent.children[parent_index] 
 361              else: 
 362                  child_index += 1 
  363   
 365      """ 
 366      Pop any completed blocks off the stack.  This includes any 
 367      blocks that we have dedented past, as well as any list item 
 368      blocks that we've dedented to.  The top element on the stack  
 369      should only be a list if we're about to start a new list 
 370      item (i.e., if the next token is a bullet). 
 371      """ 
 372      indent = token.indent 
 373      if indent != None: 
 374          while (len(stack) > 2): 
 375              pop = 0 
 376               
 377               
 378              if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 
 379              elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 
 380   
 381               
 382               
 383              elif (token.tag == 'bullet' and indent==indent_stack[-2] and  
 384                    stack[-1].tag in ('li', 'field')): pop=1 
 385   
 386               
 387              elif (stack[-1].tag in ('ulist', 'olist') and 
 388                    (token.tag != 'bullet' or token.contents[-1] == ':')): 
 389                  pop=1 
 390   
 391               
 392              if pop == 0: return 
 393              stack.pop() 
 394              indent_stack.pop() 
  395   
 396 -def _add_para(doc, para_token, stack, indent_stack, errors): 
  397      """Colorize the given paragraph, and add it to the DOM tree.""" 
 398       
 399       
 400      if indent_stack[-1] == None: 
 401          indent_stack[-1] = para_token.indent 
 402      if para_token.indent == indent_stack[-1]: 
 403           
 404          para = _colorize(doc, para_token, errors) 
 405          if para_token.inline: 
 406              para.attribs['inline'] = True 
 407          stack[-1].children.append(para) 
 408      else: 
 409          estr = "Improper paragraph indentation." 
 410          errors.append(StructuringError(estr, para_token.startline)) 
  411   
 412 -def _add_section(doc, heading_token, stack, indent_stack, errors): 
  413      """Add a new section to the DOM tree, with the given heading.""" 
 414      if indent_stack[-1] == None: 
 415          indent_stack[-1] = heading_token.indent 
 416      elif indent_stack[-1] != heading_token.indent: 
 417          estr = "Improper heading indentation." 
 418          errors.append(StructuringError(estr, heading_token.startline)) 
 419   
 420       
 421      for tok in stack[2:]: 
 422          if tok.tag != "section": 
 423              estr = "Headings must occur at the top level." 
 424              errors.append(StructuringError(estr, heading_token.startline)) 
 425              break 
 426      if (heading_token.level+2) > len(stack): 
 427          estr = "Wrong underline character for heading." 
 428          errors.append(StructuringError(estr, heading_token.startline)) 
 429   
 430       
 431       
 432      stack[heading_token.level+2:] = [] 
 433      indent_stack[heading_token.level+2:] = [] 
 434   
 435       
 436      head = _colorize(doc, heading_token, errors, 'heading') 
 437   
 438       
 439      sec = Element("section") 
 440      stack[-1].children.append(sec) 
 441      stack.append(sec) 
 442      sec.children.append(head) 
 443      indent_stack.append(None) 
  444           
 445 -def _add_list(doc, bullet_token, stack, indent_stack, errors): 
  446      """ 
 447      Add a new list item or field to the DOM tree, with the given 
 448      bullet or field tag.  When necessary, create the associated 
 449      list. 
 450      """ 
 451       
 452      if bullet_token.contents[-1] == '-': 
 453          list_type = 'ulist' 
 454      elif bullet_token.contents[-1] == '.': 
 455          list_type = 'olist' 
 456      elif bullet_token.contents[-1] == ':': 
 457          list_type = 'fieldlist' 
 458      else: 
 459          raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 
 460   
 461       
 462      newlist = 0 
 463      if stack[-1].tag != list_type: 
 464          newlist = 1 
 465      elif list_type == 'olist' and stack[-1].tag == 'olist': 
 466          old_listitem = stack[-1].children[-1] 
 467          old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] 
 468          new_bullet = bullet_token.contents.split('.')[:-1] 
 469          if (new_bullet[:-1] != old_bullet[:-1] or 
 470              int(new_bullet[-1]) != int(old_bullet[-1])+1): 
 471              newlist = 1 
 472   
 473       
 474      if newlist: 
 475          if stack[-1].tag is 'fieldlist': 
 476               
 477               
 478               
 479               
 480               
 481               
 482              estr = "Lists must be indented." 
 483              errors.append(StructuringError(estr, bullet_token.startline)) 
 484          if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): 
 485              stack.pop() 
 486              indent_stack.pop() 
 487   
 488          if (list_type != 'fieldlist' and indent_stack[-1] is not None and 
 489              bullet_token.indent == indent_stack[-1]): 
 490               
 491               
 492               
 493              if bullet_token.startline != 1 or bullet_token.indent != 0: 
 494                  estr = "Lists must be indented." 
 495                  errors.append(StructuringError(estr, bullet_token.startline)) 
 496   
 497          if list_type == 'fieldlist': 
 498               
 499              for tok in stack[2:]: 
 500                  if tok.tag != "section": 
 501                      estr = "Fields must be at the top level." 
 502                      errors.append( 
 503                          StructuringError(estr, bullet_token.startline)) 
 504                      break 
 505              stack[2:] = [] 
 506              indent_stack[2:] = [] 
 507   
 508           
 509          lst = Element(list_type) 
 510          stack[-1].children.append(lst) 
 511          stack.append(lst) 
 512          indent_stack.append(bullet_token.indent) 
 513          if list_type == 'olist': 
 514              start = bullet_token.contents.split('.')[:-1] 
 515              if start != '1': 
 516                  lst.attribs["start"] = start[-1] 
 517   
 518       
 519       
 520       
 521       
 522      if list_type == 'fieldlist': 
 523          li = Element("field") 
 524          token_words = bullet_token.contents[1:-1].split(None, 1) 
 525          tag_elt = Element("tag") 
 526          tag_elt.children.append(token_words[0]) 
 527          li.children.append(tag_elt) 
 528   
 529          if len(token_words) > 1: 
 530              arg_elt = Element("arg") 
 531              arg_elt.children.append(token_words[1]) 
 532              li.children.append(arg_elt) 
 533      else: 
 534          li = Element("li") 
 535          if list_type == 'olist': 
 536              li.attribs["bullet"] = bullet_token.contents 
 537   
 538       
 539      stack[-1].children.append(li) 
 540      stack.append(li) 
 541      indent_stack.append(None) 
  542   
 543   
 544   
 545   
 546   
 548      """ 
 549      C{Token}s are an intermediate data structure used while 
 550      constructing the structuring DOM tree for a formatted docstring. 
 551      There are five types of C{Token}: 
 552       
 553          - Paragraphs 
 554          - Literal blocks 
 555          - Doctest blocks 
 556          - Headings 
 557          - Bullets 
 558   
 559      The text contained in each C{Token} is stored in the 
 560      C{contents} variable.  The string in this variable has been 
 561      normalized.  For paragraphs, this means that it has been converted  
 562      into a single line of text, with newline/indentation replaced by 
 563      single spaces.  For literal blocks and doctest blocks, this means 
 564      that the appropriate amount of leading whitespace has been removed  
 565      from each line. 
 566   
 567      Each C{Token} has an indentation level associated with it, 
 568      stored in the C{indent} variable.  This indentation level is used 
 569      by the structuring procedure to assemble hierarchical blocks. 
 570   
 571      @type tag: C{string} 
 572      @ivar tag: This C{Token}'s type.  Possible values are C{Token.PARA}  
 573          (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 
 574          (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 
 575           
 576      @type startline: C{int} 
 577      @ivar startline: The line on which this C{Token} begins.  This  
 578          line number is only used for issuing errors. 
 579   
 580      @type contents: C{string} 
 581      @ivar contents: The normalized text contained in this C{Token}. 
 582       
 583      @type indent: C{int} or C{None} 
 584      @ivar indent: The indentation level of this C{Token} (in 
 585          number of leading spaces).  A value of C{None} indicates an 
 586          unknown indentation; this is used for list items and fields 
 587          that begin with one-line paragraphs. 
 588           
 589      @type level: C{int} or C{None} 
 590      @ivar level: The heading-level of this C{Token} if it is a 
 591          heading; C{None}, otherwise.  Valid heading levels are 0, 1, 
 592          and 2. 
 593   
 594      @type inline: C{bool} 
 595      @ivar inline: If True, the element is an inline level element, comparable 
 596          to an HTML C{<span>} tag. Else, it is a block level element, comparable 
 597          to an HTML C{<div>}. 
 598   
 599      @type PARA: C{string} 
 600      @cvar PARA: The C{tag} value for paragraph C{Token}s. 
 601      @type LBLOCK: C{string} 
 602      @cvar LBLOCK: The C{tag} value for literal C{Token}s. 
 603      @type DTBLOCK: C{string} 
 604      @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 
 605      @type HEADING: C{string} 
 606      @cvar HEADING: The C{tag} value for heading C{Token}s. 
 607      @type BULLET: C{string} 
 608      @cvar BULLET: The C{tag} value for bullet C{Token}s.  This C{tag} 
 609          value is also used for field tag C{Token}s, since fields 
 610          function syntactically the same as list items. 
 611      """ 
 612       
 613      PARA = "para" 
 614      LBLOCK = "literalblock" 
 615      DTBLOCK = "doctestblock" 
 616      HEADING = "heading" 
 617      BULLET = "bullet" 
 618   
 619 -    def __init__(self, tag, startline, contents, indent, level=None, 
 620                   inline=False): 
  621          """ 
 622          Create a new C{Token}. 
 623   
 624          @param tag: The type of the new C{Token}. 
 625          @type tag: C{string} 
 626          @param startline: The line on which the new C{Token} begins. 
 627          @type startline: C{int} 
 628          @param contents: The normalized contents of the new C{Token}. 
 629          @type contents: C{string} 
 630          @param indent: The indentation of the new C{Token} (in number 
 631              of leading spaces).  A value of C{None} indicates an 
 632              unknown indentation. 
 633          @type indent: C{int} or C{None} 
 634          @param level: The heading-level of this C{Token} if it is a 
 635              heading; C{None}, otherwise. 
 636          @type level: C{int} or C{None} 
 637          @param inline: Is this C{Token} inline as a C{<span>}?. 
 638          @type inline: C{bool} 
 639          """ 
 640          self.tag = tag 
 641          self.startline = startline 
 642          self.contents = contents 
 643          self.indent = indent 
 644          self.level = level 
 645          self.inline = inline 
  646   
 648          """ 
 649          @rtype: C{string} 
 650          @return: the formal representation of this C{Token}. 
 651              C{Token}s have formal representaitons of the form::  
 652                  <Token: para at line 12> 
 653          """ 
 654          return '<Token: %s at line %s>' % (self.tag, self.startline) 
  655   
 657          """ 
 658          @return: a DOM representation of this C{Token}. 
 659          @rtype: L{Element} 
 660          """ 
 661          e = Element(self.tag) 
 662          e.children.append(self.contents) 
 663          return e 
   664   
 665   
 666   
 667   
 668  _ULIST_BULLET = '[-]( +|$)' 
 669  _OLIST_BULLET = '(\d+[.])+( +|$)' 
 670  _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' 
 671  _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 
 672                          _OLIST_BULLET + '|' + 
 673                          _FIELD_BULLET) 
 674  _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 
 675  _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 
 676  del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 
 677   
 679      """ 
 680      Construct a L{Token} containing the doctest block starting at 
 681      C{lines[start]}, and append it to C{tokens}.  C{block_indent} 
 682      should be the indentation of the doctest block.  Any errors 
 683      generated while tokenizing the doctest block will be appended to 
 684      C{errors}. 
 685   
 686      @param lines: The list of lines to be tokenized 
 687      @param start: The index into C{lines} of the first line of the 
 688          doctest block to be tokenized. 
 689      @param block_indent: The indentation of C{lines[start]}.  This is 
 690          the indentation of the doctest block. 
 691      @param errors: A list where any errors generated during parsing 
 692          will be stored.  If no list is specified, then errors will  
 693          generate exceptions. 
 694      @return: The line number of the first line following the doctest 
 695          block. 
 696           
 697      @type lines: C{list} of C{string} 
 698      @type start: C{int} 
 699      @type block_indent: C{int} 
 700      @type tokens: C{list} of L{Token} 
 701      @type errors: C{list} of L{ParseError} 
 702      @rtype: C{int} 
 703      """ 
 704       
 705       
 706       
 707      min_indent = block_indent 
 708   
 709      linenum = start + 1 
 710      while linenum < len(lines): 
 711           
 712          line = lines[linenum] 
 713          indent = len(line) - len(line.lstrip()) 
 714           
 715           
 716          if indent == len(line): break 
 717           
 718           
 719          if indent < block_indent: 
 720              min_indent = min(min_indent, indent) 
 721              estr = 'Improper doctest block indentation.' 
 722              errors.append(TokenizationError(estr, linenum)) 
 723   
 724           
 725          linenum += 1 
 726   
 727       
 728      contents = [line[min_indent:] for line in lines[start:linenum]] 
 729      contents = '\n'.join(contents) 
 730      tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 
 731      return linenum 
  732   
 734      """ 
 735      Construct a L{Token} containing the literal block starting at 
 736      C{lines[start]}, and append it to C{tokens}.  C{block_indent} 
 737      should be the indentation of the literal block.  Any errors 
 738      generated while tokenizing the literal block will be appended to 
 739      C{errors}. 
 740   
 741      @param lines: The list of lines to be tokenized 
 742      @param start: The index into C{lines} of the first line of the 
 743          literal block to be tokenized. 
 744      @param block_indent: The indentation of C{lines[start]}.  This is 
 745          the indentation of the literal block. 
 746      @param errors: A list of the errors generated by parsing.  Any 
 747          new errors generated while will tokenizing this paragraph 
 748          will be appended to this list. 
 749      @return: The line number of the first line following the literal 
 750          block.  
 751           
 752      @type lines: C{list} of C{string} 
 753      @type start: C{int} 
 754      @type block_indent: C{int} 
 755      @type tokens: C{list} of L{Token} 
 756      @type errors: C{list} of L{ParseError} 
 757      @rtype: C{int} 
 758      """ 
 759      linenum = start + 1 
 760      while linenum < len(lines): 
 761           
 762          line = lines[linenum] 
 763          indent = len(line) - len(line.lstrip()) 
 764   
 765           
 766           
 767          if len(line) != indent and indent <= block_indent: 
 768              break 
 769           
 770           
 771          linenum += 1 
 772   
 773       
 774      contents = [line[block_indent+1:] for line in lines[start:linenum]] 
 775      contents = '\n'.join(contents) 
 776      contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 
 777      tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 
 778      return linenum 
  779   
 781      """ 
 782      Construct L{Token}s for the bullet and the first paragraph of the 
 783      list item (or field) starting at C{lines[start]}, and append them 
 784      to C{tokens}.  C{bullet_indent} should be the indentation of the 
 785      list item.  Any errors generated while tokenizing will be 
 786      appended to C{errors}. 
 787   
 788      @param lines: The list of lines to be tokenized 
 789      @param start: The index into C{lines} of the first line of the 
 790          list item to be tokenized. 
 791      @param bullet_indent: The indentation of C{lines[start]}.  This is 
 792          the indentation of the list item. 
 793      @param errors: A list of the errors generated by parsing.  Any 
 794          new errors generated while will tokenizing this paragraph 
 795          will be appended to this list. 
 796      @return: The line number of the first line following the list 
 797          item's first paragraph. 
 798           
 799      @type lines: C{list} of C{string} 
 800      @type start: C{int} 
 801      @type bullet_indent: C{int} 
 802      @type tokens: C{list} of L{Token} 
 803      @type errors: C{list} of L{ParseError} 
 804      @rtype: C{int} 
 805      """ 
 806      linenum = start + 1 
 807      para_indent = None 
 808      doublecolon = lines[start].rstrip()[-2:] == '::' 
 809   
 810       
 811      para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 
 812      bcontents = lines[start][bullet_indent:para_start].strip() 
 813       
 814      while linenum < len(lines): 
 815           
 816          line = lines[linenum] 
 817          indent = len(line) - len(line.lstrip()) 
 818   
 819           
 820          if doublecolon: break 
 821          if line.rstrip()[-2:] == '::': doublecolon = 1 
 822   
 823           
 824          if indent == len(line): break 
 825   
 826           
 827          if indent < bullet_indent: break 
 828           
 829           
 830          if _BULLET_RE.match(line, indent): break 
 831           
 832           
 833           
 834          if para_indent == None: para_indent = indent 
 835   
 836           
 837          if indent != para_indent: break 
 838   
 839           
 840          linenum += 1 
 841   
 842       
 843      tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent, 
 844                          inline=True)) 
 845   
 846       
 847      pcontents = ([lines[start][para_start:].strip()] +  
 848                   [line.strip() for line in lines[start+1:linenum]]) 
 849      pcontents = ' '.join(pcontents).strip() 
 850      if pcontents: 
 851          tokens.append(Token(Token.PARA, start, pcontents, para_indent, 
 852                              inline=True)) 
 853   
 854       
 855      return linenum 
  856   
 858      """ 
 859      Construct a L{Token} containing the paragraph starting at 
 860      C{lines[start]}, and append it to C{tokens}.  C{para_indent} 
 861      should be the indentation of the paragraph .  Any errors 
 862      generated while tokenizing the paragraph will be appended to 
 863      C{errors}. 
 864   
 865      @param lines: The list of lines to be tokenized 
 866      @param start: The index into C{lines} of the first line of the 
 867          paragraph to be tokenized. 
 868      @param para_indent: The indentation of C{lines[start]}.  This is 
 869          the indentation of the paragraph. 
 870      @param errors: A list of the errors generated by parsing.  Any 
 871          new errors generated while will tokenizing this paragraph 
 872          will be appended to this list. 
 873      @return: The line number of the first line following the 
 874          paragraph.  
 875           
 876      @type lines: C{list} of C{string} 
 877      @type start: C{int} 
 878      @type para_indent: C{int} 
 879      @type tokens: C{list} of L{Token} 
 880      @type errors: C{list} of L{ParseError} 
 881      @rtype: C{int} 
 882      """ 
 883      linenum = start + 1 
 884      doublecolon = 0 
 885      while linenum < len(lines): 
 886           
 887          line = lines[linenum] 
 888          indent = len(line) - len(line.lstrip()) 
 889   
 890           
 891          if doublecolon: break 
 892          if line.rstrip()[-2:] == '::': doublecolon = 1 
 893   
 894           
 895          if indent == len(line): break 
 896   
 897           
 898          if indent != para_indent: break 
 899   
 900           
 901          if _BULLET_RE.match(line, indent): break 
 902   
 903           
 904          if line[indent] == '@': 
 905              estr = "Possible mal-formatted field item." 
 906              errors.append(TokenizationError(estr, linenum, is_fatal=0)) 
 907               
 908           
 909          linenum += 1 
 910   
 911      contents = [line.strip() for line in lines[start:linenum]] 
 912       
 913       
 914      if ((len(contents) < 2) or 
 915          (contents[1][0] not in _HEADING_CHARS) or 
 916          (abs(len(contents[0])-len(contents[1])) > 5)): 
 917          looks_like_heading = 0 
 918      else: 
 919          looks_like_heading = 1 
 920          for char in contents[1]: 
 921              if char != contents[1][0]: 
 922                  looks_like_heading = 0 
 923                  break 
 924   
 925      if looks_like_heading: 
 926          if len(contents[0]) != len(contents[1]): 
 927              estr = ("Possible heading typo: the number of "+ 
 928                      "underline characters must match the "+ 
 929                      "number of heading characters.") 
 930              errors.append(TokenizationError(estr, start, is_fatal=0)) 
 931          else: 
 932              level = _HEADING_CHARS.index(contents[1][0]) 
 933              tokens.append(Token(Token.HEADING, start, 
 934                                  contents[0], para_indent, level)) 
 935              return start+2 
 936                    
 937       
 938      contents = ' '.join(contents) 
 939      tokens.append(Token(Token.PARA, start, contents, para_indent)) 
 940      return linenum 
  941           
 943      """ 
 944      Split a given formatted docstring into an ordered list of 
 945      C{Token}s, according to the epytext markup rules. 
 946   
 947      @param str: The epytext string 
 948      @type str: C{string} 
 949      @param errors: A list where any errors generated during parsing 
 950          will be stored.  If no list is specified, then errors will  
 951          generate exceptions. 
 952      @type errors: C{list} of L{ParseError} 
 953      @return: a list of the C{Token}s that make up the given string. 
 954      @rtype: C{list} of L{Token} 
 955      """ 
 956      tokens = [] 
 957      lines = str.split('\n') 
 958   
 959       
 960       
 961      linenum = 0 
 962      while linenum < len(lines): 
 963           
 964          line = lines[linenum] 
 965          indent = len(line)-len(line.lstrip()) 
 966   
 967          if indent == len(line): 
 968               
 969              linenum += 1 
 970              continue 
 971          elif line[indent:indent+4] == '>>> ': 
 972               
 973              linenum = _tokenize_doctest(lines, linenum, indent, 
 974                                          tokens, errors) 
 975          elif _BULLET_RE.match(line, indent): 
 976               
 977              linenum = _tokenize_listart(lines, linenum, indent, 
 978                                          tokens, errors) 
 979              if tokens[-1].indent != None: 
 980                  indent = tokens[-1].indent 
 981          else: 
 982               
 983              if line[indent] == '@': 
 984                  estr = "Possible mal-formatted field item." 
 985                  errors.append(TokenizationError(estr, linenum, is_fatal=0)) 
 986               
 987               
 988              linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 
 989   
 990           
 991          if (tokens[-1].tag == Token.PARA and 
 992              tokens[-1].contents[-2:] == '::'): 
 993              tokens[-1].contents = tokens[-1].contents[:-1] 
 994              linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 
 995   
 996      return tokens 
  997   
 998   
 999   
1000   
1001   
1002   
1003   
1004  _BRACE_RE = re.compile('{|}') 
1005  _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 
1006   
1007 -def _colorize(doc, token, errors, tagName='para'): 
 1008      """ 
1009      Given a string containing the contents of a paragraph, produce a 
1010      DOM C{Element} encoding that paragraph.  Colorized regions are 
1011      represented using DOM C{Element}s, and text is represented using 
1012      DOM C{Text}s. 
1013   
1014      @param errors: A list of errors.  Any newly generated errors will 
1015          be appended to this list. 
1016      @type errors: C{list} of C{string} 
1017       
1018      @param tagName: The element tag for the DOM C{Element} that should 
1019          be generated. 
1020      @type tagName: C{string} 
1021       
1022      @return: a DOM C{Element} encoding the given paragraph. 
1023      @returntype: C{Element} 
1024      """ 
1025      str = token.contents 
1026      linenum = 0 
1027       
1028       
1029       
1030       
1031       
1032      stack = [Element(tagName)] 
1033   
1034       
1035       
1036       
1037      openbrace_stack = [0] 
1038   
1039       
1040       
1041       
1042       
1043      start = 0 
1044      while 1: 
1045          match = _BRACE_RE.search(str, start) 
1046          if match == None: break 
1047          end = match.start() 
1048           
1049           
1050           
1051           
1052           
1053           
1054           
1055          if match.group() == '{': 
1056              if (end>0) and 'A' <= str[end-1] <= 'Z': 
1057                  if (end-1) > start: 
1058                      stack[-1].children.append(str[start:end-1]) 
1059                  if str[end-1] not in _COLORIZING_TAGS: 
1060                      estr = "Unknown inline markup tag." 
1061                      errors.append(ColorizingError(estr, token, end-1)) 
1062                      stack.append(Element('unknown')) 
1063                  else: 
1064                      tag = _COLORIZING_TAGS[str[end-1]] 
1065                      stack.append(Element(tag)) 
1066              else: 
1067                  if end > start: 
1068                      stack[-1].children.append(str[start:end]) 
1069                  stack.append(Element('litbrace')) 
1070              openbrace_stack.append(end) 
1071              stack[-2].children.append(stack[-1]) 
1072               
1073           
1074          elif match.group() == '}': 
1075               
1076              if len(stack) <= 1: 
1077                  estr = "Unbalanced '}'." 
1078                  errors.append(ColorizingError(estr, token, end)) 
1079                  start = end + 1 
1080                  continue 
1081   
1082               
1083              if end > start: 
1084                  stack[-1].children.append(str[start:end]) 
1085   
1086               
1087              if stack[-1].tag == 'symbol': 
1088                  if (len(stack[-1].children) != 1 or 
1089                      not isinstance(stack[-1].children[0], basestring)): 
1090                      estr = "Invalid symbol code." 
1091                      errors.append(ColorizingError(estr, token, end)) 
1092                  else: 
1093                      symb = stack[-1].children[0] 
1094                      if symb in _SYMBOLS: 
1095                           
1096                          stack[-2].children[-1] = Element('symbol', symb) 
1097                      else: 
1098                          estr = "Invalid symbol code." 
1099                          errors.append(ColorizingError(estr, token, end)) 
1100                           
1101               
1102              if stack[-1].tag == 'escape': 
1103                  if (len(stack[-1].children) != 1 or 
1104                      not isinstance(stack[-1].children[0], basestring)): 
1105                      estr = "Invalid escape code." 
1106                      errors.append(ColorizingError(estr, token, end)) 
1107                  else: 
1108                      escp = stack[-1].children[0] 
1109                      if escp in _ESCAPES: 
1110                           
1111                          stack[-2].children[-1] = _ESCAPES[escp] 
1112                      elif len(escp) == 1: 
1113                           
1114                          stack[-2].children[-1] = escp 
1115                      else: 
1116                          estr = "Invalid escape code." 
1117                          errors.append(ColorizingError(estr, token, end)) 
1118   
1119               
1120              if stack[-1].tag == 'litbrace': 
1121                  stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] 
1122   
1123               
1124              if stack[-1].tag == 'graph': 
1125                  _colorize_graph(doc, stack[-1], token, end, errors) 
1126   
1127               
1128              if stack[-1].tag in _LINK_COLORIZING_TAGS: 
1129                  _colorize_link(doc, stack[-1], token, end, errors) 
1130   
1131               
1132              openbrace_stack.pop() 
1133              stack.pop() 
1134   
1135          start = end+1 
1136   
1137       
1138      if start < len(str): 
1139          stack[-1].children.append(str[start:]) 
1140           
1141      if len(stack) != 1:  
1142          estr = "Unbalanced '{'." 
1143          errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 
1144   
1145      return stack[0] 
 1146   
1147  GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 
1148   
1150      """ 
1151      Eg:: 
1152        G{classtree} 
1153        G{classtree x, y, z} 
1154        G{importgraph} 
1155      """ 
1156      bad_graph_spec = False 
1157       
1158      children = graph.children[:] 
1159      graph.children = [] 
1160   
1161      if len(children) != 1 or not isinstance(children[0], basestring): 
1162          bad_graph_spec = "Bad graph specification" 
1163      else: 
1164          pieces = children[0].split(None, 1) 
1165          graphtype = pieces[0].replace(':','').strip().lower() 
1166          if graphtype in GRAPH_TYPES: 
1167              if len(pieces) == 2: 
1168                  if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 
1169                      args = pieces[1].replace(',', ' ').replace(':','').split() 
1170                  else: 
1171                      bad_graph_spec = "Bad graph arg list" 
1172              else: 
1173                  args = [] 
1174          else: 
1175              bad_graph_spec = ("Bad graph type %s -- use one of %s" % 
1176                                (pieces[0], ', '.join(GRAPH_TYPES))) 
1177   
1178      if bad_graph_spec: 
1179          errors.append(ColorizingError(bad_graph_spec, token, end)) 
1180          graph.children.append('none') 
1181          graph.children.append('') 
1182          return 
1183   
1184      graph.children.append(graphtype) 
1185      for arg in args: 
1186          graph.children.append(arg) 
 1187   
1189      variables = link.children[:] 
1190   
1191       
1192      if len(variables)==0 or not isinstance(variables[-1], basestring): 
1193          estr = "Bad %s target." % link.tag 
1194          errors.append(ColorizingError(estr, token, end)) 
1195          return 
1196       
1197       
1198      match2 = _TARGET_RE.match(variables[-1]) 
1199      if match2: 
1200          (text, target) = match2.groups() 
1201          variables[-1] = text 
1202       
1203      elif len(variables) == 1: 
1204          target = variables[0] 
1205      else: 
1206          estr = "Bad %s target." % link.tag 
1207          errors.append(ColorizingError(estr, token, end)) 
1208          return 
1209   
1210       
1211      name_elt = Element('name', *variables) 
1212   
1213       
1214       
1215      target = re.sub(r'\s', '', target) 
1216      if link.tag=='uri': 
1217          if not re.match(r'\w+:', target): 
1218              if re.match(r'\w+@(\w+)(\.\w+)*', target): 
1219                  target = 'mailto:' + target 
1220              else: 
1221                  target = 'http://'+target 
1222      elif link.tag=='link': 
1223           
1224          target = re.sub(r'\(.*\)$', '', target) 
1225          if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): 
1226              estr = "Bad link target." 
1227              errors.append(ColorizingError(estr, token, end)) 
1228              return 
1229   
1230       
1231      target_elt = Element('target', target) 
1232   
1233       
1234      link.children = [name_elt, target_elt] 
 1235   
1236   
1237   
1238   
1239   
1240 -def to_epytext(tree, indent=0, seclevel=0): 
 1241      """ 
1242      Convert a DOM document encoding epytext back to an epytext string. 
1243      This is the inverse operation from L{parse}.  I.e., assuming there 
1244      are no errors, the following is true: 
1245          - C{parse(to_epytext(tree)) == tree} 
1246   
1247      The inverse is true, except that whitespace, line wrapping, and 
1248      character escaping may be done differently. 
1249          - C{to_epytext(parse(str)) == str} (approximately) 
1250   
1251      @param tree: A DOM document encoding of an epytext string. 
1252      @type tree: C{Element} 
1253      @param indent: The indentation for the string representation of 
1254          C{tree}.  Each line of the returned string will begin with 
1255          C{indent} space characters. 
1256      @type indent: C{int} 
1257      @param seclevel: The section level that C{tree} appears at.  This 
1258          is used to generate section headings. 
1259      @type seclevel: C{int} 
1260      @return: The epytext string corresponding to C{tree}. 
1261      @rtype: C{string} 
1262      """ 
1263      if isinstance(tree, basestring): 
1264          str = re.sub(r'\{', '\0', tree) 
1265          str = re.sub(r'\}', '\1', str) 
1266          return str 
1267   
1268      if tree.tag == 'epytext': indent -= 2 
1269      if tree.tag == 'section': seclevel += 1 
1270      variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] 
1271      childstr = ''.join(variables) 
1272   
1273       
1274      childstr = re.sub(':(\s*)\2', '::\\1', childstr) 
1275   
1276      if tree.tag == 'para': 
1277          str = wordwrap(childstr, indent)+'\n' 
1278          str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 
1279          str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 
1280          str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 
1281          str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 
1282          str = re.sub('\0', 'E{lb}', str) 
1283          str = re.sub('\1', 'E{rb}', str) 
1284          return str 
1285      elif tree.tag == 'li': 
1286          bullet = tree.attribs.get('bullet') or '-' 
1287          return indent*' '+ bullet + ' ' + childstr.lstrip() 
1288      elif tree.tag == 'heading': 
1289          str = re.sub('\0', 'E{lb}',childstr) 
1290          str = re.sub('\1', 'E{rb}', str) 
1291          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1292          return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 
1293      elif tree.tag == 'doctestblock': 
1294          str = re.sub('\0', '{', childstr) 
1295          str = re.sub('\1', '}', str) 
1296          lines = ['  '+indent*' '+line for line in str.split('\n')] 
1297          return '\n'.join(lines) + '\n\n' 
1298      elif tree.tag == 'literalblock': 
1299          str = re.sub('\0', '{', childstr) 
1300          str = re.sub('\1', '}', str) 
1301          lines = [(indent+1)*' '+line for line in str.split('\n')] 
1302          return '\2' + '\n'.join(lines) + '\n\n' 
1303      elif tree.tag == 'field': 
1304          numargs = 0 
1305          while tree.children[numargs+1].tag == 'arg': numargs += 1 
1306          tag = variables[0] 
1307          args = variables[1:1+numargs] 
1308          body = variables[1+numargs:] 
1309          str = (indent)*' '+'@'+variables[0] 
1310          if args: str += '(' + ', '.join(args) + ')' 
1311          return str + ':\n' + ''.join(body) 
1312      elif tree.tag == 'target': 
1313          return '<%s>' % childstr 
1314      elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 
1315                            'section', 'olist', 'ulist', 'name'): 
1316          return childstr 
1317      elif tree.tag == 'symbol': 
1318          return 'E{%s}' % childstr 
1319      elif tree.tag == 'graph': 
1320          return 'G{%s}' % ' '.join(variables) 
1321      else: 
1322          for (tag, name) in _COLORIZING_TAGS.items(): 
1323              if name == tree.tag: 
1324                  return '%s{%s}' % (tag, childstr) 
1325      raise ValueError('Unknown DOM element %r' % tree.tag) 
 1326   
1327  SYMBOL_TO_PLAINTEXT = { 
1328      'crarr': '\\', 
1329      } 
1330   
1331 -def to_plaintext(tree, indent=0, seclevel=0): 
 1332      """     
1333      Convert a DOM document encoding epytext to a string representation. 
1334      This representation is similar to the string generated by 
1335      C{to_epytext}, but C{to_plaintext} removes inline markup, prints 
1336      escaped characters in unescaped form, etc. 
1337   
1338      @param tree: A DOM document encoding of an epytext string. 
1339      @type tree: C{Element} 
1340      @param indent: The indentation for the string representation of 
1341          C{tree}.  Each line of the returned string will begin with 
1342          C{indent} space characters. 
1343      @type indent: C{int} 
1344      @param seclevel: The section level that C{tree} appears at.  This 
1345          is used to generate section headings. 
1346      @type seclevel: C{int} 
1347      @return: The epytext string corresponding to C{tree}. 
1348      @rtype: C{string} 
1349      """ 
1350      if isinstance(tree, basestring): return tree 
1351   
1352      if tree.tag == 'section': seclevel += 1 
1353   
1354       
1355      if tree.tag == 'epytext': cindent = indent 
1356      elif tree.tag == 'li' and tree.attribs.get('bullet'): 
1357          cindent = indent + 1 + len(tree.attribs.get('bullet')) 
1358      else: 
1359          cindent = indent + 2 
1360      variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] 
1361      childstr = ''.join(variables) 
1362   
1363      if tree.tag == 'para': 
1364          return wordwrap(childstr, indent)+'\n' 
1365      elif tree.tag == 'li': 
1366           
1367           
1368          bullet = tree.attribs.get('bullet') or '-' 
1369          return indent*' ' + bullet + ' ' + childstr.lstrip() 
1370      elif tree.tag == 'heading': 
1371          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1372          return ((indent-2)*' ' + childstr + '\n' + 
1373                  (indent-2)*' ' + uline + '\n') 
1374      elif tree.tag == 'doctestblock': 
1375          lines = [(indent+2)*' '+line for line in childstr.split('\n')] 
1376          return '\n'.join(lines) + '\n\n' 
1377      elif tree.tag == 'literalblock': 
1378          lines = [(indent+1)*' '+line for line in childstr.split('\n')] 
1379          return '\n'.join(lines) + '\n\n' 
1380      elif tree.tag == 'fieldlist': 
1381          return childstr 
1382      elif tree.tag == 'field': 
1383          numargs = 0 
1384          while tree.children[numargs+1].tag == 'arg': numargs += 1 
1385          tag = variables[0] 
1386          args = variables[1:1+numargs] 
1387          body = variables[1+numargs:] 
1388          str = (indent)*' '+'@'+variables[0] 
1389          if args: str += '(' + ', '.join(args) + ')' 
1390          return str + ':\n' + ''.join(body) 
1391      elif tree.tag == 'uri': 
1392          if len(variables) != 2: raise ValueError('Bad URI ') 
1393          elif variables[0] == variables[1]: return '<%s>' % variables[1] 
1394          else: return '%r<%s>' % (variables[0], variables[1]) 
1395      elif tree.tag == 'link': 
1396          if len(variables) != 2: raise ValueError('Bad Link') 
1397          return '%s' % variables[0] 
1398      elif tree.tag in ('olist', 'ulist'): 
1399           
1400           
1401           
1402           
1403          return childstr.replace('\n\n', '\n')+'\n' 
1404      elif tree.tag == 'symbol': 
1405          return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) 
1406      elif tree.tag == 'graph': 
1407          return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 
1408      else: 
1409           
1410          return childstr 
 1411   
1412 -def to_debug(tree, indent=4, seclevel=0): 
 1413      """     
1414      Convert a DOM document encoding epytext back to an epytext string, 
1415      annotated with extra debugging information.  This function is 
1416      similar to L{to_epytext}, but it adds explicit information about 
1417      where different blocks begin, along the left margin. 
1418   
1419      @param tree: A DOM document encoding of an epytext string. 
1420      @type tree: C{Element} 
1421      @param indent: The indentation for the string representation of 
1422          C{tree}.  Each line of the returned string will begin with 
1423          C{indent} space characters. 
1424      @type indent: C{int} 
1425      @param seclevel: The section level that C{tree} appears at.  This 
1426          is used to generate section headings. 
1427      @type seclevel: C{int} 
1428      @return: The epytext string corresponding to C{tree}. 
1429      @rtype: C{string} 
1430      """ 
1431      if isinstance(tree, basestring): 
1432          str = re.sub(r'\{', '\0', tree) 
1433          str = re.sub(r'\}', '\1', str) 
1434          return str 
1435   
1436      if tree.tag == 'section': seclevel += 1 
1437      variables = [to_debug(c, indent+2, seclevel) for c in tree.children] 
1438      childstr = ''.join(variables) 
1439   
1440       
1441      childstr = re.sub(':( *\n     \|\n)\2', '::\\1', childstr) 
1442   
1443      if tree.tag == 'para': 
1444          str = wordwrap(childstr, indent-6, 69)+'\n' 
1445          str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 
1446          str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 
1447          str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 
1448          str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 
1449          str = re.sub('\0', 'E{lb}', str) 
1450          str = re.sub('\1', 'E{rb}', str) 
1451          lines = str.rstrip().split('\n') 
1452          lines[0] = '   P>|' + lines[0] 
1453          lines[1:] = ['     |'+l for l in lines[1:]] 
1454          return '\n'.join(lines)+'\n     |\n' 
1455      elif tree.tag == 'li': 
1456          bullet = tree.attribs.get('bullet') or '-' 
1457          return '  LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 
1458      elif tree.tag in ('olist', 'ulist'): 
1459          return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 
1460      elif tree.tag == 'heading': 
1461          str = re.sub('\0', 'E{lb}', childstr) 
1462          str = re.sub('\1', 'E{rb}', str) 
1463          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1464          return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 
1465                  '     |'+(indent-8)*' ' + uline + '\n') 
1466      elif tree.tag == 'doctestblock': 
1467          str = re.sub('\0', '{', childstr) 
1468          str = re.sub('\1', '}', str) 
1469          lines = ['     |'+(indent-4)*' '+line for line in str.split('\n')] 
1470          lines[0] = 'DTST>'+lines[0][5:] 
1471          return '\n'.join(lines) + '\n     |\n' 
1472      elif tree.tag == 'literalblock': 
1473          str = re.sub('\0', '{', childstr) 
1474          str = re.sub('\1', '}', str) 
1475          lines = ['     |'+(indent-5)*' '+line for line in str.split('\n')] 
1476          lines[0] = ' LIT>'+lines[0][5:] 
1477          return '\2' + '\n'.join(lines) + '\n     |\n' 
1478      elif tree.tag == 'field': 
1479          numargs = 0 
1480          while tree.children[numargs+1].tag == 'arg': numargs += 1 
1481          tag = variables[0] 
1482          args = variables[1:1+numargs] 
1483          body = variables[1+numargs:] 
1484          str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 
1485          if args: str += '(' + ', '.join(args) + ')' 
1486          return str + ':\n' + ''.join(body) 
1487      elif tree.tag == 'target': 
1488          return '<%s>' % childstr 
1489      elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 
1490                            'section', 'olist', 'ulist', 'name'): 
1491          return childstr 
1492      elif tree.tag == 'symbol': 
1493          return 'E{%s}' % childstr 
1494      elif tree.tag == 'graph': 
1495          return 'G{%s}' % ' '.join(variables) 
1496      else: 
1497          for (tag, name) in _COLORIZING_TAGS.items(): 
1498              if name == tree.tag: 
1499                  return '%s{%s}' % (tag, childstr) 
1500      raise ValueError('Unknown DOM element %r' % tree.tag) 
 1501   
1502 -def to_rst(tree, indent=0, seclevel=0, wrap_startindex=0): 
 1503      """ 
1504      Convert a DOM document encoding epytext into a reStructuredText 
1505      markup string.  (Because rst is fairly loosely defined, it is 
1506      possible that this function will produce incorrect output in some 
1507      cases.) 
1508   
1509      @param tree: A DOM document encoding of an epytext string. 
1510      @type tree: C{Element} 
1511      @param indent: The indentation for the string representation of 
1512          C{tree}.  Each line of the returned string will begin with 
1513          C{indent} space characters. 
1514      @type indent: C{int} 
1515      @param seclevel: The section level that C{tree} appears at.  This 
1516          is used to generate section headings. 
1517      @type seclevel: C{int} 
1518      @return: The reStructuredText string corresponding to C{tree}. 
1519      @rtype: C{string} 
1520      """ 
1521      if isinstance(tree, basestring): 
1522           
1523          s = tree.replace('\\', '\\\\') 
1524          return re.sub(r'(\b[*`|\[]|[*`|\]]\b)', r'\\\1', s) 
1525      elif tree.tag == 'para': 
1526           
1527           
1528          s = '' 
1529          for child in tree.children: 
1530              childstr = to_rst(child, indent, seclevel) 
1531              if ( s[-1:] in ('*','`','|') and 
1532                   childstr[:1] not in (' ','\n','.',',',';',"'",'"',')',':') ): 
1533                  s += r'\ '  
1534              s += childstr 
1535          s = wordwrap(s, indent, 75, wrap_startindex)+'\n' 
1536           
1537          s = re.sub(r'(?m)^(\s*)([\*\-\+])', r'\1\\\2', s) 
1538           
1539          s = re.sub(r'(?m)^(\s*)(\d+\.|\#\.)', r'\1\\\2', s) 
1540           
1541          s = re.sub(r'(?m)^(\s*)(([^a-zA-Z0-9\s])\3\3*\s*$)', r'\1\\\2', s) 
1542           
1543          s = re.sub(r'(?m)^(\s*)([:\-\/\|])', r'\1\\\2', s) 
1544           
1545          s = re.sub(r'(?m)(::\s*)$', r'\\\1', s) 
1546           
1547          s = re.sub(r'(?m)^(\s*)(>>>)$', r'\1\\\2', s) 
1548           
1549          s = re.sub(r'(?m)^(\s*)([+=-])', r'\1\\\2', s) 
1550           
1551          s = re.sub(r'(?m)^(\s*)(\.\.)', r'\1\\\2', s) 
1552          return s 
1553      elif tree.tag == 'doctestblock': 
1554          for c in tree.children: assert isinstance(c, basestring) 
1555          childstr = ''.join(tree.children) 
1556          return '\n\n%s\n\n' % '\n'.join([' '*indent+line 
1557                                           for line in childstr.split('\n')]) 
1558      elif tree.tag == 'literalblock': 
1559          for c in tree.children: assert isinstance(c, basestring) 
1560          childstr = ''.join(tree.children) 
1561          return '\n\n::\n\n%s\n\n' % '\n'.join([' '*(indent+1)+line 
1562                                             for line in childstr.split('\n')]) 
1563      elif tree.tag == 'link': 
1564          name = to_rst(tree.children[0], indent, seclevel).strip() 
1565          target = ''.join(tree.children[1].children).strip() 
1566          if target == re.sub(r'\(.*\)$', '', name).replace('\_', '_'): 
1567              return '`%s`' % target 
1568          else: 
1569              return '`%s <%s>`' % (name, target) 
1570      elif tree.tag == 'uri': 
1571          name = to_rst(tree.children[0], indent, seclevel).strip() 
1572          target = ''.join(tree.children[1].children).strip() 
1573          if target == name and name.startswith('http://'): 
1574              return target 
1575          else: 
1576              return '`%s <%s>`__' % (name, target) 
1577      elif tree.tag == 'target': 
1578          return '<%s>' % ''.join(tree.children) 
1579      elif tree.tag == 'symbol': 
1580          return '|%s|' % ''.join(tree.children)  
1581      else: 
1582          if tree.tag == 'li': 
1583              bullet = tree.attribs.get('bullet') or '-' 
1584               
1585              bullet = re.sub(r'^(\d+.)*(\d+.)$', r'\2', bullet) 
1586              child_indent = indent + len(bullet) + 1 
1587              child_wrap_startindex = child_indent 
1588          elif tree.tag == 'field': 
1589              tt = to_debug(tree) 
1590              tagname = ''.join(tree.children.pop(0).children) 
1591              args = [] 
1592              while tree.children and tree.children[0].tag == 'arg': 
1593                  args.append(''.join(tree.children.pop(0).children)) 
1594              child_indent = max(indent, wrap_startindex) + 4 
1595              tag = ':%s%s: ' % (tagname, ''.join([' %s' % arg for arg in args])) 
1596              child_wrap_startindex = indent+len(tag) 
1597          else: 
1598              child_indent = indent 
1599              child_wrap_startindex = wrap_startindex 
1600               
1601          if tree.children: 
1602              childstrs = ([to_rst(tree.children[0], child_indent, 
1603                                   seclevel, child_wrap_startindex)] + 
1604                           [to_rst(c, indent, seclevel) 
1605                            for c in tree.children[1:]]) 
1606          else: 
1607              childstrs = [] 
1608          childstr = ''.join(childstrs) 
1609   
1610          if tree.tag in ('fieldlist', 'olist', 'ulist'): 
1611              tight = True 
1612              for item in childstrs[:-1]: 
1613                  if '\n' in item.strip(): 
1614                      tight = False 
1615              if tight: 
1616                  return '\n'.join([item.rstrip() for item in childstrs])+'\n\n' 
1617              else: 
1618                  return childstr+'\n' 
1619          elif tree.tag == 'field': 
1620              return '%s%s%s' % ((indent-wrap_startindex)*' ', tag, childstr) 
1621          elif tree.tag == 'li': 
1622              return '%s%s %s' % ((indent-wrap_startindex)*' ', bullet, childstr) 
1623          elif tree.tag == 'graph': 
1624              return '\n\n(GRAPH: %s)\n\n' % childstr 
1625          elif tree.tag == 'heading': 
1626              uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1627              return '%s%s\n%s%s\n' % (indent*' ', childstr, indent*' ', uline) 
1628          elif tree.tag in ('tag', 'arg', 'section', 'name'): 
1629              return childstr 
1630          elif tree.tag == 'code': 
1631              if [c for c in tree.children if not isinstance(c, basestring)]: 
1632                   
1633                  print 'Warning: Generating "``%s``"' % childstr 
1634                  return '``%s``' % childstr 
1635              else: 
1636                  return '``%s``' % ''.join(tree.children) 
1637          elif tree.tag == 'math' or tree.tag == 'italic': 
1638              return '*%s*' % childstr 
1639          elif tree.tag == 'indexed': 
1640              return '`%s`:term:' % childstr 
1641          elif tree.tag == 'bold': 
1642              return '**%s**' % childstr 
1643          elif tree.tag == 'epytext': 
1644               
1645              childstr = re.sub('\n{3,}', '\n\n', childstr) 
1646              childstr = re.sub(':\s*\n+::\n', '::\n', childstr) 
1647              return childstr.rstrip()+'\n' 
1648          else: 
1649              raise ValueError('Unknown DOM element %r' % tree.tag) 
 1650   
1651   
1652   
1653   
1654   
1655  SCRWIDTH = 75 
1656 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr): 
 1657      """ 
1658      Pretty-parse the string.  This parses the string, and catches any 
1659      warnings or errors produced.  Any warnings and errors are 
1660      displayed, and the resulting DOM parse structure is returned. 
1661   
1662      @param str: The string to parse. 
1663      @type str: C{string} 
1664      @param show_warnings: Whether or not to display non-fatal errors 
1665          generated by parsing C{str}. 
1666      @type show_warnings: C{boolean} 
1667      @param show_errors: Whether or not to display fatal errors  
1668          generated by parsing C{str}. 
1669      @type show_errors: C{boolean} 
1670      @param stream: The stream that warnings and errors should be 
1671          written to. 
1672      @type stream: C{stream} 
1673      @return: a DOM document encoding the contents of C{str}. 
1674      @rtype: C{Element} 
1675      @raise SyntaxError: If any fatal errors were encountered. 
1676      """ 
1677      errors = [] 
1678      confused = 0 
1679      try: 
1680          val = parse(str, errors) 
1681          warnings = [e for e in errors if not e.is_fatal()] 
1682          errors = [e for e in errors if e.is_fatal()] 
1683      except: 
1684          confused = 1 
1685           
1686      if not show_warnings: warnings = [] 
1687      warnings.sort() 
1688      errors.sort() 
1689      if warnings: 
1690          print >>stream, '='*SCRWIDTH 
1691          print >>stream, "WARNINGS" 
1692          print >>stream, '-'*SCRWIDTH 
1693          for warning in warnings: 
1694              print >>stream, warning.as_warning() 
1695          print >>stream, '='*SCRWIDTH 
1696      if errors and show_errors: 
1697          if not warnings: print >>stream, '='*SCRWIDTH 
1698          print >>stream, "ERRORS" 
1699          print >>stream, '-'*SCRWIDTH 
1700          for error in errors: 
1701              print >>stream, error 
1702          print >>stream, '='*SCRWIDTH 
1703   
1704      if confused: raise 
1705      elif errors: raise SyntaxError('Encountered Errors') 
1706      else: return val 
 1707   
1708   
1709   
1710   
1711   
1713      """ 
1714      An error generated while tokenizing a formatted documentation 
1715      string. 
1716      """ 
 1717   
1719      """ 
1720      An error generated while structuring a formatted documentation 
1721      string. 
1722      """ 
 1723   
1725      """ 
1726      An error generated while colorizing a paragraph. 
1727      """ 
1728 -    def __init__(self, descr, token, charnum, is_fatal=1): 
 1729          """ 
1730          Construct a new colorizing exception. 
1731           
1732          @param descr: A short description of the error. 
1733          @type descr: C{string} 
1734          @param token: The token where the error occured 
1735          @type token: L{Token} 
1736          @param charnum: The character index of the position in 
1737              C{token} where the error occured. 
1738          @type charnum: C{int} 
1739          """ 
1740          ParseError.__init__(self, descr, token.startline, is_fatal) 
1741          self.token = token 
1742          self.charnum = charnum 
 1743   
1744      CONTEXT_RANGE = 20 
1746          RANGE = self.CONTEXT_RANGE 
1747          if self.charnum <= RANGE: 
1748              left = self.token.contents[0:self.charnum] 
1749          else: 
1750              left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 
1751          if (len(self.token.contents)-self.charnum) <= RANGE: 
1752              right = self.token.contents[self.charnum:] 
1753          else: 
1754              right = (self.token.contents[self.charnum:self.charnum+RANGE] 
1755                       + '...') 
1756          return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left))) 
  1757                   
1758   
1759   
1760   
1761   
1763      """ 
1764      Return a DOM document matching the epytext DTD, containing a 
1765      single literal block.  That literal block will include the 
1766      contents of the given string.  This method is typically used as a 
1767      fall-back when the parser fails. 
1768   
1769      @param str: The string which should be enclosed in a literal 
1770          block. 
1771      @type str: C{string} 
1772       
1773      @return: A DOM document containing C{str} in a single literal 
1774          block. 
1775      @rtype: C{Element} 
1776      """ 
1777      return Element('epytext', Element('literalblock', str)) 
 1778   
1780      """ 
1781      Return a DOM document matching the epytext DTD, containing a 
1782      single paragraph.  That paragraph will include the contents of the 
1783      given string.  This can be used to wrap some forms of 
1784      automatically generated information (such as type names) in 
1785      paragraphs. 
1786   
1787      @param str: The string which should be enclosed in a paragraph. 
1788      @type str: C{string} 
1789       
1790      @return: A DOM document containing C{str} in a single paragraph. 
1791      @rtype: C{Element} 
1792      """ 
1793      return Element('epytext', Element('para', str)) 
 1794   
1795   
1796   
1797   
1798   
1800      """ 
1801      Parse the given docstring, which is formatted using epytext; and 
1802      return a C{ParsedDocstring} representation of its contents. 
1803      @param docstring: The docstring to parse 
1804      @type docstring: C{string} 
1805      @param errors: A list where any errors generated during parsing 
1806          will be stored. 
1807      @type errors: C{list} of L{ParseError} 
1808      @param options: Extra options.  Unknown options are ignored. 
1809          Currently, no extra options are defined. 
1810      @rtype: L{ParsedDocstring} 
1811      """ 
1812      return ParsedEpytextDocstring(parse(docstring, errors), **options) 
 1813       
1814 -class ParsedEpytextDocstring(ParsedDocstring): 
 1815      SYMBOL_TO_HTML = { 
1816           
1817          '<-': '←', '->': '→', '^': '↑', 'v': '↓', 
1818       
1819           
1820          'alpha': 'α', 'beta': 'β', 'gamma': 'γ', 
1821          'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ',   
1822          'eta': 'η', 'theta': 'θ', 'iota': 'ι',  
1823          'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ',   
1824          'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο',   
1825          'pi': 'π', 'rho': 'ρ', 'sigma': 'σ',   
1826          'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ',   
1827          'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', 
1828          'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ', 
1829          'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ',   
1830          'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι',  
1831          'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ',   
1832          'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο',   
1833          'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ',   
1834          'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ',   
1835          'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω', 
1836       
1837           
1838          'larr': '←', 'rarr': '→', 'uarr': '↑', 
1839          'darr': '↓', 'harr': '↔', 'crarr': '↵', 
1840          'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑', 
1841          'dArr': '⇓', 'hArr': '⇔',  
1842          'copy': '©', 'times': '×', 'forall': '∀', 
1843          'exist': '∃', 'part': '∂', 
1844          'empty': '∅', 'isin': '∈', 'notin': '∉', 
1845          'ni': '∋', 'prod': '∏', 'sum': '∑', 
1846          'prop': '∝', 'infin': '∞', 'ang': '∠', 
1847          'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪', 
1848          'int': '∫', 'there4': '∴', 'sim': '∼', 
1849          'cong': '≅', 'asymp': '≈', 'ne': '≠', 
1850          'equiv': '≡', 'le': '≤', 'ge': '≥', 
1851          'sub': '⊂', 'sup': '⊃', 'nsub': '⊄', 
1852          'sube': '⊆', 'supe': '⊇', 'oplus': '⊕', 
1853          'otimes': '⊗', 'perp': '⊥', 
1854       
1855           
1856          'infinity': '∞', 'integral': '∫', 'product': '∏', 
1857          '<=': '≤', '>=': '≥', 
1858          } 
1859       
1860      SYMBOL_TO_LATEX = { 
1861           
1862          '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', 
1863          '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', 
1864       
1865           
1866   
1867          'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': 
1868          r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': 
1869          r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', 
1870          'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': 
1871          r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', 
1872          'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': 
1873          r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': 
1874          r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', 
1875          'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': 
1876          r'\(\omega\)', 
1877           
1878          'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': 
1879          r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': 
1880          r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', 
1881          'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': 
1882          r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', 
1883          'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': 
1884          r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': 
1885          r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', 
1886          'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': 
1887          r'\(\Omega\)', 
1888       
1889           
1890          'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': 
1891          r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': 
1892          r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', 
1893          'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': 
1894          r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': 
1895          r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', 
1896          'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': 
1897          r'\(\exists\)', 'part': r'\(\partial\)', 'empty': 
1898          r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', 
1899          'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', 
1900          'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': 
1901          r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': 
1902          r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': 
1903          r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', 
1904          'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': 
1905          r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': 
1906          r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', 
1907          'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': 
1908          r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', 
1909       
1910           
1911          'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': 
1912          r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', 
1913          } 
1914       
1915 -    def __init__(self, dom_tree, **options): 
 1916          self._tree = dom_tree 
1917           
1918          self._html = self._latex = self._plaintext = None 
1919          self._terms = None 
1920           
1921          if options.get('inline') and self._tree is not None: 
1922              for elt in self._tree.children: 
1923                  elt.attribs['inline'] = True 
 1924   
1925 -    def __str__(self): 
 1926          return str(self._tree) 
 1927           
1928 -    def to_html(self, docstring_linker, directory=None, docindex=None, 
1929                  context=None, **options): 
 1930          if self._html is not None: return self._html 
1931          if self._tree is None: return '' 
1932          indent = options.get('indent', 0) 
1933          self._html = self._to_html(self._tree, docstring_linker, directory,  
1934                                     docindex, context, indent) 
1935          return self._html 
 1936   
1937 -    def to_latex(self, docstring_linker, directory=None, docindex=None, 
1938                   context=None, **options): 
 1939          if self._latex is not None: return self._latex 
1940          if self._tree is None: return '' 
1941          indent = options.get('indent', 0) 
1942          self._hyperref = options.get('hyperref', 1) 
1943          self._latex = self._to_latex(self._tree, docstring_linker, directory, 
1944                                       docindex, context, indent) 
1945          return self._latex 
 1946   
1947 -    def to_plaintext(self, docstring_linker, **options): 
 1948           
1949           
1950          if self._tree is None: return '' 
1951          if 'indent' in options: 
1952              self._plaintext = to_plaintext(self._tree, 
1953                                             indent=options['indent']) 
1954          else: 
1955              self._plaintext = to_plaintext(self._tree) 
1956          return self._plaintext 
 1957   
1958 -    def _index_term_key(self, tree): 
 1959          str = to_plaintext(tree) 
1960          str = re.sub(r'\s\s+', '-', str) 
1961          return "index-"+re.sub("[^a-zA-Z0-9]", "_", str) 
 1962   
1963 -    def _to_html(self, tree, linker, directory, docindex, context, 
1964                   indent=0, seclevel=0): 
 1965          if isinstance(tree, basestring): 
1966              return plaintext_to_html(tree) 
1967   
1968          if tree.tag == 'epytext': indent -= 2 
1969          if tree.tag == 'section': seclevel += 1 
1970   
1971           
1972          variables = [self._to_html(c, linker, directory, docindex, context, 
1973                                     indent+2, seclevel) 
1974                      for c in tree.children] 
1975       
1976           
1977          childstr = ''.join(variables) 
1978       
1979           
1980          if tree.tag == 'para': 
1981              return wordwrap( 
1982                  (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr, 
1983                  indent) 
1984          elif tree.tag == 'code': 
1985              style = tree.attribs.get('style') 
1986              if style: 
1987                  return '<code class="%s">%s</code>' % (style, childstr) 
1988              else: 
1989                  return '<code>%s</code>' % childstr 
1990          elif tree.tag == 'uri': 
1991              return ('<a href="%s" target="_top">%s</a>' % 
1992                      (variables[1], variables[0])) 
1993          elif tree.tag == 'link': 
1994              return linker.translate_identifier_xref(variables[1], variables[0]) 
1995          elif tree.tag == 'italic': 
1996              return '<i>%s</i>' % childstr 
1997          elif tree.tag == 'math': 
1998              return '<i class="math">%s</i>' % childstr 
1999          elif tree.tag == 'indexed': 
2000              term = Element('epytext', *tree.children, **tree.attribs) 
2001              return linker.translate_indexterm(ParsedEpytextDocstring(term)) 
2002               
2003               
2004          elif tree.tag == 'bold': 
2005              return '<b>%s</b>' % childstr 
2006          elif tree.tag == 'ulist': 
2007              return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 
2008          elif tree.tag == 'olist': 
2009              start = tree.attribs.get('start') or '' 
2010              return ('%s<ol start="%s">\n%s%s</ol>\n' % 
2011                      (indent*' ', start, childstr, indent*' ')) 
2012          elif tree.tag == 'li': 
2013              return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 
2014          elif tree.tag == 'heading': 
2015              return ('%s<h%s class="heading">%s</h%s>\n' % 
2016                      ((indent-2)*' ', seclevel, childstr, seclevel)) 
2017          elif tree.tag == 'literalblock': 
2018              return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 
2019          elif tree.tag == 'doctestblock': 
2020              return doctest_to_html(tree.children[0].strip()) 
2021          elif tree.tag == 'fieldlist': 
2022              raise AssertionError("There should not be any field lists left") 
2023          elif tree.tag in ('epytext', 'section', 'tag', 'arg', 
2024                                'name', 'target', 'html'): 
2025              return childstr 
2026          elif tree.tag == 'symbol': 
2027              symbol = tree.children[0] 
2028              return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) 
2029          elif tree.tag == 'graph': 
2030              if directory is None: return '' 
2031               
2032              graph = self._build_graph(variables[0], variables[1:], linker, 
2033                                        docindex, context) 
2034              if not graph: return '' 
2035               
2036              return graph.to_html(directory) 
2037          else: 
2038              raise ValueError('Unknown epytext DOM element %r' % tree.tag) 
 2039   
2040       
2041 -    def _build_graph(self, graph_type, graph_args, linker,  
2042                       docindex, context): 
 2043           
2044          if graph_type == 'classtree': 
2045              from epydoc.apidoc import ClassDoc 
2046              if graph_args: 
2047                  bases = [docindex.find(name, context) 
2048                           for name in graph_args] 
2049              elif isinstance(context, ClassDoc): 
2050                  bases = [context] 
2051              else: 
2052                  log.warning("Could not construct class tree: you must " 
2053                              "specify one or more base classes.") 
2054                  return None 
2055              from epydoc.docwriter.dotgraph import class_tree_graph 
2056              return class_tree_graph(bases, linker, context) 
2057          elif graph_type == 'packagetree': 
2058              from epydoc.apidoc import ModuleDoc 
2059              if graph_args: 
2060                  packages = [docindex.find(name, context) 
2061                              for name in graph_args] 
2062              elif isinstance(context, ModuleDoc): 
2063                  packages = [context] 
2064              else: 
2065                  log.warning("Could not construct package tree: you must " 
2066                              "specify one or more root packages.") 
2067                  return None 
2068              from epydoc.docwriter.dotgraph import package_tree_graph 
2069              return package_tree_graph(packages, linker, context) 
2070          elif graph_type == 'importgraph': 
2071              from epydoc.apidoc import ModuleDoc 
2072              modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 
2073              from epydoc.docwriter.dotgraph import import_graph 
2074              return import_graph(modules, docindex, linker, context) 
2075   
2076          elif graph_type == 'callgraph': 
2077              if graph_args: 
2078                  docs = [docindex.find(name, context) for name in graph_args] 
2079                  docs = [doc for doc in docs if doc is not None] 
2080              else: 
2081                  docs = [context] 
2082              from epydoc.docwriter.dotgraph import call_graph 
2083              return call_graph(docs, docindex, linker, context) 
2084          else: 
2085              log.warning("Unknown graph type %s" % graph_type) 
 2086               
2087 -    def _to_latex(self, tree, linker, directory, docindex, context, 
2088                    indent=0, seclevel=0, breakany=0): 
 2089          if isinstance(tree, basestring): 
2090              return plaintext_to_latex(tree, breakany=breakany) 
2091   
2092          if tree.tag == 'section': seclevel += 1 
2093       
2094           
2095          if tree.tag == 'epytext': cindent = indent 
2096          else: cindent = indent + 2 
2097          variables = [self._to_latex(c, linker, directory, docindex, 
2098                                      context, cindent, seclevel, breakany) 
2099                      for c in tree.children] 
2100          childstr = ''.join(variables) 
2101       
2102          if tree.tag == 'para': 
2103              return wordwrap(childstr, indent)+'\n' 
2104          elif tree.tag == 'code': 
2105              return '\\texttt{%s}' % childstr 
2106          elif tree.tag == 'uri': 
2107              if len(variables) != 2: raise ValueError('Bad URI ') 
2108              if self._hyperref: 
2109                   
2110                  uri = tree.children[1].children[0] 
2111                  uri = uri.replace('{\\textasciitilde}', '~') 
2112                  uri = uri.replace('\\#', '#') 
2113                  if variables[0] == variables[1]: 
2114                      return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 
2115                  else: 
2116                      return ('%s\\footnote{\\href{%s}{%s}}' % 
2117                              (variables[0], uri, variables[1])) 
2118              else: 
2119                  if variables[0] == variables[1]: 
2120                      return '\\textit{%s}' % variables[1] 
2121                  else: 
2122                      return '%s\\footnote{%s}' % (variables[0], variables[1]) 
2123          elif tree.tag == 'link': 
2124              if len(variables) != 2: raise ValueError('Bad Link') 
2125              return linker.translate_identifier_xref(variables[1], variables[0]) 
2126          elif tree.tag == 'italic': 
2127              return '\\textit{%s}' % childstr 
2128          elif tree.tag == 'math': 
2129              return '\\textit{%s}' % childstr 
2130          elif tree.tag == 'indexed': 
2131              term = Element('epytext', *tree.children, **tree.attribs) 
2132              return linker.translate_indexterm(ParsedEpytextDocstring(term)) 
2133          elif tree.tag == 'bold': 
2134              return '\\textbf{%s}' % childstr 
2135          elif tree.tag == 'li': 
2136              return indent*' ' + '\\item ' + childstr.lstrip() 
2137          elif tree.tag == 'heading': 
2138              sec = ('\\EpydocUser' + 
2139                     ('%ssection' % ('sub'*(min(seclevel,3)-1))).capitalize()) 
2140              return (' '*(indent-2) + '%s{%s}\n\n' % (sec, childstr.strip())) 
2141          elif tree.tag == 'doctestblock': 
2142              return doctest_to_latex(tree.children[0].strip()) 
2143          elif tree.tag == 'literalblock': 
2144              return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 
2145          elif tree.tag == 'fieldlist': 
2146              return indent*' '+'{omitted fieldlist}\n' 
2147          elif tree.tag == 'olist': 
2148              return (' '*indent + '\\begin{enumerate}\n\n' +  
2149                      ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 
2150                      childstr + 
2151                      ' '*indent + '\\end{enumerate}\n\n') 
2152          elif tree.tag == 'ulist': 
2153              return (' '*indent + '\\begin{itemize}\n' + 
2154                      ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 
2155                      childstr + 
2156                      ' '*indent + '\\end{itemize}\n\n') 
2157          elif tree.tag == 'symbol': 
2158              symbol = tree.children[0] 
2159              return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) 
2160          elif tree.tag == 'graph': 
2161              if directory is None: return '' 
2162               
2163              graph = self._build_graph(variables[0], variables[1:], linker, 
2164                                        docindex, context) 
2165              if not graph: return '' 
2166               
2167              return graph.to_latex(directory) 
2168          else: 
2169               
2170              return childstr 
 2171   
2172      _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') 
2173   
2174 -    def summary(self): 
 2175          if self._tree is None: return self, False 
2176          tree = self._tree 
2177          doc = Element('epytext') 
2178       
2179           
2180          variables = tree.children 
2181          while (len(variables) > 0) and (variables[0].tag != 'para'): 
2182              if variables[0].tag in ('section', 'ulist', 'olist', 'li'): 
2183                  variables = variables[0].children 
2184              else: 
2185                  variables = variables[1:] 
2186       
2187           
2188           
2189          if (len(variables) == 0 and len(tree.children) == 1 and 
2190              tree.children[0].tag == 'literalblock'): 
2191              str = re.split(r'\n\s*(\n|$).*', 
2192                             tree.children[0].children[0], 1)[0] 
2193              variables = [Element('para')] 
2194              variables[0].children.append(str) 
2195       
2196           
2197          if len(variables) == 0: return ParsedEpytextDocstring(doc), False 
2198       
2199           
2200          long_docs = False 
2201          for var in variables[1:]: 
2202              if isinstance(var, Element) and var.tag == 'fieldlist': 
2203                  continue 
2204              long_docs = True 
2205              break 
2206           
2207           
2208          parachildren = variables[0].children 
2209          para = Element('para', inline=True) 
2210          doc.children.append(para) 
2211          for parachild in parachildren: 
2212              if isinstance(parachild, basestring): 
2213                  m = self._SUMMARY_RE.match(parachild) 
2214                  if m: 
2215                      para.children.append(m.group(1)) 
2216                      long_docs |= parachild is not parachildren[-1] 
2217                      if not long_docs: 
2218                          other = parachild[m.end():] 
2219                          if other and not other.isspace(): 
2220                              long_docs = True 
2221                      return ParsedEpytextDocstring(doc), long_docs 
2222              para.children.append(parachild) 
2223   
2224          return ParsedEpytextDocstring(doc), long_docs 
 2225   
2226 -    def split_fields(self, errors=None): 
 2227          if self._tree is None: return (self, ()) 
2228          tree = Element(self._tree.tag, *self._tree.children, 
2229                         **self._tree.attribs) 
2230          fields = [] 
2231   
2232          if (tree.children and 
2233              tree.children[-1].tag == 'fieldlist' and 
2234              tree.children[-1].children): 
2235              field_nodes = tree.children[-1].children 
2236              del tree.children[-1] 
2237   
2238              for field in field_nodes: 
2239                   
2240                  tag = field.children[0].children[0].lower() 
2241                  del field.children[0] 
2242   
2243                   
2244                  if field.children and field.children[0].tag == 'arg': 
2245                      arg = field.children[0].children[0] 
2246                      del field.children[0] 
2247                  else: 
2248                      arg = None 
2249   
2250                   
2251                  field.tag = 'epytext' 
2252                  fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 
2253   
2254           
2255          if tree.children and tree.children[0].children: 
2256              return ParsedEpytextDocstring(tree), fields 
2257          else: 
2258              return None, fields 
 2259   
2260       
2261 -    def index_terms(self): 
 2262          if self._terms is None: 
2263              self._terms = [] 
2264              self._index_terms(self._tree, self._terms) 
2265          return self._terms 
 2266   
2267 -    def _index_terms(self, tree, terms): 
 2268          if tree is None or isinstance(tree, basestring): 
2269              return 
2270           
2271          if tree.tag == 'indexed': 
2272              term = Element('epytext', *tree.children, **tree.attribs) 
2273              terms.append(ParsedEpytextDocstring(term)) 
2274   
2275           
2276          for child in tree.children: 
2277              self._index_terms(child, terms) 
  2278