1
2
3
4
5
6
7
8
9 """
10 Parser for epytext strings. Epytext is a lightweight markup whose
11 primary intended application is Python documentation strings. This
12 parser converts Epytext strings to a simple DOM-like representation
13 (encoded as a tree of L{Element} objects and strings). Epytext
14 strings can contain the following X{structural blocks}:
15
16 - X{epytext}: The top-level element of the DOM tree.
17 - X{para}: A paragraph of text. Paragraphs contain no newlines,
18 and all spaces are soft.
19 - X{section}: A section or subsection.
20 - X{field}: A tagged field. These fields provide information
21 about specific aspects of a Python object, such as the
22 description of a function's parameter, or the author of a
23 module.
24 - X{literalblock}: A block of literal text. This text should be
25 displayed as it would be displayed in plaintext. The
26 parser removes the appropriate amount of leading whitespace
27 from each line in the literal block.
28 - X{doctestblock}: A block containing sample python code,
29 formatted according to the specifications of the C{doctest}
30 module.
31 - X{ulist}: An unordered list.
32 - X{olist}: An ordered list.
33 - X{li}: A list item. This tag is used both for unordered list
34 items and for ordered list items.
35
36 Additionally, the following X{inline regions} may be used within
37 C{para} blocks:
38
39 - X{code}: Source code and identifiers.
40 - X{math}: Mathematical expressions.
41 - X{index}: A term which should be included in an index, if one
42 is generated.
43 - X{italic}: Italicized text.
44 - X{bold}: Bold-faced text.
45 - X{uri}: A Universal Resource Indicator (URI) or Universal
46 Resource Locator (URL)
47 - X{link}: A Python identifier which should be hyperlinked to
48 the named object's documentation, when possible.
49
50 The returned DOM tree will conform to the the following Document Type
51 Description::
52
53 <!ENTITY % colorized '(code | math | index | italic |
54 bold | uri | link | symbol)*'>
55
56 <!ELEMENT epytext ((para | literalblock | doctestblock |
57 section | ulist | olist)*, fieldlist?)>
58
59 <!ELEMENT para (#PCDATA | %colorized;)*>
60
61 <!ELEMENT section (para | listblock | doctestblock |
62 section | ulist | olist)+>
63
64 <!ELEMENT fieldlist (field+)>
65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
66 ulist | olist)+)>
67 <!ELEMENT tag (#PCDATA)>
68 <!ELEMENT arg (#PCDATA)>
69
70 <!ELEMENT literalblock (#PCDATA | %colorized;)*>
71 <!ELEMENT doctestblock (#PCDATA)>
72
73 <!ELEMENT ulist (li+)>
74 <!ELEMENT olist (li+)>
75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
76 <!ATTLIST li bullet NMTOKEN #IMPLIED>
77 <!ATTLIST olist start NMTOKEN #IMPLIED>
78
79 <!ELEMENT uri (name, target)>
80 <!ELEMENT link (name, target)>
81 <!ELEMENT name (#PCDATA | %colorized;)*>
82 <!ELEMENT target (#PCDATA)>
83
84 <!ELEMENT code (#PCDATA | %colorized;)*>
85 <!ELEMENT math (#PCDATA | %colorized;)*>
86 <!ELEMENT italic (#PCDATA | %colorized;)*>
87 <!ELEMENT bold (#PCDATA | %colorized;)*>
88 <!ELEMENT indexed (#PCDATA | %colorized;)>
89 <!ATTLIST code style CDATA #IMPLIED>
90
91 <!ELEMENT symbol (#PCDATA)>
92
93 @var SYMBOLS: A list of the of escape symbols that are supported
94 by epydoc. Currently the following symbols are supported:
95 <<<SYMBOLS>>>
96 """
97
98
99
100 __docformat__ = 'epytext en'
101
102
103
104
105
106
107
108
109 import re, string, types, sys, os.path
110 from epydoc.markup import *
111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
113
114
115
116
117
119 """
120 A very simple DOM-like representation for parsed epytext
121 documents. Each epytext document is encoded as a tree whose nodes
122 are L{Element} objects, and whose leaves are C{string}s. Each
123 node is marked by a I{tag} and zero or more I{attributes}. Each
124 attribute is a mapping from a string key to a string value.
125 """
126 - def __init__(self, tag, *children, **attribs):
127 self.tag = tag
128 """A string tag indicating the type of this element.
129 @type: C{string}"""
130
131 self.children = list(children)
132 """A list of the children of this element.
133 @type: C{list} of (C{string} or C{Element})"""
134
135 self.attribs = attribs
136 """A dictionary mapping attribute names to attribute values
137 for this element.
138 @type: C{dict} from C{string} to C{string}"""
139
141 """
142 Return a string representation of this element, using XML
143 notation.
144 @bug: Doesn't escape '<' or '&' or '>'.
145 """
146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
147 return ('<%s%s>' % (self.tag, attribs) +
148 ''.join([str(child) for child in self.children]) +
149 '</%s>' % self.tag)
150
152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
153 args = ''.join([', %r' % c for c in self.children])
154 return 'Element(%s%s%s)' % (self.tag, args, attribs)
155
156
157
158
159
160
161
162 _HEADING_CHARS = "=-~"
163
164
165 _ESCAPES = {'lb':'{', 'rb': '}'}
166
167
168 SYMBOLS = [
169
170 '<-', '->', '^', 'v',
171
172
173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
181
182
183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
185 'copy', 'times', 'forall', 'exist', 'part',
186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
190 'sube', 'supe', 'oplus', 'otimes', 'perp',
191
192
193 'infinity', 'integral', 'product',
194 '>=', '<=',
195 ]
196
197 _SYMBOLS = {}
198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
199
200
201 symblist = ' '
202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
203 for symbol in SYMBOLS])
204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
205 del symbol, symblist
206
207
208 _COLORIZING_TAGS = {
209 'C': 'code',
210 'M': 'math',
211 'X': 'indexed',
212 'I': 'italic',
213 'B': 'bold',
214 'U': 'uri',
215 'L': 'link',
216 'E': 'escape',
217 'S': 'symbol',
218 'G': 'graph',
219 }
220
221
222 _LINK_COLORIZING_TAGS = ['link', 'uri']
223
224
225
226
227
228 -def parse(str, errors = None):
229 """
230 Return a DOM tree encoding the contents of an epytext string. Any
231 errors generated during parsing will be stored in C{errors}.
232
233 @param str: The epytext string to parse.
234 @type str: C{string}
235 @param errors: A list where any errors generated during parsing
236 will be stored. If no list is specified, then fatal errors
237 will generate exceptions, and non-fatal errors will be
238 ignored.
239 @type errors: C{list} of L{ParseError}
240 @return: a DOM tree encoding the contents of an epytext string.
241 @rtype: C{Element}
242 @raise ParseError: If C{errors} is C{None} and an error is
243 encountered while parsing.
244 """
245
246 if errors == None:
247 errors = []
248 raise_on_error = 1
249 else:
250 raise_on_error = 0
251
252
253 str = re.sub('\015\012', '\012', str)
254 str = string.expandtabs(str)
255
256
257 tokens = _tokenize(str, errors)
258
259
260 encountered_field = 0
261
262
263 doc = Element('epytext')
264
265
266
267
268
269
270
271
272
273
274
275 stack = [None, doc]
276 indent_stack = [-1, None]
277
278 for token in tokens:
279
280
281
282
283
284
285
286 _pop_completed_blocks(token, stack, indent_stack)
287
288
289 if token.tag == Token.PARA:
290 _add_para(doc, token, stack, indent_stack, errors)
291
292
293 elif token.tag == Token.HEADING:
294 _add_section(doc, token, stack, indent_stack, errors)
295
296
297 elif token.tag == Token.LBLOCK:
298 stack[-1].children.append(token.to_dom(doc))
299
300
301 elif token.tag == Token.DTBLOCK:
302 stack[-1].children.append(token.to_dom(doc))
303
304
305 elif token.tag == Token.BULLET:
306 _add_list(doc, token, stack, indent_stack, errors)
307 else:
308 assert 0, 'Unknown token type: '+token.tag
309
310
311 if stack[-1].tag == 'field':
312 encountered_field = 1
313 elif encountered_field == 1:
314 if len(stack) <= 3:
315 estr = ("Fields must be the final elements in an "+
316 "epytext string.")
317 errors.append(StructuringError(estr, token.startline))
318
319
320
321
322
323
324 for child in doc.children:
325 _raise_graphs(child, doc)
326
327
328 if len([e for e in errors if e.is_fatal()]) > 0:
329 if raise_on_error:
330 raise errors[0]
331 else:
332 return None
333
334
335 return doc
336
338
339 have_graph_child = False
340 for elt in tree.children:
341 if isinstance(elt, Element):
342 _raise_graphs(elt, tree)
343 if elt.tag == 'graph': have_graph_child = True
344
345 block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li')
346 if have_graph_child and tree.tag not in block:
347 child_index = 0
348 parent_index = parent.children.index(tree)
349 for elt in tree.children:
350 if isinstance(elt, Element) and elt.tag == 'graph':
351
352 left = tree.children[:child_index]
353 right = tree.children[child_index+1:]
354 parent.children[parent_index:parent_index+1] = [
355 Element(tree.tag, *left, **tree.attribs),
356 elt,
357 Element(tree.tag, *right, **tree.attribs)]
358 child_index = 0
359 parent_index += 2
360 tree = parent.children[parent_index]
361 else:
362 child_index += 1
363
365 """
366 Pop any completed blocks off the stack. This includes any
367 blocks that we have dedented past, as well as any list item
368 blocks that we've dedented to. The top element on the stack
369 should only be a list if we're about to start a new list
370 item (i.e., if the next token is a bullet).
371 """
372 indent = token.indent
373 if indent != None:
374 while (len(stack) > 2):
375 pop = 0
376
377
378 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
379 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
380
381
382
383 elif (token.tag == 'bullet' and indent==indent_stack[-2] and
384 stack[-1].tag in ('li', 'field')): pop=1
385
386
387 elif (stack[-1].tag in ('ulist', 'olist') and
388 (token.tag != 'bullet' or token.contents[-1] == ':')):
389 pop=1
390
391
392 if pop == 0: return
393 stack.pop()
394 indent_stack.pop()
395
396 -def _add_para(doc, para_token, stack, indent_stack, errors):
397 """Colorize the given paragraph, and add it to the DOM tree."""
398
399
400 if indent_stack[-1] == None:
401 indent_stack[-1] = para_token.indent
402 if para_token.indent == indent_stack[-1]:
403
404 para = _colorize(doc, para_token, errors)
405 if para_token.inline:
406 para.attribs['inline'] = True
407 stack[-1].children.append(para)
408 else:
409 estr = "Improper paragraph indentation."
410 errors.append(StructuringError(estr, para_token.startline))
411
412 -def _add_section(doc, heading_token, stack, indent_stack, errors):
413 """Add a new section to the DOM tree, with the given heading."""
414 if indent_stack[-1] == None:
415 indent_stack[-1] = heading_token.indent
416 elif indent_stack[-1] != heading_token.indent:
417 estr = "Improper heading indentation."
418 errors.append(StructuringError(estr, heading_token.startline))
419
420
421 for tok in stack[2:]:
422 if tok.tag != "section":
423 estr = "Headings must occur at the top level."
424 errors.append(StructuringError(estr, heading_token.startline))
425 break
426 if (heading_token.level+2) > len(stack):
427 estr = "Wrong underline character for heading."
428 errors.append(StructuringError(estr, heading_token.startline))
429
430
431
432 stack[heading_token.level+2:] = []
433 indent_stack[heading_token.level+2:] = []
434
435
436 head = _colorize(doc, heading_token, errors, 'heading')
437
438
439 sec = Element("section")
440 stack[-1].children.append(sec)
441 stack.append(sec)
442 sec.children.append(head)
443 indent_stack.append(None)
444
445 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
446 """
447 Add a new list item or field to the DOM tree, with the given
448 bullet or field tag. When necessary, create the associated
449 list.
450 """
451
452 if bullet_token.contents[-1] == '-':
453 list_type = 'ulist'
454 elif bullet_token.contents[-1] == '.':
455 list_type = 'olist'
456 elif bullet_token.contents[-1] == ':':
457 list_type = 'fieldlist'
458 else:
459 raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
460
461
462 newlist = 0
463 if stack[-1].tag != list_type:
464 newlist = 1
465 elif list_type == 'olist' and stack[-1].tag == 'olist':
466 old_listitem = stack[-1].children[-1]
467 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
468 new_bullet = bullet_token.contents.split('.')[:-1]
469 if (new_bullet[:-1] != old_bullet[:-1] or
470 int(new_bullet[-1]) != int(old_bullet[-1])+1):
471 newlist = 1
472
473
474 if newlist:
475 if stack[-1].tag is 'fieldlist':
476
477
478
479
480
481
482 estr = "Lists must be indented."
483 errors.append(StructuringError(estr, bullet_token.startline))
484 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
485 stack.pop()
486 indent_stack.pop()
487
488 if (list_type != 'fieldlist' and indent_stack[-1] is not None and
489 bullet_token.indent == indent_stack[-1]):
490
491
492
493 if bullet_token.startline != 1 or bullet_token.indent != 0:
494 estr = "Lists must be indented."
495 errors.append(StructuringError(estr, bullet_token.startline))
496
497 if list_type == 'fieldlist':
498
499 for tok in stack[2:]:
500 if tok.tag != "section":
501 estr = "Fields must be at the top level."
502 errors.append(
503 StructuringError(estr, bullet_token.startline))
504 break
505 stack[2:] = []
506 indent_stack[2:] = []
507
508
509 lst = Element(list_type)
510 stack[-1].children.append(lst)
511 stack.append(lst)
512 indent_stack.append(bullet_token.indent)
513 if list_type == 'olist':
514 start = bullet_token.contents.split('.')[:-1]
515 if start != '1':
516 lst.attribs["start"] = start[-1]
517
518
519
520
521
522 if list_type == 'fieldlist':
523 li = Element("field")
524 token_words = bullet_token.contents[1:-1].split(None, 1)
525 tag_elt = Element("tag")
526 tag_elt.children.append(token_words[0])
527 li.children.append(tag_elt)
528
529 if len(token_words) > 1:
530 arg_elt = Element("arg")
531 arg_elt.children.append(token_words[1])
532 li.children.append(arg_elt)
533 else:
534 li = Element("li")
535 if list_type == 'olist':
536 li.attribs["bullet"] = bullet_token.contents
537
538
539 stack[-1].children.append(li)
540 stack.append(li)
541 indent_stack.append(None)
542
543
544
545
546
548 """
549 C{Token}s are an intermediate data structure used while
550 constructing the structuring DOM tree for a formatted docstring.
551 There are five types of C{Token}:
552
553 - Paragraphs
554 - Literal blocks
555 - Doctest blocks
556 - Headings
557 - Bullets
558
559 The text contained in each C{Token} is stored in the
560 C{contents} variable. The string in this variable has been
561 normalized. For paragraphs, this means that it has been converted
562 into a single line of text, with newline/indentation replaced by
563 single spaces. For literal blocks and doctest blocks, this means
564 that the appropriate amount of leading whitespace has been removed
565 from each line.
566
567 Each C{Token} has an indentation level associated with it,
568 stored in the C{indent} variable. This indentation level is used
569 by the structuring procedure to assemble hierarchical blocks.
570
571 @type tag: C{string}
572 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA}
573 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
574 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
575
576 @type startline: C{int}
577 @ivar startline: The line on which this C{Token} begins. This
578 line number is only used for issuing errors.
579
580 @type contents: C{string}
581 @ivar contents: The normalized text contained in this C{Token}.
582
583 @type indent: C{int} or C{None}
584 @ivar indent: The indentation level of this C{Token} (in
585 number of leading spaces). A value of C{None} indicates an
586 unknown indentation; this is used for list items and fields
587 that begin with one-line paragraphs.
588
589 @type level: C{int} or C{None}
590 @ivar level: The heading-level of this C{Token} if it is a
591 heading; C{None}, otherwise. Valid heading levels are 0, 1,
592 and 2.
593
594 @type inline: C{bool}
595 @ivar inline: If True, the element is an inline level element, comparable
596 to an HTML C{<span>} tag. Else, it is a block level element, comparable
597 to an HTML C{<div>}.
598
599 @type PARA: C{string}
600 @cvar PARA: The C{tag} value for paragraph C{Token}s.
601 @type LBLOCK: C{string}
602 @cvar LBLOCK: The C{tag} value for literal C{Token}s.
603 @type DTBLOCK: C{string}
604 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
605 @type HEADING: C{string}
606 @cvar HEADING: The C{tag} value for heading C{Token}s.
607 @type BULLET: C{string}
608 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag}
609 value is also used for field tag C{Token}s, since fields
610 function syntactically the same as list items.
611 """
612
613 PARA = "para"
614 LBLOCK = "literalblock"
615 DTBLOCK = "doctestblock"
616 HEADING = "heading"
617 BULLET = "bullet"
618
619 - def __init__(self, tag, startline, contents, indent, level=None,
620 inline=False):
621 """
622 Create a new C{Token}.
623
624 @param tag: The type of the new C{Token}.
625 @type tag: C{string}
626 @param startline: The line on which the new C{Token} begins.
627 @type startline: C{int}
628 @param contents: The normalized contents of the new C{Token}.
629 @type contents: C{string}
630 @param indent: The indentation of the new C{Token} (in number
631 of leading spaces). A value of C{None} indicates an
632 unknown indentation.
633 @type indent: C{int} or C{None}
634 @param level: The heading-level of this C{Token} if it is a
635 heading; C{None}, otherwise.
636 @type level: C{int} or C{None}
637 @param inline: Is this C{Token} inline as a C{<span>}?.
638 @type inline: C{bool}
639 """
640 self.tag = tag
641 self.startline = startline
642 self.contents = contents
643 self.indent = indent
644 self.level = level
645 self.inline = inline
646
648 """
649 @rtype: C{string}
650 @return: the formal representation of this C{Token}.
651 C{Token}s have formal representaitons of the form::
652 <Token: para at line 12>
653 """
654 return '<Token: %s at line %s>' % (self.tag, self.startline)
655
657 """
658 @return: a DOM representation of this C{Token}.
659 @rtype: L{Element}
660 """
661 e = Element(self.tag)
662 e.children.append(self.contents)
663 return e
664
665
666
667
668 _ULIST_BULLET = '[-]( +|$)'
669 _OLIST_BULLET = '(\d+[.])+( +|$)'
670 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:'
671 _BULLET_RE = re.compile(_ULIST_BULLET + '|' +
672 _OLIST_BULLET + '|' +
673 _FIELD_BULLET)
674 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET)
675 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
676 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
677
679 """
680 Construct a L{Token} containing the doctest block starting at
681 C{lines[start]}, and append it to C{tokens}. C{block_indent}
682 should be the indentation of the doctest block. Any errors
683 generated while tokenizing the doctest block will be appended to
684 C{errors}.
685
686 @param lines: The list of lines to be tokenized
687 @param start: The index into C{lines} of the first line of the
688 doctest block to be tokenized.
689 @param block_indent: The indentation of C{lines[start]}. This is
690 the indentation of the doctest block.
691 @param errors: A list where any errors generated during parsing
692 will be stored. If no list is specified, then errors will
693 generate exceptions.
694 @return: The line number of the first line following the doctest
695 block.
696
697 @type lines: C{list} of C{string}
698 @type start: C{int}
699 @type block_indent: C{int}
700 @type tokens: C{list} of L{Token}
701 @type errors: C{list} of L{ParseError}
702 @rtype: C{int}
703 """
704
705
706
707 min_indent = block_indent
708
709 linenum = start + 1
710 while linenum < len(lines):
711
712 line = lines[linenum]
713 indent = len(line) - len(line.lstrip())
714
715
716 if indent == len(line): break
717
718
719 if indent < block_indent:
720 min_indent = min(min_indent, indent)
721 estr = 'Improper doctest block indentation.'
722 errors.append(TokenizationError(estr, linenum))
723
724
725 linenum += 1
726
727
728 contents = [line[min_indent:] for line in lines[start:linenum]]
729 contents = '\n'.join(contents)
730 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
731 return linenum
732
734 """
735 Construct a L{Token} containing the literal block starting at
736 C{lines[start]}, and append it to C{tokens}. C{block_indent}
737 should be the indentation of the literal block. Any errors
738 generated while tokenizing the literal block will be appended to
739 C{errors}.
740
741 @param lines: The list of lines to be tokenized
742 @param start: The index into C{lines} of the first line of the
743 literal block to be tokenized.
744 @param block_indent: The indentation of C{lines[start]}. This is
745 the indentation of the literal block.
746 @param errors: A list of the errors generated by parsing. Any
747 new errors generated while will tokenizing this paragraph
748 will be appended to this list.
749 @return: The line number of the first line following the literal
750 block.
751
752 @type lines: C{list} of C{string}
753 @type start: C{int}
754 @type block_indent: C{int}
755 @type tokens: C{list} of L{Token}
756 @type errors: C{list} of L{ParseError}
757 @rtype: C{int}
758 """
759 linenum = start + 1
760 while linenum < len(lines):
761
762 line = lines[linenum]
763 indent = len(line) - len(line.lstrip())
764
765
766
767 if len(line) != indent and indent <= block_indent:
768 break
769
770
771 linenum += 1
772
773
774 contents = [line[block_indent+1:] for line in lines[start:linenum]]
775 contents = '\n'.join(contents)
776 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents)
777 tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
778 return linenum
779
781 """
782 Construct L{Token}s for the bullet and the first paragraph of the
783 list item (or field) starting at C{lines[start]}, and append them
784 to C{tokens}. C{bullet_indent} should be the indentation of the
785 list item. Any errors generated while tokenizing will be
786 appended to C{errors}.
787
788 @param lines: The list of lines to be tokenized
789 @param start: The index into C{lines} of the first line of the
790 list item to be tokenized.
791 @param bullet_indent: The indentation of C{lines[start]}. This is
792 the indentation of the list item.
793 @param errors: A list of the errors generated by parsing. Any
794 new errors generated while will tokenizing this paragraph
795 will be appended to this list.
796 @return: The line number of the first line following the list
797 item's first paragraph.
798
799 @type lines: C{list} of C{string}
800 @type start: C{int}
801 @type bullet_indent: C{int}
802 @type tokens: C{list} of L{Token}
803 @type errors: C{list} of L{ParseError}
804 @rtype: C{int}
805 """
806 linenum = start + 1
807 para_indent = None
808 doublecolon = lines[start].rstrip()[-2:] == '::'
809
810
811 para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
812 bcontents = lines[start][bullet_indent:para_start].strip()
813
814 while linenum < len(lines):
815
816 line = lines[linenum]
817 indent = len(line) - len(line.lstrip())
818
819
820 if doublecolon: break
821 if line.rstrip()[-2:] == '::': doublecolon = 1
822
823
824 if indent == len(line): break
825
826
827 if indent < bullet_indent: break
828
829
830 if _BULLET_RE.match(line, indent): break
831
832
833
834 if para_indent == None: para_indent = indent
835
836
837 if indent != para_indent: break
838
839
840 linenum += 1
841
842
843 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent,
844 inline=True))
845
846
847 pcontents = ([lines[start][para_start:].strip()] +
848 [line.strip() for line in lines[start+1:linenum]])
849 pcontents = ' '.join(pcontents).strip()
850 if pcontents:
851 tokens.append(Token(Token.PARA, start, pcontents, para_indent,
852 inline=True))
853
854
855 return linenum
856
858 """
859 Construct a L{Token} containing the paragraph starting at
860 C{lines[start]}, and append it to C{tokens}. C{para_indent}
861 should be the indentation of the paragraph . Any errors
862 generated while tokenizing the paragraph will be appended to
863 C{errors}.
864
865 @param lines: The list of lines to be tokenized
866 @param start: The index into C{lines} of the first line of the
867 paragraph to be tokenized.
868 @param para_indent: The indentation of C{lines[start]}. This is
869 the indentation of the paragraph.
870 @param errors: A list of the errors generated by parsing. Any
871 new errors generated while will tokenizing this paragraph
872 will be appended to this list.
873 @return: The line number of the first line following the
874 paragraph.
875
876 @type lines: C{list} of C{string}
877 @type start: C{int}
878 @type para_indent: C{int}
879 @type tokens: C{list} of L{Token}
880 @type errors: C{list} of L{ParseError}
881 @rtype: C{int}
882 """
883 linenum = start + 1
884 doublecolon = 0
885 while linenum < len(lines):
886
887 line = lines[linenum]
888 indent = len(line) - len(line.lstrip())
889
890
891 if doublecolon: break
892 if line.rstrip()[-2:] == '::': doublecolon = 1
893
894
895 if indent == len(line): break
896
897
898 if indent != para_indent: break
899
900
901 if _BULLET_RE.match(line, indent): break
902
903
904 if line[indent] == '@':
905 estr = "Possible mal-formatted field item."
906 errors.append(TokenizationError(estr, linenum, is_fatal=0))
907
908
909 linenum += 1
910
911 contents = [line.strip() for line in lines[start:linenum]]
912
913
914 if ((len(contents) < 2) or
915 (contents[1][0] not in _HEADING_CHARS) or
916 (abs(len(contents[0])-len(contents[1])) > 5)):
917 looks_like_heading = 0
918 else:
919 looks_like_heading = 1
920 for char in contents[1]:
921 if char != contents[1][0]:
922 looks_like_heading = 0
923 break
924
925 if looks_like_heading:
926 if len(contents[0]) != len(contents[1]):
927 estr = ("Possible heading typo: the number of "+
928 "underline characters must match the "+
929 "number of heading characters.")
930 errors.append(TokenizationError(estr, start, is_fatal=0))
931 else:
932 level = _HEADING_CHARS.index(contents[1][0])
933 tokens.append(Token(Token.HEADING, start,
934 contents[0], para_indent, level))
935 return start+2
936
937
938 contents = ' '.join(contents)
939 tokens.append(Token(Token.PARA, start, contents, para_indent))
940 return linenum
941
943 """
944 Split a given formatted docstring into an ordered list of
945 C{Token}s, according to the epytext markup rules.
946
947 @param str: The epytext string
948 @type str: C{string}
949 @param errors: A list where any errors generated during parsing
950 will be stored. If no list is specified, then errors will
951 generate exceptions.
952 @type errors: C{list} of L{ParseError}
953 @return: a list of the C{Token}s that make up the given string.
954 @rtype: C{list} of L{Token}
955 """
956 tokens = []
957 lines = str.split('\n')
958
959
960
961 linenum = 0
962 while linenum < len(lines):
963
964 line = lines[linenum]
965 indent = len(line)-len(line.lstrip())
966
967 if indent == len(line):
968
969 linenum += 1
970 continue
971 elif line[indent:indent+4] == '>>> ':
972
973 linenum = _tokenize_doctest(lines, linenum, indent,
974 tokens, errors)
975 elif _BULLET_RE.match(line, indent):
976
977 linenum = _tokenize_listart(lines, linenum, indent,
978 tokens, errors)
979 if tokens[-1].indent != None:
980 indent = tokens[-1].indent
981 else:
982
983 if line[indent] == '@':
984 estr = "Possible mal-formatted field item."
985 errors.append(TokenizationError(estr, linenum, is_fatal=0))
986
987
988 linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
989
990
991 if (tokens[-1].tag == Token.PARA and
992 tokens[-1].contents[-2:] == '::'):
993 tokens[-1].contents = tokens[-1].contents[:-1]
994 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
995
996 return tokens
997
998
999
1000
1001
1002
1003
1004 _BRACE_RE = re.compile('{|}')
1005 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
1006
1007 -def _colorize(doc, token, errors, tagName='para'):
1008 """
1009 Given a string containing the contents of a paragraph, produce a
1010 DOM C{Element} encoding that paragraph. Colorized regions are
1011 represented using DOM C{Element}s, and text is represented using
1012 DOM C{Text}s.
1013
1014 @param errors: A list of errors. Any newly generated errors will
1015 be appended to this list.
1016 @type errors: C{list} of C{string}
1017
1018 @param tagName: The element tag for the DOM C{Element} that should
1019 be generated.
1020 @type tagName: C{string}
1021
1022 @return: a DOM C{Element} encoding the given paragraph.
1023 @returntype: C{Element}
1024 """
1025 str = token.contents
1026 linenum = 0
1027
1028
1029
1030
1031
1032 stack = [Element(tagName)]
1033
1034
1035
1036
1037 openbrace_stack = [0]
1038
1039
1040
1041
1042
1043 start = 0
1044 while 1:
1045 match = _BRACE_RE.search(str, start)
1046 if match == None: break
1047 end = match.start()
1048
1049
1050
1051
1052
1053
1054
1055 if match.group() == '{':
1056 if (end>0) and 'A' <= str[end-1] <= 'Z':
1057 if (end-1) > start:
1058 stack[-1].children.append(str[start:end-1])
1059 if str[end-1] not in _COLORIZING_TAGS:
1060 estr = "Unknown inline markup tag."
1061 errors.append(ColorizingError(estr, token, end-1))
1062 stack.append(Element('unknown'))
1063 else:
1064 tag = _COLORIZING_TAGS[str[end-1]]
1065 stack.append(Element(tag))
1066 else:
1067 if end > start:
1068 stack[-1].children.append(str[start:end])
1069 stack.append(Element('litbrace'))
1070 openbrace_stack.append(end)
1071 stack[-2].children.append(stack[-1])
1072
1073
1074 elif match.group() == '}':
1075
1076 if len(stack) <= 1:
1077 estr = "Unbalanced '}'."
1078 errors.append(ColorizingError(estr, token, end))
1079 start = end + 1
1080 continue
1081
1082
1083 if end > start:
1084 stack[-1].children.append(str[start:end])
1085
1086
1087 if stack[-1].tag == 'symbol':
1088 if (len(stack[-1].children) != 1 or
1089 not isinstance(stack[-1].children[0], basestring)):
1090 estr = "Invalid symbol code."
1091 errors.append(ColorizingError(estr, token, end))
1092 else:
1093 symb = stack[-1].children[0]
1094 if symb in _SYMBOLS:
1095
1096 stack[-2].children[-1] = Element('symbol', symb)
1097 else:
1098 estr = "Invalid symbol code."
1099 errors.append(ColorizingError(estr, token, end))
1100
1101
1102 if stack[-1].tag == 'escape':
1103 if (len(stack[-1].children) != 1 or
1104 not isinstance(stack[-1].children[0], basestring)):
1105 estr = "Invalid escape code."
1106 errors.append(ColorizingError(estr, token, end))
1107 else:
1108 escp = stack[-1].children[0]
1109 if escp in _ESCAPES:
1110
1111 stack[-2].children[-1] = _ESCAPES[escp]
1112 elif len(escp) == 1:
1113
1114 stack[-2].children[-1] = escp
1115 else:
1116 estr = "Invalid escape code."
1117 errors.append(ColorizingError(estr, token, end))
1118
1119
1120 if stack[-1].tag == 'litbrace':
1121 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}']
1122
1123
1124 if stack[-1].tag == 'graph':
1125 _colorize_graph(doc, stack[-1], token, end, errors)
1126
1127
1128 if stack[-1].tag in _LINK_COLORIZING_TAGS:
1129 _colorize_link(doc, stack[-1], token, end, errors)
1130
1131
1132 openbrace_stack.pop()
1133 stack.pop()
1134
1135 start = end+1
1136
1137
1138 if start < len(str):
1139 stack[-1].children.append(str[start:])
1140
1141 if len(stack) != 1:
1142 estr = "Unbalanced '{'."
1143 errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
1144
1145 return stack[0]
1146
1147 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
1148
1150 """
1151 Eg::
1152 G{classtree}
1153 G{classtree x, y, z}
1154 G{importgraph}
1155 """
1156 bad_graph_spec = False
1157
1158 children = graph.children[:]
1159 graph.children = []
1160
1161 if len(children) != 1 or not isinstance(children[0], basestring):
1162 bad_graph_spec = "Bad graph specification"
1163 else:
1164 pieces = children[0].split(None, 1)
1165 graphtype = pieces[0].replace(':','').strip().lower()
1166 if graphtype in GRAPH_TYPES:
1167 if len(pieces) == 2:
1168 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]):
1169 args = pieces[1].replace(',', ' ').replace(':','').split()
1170 else:
1171 bad_graph_spec = "Bad graph arg list"
1172 else:
1173 args = []
1174 else:
1175 bad_graph_spec = ("Bad graph type %s -- use one of %s" %
1176 (pieces[0], ', '.join(GRAPH_TYPES)))
1177
1178 if bad_graph_spec:
1179 errors.append(ColorizingError(bad_graph_spec, token, end))
1180 graph.children.append('none')
1181 graph.children.append('')
1182 return
1183
1184 graph.children.append(graphtype)
1185 for arg in args:
1186 graph.children.append(arg)
1187
1189 variables = link.children[:]
1190
1191
1192 if len(variables)==0 or not isinstance(variables[-1], basestring):
1193 estr = "Bad %s target." % link.tag
1194 errors.append(ColorizingError(estr, token, end))
1195 return
1196
1197
1198 match2 = _TARGET_RE.match(variables[-1])
1199 if match2:
1200 (text, target) = match2.groups()
1201 variables[-1] = text
1202
1203 elif len(variables) == 1:
1204 target = variables[0]
1205 else:
1206 estr = "Bad %s target." % link.tag
1207 errors.append(ColorizingError(estr, token, end))
1208 return
1209
1210
1211 name_elt = Element('name', *variables)
1212
1213
1214
1215 target = re.sub(r'\s', '', target)
1216 if link.tag=='uri':
1217 if not re.match(r'\w+:', target):
1218 if re.match(r'\w+@(\w+)(\.\w+)*', target):
1219 target = 'mailto:' + target
1220 else:
1221 target = 'http://'+target
1222 elif link.tag=='link':
1223
1224 target = re.sub(r'\(.*\)$', '', target)
1225 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
1226 estr = "Bad link target."
1227 errors.append(ColorizingError(estr, token, end))
1228 return
1229
1230
1231 target_elt = Element('target', target)
1232
1233
1234 link.children = [name_elt, target_elt]
1235
1236
1237
1238
1239
1240 -def to_epytext(tree, indent=0, seclevel=0):
1241 """
1242 Convert a DOM document encoding epytext back to an epytext string.
1243 This is the inverse operation from L{parse}. I.e., assuming there
1244 are no errors, the following is true:
1245 - C{parse(to_epytext(tree)) == tree}
1246
1247 The inverse is true, except that whitespace, line wrapping, and
1248 character escaping may be done differently.
1249 - C{to_epytext(parse(str)) == str} (approximately)
1250
1251 @param tree: A DOM document encoding of an epytext string.
1252 @type tree: C{Element}
1253 @param indent: The indentation for the string representation of
1254 C{tree}. Each line of the returned string will begin with
1255 C{indent} space characters.
1256 @type indent: C{int}
1257 @param seclevel: The section level that C{tree} appears at. This
1258 is used to generate section headings.
1259 @type seclevel: C{int}
1260 @return: The epytext string corresponding to C{tree}.
1261 @rtype: C{string}
1262 """
1263 if isinstance(tree, basestring):
1264 str = re.sub(r'\{', '\0', tree)
1265 str = re.sub(r'\}', '\1', str)
1266 return str
1267
1268 if tree.tag == 'epytext': indent -= 2
1269 if tree.tag == 'section': seclevel += 1
1270 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
1271 childstr = ''.join(variables)
1272
1273
1274 childstr = re.sub(':(\s*)\2', '::\\1', childstr)
1275
1276 if tree.tag == 'para':
1277 str = wordwrap(childstr, indent)+'\n'
1278 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1279 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1280 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1281 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1282 str = re.sub('\0', 'E{lb}', str)
1283 str = re.sub('\1', 'E{rb}', str)
1284 return str
1285 elif tree.tag == 'li':
1286 bullet = tree.attribs.get('bullet') or '-'
1287 return indent*' '+ bullet + ' ' + childstr.lstrip()
1288 elif tree.tag == 'heading':
1289 str = re.sub('\0', 'E{lb}',childstr)
1290 str = re.sub('\1', 'E{rb}', str)
1291 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1292 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
1293 elif tree.tag == 'doctestblock':
1294 str = re.sub('\0', '{', childstr)
1295 str = re.sub('\1', '}', str)
1296 lines = [' '+indent*' '+line for line in str.split('\n')]
1297 return '\n'.join(lines) + '\n\n'
1298 elif tree.tag == 'literalblock':
1299 str = re.sub('\0', '{', childstr)
1300 str = re.sub('\1', '}', str)
1301 lines = [(indent+1)*' '+line for line in str.split('\n')]
1302 return '\2' + '\n'.join(lines) + '\n\n'
1303 elif tree.tag == 'field':
1304 numargs = 0
1305 while tree.children[numargs+1].tag == 'arg': numargs += 1
1306 tag = variables[0]
1307 args = variables[1:1+numargs]
1308 body = variables[1+numargs:]
1309 str = (indent)*' '+'@'+variables[0]
1310 if args: str += '(' + ', '.join(args) + ')'
1311 return str + ':\n' + ''.join(body)
1312 elif tree.tag == 'target':
1313 return '<%s>' % childstr
1314 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1315 'section', 'olist', 'ulist', 'name'):
1316 return childstr
1317 elif tree.tag == 'symbol':
1318 return 'E{%s}' % childstr
1319 elif tree.tag == 'graph':
1320 return 'G{%s}' % ' '.join(variables)
1321 else:
1322 for (tag, name) in _COLORIZING_TAGS.items():
1323 if name == tree.tag:
1324 return '%s{%s}' % (tag, childstr)
1325 raise ValueError('Unknown DOM element %r' % tree.tag)
1326
1327 SYMBOL_TO_PLAINTEXT = {
1328 'crarr': '\\',
1329 }
1330
1331 -def to_plaintext(tree, indent=0, seclevel=0):
1332 """
1333 Convert a DOM document encoding epytext to a string representation.
1334 This representation is similar to the string generated by
1335 C{to_epytext}, but C{to_plaintext} removes inline markup, prints
1336 escaped characters in unescaped form, etc.
1337
1338 @param tree: A DOM document encoding of an epytext string.
1339 @type tree: C{Element}
1340 @param indent: The indentation for the string representation of
1341 C{tree}. Each line of the returned string will begin with
1342 C{indent} space characters.
1343 @type indent: C{int}
1344 @param seclevel: The section level that C{tree} appears at. This
1345 is used to generate section headings.
1346 @type seclevel: C{int}
1347 @return: The epytext string corresponding to C{tree}.
1348 @rtype: C{string}
1349 """
1350 if isinstance(tree, basestring): return tree
1351
1352 if tree.tag == 'section': seclevel += 1
1353
1354
1355 if tree.tag == 'epytext': cindent = indent
1356 elif tree.tag == 'li' and tree.attribs.get('bullet'):
1357 cindent = indent + 1 + len(tree.attribs.get('bullet'))
1358 else:
1359 cindent = indent + 2
1360 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
1361 childstr = ''.join(variables)
1362
1363 if tree.tag == 'para':
1364 return wordwrap(childstr, indent)+'\n'
1365 elif tree.tag == 'li':
1366
1367
1368 bullet = tree.attribs.get('bullet') or '-'
1369 return indent*' ' + bullet + ' ' + childstr.lstrip()
1370 elif tree.tag == 'heading':
1371 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1372 return ((indent-2)*' ' + childstr + '\n' +
1373 (indent-2)*' ' + uline + '\n')
1374 elif tree.tag == 'doctestblock':
1375 lines = [(indent+2)*' '+line for line in childstr.split('\n')]
1376 return '\n'.join(lines) + '\n\n'
1377 elif tree.tag == 'literalblock':
1378 lines = [(indent+1)*' '+line for line in childstr.split('\n')]
1379 return '\n'.join(lines) + '\n\n'
1380 elif tree.tag == 'fieldlist':
1381 return childstr
1382 elif tree.tag == 'field':
1383 numargs = 0
1384 while tree.children[numargs+1].tag == 'arg': numargs += 1
1385 tag = variables[0]
1386 args = variables[1:1+numargs]
1387 body = variables[1+numargs:]
1388 str = (indent)*' '+'@'+variables[0]
1389 if args: str += '(' + ', '.join(args) + ')'
1390 return str + ':\n' + ''.join(body)
1391 elif tree.tag == 'uri':
1392 if len(variables) != 2: raise ValueError('Bad URI ')
1393 elif variables[0] == variables[1]: return '<%s>' % variables[1]
1394 else: return '%r<%s>' % (variables[0], variables[1])
1395 elif tree.tag == 'link':
1396 if len(variables) != 2: raise ValueError('Bad Link')
1397 return '%s' % variables[0]
1398 elif tree.tag in ('olist', 'ulist'):
1399
1400
1401
1402
1403 return childstr.replace('\n\n', '\n')+'\n'
1404 elif tree.tag == 'symbol':
1405 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr)
1406 elif tree.tag == 'graph':
1407 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
1408 else:
1409
1410 return childstr
1411
1412 -def to_debug(tree, indent=4, seclevel=0):
1413 """
1414 Convert a DOM document encoding epytext back to an epytext string,
1415 annotated with extra debugging information. This function is
1416 similar to L{to_epytext}, but it adds explicit information about
1417 where different blocks begin, along the left margin.
1418
1419 @param tree: A DOM document encoding of an epytext string.
1420 @type tree: C{Element}
1421 @param indent: The indentation for the string representation of
1422 C{tree}. Each line of the returned string will begin with
1423 C{indent} space characters.
1424 @type indent: C{int}
1425 @param seclevel: The section level that C{tree} appears at. This
1426 is used to generate section headings.
1427 @type seclevel: C{int}
1428 @return: The epytext string corresponding to C{tree}.
1429 @rtype: C{string}
1430 """
1431 if isinstance(tree, basestring):
1432 str = re.sub(r'\{', '\0', tree)
1433 str = re.sub(r'\}', '\1', str)
1434 return str
1435
1436 if tree.tag == 'section': seclevel += 1
1437 variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
1438 childstr = ''.join(variables)
1439
1440
1441 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr)
1442
1443 if tree.tag == 'para':
1444 str = wordwrap(childstr, indent-6, 69)+'\n'
1445 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1446 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1447 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1448 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1449 str = re.sub('\0', 'E{lb}', str)
1450 str = re.sub('\1', 'E{rb}', str)
1451 lines = str.rstrip().split('\n')
1452 lines[0] = ' P>|' + lines[0]
1453 lines[1:] = [' |'+l for l in lines[1:]]
1454 return '\n'.join(lines)+'\n |\n'
1455 elif tree.tag == 'li':
1456 bullet = tree.attribs.get('bullet') or '-'
1457 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
1458 elif tree.tag in ('olist', 'ulist'):
1459 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
1460 elif tree.tag == 'heading':
1461 str = re.sub('\0', 'E{lb}', childstr)
1462 str = re.sub('\1', 'E{rb}', str)
1463 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1464 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
1465 ' |'+(indent-8)*' ' + uline + '\n')
1466 elif tree.tag == 'doctestblock':
1467 str = re.sub('\0', '{', childstr)
1468 str = re.sub('\1', '}', str)
1469 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')]
1470 lines[0] = 'DTST>'+lines[0][5:]
1471 return '\n'.join(lines) + '\n |\n'
1472 elif tree.tag == 'literalblock':
1473 str = re.sub('\0', '{', childstr)
1474 str = re.sub('\1', '}', str)
1475 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')]
1476 lines[0] = ' LIT>'+lines[0][5:]
1477 return '\2' + '\n'.join(lines) + '\n |\n'
1478 elif tree.tag == 'field':
1479 numargs = 0
1480 while tree.children[numargs+1].tag == 'arg': numargs += 1
1481 tag = variables[0]
1482 args = variables[1:1+numargs]
1483 body = variables[1+numargs:]
1484 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
1485 if args: str += '(' + ', '.join(args) + ')'
1486 return str + ':\n' + ''.join(body)
1487 elif tree.tag == 'target':
1488 return '<%s>' % childstr
1489 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1490 'section', 'olist', 'ulist', 'name'):
1491 return childstr
1492 elif tree.tag == 'symbol':
1493 return 'E{%s}' % childstr
1494 elif tree.tag == 'graph':
1495 return 'G{%s}' % ' '.join(variables)
1496 else:
1497 for (tag, name) in _COLORIZING_TAGS.items():
1498 if name == tree.tag:
1499 return '%s{%s}' % (tag, childstr)
1500 raise ValueError('Unknown DOM element %r' % tree.tag)
1501
1502 -def to_rst(tree, indent=0, seclevel=0, wrap_startindex=0):
1503 """
1504 Convert a DOM document encoding epytext into a reStructuredText
1505 markup string. (Because rst is fairly loosely defined, it is
1506 possible that this function will produce incorrect output in some
1507 cases.)
1508
1509 @param tree: A DOM document encoding of an epytext string.
1510 @type tree: C{Element}
1511 @param indent: The indentation for the string representation of
1512 C{tree}. Each line of the returned string will begin with
1513 C{indent} space characters.
1514 @type indent: C{int}
1515 @param seclevel: The section level that C{tree} appears at. This
1516 is used to generate section headings.
1517 @type seclevel: C{int}
1518 @return: The reStructuredText string corresponding to C{tree}.
1519 @rtype: C{string}
1520 """
1521 if isinstance(tree, basestring):
1522
1523 s = tree.replace('\\', '\\\\')
1524 return re.sub(r'(\b[*`|\[]|[*`|\]]\b)', r'\\\1', s)
1525 elif tree.tag == 'para':
1526
1527
1528 s = ''
1529 for child in tree.children:
1530 childstr = to_rst(child, indent, seclevel)
1531 if ( s[-1:] in ('*','`','|') and
1532 childstr[:1] not in (' ','\n','.',',',';',"'",'"',')',':') ):
1533 s += r'\ '
1534 s += childstr
1535 s = wordwrap(s, indent, 75, wrap_startindex)+'\n'
1536
1537 s = re.sub(r'(?m)^(\s*)([\*\-\+])', r'\1\\\2', s)
1538
1539 s = re.sub(r'(?m)^(\s*)(\d+\.|\#\.)', r'\1\\\2', s)
1540
1541 s = re.sub(r'(?m)^(\s*)(([^a-zA-Z0-9\s])\3\3*\s*$)', r'\1\\\2', s)
1542
1543 s = re.sub(r'(?m)^(\s*)([:\-\/\|])', r'\1\\\2', s)
1544
1545 s = re.sub(r'(?m)(::\s*)$', r'\\\1', s)
1546
1547 s = re.sub(r'(?m)^(\s*)(>>>)$', r'\1\\\2', s)
1548
1549 s = re.sub(r'(?m)^(\s*)([+=-])', r'\1\\\2', s)
1550
1551 s = re.sub(r'(?m)^(\s*)(\.\.)', r'\1\\\2', s)
1552 return s
1553 elif tree.tag == 'doctestblock':
1554 for c in tree.children: assert isinstance(c, basestring)
1555 childstr = ''.join(tree.children)
1556 return '\n\n%s\n\n' % '\n'.join([' '*indent+line
1557 for line in childstr.split('\n')])
1558 elif tree.tag == 'literalblock':
1559 for c in tree.children: assert isinstance(c, basestring)
1560 childstr = ''.join(tree.children)
1561 return '\n\n::\n\n%s\n\n' % '\n'.join([' '*(indent+1)+line
1562 for line in childstr.split('\n')])
1563 elif tree.tag == 'link':
1564 name = to_rst(tree.children[0], indent, seclevel).strip()
1565 target = ''.join(tree.children[1].children).strip()
1566 if target == re.sub(r'\(.*\)$', '', name).replace('\_', '_'):
1567 return '`%s`' % target
1568 else:
1569 return '`%s <%s>`' % (name, target)
1570 elif tree.tag == 'uri':
1571 name = to_rst(tree.children[0], indent, seclevel).strip()
1572 target = ''.join(tree.children[1].children).strip()
1573 if target == name and name.startswith('http://'):
1574 return target
1575 else:
1576 return '`%s <%s>`__' % (name, target)
1577 elif tree.tag == 'target':
1578 return '<%s>' % ''.join(tree.children)
1579 elif tree.tag == 'symbol':
1580 return '|%s|' % ''.join(tree.children)
1581 else:
1582 if tree.tag == 'li':
1583 bullet = tree.attribs.get('bullet') or '-'
1584
1585 bullet = re.sub(r'^(\d+.)*(\d+.)$', r'\2', bullet)
1586 child_indent = indent + len(bullet) + 1
1587 child_wrap_startindex = child_indent
1588 elif tree.tag == 'field':
1589 tt = to_debug(tree)
1590 tagname = ''.join(tree.children.pop(0).children)
1591 args = []
1592 while tree.children and tree.children[0].tag == 'arg':
1593 args.append(''.join(tree.children.pop(0).children))
1594 child_indent = max(indent, wrap_startindex) + 4
1595 tag = ':%s%s: ' % (tagname, ''.join([' %s' % arg for arg in args]))
1596 child_wrap_startindex = indent+len(tag)
1597 else:
1598 child_indent = indent
1599 child_wrap_startindex = wrap_startindex
1600
1601 if tree.children:
1602 childstrs = ([to_rst(tree.children[0], child_indent,
1603 seclevel, child_wrap_startindex)] +
1604 [to_rst(c, indent, seclevel)
1605 for c in tree.children[1:]])
1606 else:
1607 childstrs = []
1608 childstr = ''.join(childstrs)
1609
1610 if tree.tag in ('fieldlist', 'olist', 'ulist'):
1611 tight = True
1612 for item in childstrs[:-1]:
1613 if '\n' in item.strip():
1614 tight = False
1615 if tight:
1616 return '\n'.join([item.rstrip() for item in childstrs])+'\n\n'
1617 else:
1618 return childstr+'\n'
1619 elif tree.tag == 'field':
1620 return '%s%s%s' % ((indent-wrap_startindex)*' ', tag, childstr)
1621 elif tree.tag == 'li':
1622 return '%s%s %s' % ((indent-wrap_startindex)*' ', bullet, childstr)
1623 elif tree.tag == 'graph':
1624 return '\n\n(GRAPH: %s)\n\n' % childstr
1625 elif tree.tag == 'heading':
1626 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1627 return '%s%s\n%s%s\n' % (indent*' ', childstr, indent*' ', uline)
1628 elif tree.tag in ('tag', 'arg', 'section', 'name'):
1629 return childstr
1630 elif tree.tag == 'code':
1631 if [c for c in tree.children if not isinstance(c, basestring)]:
1632
1633 print 'Warning: Generating "``%s``"' % childstr
1634 return '``%s``' % childstr
1635 else:
1636 return '``%s``' % ''.join(tree.children)
1637 elif tree.tag == 'math' or tree.tag == 'italic':
1638 return '*%s*' % childstr
1639 elif tree.tag == 'indexed':
1640 return '`%s`:term:' % childstr
1641 elif tree.tag == 'bold':
1642 return '**%s**' % childstr
1643 elif tree.tag == 'epytext':
1644
1645 childstr = re.sub('\n{3,}', '\n\n', childstr)
1646 childstr = re.sub(':\s*\n+::\n', '::\n', childstr)
1647 return childstr.rstrip()+'\n'
1648 else:
1649 raise ValueError('Unknown DOM element %r' % tree.tag)
1650
1651
1652
1653
1654
1655 SCRWIDTH = 75
1656 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1657 """
1658 Pretty-parse the string. This parses the string, and catches any
1659 warnings or errors produced. Any warnings and errors are
1660 displayed, and the resulting DOM parse structure is returned.
1661
1662 @param str: The string to parse.
1663 @type str: C{string}
1664 @param show_warnings: Whether or not to display non-fatal errors
1665 generated by parsing C{str}.
1666 @type show_warnings: C{boolean}
1667 @param show_errors: Whether or not to display fatal errors
1668 generated by parsing C{str}.
1669 @type show_errors: C{boolean}
1670 @param stream: The stream that warnings and errors should be
1671 written to.
1672 @type stream: C{stream}
1673 @return: a DOM document encoding the contents of C{str}.
1674 @rtype: C{Element}
1675 @raise SyntaxError: If any fatal errors were encountered.
1676 """
1677 errors = []
1678 confused = 0
1679 try:
1680 val = parse(str, errors)
1681 warnings = [e for e in errors if not e.is_fatal()]
1682 errors = [e for e in errors if e.is_fatal()]
1683 except:
1684 confused = 1
1685
1686 if not show_warnings: warnings = []
1687 warnings.sort()
1688 errors.sort()
1689 if warnings:
1690 print >>stream, '='*SCRWIDTH
1691 print >>stream, "WARNINGS"
1692 print >>stream, '-'*SCRWIDTH
1693 for warning in warnings:
1694 print >>stream, warning.as_warning()
1695 print >>stream, '='*SCRWIDTH
1696 if errors and show_errors:
1697 if not warnings: print >>stream, '='*SCRWIDTH
1698 print >>stream, "ERRORS"
1699 print >>stream, '-'*SCRWIDTH
1700 for error in errors:
1701 print >>stream, error
1702 print >>stream, '='*SCRWIDTH
1703
1704 if confused: raise
1705 elif errors: raise SyntaxError('Encountered Errors')
1706 else: return val
1707
1708
1709
1710
1711
1713 """
1714 An error generated while tokenizing a formatted documentation
1715 string.
1716 """
1717
1719 """
1720 An error generated while structuring a formatted documentation
1721 string.
1722 """
1723
1725 """
1726 An error generated while colorizing a paragraph.
1727 """
1728 - def __init__(self, descr, token, charnum, is_fatal=1):
1729 """
1730 Construct a new colorizing exception.
1731
1732 @param descr: A short description of the error.
1733 @type descr: C{string}
1734 @param token: The token where the error occured
1735 @type token: L{Token}
1736 @param charnum: The character index of the position in
1737 C{token} where the error occured.
1738 @type charnum: C{int}
1739 """
1740 ParseError.__init__(self, descr, token.startline, is_fatal)
1741 self.token = token
1742 self.charnum = charnum
1743
1744 CONTEXT_RANGE = 20
1746 RANGE = self.CONTEXT_RANGE
1747 if self.charnum <= RANGE:
1748 left = self.token.contents[0:self.charnum]
1749 else:
1750 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
1751 if (len(self.token.contents)-self.charnum) <= RANGE:
1752 right = self.token.contents[self.charnum:]
1753 else:
1754 right = (self.token.contents[self.charnum:self.charnum+RANGE]
1755 + '...')
1756 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1757
1758
1759
1760
1761
1763 """
1764 Return a DOM document matching the epytext DTD, containing a
1765 single literal block. That literal block will include the
1766 contents of the given string. This method is typically used as a
1767 fall-back when the parser fails.
1768
1769 @param str: The string which should be enclosed in a literal
1770 block.
1771 @type str: C{string}
1772
1773 @return: A DOM document containing C{str} in a single literal
1774 block.
1775 @rtype: C{Element}
1776 """
1777 return Element('epytext', Element('literalblock', str))
1778
1780 """
1781 Return a DOM document matching the epytext DTD, containing a
1782 single paragraph. That paragraph will include the contents of the
1783 given string. This can be used to wrap some forms of
1784 automatically generated information (such as type names) in
1785 paragraphs.
1786
1787 @param str: The string which should be enclosed in a paragraph.
1788 @type str: C{string}
1789
1790 @return: A DOM document containing C{str} in a single paragraph.
1791 @rtype: C{Element}
1792 """
1793 return Element('epytext', Element('para', str))
1794
1795
1796
1797
1798
1800 """
1801 Parse the given docstring, which is formatted using epytext; and
1802 return a C{ParsedDocstring} representation of its contents.
1803 @param docstring: The docstring to parse
1804 @type docstring: C{string}
1805 @param errors: A list where any errors generated during parsing
1806 will be stored.
1807 @type errors: C{list} of L{ParseError}
1808 @param options: Extra options. Unknown options are ignored.
1809 Currently, no extra options are defined.
1810 @rtype: L{ParsedDocstring}
1811 """
1812 return ParsedEpytextDocstring(parse(docstring, errors), **options)
1813
1814 -class ParsedEpytextDocstring(ParsedDocstring):
1815 SYMBOL_TO_HTML = {
1816
1817 '<-': '←', '->': '→', '^': '↑', 'v': '↓',
1818
1819
1820 'alpha': 'α', 'beta': 'β', 'gamma': 'γ',
1821 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ',
1822 'eta': 'η', 'theta': 'θ', 'iota': 'ι',
1823 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ',
1824 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο',
1825 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ',
1826 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ',
1827 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω',
1828 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ',
1829 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ',
1830 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι',
1831 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ',
1832 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο',
1833 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ',
1834 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ',
1835 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω',
1836
1837
1838 'larr': '←', 'rarr': '→', 'uarr': '↑',
1839 'darr': '↓', 'harr': '↔', 'crarr': '↵',
1840 'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑',
1841 'dArr': '⇓', 'hArr': '⇔',
1842 'copy': '©', 'times': '×', 'forall': '∀',
1843 'exist': '∃', 'part': '∂',
1844 'empty': '∅', 'isin': '∈', 'notin': '∉',
1845 'ni': '∋', 'prod': '∏', 'sum': '∑',
1846 'prop': '∝', 'infin': '∞', 'ang': '∠',
1847 'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪',
1848 'int': '∫', 'there4': '∴', 'sim': '∼',
1849 'cong': '≅', 'asymp': '≈', 'ne': '≠',
1850 'equiv': '≡', 'le': '≤', 'ge': '≥',
1851 'sub': '⊂', 'sup': '⊃', 'nsub': '⊄',
1852 'sube': '⊆', 'supe': '⊇', 'oplus': '⊕',
1853 'otimes': '⊗', 'perp': '⊥',
1854
1855
1856 'infinity': '∞', 'integral': '∫', 'product': '∏',
1857 '<=': '≤', '>=': '≥',
1858 }
1859
1860 SYMBOL_TO_LATEX = {
1861
1862 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)',
1863 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)',
1864
1865
1866
1867 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma':
1868 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon':
1869 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)',
1870 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa':
1871 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)',
1872 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi':
1873 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau':
1874 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)',
1875 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega':
1876 r'\(\omega\)',
1877
1878 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma':
1879 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon':
1880 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)',
1881 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa':
1882 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)',
1883 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi':
1884 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau':
1885 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)',
1886 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega':
1887 r'\(\Omega\)',
1888
1889
1890 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr':
1891 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr':
1892 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)',
1893 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr':
1894 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr':
1895 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}',
1896 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist':
1897 r'\(\exists\)', 'part': r'\(\partial\)', 'empty':
1898 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)',
1899 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)',
1900 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang':
1901 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap':
1902 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4':
1903 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)',
1904 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv':
1905 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub':
1906 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)',
1907 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus':
1908 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)',
1909
1910
1911 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product':
1912 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)',
1913 }
1914
1915 - def __init__(self, dom_tree, **options):
1916 self._tree = dom_tree
1917
1918 self._html = self._latex = self._plaintext = None
1919 self._terms = None
1920
1921 if options.get('inline') and self._tree is not None:
1922 for elt in self._tree.children:
1923 elt.attribs['inline'] = True
1924
1925 - def __str__(self):
1926 return str(self._tree)
1927
1928 - def to_html(self, docstring_linker, directory=None, docindex=None,
1929 context=None, **options):
1930 if self._html is not None: return self._html
1931 if self._tree is None: return ''
1932 indent = options.get('indent', 0)
1933 self._html = self._to_html(self._tree, docstring_linker, directory,
1934 docindex, context, indent)
1935 return self._html
1936
1937 - def to_latex(self, docstring_linker, directory=None, docindex=None,
1938 context=None, **options):
1939 if self._latex is not None: return self._latex
1940 if self._tree is None: return ''
1941 indent = options.get('indent', 0)
1942 self._hyperref = options.get('hyperref', 1)
1943 self._latex = self._to_latex(self._tree, docstring_linker, directory,
1944 docindex, context, indent)
1945 return self._latex
1946
1947 - def to_plaintext(self, docstring_linker, **options):
1948
1949
1950 if self._tree is None: return ''
1951 if 'indent' in options:
1952 self._plaintext = to_plaintext(self._tree,
1953 indent=options['indent'])
1954 else:
1955 self._plaintext = to_plaintext(self._tree)
1956 return self._plaintext
1957
1958 - def _index_term_key(self, tree):
1959 str = to_plaintext(tree)
1960 str = re.sub(r'\s\s+', '-', str)
1961 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1962
1963 - def _to_html(self, tree, linker, directory, docindex, context,
1964 indent=0, seclevel=0):
1965 if isinstance(tree, basestring):
1966 return plaintext_to_html(tree)
1967
1968 if tree.tag == 'epytext': indent -= 2
1969 if tree.tag == 'section': seclevel += 1
1970
1971
1972 variables = [self._to_html(c, linker, directory, docindex, context,
1973 indent+2, seclevel)
1974 for c in tree.children]
1975
1976
1977 childstr = ''.join(variables)
1978
1979
1980 if tree.tag == 'para':
1981 return wordwrap(
1982 (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr,
1983 indent)
1984 elif tree.tag == 'code':
1985 style = tree.attribs.get('style')
1986 if style:
1987 return '<code class="%s">%s</code>' % (style, childstr)
1988 else:
1989 return '<code>%s</code>' % childstr
1990 elif tree.tag == 'uri':
1991 return ('<a href="%s" target="_top">%s</a>' %
1992 (variables[1], variables[0]))
1993 elif tree.tag == 'link':
1994 return linker.translate_identifier_xref(variables[1], variables[0])
1995 elif tree.tag == 'italic':
1996 return '<i>%s</i>' % childstr
1997 elif tree.tag == 'math':
1998 return '<i class="math">%s</i>' % childstr
1999 elif tree.tag == 'indexed':
2000 term = Element('epytext', *tree.children, **tree.attribs)
2001 return linker.translate_indexterm(ParsedEpytextDocstring(term))
2002
2003
2004 elif tree.tag == 'bold':
2005 return '<b>%s</b>' % childstr
2006 elif tree.tag == 'ulist':
2007 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
2008 elif tree.tag == 'olist':
2009 start = tree.attribs.get('start') or ''
2010 return ('%s<ol start="%s">\n%s%s</ol>\n' %
2011 (indent*' ', start, childstr, indent*' '))
2012 elif tree.tag == 'li':
2013 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
2014 elif tree.tag == 'heading':
2015 return ('%s<h%s class="heading">%s</h%s>\n' %
2016 ((indent-2)*' ', seclevel, childstr, seclevel))
2017 elif tree.tag == 'literalblock':
2018 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
2019 elif tree.tag == 'doctestblock':
2020 return doctest_to_html(tree.children[0].strip())
2021 elif tree.tag == 'fieldlist':
2022 raise AssertionError("There should not be any field lists left")
2023 elif tree.tag in ('epytext', 'section', 'tag', 'arg',
2024 'name', 'target', 'html'):
2025 return childstr
2026 elif tree.tag == 'symbol':
2027 symbol = tree.children[0]
2028 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol)
2029 elif tree.tag == 'graph':
2030 if directory is None: return ''
2031
2032 graph = self._build_graph(variables[0], variables[1:], linker,
2033 docindex, context)
2034 if not graph: return ''
2035
2036 return graph.to_html(directory)
2037 else:
2038 raise ValueError('Unknown epytext DOM element %r' % tree.tag)
2039
2040
2041 - def _build_graph(self, graph_type, graph_args, linker,
2042 docindex, context):
2043
2044 if graph_type == 'classtree':
2045 from epydoc.apidoc import ClassDoc
2046 if graph_args:
2047 bases = [docindex.find(name, context)
2048 for name in graph_args]
2049 elif isinstance(context, ClassDoc):
2050 bases = [context]
2051 else:
2052 log.warning("Could not construct class tree: you must "
2053 "specify one or more base classes.")
2054 return None
2055 from epydoc.docwriter.dotgraph import class_tree_graph
2056 return class_tree_graph(bases, linker, context)
2057 elif graph_type == 'packagetree':
2058 from epydoc.apidoc import ModuleDoc
2059 if graph_args:
2060 packages = [docindex.find(name, context)
2061 for name in graph_args]
2062 elif isinstance(context, ModuleDoc):
2063 packages = [context]
2064 else:
2065 log.warning("Could not construct package tree: you must "
2066 "specify one or more root packages.")
2067 return None
2068 from epydoc.docwriter.dotgraph import package_tree_graph
2069 return package_tree_graph(packages, linker, context)
2070 elif graph_type == 'importgraph':
2071 from epydoc.apidoc import ModuleDoc
2072 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
2073 from epydoc.docwriter.dotgraph import import_graph
2074 return import_graph(modules, docindex, linker, context)
2075
2076 elif graph_type == 'callgraph':
2077 if graph_args:
2078 docs = [docindex.find(name, context) for name in graph_args]
2079 docs = [doc for doc in docs if doc is not None]
2080 else:
2081 docs = [context]
2082 from epydoc.docwriter.dotgraph import call_graph
2083 return call_graph(docs, docindex, linker, context)
2084 else:
2085 log.warning("Unknown graph type %s" % graph_type)
2086
2087 - def _to_latex(self, tree, linker, directory, docindex, context,
2088 indent=0, seclevel=0, breakany=0):
2089 if isinstance(tree, basestring):
2090 return plaintext_to_latex(tree, breakany=breakany)
2091
2092 if tree.tag == 'section': seclevel += 1
2093
2094
2095 if tree.tag == 'epytext': cindent = indent
2096 else: cindent = indent + 2
2097 variables = [self._to_latex(c, linker, directory, docindex,
2098 context, cindent, seclevel, breakany)
2099 for c in tree.children]
2100 childstr = ''.join(variables)
2101
2102 if tree.tag == 'para':
2103 return wordwrap(childstr, indent)+'\n'
2104 elif tree.tag == 'code':
2105 return '\\texttt{%s}' % childstr
2106 elif tree.tag == 'uri':
2107 if len(variables) != 2: raise ValueError('Bad URI ')
2108 if self._hyperref:
2109
2110 uri = tree.children[1].children[0]
2111 uri = uri.replace('{\\textasciitilde}', '~')
2112 uri = uri.replace('\\#', '#')
2113 if variables[0] == variables[1]:
2114 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
2115 else:
2116 return ('%s\\footnote{\\href{%s}{%s}}' %
2117 (variables[0], uri, variables[1]))
2118 else:
2119 if variables[0] == variables[1]:
2120 return '\\textit{%s}' % variables[1]
2121 else:
2122 return '%s\\footnote{%s}' % (variables[0], variables[1])
2123 elif tree.tag == 'link':
2124 if len(variables) != 2: raise ValueError('Bad Link')
2125 return linker.translate_identifier_xref(variables[1], variables[0])
2126 elif tree.tag == 'italic':
2127 return '\\textit{%s}' % childstr
2128 elif tree.tag == 'math':
2129 return '\\textit{%s}' % childstr
2130 elif tree.tag == 'indexed':
2131 term = Element('epytext', *tree.children, **tree.attribs)
2132 return linker.translate_indexterm(ParsedEpytextDocstring(term))
2133 elif tree.tag == 'bold':
2134 return '\\textbf{%s}' % childstr
2135 elif tree.tag == 'li':
2136 return indent*' ' + '\\item ' + childstr.lstrip()
2137 elif tree.tag == 'heading':
2138 sec = ('\\EpydocUser' +
2139 ('%ssection' % ('sub'*(min(seclevel,3)-1))).capitalize())
2140 return (' '*(indent-2) + '%s{%s}\n\n' % (sec, childstr.strip()))
2141 elif tree.tag == 'doctestblock':
2142 return doctest_to_latex(tree.children[0].strip())
2143 elif tree.tag == 'literalblock':
2144 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
2145 elif tree.tag == 'fieldlist':
2146 return indent*' '+'{omitted fieldlist}\n'
2147 elif tree.tag == 'olist':
2148 return (' '*indent + '\\begin{enumerate}\n\n' +
2149 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
2150 childstr +
2151 ' '*indent + '\\end{enumerate}\n\n')
2152 elif tree.tag == 'ulist':
2153 return (' '*indent + '\\begin{itemize}\n' +
2154 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
2155 childstr +
2156 ' '*indent + '\\end{itemize}\n\n')
2157 elif tree.tag == 'symbol':
2158 symbol = tree.children[0]
2159 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol)
2160 elif tree.tag == 'graph':
2161 if directory is None: return ''
2162
2163 graph = self._build_graph(variables[0], variables[1:], linker,
2164 docindex, context)
2165 if not graph: return ''
2166
2167 return graph.to_latex(directory)
2168 else:
2169
2170 return childstr
2171
2172 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')
2173
2174 - def summary(self):
2175 if self._tree is None: return self, False
2176 tree = self._tree
2177 doc = Element('epytext')
2178
2179
2180 variables = tree.children
2181 while (len(variables) > 0) and (variables[0].tag != 'para'):
2182 if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
2183 variables = variables[0].children
2184 else:
2185 variables = variables[1:]
2186
2187
2188
2189 if (len(variables) == 0 and len(tree.children) == 1 and
2190 tree.children[0].tag == 'literalblock'):
2191 str = re.split(r'\n\s*(\n|$).*',
2192 tree.children[0].children[0], 1)[0]
2193 variables = [Element('para')]
2194 variables[0].children.append(str)
2195
2196
2197 if len(variables) == 0: return ParsedEpytextDocstring(doc), False
2198
2199
2200 long_docs = False
2201 for var in variables[1:]:
2202 if isinstance(var, Element) and var.tag == 'fieldlist':
2203 continue
2204 long_docs = True
2205 break
2206
2207
2208 parachildren = variables[0].children
2209 para = Element('para', inline=True)
2210 doc.children.append(para)
2211 for parachild in parachildren:
2212 if isinstance(parachild, basestring):
2213 m = self._SUMMARY_RE.match(parachild)
2214 if m:
2215 para.children.append(m.group(1))
2216 long_docs |= parachild is not parachildren[-1]
2217 if not long_docs:
2218 other = parachild[m.end():]
2219 if other and not other.isspace():
2220 long_docs = True
2221 return ParsedEpytextDocstring(doc), long_docs
2222 para.children.append(parachild)
2223
2224 return ParsedEpytextDocstring(doc), long_docs
2225
2226 - def split_fields(self, errors=None):
2227 if self._tree is None: return (self, ())
2228 tree = Element(self._tree.tag, *self._tree.children,
2229 **self._tree.attribs)
2230 fields = []
2231
2232 if (tree.children and
2233 tree.children[-1].tag == 'fieldlist' and
2234 tree.children[-1].children):
2235 field_nodes = tree.children[-1].children
2236 del tree.children[-1]
2237
2238 for field in field_nodes:
2239
2240 tag = field.children[0].children[0].lower()
2241 del field.children[0]
2242
2243
2244 if field.children and field.children[0].tag == 'arg':
2245 arg = field.children[0].children[0]
2246 del field.children[0]
2247 else:
2248 arg = None
2249
2250
2251 field.tag = 'epytext'
2252 fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
2253
2254
2255 if tree.children and tree.children[0].children:
2256 return ParsedEpytextDocstring(tree), fields
2257 else:
2258 return None, fields
2259
2260
2261 - def index_terms(self):
2262 if self._terms is None:
2263 self._terms = []
2264 self._index_terms(self._tree, self._terms)
2265 return self._terms
2266
2267 - def _index_terms(self, tree, terms):
2268 if tree is None or isinstance(tree, basestring):
2269 return
2270
2271 if tree.tag == 'indexed':
2272 term = Element('epytext', *tree.children, **tree.attribs)
2273 terms.append(ParsedEpytextDocstring(term))
2274
2275
2276 for child in tree.children:
2277 self._index_terms(child, terms)
2278