1
2
3
4
5
6
7
8
9 """
10 Parser for epytext strings. Epytext is a lightweight markup whose
11 primary intended application is Python documentation strings. This
12 parser converts Epytext strings to a simple DOM-like representation
13 (encoded as a tree of L{Element} objects and strings). Epytext
14 strings can contain the following X{structural blocks}:
15
16 - X{epytext}: The top-level element of the DOM tree.
17 - X{para}: A paragraph of text. Paragraphs contain no newlines,
18 and all spaces are soft.
19 - X{section}: A section or subsection.
20 - X{field}: A tagged field. These fields provide information
21 about specific aspects of a Python object, such as the
22 description of a function's parameter, or the author of a
23 module.
24 - X{literalblock}: A block of literal text. This text should be
25 displayed as it would be displayed in plaintext. The
26 parser removes the appropriate amount of leading whitespace
27 from each line in the literal block.
28 - X{doctestblock}: A block containing sample python code,
29 formatted according to the specifications of the C{doctest}
30 module.
31 - X{ulist}: An unordered list.
32 - X{olist}: An ordered list.
33 - X{li}: A list item. This tag is used both for unordered list
34 items and for ordered list items.
35
36 Additionally, the following X{inline regions} may be used within
37 C{para} blocks:
38
39 - X{code}: Source code and identifiers.
40 - X{math}: Mathematical expressions.
41 - X{index}: A term which should be included in an index, if one
42 is generated.
43 - X{italic}: Italicized text.
44 - X{bold}: Bold-faced text.
45 - X{uri}: A Universal Resource Indicator (URI) or Universal
46 Resource Locator (URL)
47 - X{link}: A Python identifier which should be hyperlinked to
48 the named object's documentation, when possible.
49
50 The returned DOM tree will conform to the the following Document Type
51 Description::
52
53 <!ENTITY % colorized '(code | math | index | italic |
54 bold | uri | link | symbol)*'>
55
56 <!ELEMENT epytext ((para | literalblock | doctestblock |
57 section | ulist | olist)*, fieldlist?)>
58
59 <!ELEMENT para (#PCDATA | %colorized;)*>
60
61 <!ELEMENT section (para | listblock | doctestblock |
62 section | ulist | olist)+>
63
64 <!ELEMENT fieldlist (field+)>
65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
66 ulist | olist)+)>
67 <!ELEMENT tag (#PCDATA)>
68 <!ELEMENT arg (#PCDATA)>
69
70 <!ELEMENT literalblock (#PCDATA | %colorized;)*>
71 <!ELEMENT doctestblock (#PCDATA)>
72
73 <!ELEMENT ulist (li+)>
74 <!ELEMENT olist (li+)>
75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
76 <!ATTLIST li bullet NMTOKEN #IMPLIED>
77 <!ATTLIST olist start NMTOKEN #IMPLIED>
78
79 <!ELEMENT uri (name, target)>
80 <!ELEMENT link (name, target)>
81 <!ELEMENT name (#PCDATA | %colorized;)*>
82 <!ELEMENT target (#PCDATA)>
83
84 <!ELEMENT code (#PCDATA | %colorized;)*>
85 <!ELEMENT math (#PCDATA | %colorized;)*>
86 <!ELEMENT italic (#PCDATA | %colorized;)*>
87 <!ELEMENT bold (#PCDATA | %colorized;)*>
88 <!ELEMENT indexed (#PCDATA | %colorized;)>
89 <!ATTLIST code style CDATA #IMPLIED>
90
91 <!ELEMENT symbol (#PCDATA)>
92
93 @var SYMBOLS: A list of the of escape symbols that are supported
94 by epydoc. Currently the following symbols are supported:
95 <<<SYMBOLS>>>
96 """
97
98
99
100 __docformat__ = 'epytext en'
101
102
103
104
105
106
107
108
109 import re, string, types, sys, os.path
110 from epydoc.markup import *
111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
113
114
115
116
117
119 """
120 A very simple DOM-like representation for parsed epytext
121 documents. Each epytext document is encoded as a tree whose nodes
122 are L{Element} objects, and whose leaves are C{string}s. Each
123 node is marked by a I{tag} and zero or more I{attributes}. Each
124 attribute is a mapping from a string key to a string value.
125 """
126 - def __init__(self, tag, *children, **attribs):
127 self.tag = tag
128 """A string tag indicating the type of this element.
129 @type: C{string}"""
130
131 self.children = list(children)
132 """A list of the children of this element.
133 @type: C{list} of (C{string} or C{Element})"""
134
135 self.attribs = attribs
136 """A dictionary mapping attribute names to attribute values
137 for this element.
138 @type: C{dict} from C{string} to C{string}"""
139
141 """
142 Return a string representation of this element, using XML
143 notation.
144 @bug: Doesn't escape '<' or '&' or '>'.
145 """
146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
147 return ('<%s%s>' % (self.tag, attribs) +
148 ''.join([str(child) for child in self.children]) +
149 '</%s>' % self.tag)
150
152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
153 args = ''.join([', %r' % c for c in self.children])
154 return 'Element(%s%s%s)' % (self.tag, args, attribs)
155
156
157
158
159
160
161
162 _HEADING_CHARS = "=-~"
163
164
165 _ESCAPES = {'lb':'{', 'rb': '}'}
166
167
168 SYMBOLS = [
169
170 '<-', '->', '^', 'v',
171
172
173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
181
182
183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
185 'copy', 'times', 'forall', 'exist', 'part',
186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
190 'sube', 'supe', 'oplus', 'otimes', 'perp',
191
192
193 'infinity', 'integral', 'product',
194 '>=', '<=',
195 ]
196
197 _SYMBOLS = {}
198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
199
200
201 symblist = ' '
202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
203 for symbol in SYMBOLS])
204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
205 del symbol, symblist
206
207
208 _COLORIZING_TAGS = {
209 'C': 'code',
210 'M': 'math',
211 'X': 'indexed',
212 'I': 'italic',
213 'B': 'bold',
214 'U': 'uri',
215 'L': 'link',
216 'E': 'escape',
217 'S': 'symbol',
218 'G': 'graph',
219 }
220
221
222 _LINK_COLORIZING_TAGS = ['link', 'uri']
223
224
225
226
227
228 -def parse(str, errors = None):
229 """
230 Return a DOM tree encoding the contents of an epytext string. Any
231 errors generated during parsing will be stored in C{errors}.
232
233 @param str: The epytext string to parse.
234 @type str: C{string}
235 @param errors: A list where any errors generated during parsing
236 will be stored. If no list is specified, then fatal errors
237 will generate exceptions, and non-fatal errors will be
238 ignored.
239 @type errors: C{list} of L{ParseError}
240 @return: a DOM tree encoding the contents of an epytext string.
241 @rtype: C{Element}
242 @raise ParseError: If C{errors} is C{None} and an error is
243 encountered while parsing.
244 """
245
246 if errors == None:
247 errors = []
248 raise_on_error = 1
249 else:
250 raise_on_error = 0
251
252
253 str = re.sub('\015\012', '\012', str)
254 str = string.expandtabs(str)
255
256
257 tokens = _tokenize(str, errors)
258
259
260 encountered_field = 0
261
262
263 doc = Element('epytext')
264
265
266
267
268
269
270
271
272
273
274
275 stack = [None, doc]
276 indent_stack = [-1, None]
277
278 for token in tokens:
279
280
281
282
283
284
285
286 _pop_completed_blocks(token, stack, indent_stack)
287
288
289 if token.tag == Token.PARA:
290 _add_para(doc, token, stack, indent_stack, errors)
291
292
293 elif token.tag == Token.HEADING:
294 _add_section(doc, token, stack, indent_stack, errors)
295
296
297 elif token.tag == Token.LBLOCK:
298 stack[-1].children.append(token.to_dom(doc))
299
300
301 elif token.tag == Token.DTBLOCK:
302 stack[-1].children.append(token.to_dom(doc))
303
304
305 elif token.tag == Token.BULLET:
306 _add_list(doc, token, stack, indent_stack, errors)
307 else:
308 assert 0, 'Unknown token type: '+token.tag
309
310
311 if stack[-1].tag == 'field':
312 encountered_field = 1
313 elif encountered_field == 1:
314 if len(stack) <= 3:
315 estr = ("Fields must be the final elements in an "+
316 "epytext string.")
317 errors.append(StructuringError(estr, token.startline))
318
319
320
321
322
323
324 for child in doc.children:
325 _raise_graphs(child, doc)
326
327
328 if len([e for e in errors if e.is_fatal()]) > 0:
329 if raise_on_error:
330 raise errors[0]
331 else:
332 return None
333
334
335 return doc
336
338
339 have_graph_child = False
340 for elt in tree.children:
341 if isinstance(elt, Element):
342 _raise_graphs(elt, tree)
343 if elt.tag == 'graph': have_graph_child = True
344
345 block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li')
346 if have_graph_child and tree.tag not in block:
347 child_index = 0
348 parent_index = parent.children.index(tree)
349 for elt in tree.children:
350 if isinstance(elt, Element) and elt.tag == 'graph':
351
352 left = tree.children[:child_index]
353 right = tree.children[child_index+1:]
354 parent.children[parent_index:parent_index+1] = [
355 Element(tree.tag, *left, **tree.attribs),
356 elt,
357 Element(tree.tag, *right, **tree.attribs)]
358 child_index = 0
359 parent_index += 2
360 tree = parent.children[parent_index]
361 else:
362 child_index += 1
363
365 """
366 Pop any completed blocks off the stack. This includes any
367 blocks that we have dedented past, as well as any list item
368 blocks that we've dedented to. The top element on the stack
369 should only be a list if we're about to start a new list
370 item (i.e., if the next token is a bullet).
371 """
372 indent = token.indent
373 if indent != None:
374 while (len(stack) > 2):
375 pop = 0
376
377
378 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
379 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
380
381
382
383 elif (token.tag == 'bullet' and indent==indent_stack[-2] and
384 stack[-1].tag in ('li', 'field')): pop=1
385
386
387 elif (stack[-1].tag in ('ulist', 'olist') and
388 (token.tag != 'bullet' or token.contents[-1] == ':')):
389 pop=1
390
391
392 if pop == 0: return
393 stack.pop()
394 indent_stack.pop()
395
396 -def _add_para(doc, para_token, stack, indent_stack, errors):
397 """Colorize the given paragraph, and add it to the DOM tree."""
398
399
400 if indent_stack[-1] == None:
401 indent_stack[-1] = para_token.indent
402 if para_token.indent == indent_stack[-1]:
403
404 para = _colorize(doc, para_token, errors)
405 if para_token.inline:
406 para.attribs['inline'] = True
407 stack[-1].children.append(para)
408 else:
409 estr = "Improper paragraph indentation."
410 errors.append(StructuringError(estr, para_token.startline))
411
412 -def _add_section(doc, heading_token, stack, indent_stack, errors):
413 """Add a new section to the DOM tree, with the given heading."""
414 if indent_stack[-1] == None:
415 indent_stack[-1] = heading_token.indent
416 elif indent_stack[-1] != heading_token.indent:
417 estr = "Improper heading indentation."
418 errors.append(StructuringError(estr, heading_token.startline))
419
420
421 for tok in stack[2:]:
422 if tok.tag != "section":
423 estr = "Headings must occur at the top level."
424 errors.append(StructuringError(estr, heading_token.startline))
425 break
426 if (heading_token.level+2) > len(stack):
427 estr = "Wrong underline character for heading."
428 errors.append(StructuringError(estr, heading_token.startline))
429
430
431
432 stack[heading_token.level+2:] = []
433 indent_stack[heading_token.level+2:] = []
434
435
436 head = _colorize(doc, heading_token, errors, 'heading')
437
438
439 sec = Element("section")
440 stack[-1].children.append(sec)
441 stack.append(sec)
442 sec.children.append(head)
443 indent_stack.append(None)
444
445 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
446 """
447 Add a new list item or field to the DOM tree, with the given
448 bullet or field tag. When necessary, create the associated
449 list.
450 """
451
452 if bullet_token.contents[-1] == '-':
453 list_type = 'ulist'
454 elif bullet_token.contents[-1] == '.':
455 list_type = 'olist'
456 elif bullet_token.contents[-1] == ':':
457 list_type = 'fieldlist'
458 else:
459 raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
460
461
462 newlist = 0
463 if stack[-1].tag != list_type:
464 newlist = 1
465 elif list_type == 'olist' and stack[-1].tag == 'olist':
466 old_listitem = stack[-1].children[-1]
467 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
468 new_bullet = bullet_token.contents.split('.')[:-1]
469 if (new_bullet[:-1] != old_bullet[:-1] or
470 int(new_bullet[-1]) != int(old_bullet[-1])+1):
471 newlist = 1
472
473
474 if newlist:
475 if stack[-1].tag is 'fieldlist':
476
477
478
479
480
481
482 estr = "Lists must be indented."
483 errors.append(StructuringError(estr, bullet_token.startline))
484 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
485 stack.pop()
486 indent_stack.pop()
487
488 if (list_type != 'fieldlist' and indent_stack[-1] is not None and
489 bullet_token.indent == indent_stack[-1]):
490
491