1
2
3
4
5
6
7
8
9
10 """
11 Markup language support for docstrings. Each submodule defines a
12 parser for a single markup language. These parsers convert an
13 object's docstring to a L{ParsedDocstring}, a standard intermediate
14 representation that can be used to generate output.
15 C{ParsedDocstring}s support the following operations:
16 - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>},
17 L{to_html()<ParsedDocstring.to_html>}, and
18 L{to_latex()<ParsedDocstring.to_latex>}).
19 - Summarization (L{summary()<ParsedDocstring.summary>}).
20 - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}).
21 - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}.
22
23 The L{parse()} function provides a single interface to the
24 C{epydoc.markup} package: it takes a docstring and the name of a
25 markup language; delegates to the appropriate parser; and returns the
26 parsed docstring (along with any errors or warnings that were
27 generated).
28
29 The C{ParsedDocstring} output generation methods (C{to_M{format}()})
30 use a L{DocstringLinker} to link the docstring output with the rest of
31 the documentation that epydoc generates. C{DocstringLinker}s are
32 currently responsible for translating two kinds of crossreference:
33 - index terms (L{translate_indexterm()
34 <DocstringLinker.translate_indexterm>}).
35 - identifier crossreferences (L{translate_identifier_xref()
36 <DocstringLinker.translate_identifier_xref>}).
37
38 A parsed docstring's fields can be extracted using the
39 L{ParsedDocstring.split_fields()} method. This method divides a
40 docstring into its main body and a list of L{Field}s, each of which
41 encodes a single field. The field's bodies are encoded as
42 C{ParsedDocstring}s.
43
44 Markup errors are represented using L{ParseError}s. These exception
45 classes record information about the cause, location, and severity of
46 each error.
47
48 @sort: parse, ParsedDocstring, Field, DocstringLinker
49 @group Errors and Warnings: ParseError
50 @group Utility Functions: parse_type_of
51 @var SCRWIDTH: The default width with which text will be wrapped
52 when formatting the output of the parser.
53 @type SCRWIDTH: C{int}
54 @var _parse_warnings: Used by L{_parse_warn}.
55 """
56 __docformat__ = 'epytext en'
57
58 import re, types, sys
59 from epydoc import log
60 from epydoc.util import plaintext_to_html, plaintext_to_latex
61 import epydoc
62 from epydoc.compat import *
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 _markup_language_registry = {
81 'restructuredtext': 'epydoc.markup.restructuredtext',
82 'epytext': 'epydoc.markup.epytext',
83 'plaintext': 'epydoc.markup.plaintext',
84 'javadoc': 'epydoc.markup.javadoc',
85 }
86
88 """
89 Register a new markup language named C{name}, which can be parsed
90 by the function C{parse_function}.
91
92 @param name: The name of the markup language. C{name} should be a
93 simple identifier, such as C{'epytext'} or C{'restructuredtext'}.
94 Markup language names are case insensitive.
95
96 @param parse_function: A function which can be used to parse the
97 markup language, and returns a L{ParsedDocstring}. It should
98 have the following signature:
99
100 >>> def parse(s, errors):
101 ... 'returns a ParsedDocstring'
102
103 Where:
104 - C{s} is the string to parse. (C{s} will be a unicode
105 string.)
106 - C{errors} is a list; any errors that are generated
107 during docstring parsing should be appended to this
108 list (as L{ParseError} objects).
109 """
110 _markup_language_registry[name.lower()] = parse_function
111
112 MARKUP_LANGUAGES_USED = set()
113
114 -def parse(docstring, markup='plaintext', errors=None, **options):
115 """
116 Parse the given docstring, and use it to construct a
117 C{ParsedDocstring}. If any fatal C{ParseError}s are encountered
118 while parsing the docstring, then the docstring will be rendered
119 as plaintext, instead.
120
121 @type docstring: C{string}
122 @param docstring: The docstring to encode.
123 @type markup: C{string}
124 @param markup: The name of the markup language that is used by
125 the docstring. If the markup language is not supported, then
126 the docstring will be treated as plaintext. The markup name
127 is case-insensitive.
128 @param errors: A list where any errors generated during parsing
129 will be stored. If no list is specified, then fatal errors
130 will generate exceptions, and non-fatal errors will be
131 ignored.
132 @type errors: C{list} of L{ParseError}
133 @rtype: L{ParsedDocstring}
134 @return: A L{ParsedDocstring} that encodes the contents of
135 C{docstring}.
136 @raise ParseError: If C{errors} is C{None} and an error is
137 encountered while parsing.
138 """
139
140 raise_on_error = (errors is None)
141 if errors == None: errors = []
142
143
144 markup = markup.lower()
145
146
147 if not re.match(r'\w+', markup):
148 _parse_warn('Bad markup language name %r. Treating '
149 'docstrings as plaintext.' % markup)
150 import epydoc.markup.plaintext as plaintext
151 return plaintext.parse_docstring(docstring, errors, **options)
152
153
154 if markup not in _markup_language_registry:
155 _parse_warn('Unsupported markup language %r. Treating '
156 'docstrings as plaintext.' % markup)
157 import epydoc.markup.plaintext as plaintext
158 return plaintext.parse_docstring(docstring, errors, **options)
159
160
161 parse_docstring = _markup_language_registry[markup]
162
163
164 if isinstance(parse_docstring, basestring):
165 try: exec('from %s import parse_docstring' % parse_docstring)
166 except ImportError, e:
167 _parse_warn('Error importing %s for markup language %s: %s' %
168 (parse_docstring, markup, e))
169 import epydoc.markup.plaintext as plaintext
170 return plaintext.parse_docstring(docstring, errors, **options)
171 _markup_language_registry[markup] = parse_docstring
172
173
174 MARKUP_LANGUAGES_USED.add(markup)
175
176
177 try: parsed_docstring = parse_docstring(docstring, errors, **options)
178 except KeyboardInterrupt: raise
179 except Exception, e:
180 if epydoc.DEBUG: raise
181 log.error('Internal error while parsing a docstring: %s; '
182 'treating docstring as plaintext' % e)
183 import epydoc.markup.plaintext as plaintext
184 return plaintext.parse_docstring(docstring, errors, **options)
185
186
187 fatal_errors = [e for e in errors if e.is_fatal()]
188 if fatal_errors and raise_on_error: raise fatal_errors[0]
189 if fatal_errors:
190 import epydoc.markup.plaintext as plaintext
191 return plaintext.parse_docstring(docstring, errors, **options)
192
193 return parsed_docstring
194
195
196 _parse_warnings = {}
206
207
208
209
211 """
212 A standard intermediate representation for parsed docstrings that
213 can be used to generate output. Parsed docstrings are produced by
214 markup parsers (such as L{epytext.parse} or L{javadoc.parse}).
215 C{ParsedDocstring}s support several kinds of operation:
216 - output generation (L{to_plaintext()}, L{to_html()}, and
217 L{to_latex()}).
218 - Summarization (L{summary()}).
219 - Field extraction (L{split_fields()}).
220 - Index term extraction (L{index_terms()}.
221
222 The output generation methods (C{to_M{format}()}) use a
223 L{DocstringLinker} to link the docstring output with the rest
224 of the documentation that epydoc generates.
225
226 Subclassing
227 ===========
228 The only method that a subclass is I{required} to implement is
229 L{to_plaintext()}; but it is often useful to override the other
230 methods. The default behavior of each method is described below:
231 - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it
232 returns to generate verbatim output.
233 - C{summary}: Returns C{self} (i.e., the entire docstring).
234 - C{split_fields}: Returns C{(self, [])} (i.e., extracts no
235 fields).
236 - C{index_terms}: Returns C{[]} (i.e., extracts no index terms).
237
238 If and when epydoc adds more output formats, new C{to_I{format}}
239 methods will be added to this base class; but they will always
240 be given a default implementation.
241 """
243 """
244 Split this docstring into its body and its fields.
245
246 @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is
247 the main body of this docstring, and C{M{fields}} is a list
248 of its fields. If the resulting body is empty, return
249 C{None} for the body.
250 @rtype: C{(L{ParsedDocstring}, list of L{Field})}
251 @param errors: A list where any errors generated during
252 splitting will be stored. If no list is specified, then
253 errors will be ignored.
254 @type errors: C{list} of L{ParseError}
255 """
256
257 return self, []
258
260 """
261 @return: A pair consisting of a short summary of this docstring and a
262 boolean value indicating whether there is further documentation
263 in addition to the summary. Typically, the summary consists of the
264 first sentence of the docstring.
265 @rtype: (L{ParsedDocstring}, C{bool})
266 """
267
268 return self, False
269
271 """
272 @return: A new parsed docstring containing the concatination
273 of this docstring and C{other}.
274 @raise ValueError: If the two parsed docstrings are
275 incompatible.
276 """
277 return ConcatenatedDocstring(self, other)
278
280
281 - def to_html(self, docstring_linker, **options):
282 """
283 Translate this docstring to HTML.
284
285 @param docstring_linker: An HTML translator for crossreference
286 links into and out of the docstring.
287 @type docstring_linker: L{DocstringLinker}
288 @param options: Any extra options for the output. Unknown
289 options are ignored.
290 @return: An HTML fragment that encodes this docstring.
291 @rtype: C{string}
292 """
293
294 plaintext = plaintext_to_html(self.to_plaintext(docstring_linker))
295 return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext
296
297 - def to_latex(self, docstring_linker, **options):
298 """
299 Translate this docstring to LaTeX.
300
301 @param docstring_linker: A LaTeX translator for crossreference
302 links into and out of the docstring.
303 @type docstring_linker: L{DocstringLinker}
304 @param options: Any extra options for the output. Unknown
305 options are ignored.
306 @return: A LaTeX fragment that encodes this docstring.
307 @rtype: C{string}
308 """
309
310 plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker))
311 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext
312
313 - def to_plaintext(self, docstring_linker, **options):
314 """
315 Translate this docstring to plaintext.
316
317 @param docstring_linker: A plaintext translator for
318 crossreference links into and out of the docstring.
319 @type docstring_linker: L{DocstringLinker}
320 @param options: Any extra options for the output. Unknown
321 options are ignored.
322 @return: A plaintext fragment that encodes this docstring.
323 @rtype: C{string}
324 """
325 raise NotImplementedError, 'ParsedDocstring.to_plaintext()'
326
328 """
329 @return: The list of index terms that are defined in this
330 docstring. Each of these items will be added to the index
331 page of the documentation.
332 @rtype: C{list} of C{ParsedDocstring}
333 """
334
335 return []
336
337
338
339
341 - def __init__(self, *parsed_docstrings):
342 self._parsed_docstrings = [pds for pds in parsed_docstrings
343 if pds is not None]
344
346 bodies = []
347 fields = []
348 for doc in self._parsed_docstrings:
349 b,f = doc.split_fields()
350 bodies.append(b)
351 fields.extend(f)
352
353 return ConcatenatedDocstring(*bodies), fields
354
356 return self._parsed_docstrings[0].summary()
357
358 - def to_html(self, docstring_linker, **options):
359 htmlstring = ''
360 for doc in self._parsed_docstrings:
361 htmlstring += doc.to_html(docstring_linker, **options)
362 return htmlstring
363
364 - def to_latex(self, docstring_linker, **options):
365 latexstring = ''
366 for doc in self._parsed_docstrings:
367 latexstring += doc.to_latex(docstring_linker, **options)
368 return latexstring
369
370 - def to_plaintext(self, docstring_linker, **options):
371 textstring = ''
372 for doc in self._parsed_docstrings:
373 textstring += doc.to_plaintext(docstring_linker, **options)
374 return textstring
375
377 terms = []
378 for doc in self._parsed_docstrings:
379 terms += doc.index_terms()
380 return terms
381
382
383
384
386 """
387 The contents of a docstring's field. Docstring fields are used
388 to describe specific aspects of an object, such as a parameter of
389 a function or the author of a module. Each field consists of a
390 tag, an optional argument, and a body:
391 - The tag specifies the type of information that the field
392 encodes.
393 - The argument specifies the object that the field describes.
394 The argument may be C{None} or a C{string}.
395 - The body contains the field's information.
396
397 Tags are automatically downcased and stripped; and arguments are
398 automatically stripped.
399 """
401 self._tag = tag.lower().strip()
402 if arg is None: self._arg = None
403 else: self._arg = arg.strip()
404 self._body = body
405
407 """
408 @return: This field's tag.
409 @rtype: C{string}
410 """
411 return self._tag
412
414 """
415 @return: This field's argument, or C{None} if this field has
416 no argument.
417 @rtype: C{string} or C{None}
418 """
419 return self._arg
420
422 """
423 @return: This field's body.
424 @rtype: L{ParsedDocstring}
425 """
426 return self._body
427
429 if self._arg is None:
430 return '<Field @%s: ...>' % self._tag
431 else:
432 return '<Field @%s %s: ...>' % (self._tag, self._arg)
433
434
435
436
438 """
439 A translator for crossreference links into and out of a
440 C{ParsedDocstring}. C{DocstringLinker} is used by
441 C{ParsedDocstring} to convert these crossreference links into
442 appropriate output formats. For example,
443 C{DocstringLinker.to_html} expects a C{DocstringLinker} that
444 converts crossreference links to HTML.
445 """
447 """
448 Translate an index term to the appropriate output format. The
449 output will typically include a crossreference anchor.
450
451 @type indexterm: L{ParsedDocstring}
452 @param indexterm: The index term to translate.
453 @rtype: C{string}
454 @return: The translated index term.
455 """
456 raise NotImplementedError('DocstringLinker.translate_indexterm()')
457
459 """
460 Translate a crossreference link to a Python identifier to the
461 appropriate output format. The output will typically include
462 a reference or pointer to the crossreference target.
463
464 @type identifier: C{string}
465 @param identifier: The name of the Python identifier that
466 should be linked to.
467 @type label: C{string} or C{None}
468 @param label: The label that should be used for the identifier,
469 if it's different from the name of the identifier. This
470 should be expressed in the target markup language -- e.g.
471 for latex, "_"s should be escaped.
472 @rty