1
2
3
4
5
6
7
8
9
10 """
11 Markup language support for docstrings. Each submodule defines a
12 parser for a single markup language. These parsers convert an
13 object's docstring to a L{ParsedDocstring}, a standard intermediate
14 representation that can be used to generate output.
15 C{ParsedDocstring}s support the following operations:
16 - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>},
17 L{to_html()<ParsedDocstring.to_html>}, and
18 L{to_latex()<ParsedDocstring.to_latex>}).
19 - Summarization (L{summary()<ParsedDocstring.summary>}).
20 - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}).
21 - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}.
22
23 The L{parse()} function provides a single interface to the
24 C{epydoc.markup} package: it takes a docstring and the name of a
25 markup language; delegates to the appropriate parser; and returns the
26 parsed docstring (along with any errors or warnings that were
27 generated).
28
29 The C{ParsedDocstring} output generation methods (C{to_M{format}()})
30 use a L{DocstringLinker} to link the docstring output with the rest of
31 the documentation that epydoc generates. C{DocstringLinker}s are
32 currently responsible for translating two kinds of crossreference:
33 - index terms (L{translate_indexterm()
34 <DocstringLinker.translate_indexterm>}).
35 - identifier crossreferences (L{translate_identifier_xref()
36 <DocstringLinker.translate_identifier_xref>}).
37
38 A parsed docstring's fields can be extracted using the
39 L{ParsedDocstring.split_fields()} method. This method divides a
40 docstring into its main body and a list of L{Field}s, each of which
41 encodes a single field. The field's bodies are encoded as
42 C{ParsedDocstring}s.
43
44 Markup errors are represented using L{ParseError}s. These exception
45 classes record information about the cause, location, and severity of
46 each error.
47
48 @sort: parse, ParsedDocstring, Field, DocstringLinker
49 @group Errors and Warnings: ParseError
50 @group Utility Functions: parse_type_of
51 @var SCRWIDTH: The default width with which text will be wrapped
52 when formatting the output of the parser.
53 @type SCRWIDTH: C{int}
54 @var _parse_warnings: Used by L{_parse_warn}.
55 """
56 __docformat__ = 'epytext en'
57
58 import re, types, sys
59 from epydoc import log
60 from epydoc.util import plaintext_to_html, plaintext_to_latex
61 import epydoc
62 from epydoc.compat import *
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 _markup_language_registry = {
81 'restructuredtext': 'epydoc.markup.restructuredtext',
82 'epytext': 'epydoc.markup.epytext',
83 'plaintext': 'epydoc.markup.plaintext',
84 'javadoc': 'epydoc.markup.javadoc',
85 }
86
88 """
89 Register a new markup language named C{name}, which can be parsed
90 by the function C{parse_function}.
91
92 @param name: The name of the markup language. C{name} should be a
93 simple identifier, such as C{'epytext'} or C{'restructuredtext'}.
94 Markup language names are case insensitive.
95
96 @param parse_function: A function which can be used to parse the
97 markup language, and returns a L{ParsedDocstring}. It should
98 have the following signature:
99
100 >>> def parse(s, errors):
101 ... 'returns a ParsedDocstring'
102
103 Where:
104 - C{s} is the string to parse. (C{s} will be a unicode
105 string.)
106 - C{errors} is a list; any errors that are generated
107 during docstring parsing should be appended to this
108 list (as L{ParseError} objects).
109 """
110 _markup_language_registry[name.lower()] = parse_function
111
112 MARKUP_LANGUAGES_USED = set()
113
114 -def parse(docstring, markup='plaintext', errors=None, **options):
115 """
116 Parse the given docstring, and use it to construct a
117 C{ParsedDocstring}. If any fatal C{ParseError}s are encountered
118 while parsing the docstring, then the docstring will be rendered
119 as plaintext, instead.
120
121 @type docstring: C{string}
122 @param docstring: The docstring to encode.
123 @type markup: C{string}
124 @param markup: The name of the markup language that is used by
125 the docstring. If the markup language is not supported, then
126 the docstring will be treated as plaintext. The markup name
127 is case-insensitive.
128 @param errors: A list where any errors generated during parsing
129 will be stored. If no list is specified, then fatal errors
130 will generate exceptions, and non-fatal errors will be
131 ignored.
132 @type errors: C{list} of L{ParseError}
133 @rtype: L{ParsedDocstring}
134 @return: A L{ParsedDocstring} that encodes the contents of
135 C{docstring}.
136 @raise ParseError: If C{errors} is C{None} and an error is
137 encountered while parsing.
138 """
139
140 raise_on_error = (errors is None)
141 if errors == None: errors = []
142
143
144 markup = markup.lower()
145
146
147 if not re.match(r'\w+', markup):
148 _parse_warn('Bad markup language name %r. Treating '
149 'docstrings as plaintext.' % markup)
150 import epydoc.markup.plaintext as plaintext
151 return plaintext.parse_docstring(docstring, errors, **options)
152
153
154 if markup not in _markup_language_registry:
155 _parse_warn('Unsupported markup language %r. Treating '
156 'docstrings as plaintext.' % markup)
157 import epydoc.markup.plaintext as plaintext
158 return plaintext.parse_docstring(docstring, errors, **options)
159
160
161 parse_docstring = _markup_language_registry[markup]
162
163
164 if isinstance(parse_docstring, basestring):
165 try: exec('from %s import parse_docstring' % parse_docstring)
166 except ImportError, e:
167 _parse_warn('Error importing %s for markup language %s: %s' %
168 (parse_docstring, markup, e))
169 import epydoc.markup.plaintext as plaintext
170 return plaintext.parse_docstring(docstring, errors, **options)
171 _markup_language_registry[markup] = parse_docstring
172
173
174 MARKUP_LANGUAGES_USED.add(markup)
175
176
177 try: parsed_docstring = parse_docstring(docstring, errors, **options)
178 except KeyboardInterrupt: raise
179 except Exception, e:
180 if epydoc.DEBUG: raise
181 log.error('Internal error while parsing a docstring: %s; '
182 'treating docstring as plaintext' % e)
183 import epydoc.markup.plaintext as plaintext
184 return plaintext.parse_docstring(docstring, errors, **options)
185
186
187 fatal_errors = [e for e in errors if e.is_fatal()]
188 if fatal_errors and raise_on_error: raise fatal_errors[0]
189 if fatal_errors:
190 import epydoc.markup.plaintext as plaintext
191 return plaintext.parse_docstring(docstring, errors, **options)
192
193 return parsed_docstring
194
195
196 _parse_warnings = {}
206
207
208
209
211 """
212 A standard intermediate representation for parsed docstrings that
213 can be used to generate output. Parsed docstrings are produced by
214 markup parsers (such as L{epytext.parse} or L{javadoc.parse}).
215 C{ParsedDocstring}s support several kinds of operation:
216 - output generation (L{to_plaintext()}, L{to_html()}, and
217 L{to_latex()}).
218 - Summarization (L{summary()}).
219 - Field extraction (L{split_fields()}).
220 - Index term extraction (L{index_terms()}.
221
222 The output generation methods (C{to_M{format}()}) use a
223 L{DocstringLinker} to link the docstring output with the rest
224 of the documentation that epydoc generates.
225
226 Subclassing
227 ===========
228 The only method that a subclass is I{required} to implement is
229 L{to_plaintext()}; but it is often useful to override the other
230 methods. The default behavior of each method is described below:
231 - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it
232 returns to generate verbatim output.
233 - C{summary}: Returns C{self} (i.e., the entire docstring).
234 - C{split_fields}: Returns C{(self, [])} (i.e., extracts no
235 fields).
236 - C{index_terms}: Returns C{[]} (i.e., extracts no index terms).
237
238 If and when epydoc adds more output formats, new C{to_I{format}}
239 methods will be added to this base class; but they will always
240 be given a default implementation.
241 """
243 """
244 Split this docstring into its body and its fields.
245
246 @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is
247 the main body of this docstring, and C{M{fields}} is a list
248 of its fields. If the resulting body is empty, return
249 C{None} for the body.
250 @rtype: C{(L{ParsedDocstring}, list of L{Field})}
251 @param errors: A list where any errors generated during
252 splitting will be stored. If no list is specified, then
253 errors will be ignored.
254 @type errors: C{list} of L{ParseError}
255 """
256
257 return self, []
258
260 """
261 @return: A pair consisting of a short summary of this docstring and a
262 boolean value indicating whether there is further documentation
263 in addition to the summary. Typically, the summary consists of the
264 first sentence of the docstring.
265 @rtype: (L{ParsedDocstring}, C{bool})
266 """
267
268 return self, False
269
271 """
272 @return: A new parsed docstring containing the concatination
273 of this docstring and C{other}.
274 @raise ValueError: If the two parsed docstrings are
275 incompatible.
276 """
277 return ConcatenatedDocstring(self, other)
278
280
281 - def to_html(self, docstring_linker, **options):
282 """
283 Translate this docstring to HTML.
284
285 @param docstring_linker: An HTML translator for crossreference
286 links into and out of the docstring.
287 @type docstring_linker: L{DocstringLinker}
288 @param options: Any extra options for the output. Unknown
289 options are ignored.
290 @return: An HTML fragment that encodes this docstring.
291 @rtype: C{string}
292 """
293
294 plaintext = plaintext_to_html(self.to_plaintext(docstring_linker))
295 return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext
296
297 - def to_latex(self, docstring_linker, **options):
298 """
299 Translate this docstring to LaTeX.
300
301 @param docstring_linker: A LaTeX translator for crossreference
302 links into and out of the docstring.
303 @type docstring_linker: L{DocstringLinker}
304 @param options: Any extra options for the output. Unknown
305 options are ignored.
306 @return: A LaTeX fragment that encodes this docstring.
307 @rtype: C{string}
308 """
309
310 plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker))
311 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext
312
313 - def to_plaintext(self, docstring_linker, **options):
314 """
315 Translate this docstring to plaintext.
316
317 @param docstring_linker: A plaintext translator for
318 crossreference links into and out of the docstring.
319 @type docstring_linker: L{DocstringLinker}
320 @param options: Any extra options for the output. Unknown
321 options are ignored.
322 @return: A plaintext fragment that encodes this docstring.
323 @rtype: C{string}
324 """
325 raise NotImplementedError, 'ParsedDocstring.to_plaintext()'
326
328 """
329 @return: The list of index terms that are defined in this
330 docstring. Each of these items will be added to the index
331 page of the documentation.
332 @rtype: C{list} of C{ParsedDocstring}
333 """
334
335 return []
336
337
338
339
341 - def __init__(self, *parsed_docstrings):
342 self._parsed_docstrings = [pds for pds in parsed_docstrings
343 if pds is not None]
344
346 bodies = []
347 fields = []
348 for doc in self._parsed_docstrings:
349 b,f = doc.split_fields()
350 bodies.append(b)
351 fields.extend(f)
352
353 return ConcatenatedDocstring(*bodies), fields
354
356 return self._parsed_docstrings[0].summary()
357
358 - def to_html(self, docstring_linker, **options):
359 htmlstring = ''
360 for doc in self._parsed_docstrings:
361 htmlstring += doc.to_html(docstring_linker, **options)
362 return htmlstring
363
364 - def to_latex(self, docstring_linker, **options):
365 latexstring = ''
366 for doc in self._parsed_docstrings:
367 latexstring += doc.to_latex(docstring_linker, **options)
368 return latexstring
369
370 - def to_plaintext(self, docstring_linker, **options):
371 textstring = ''
372 for doc in self._parsed_docstrings:
373 textstring += doc.to_plaintext(docstring_linker, **options)
374 return textstring
375
377 terms = []
378 for doc in self._parsed_docstrings:
379 terms += doc.index_terms()
380 return terms
381
382
383
384
386 """
387 The contents of a docstring's field. Docstring fields are used
388 to describe specific aspects of an object, such as a parameter of
389 a function or the author of a module. Each field consists of a
390 tag, an optional argument, and a body:
391 - The tag specifies the type of information that the field
392 encodes.
393 - The argument specifies the object that the field describes.
394 The argument may be C{None} or a C{string}.
395 - The body contains the field's information.
396
397 Tags are automatically downcased and stripped; and arguments are
398 automatically stripped.
399 """
401 self._tag = tag.lower().strip()
402 if arg is None: self._arg = None
403 else: self._arg = arg.strip()
404 self._body = body
405
407 """
408 @return: This field's tag.
409 @rtype: C{string}
410 """
411 return self._tag
412
414 """
415 @return: This field's argument, or C{None} if this field has
416 no argument.
417 @rtype: C{string} or C{None}
418 """
419 return self._arg
420
422 """
423 @return: This field's body.
424 @rtype: L{ParsedDocstring}
425 """
426 return self._body
427
429 if self._arg is None:
430 return '<Field @%s: ...>' % self._tag
431 else:
432 return '<Field @%s %s: ...>' % (self._tag, self._arg)
433
434
435
436
438 """
439 A translator for crossreference links into and out of a
440 C{ParsedDocstring}. C{DocstringLinker} is used by
441 C{ParsedDocstring} to convert these crossreference links into
442 appropriate output formats. For example,
443 C{DocstringLinker.to_html} expects a C{DocstringLinker} that
444 converts crossreference links to HTML.
445 """
447 """
448 Translate an index term to the appropriate output format. The
449 output will typically include a crossreference anchor.
450
451 @type indexterm: L{ParsedDocstring}
452 @param indexterm: The index term to translate.
453 @rtype: C{string}
454 @return: The translated index term.
455 """
456 raise NotImplementedError('DocstringLinker.translate_indexterm()')
457
459 """
460 Translate a crossreference link to a Python identifier to the
461 appropriate output format. The output will typically include
462 a reference or pointer to the crossreference target.
463
464 @type identifier: C{string}
465 @param identifier: The name of the Python identifier that
466 should be linked to.
467 @type label: C{string} or C{None}
468 @param label: The label that should be used for the identifier,
469 if it's different from the name of the identifier. This
470 should be expressed in the target markup language -- e.g.
471 for latex, "_"s should be escaped.
472 @rtype: C{string}
473 @return: The translated crossreference link.
474 """
475 raise NotImplementedError('DocstringLinker.translate_xref()')
476
478 """
479 Given an identifier, return a URL pointing at that identifier.
480 This is used to create hyperlinks in dotgraphs. This method
481 is *optional* -- i.e., it may raise NotImplementedError
482 """
483 raise NotImplementedError('DocstringLinker.url_for()')
484
485
486
487
488
489
491 """
492 The base class for errors generated while parsing docstrings.
493
494 @ivar _linenum: The line on which the error occured within the
495 docstring. The linenum of the first line is 0.
496 @type _linenum: C{int}
497 @ivar _offset: The line number where the docstring begins. This
498 offset is added to C{_linenum} when displaying the line number
499 of the error. Default value: 1.
500 @type _offset: C{int}
501 @ivar _descr: A description of the error.
502 @type _descr: C{string}
503 @ivar _fatal: True if this is a fatal error.
504 @type _fatal: C{boolean}
505 """
506 - def __init__(self, descr, linenum=None, is_fatal=1):
507 """
508 @type descr: C{string}
509 @param descr: A description of the error.
510 @type linenum: C{int}
511 @param linenum: The line on which the error occured within
512 the docstring. The linenum of the first line is 0.
513 @type is_fatal: C{boolean}
514 @param is_fatal: True if this is a fatal error.
515 """
516 self._descr = descr
517 self._linenum = linenum
518 self._fatal = is_fatal
519 self._offset = 1
520
522 """
523 @return: true if this is a fatal error. If an error is fatal,
524 then epydoc should ignore the output of the parser, and
525 parse the docstring as plaintext.
526 @rtype: C{boolean}
527 """
528 return self._fatal
529
531 """
532 @return: The line number on which the error occured (including
533 any offset). If the line number is unknown, then return
534 C{None}.
535 @rtype: C{int} or C{None}
536 """
537 if self._linenum is None: return None
538 else: return self._offset + self._linenum
539
541 """
542 Set the line number offset for this error. This offset is the
543 line number where the docstring begins. This offset is added
544 to C{_linenum} when displaying the line number of the error.
545
546 @param offset: The new line number offset.
547 @type offset: C{int}
548 @rtype: C{None}
549 """
550 self._offset = offset
551
554
556 """
557 Return a string representation of this C{ParseError}. This
558 multi-line string contains a description of the error, and
559 specifies where it occured.
560
561 @return: the informal representation of this C{ParseError}.
562 @rtype: C{string}
563 """
564 if self._linenum is not None:
565 return 'Line %s: %s' % (self._linenum+self._offset, self.descr())
566 else:
567 return self.descr()
568
570 """
571 Return the formal representation of this C{ParseError}.
572 C{ParseError}s have formal representations of the form::
573 <ParseError on line 12>
574
575 @return: the formal representation of this C{ParseError}.
576 @rtype: C{string}
577 """
578 if self._linenum is None:
579 return '<ParseError on line %d' % self._offset
580 else:
581 return '<ParseError on line %d>' % (self._linenum+self._offset)
582
584 """
585 Compare two C{ParseError}s, based on their line number.
586 - Return -1 if C{self.linenum<other.linenum}
587 - Return +1 if C{self.linenum>other.linenum}
588 - Return 0 if C{self.linenum==other.linenum}.
589 The return value is undefined if C{other} is not a
590 ParseError.
591
592 @rtype: C{int}
593 """
594 if not isinstance(other, ParseError): return -1000
595 return cmp(self._linenum+self._offset,
596 other._linenum+other._offset)
597
598
599
600
601
602
604 """
605 @return: A C{ParsedDocstring} that encodes the type of the given
606 object.
607 @rtype: L{ParsedDocstring}
608 @param obj: The object whose type should be returned as DOM document.
609 @type obj: any
610 """
611
612 from epydoc.markup.epytext import ParsedEpytextDocstring
613 from xml.dom.minidom import Document
614 doc = Document()
615 epytext = doc.createElement('epytext')
616 para = doc.createElement('para')
617 doc.appendChild(epytext)
618 epytext.appendChild(para)
619
620 if type(obj) is types.InstanceType:
621 link = doc.createElement('link')
622 name = doc.createElement('name')
623 target = doc.createElement('target')
624 para.appendChild(link)
625 link.appendChild(name)
626 link.appendChild(target)
627 name.appendChild(doc.createTextNode(str(obj.__class__.__name__)))
628 target.appendChild(doc.createTextNode(str(obj.__class__)))
629 else:
630 code = doc.createElement('code')
631 para.appendChild(code)
632 code.appendChild(doc.createTextNode(type(obj).__name__))
633 return ParsedEpytextDocstring(doc)
634