1
2
3
4
5
6
7
8
9 """
10 Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>}
11 docstrings. Javadoc is an HTML-based markup language that was
12 developed for documenting Java APIs with inline comments. It consists
13 of raw HTML, augmented by Javadoc tags. There are two types of
14 Javadoc tag:
15
16 - X{Javadoc block tags} correspond to Epydoc fields. They are
17 marked by starting a line with a string of the form \"C{@M{tag}
18 [M{arg}]}\", where C{M{tag}} indicates the type of block, and
19 C{M{arg}} is an optional argument. (For fields that take
20 arguments, Javadoc assumes that the single word immediately
21 following the tag is an argument; multi-word arguments cannot be
22 used with javadoc.)
23
24 - X{inline Javadoc tags} are used for inline markup. In particular,
25 epydoc uses them for crossreference links between documentation.
26 Inline tags may appear anywhere in the text, and have the form
27 \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the
28 type of inline markup, and C{M{args}} are optional arguments.
29
30 Epydoc supports all Javadoc tags, I{except}:
31 - C{{@docRoot}}, which gives the (relative) URL of the generated
32 documentation's root.
33 - C{{@inheritDoc}}, which copies the documentation of the nearest
34 overridden object. This can be used to combine the documentation
35 of the overridden object with the documentation of the
36 overridding object.
37 - C{@serial}, C{@serialField}, and C{@serialData} which describe the
38 serialization (pickling) of an object.
39 - C{{@value}}, which copies the value of a constant.
40
41 @warning: Epydoc only supports HTML output for Javadoc docstrings.
42 """
43 __docformat__ = 'epytext en'
44
45
46 import re
47 from xml.dom.minidom import *
48 from epydoc.markup import *
49
51 """
52 Parse the given docstring, which is formatted using Javadoc; and
53 return a C{ParsedDocstring} representation of its contents.
54 @param docstring: The docstring to parse
55 @type docstring: C{string}
56 @param errors: A list where any errors generated during parsing
57 will be stored.
58 @type errors: C{list} of L{ParseError}
59 @param options: Extra options. Unknown options are ignored.
60 Currently, no extra options are defined.
61 @rtype: L{ParsedDocstring}
62 """
63 return ParsedJavadocDocstring(docstring, errors)
64
66 """
67 An encoded version of a Javadoc docstring. Since Javadoc is a
68 fairly simple markup language, we don't do any processing in
69 advance; instead, we wait to split fields or resolve
70 crossreference links until we need to.
71
72 @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE
73 @cvar _ARG_FIELDS: A list of the fields that take arguments.
74 Since Javadoc doesn't mark arguments in any special way, we
75 must consult this list to decide whether the first word of a
76 field is an argument or not.
77 @cvar _FIELD_RE: A regular expression used to search for Javadoc
78 block tags.
79
80 @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE
81 @cvar _LINK_SPLIT_RE: A regular expression used to search for
82 Javadoc inline tags.
83 @cvar _LINK_RE: A regular expression used to process Javadoc
84 inline tags.
85 """
86 - def __init__(self, docstring, errors=None):
87 """
88 Create a new C{ParsedJavadocDocstring}.
89
90 @param docstring: The docstring that should be used to
91 construct this C{ParsedJavadocDocstring}.
92 @type docstring: C{string}
93 @param errors: A list where any errors generated during
94 parsing will be stored. If no list is given, then
95 all errors are ignored.
96 @type errors: C{list} of L{ParseError}
97 """
98 self._docstring = docstring
99 if errors is None: errors = []
100 self._check_links(errors)
101
102
103
104
105
106 _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+
107 'ivar param '+
108 'parameter arg argument raise raises exception '+
109 'except deffield newfield keyword kwarg kwparam').split()
110 _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE)
111
112
114
115
116
117 pieces = self._FIELD_RE.split(self._docstring)
118
119
120 descr = ParsedJavadocDocstring(pieces[0])
121
122
123
124 fields = []
125 for i in range(1, len(pieces)):
126 if i%2 == 1:
127
128 tag = pieces[i].strip()[1:]
129 else:
130
131 if tag in self._ARG_FIELDS:
132 subpieces = pieces[i].strip().split(None, 1)+['','']
133 (arg, body) = subpieces[:2]
134 else:
135 (arg, body) = (None, pieces[i])
136
137
138
139
140 if tag == 'see' and body:
141 if body[0] in '"\'':
142 if body[-1] == body[0]: body = body[1:-1]
143 elif body[0] == '<': pass
144 else: body = '{@link %s}' % body
145
146
147 parsed_body = ParsedJavadocDocstring(body)
148 fields.append(Field(tag, arg, parsed_body))
149
150 if pieces[0].strip():
151 return (descr, fields)
152 else:
153 return (None, fields)
154
155
156
157
158
159 _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})')
160 _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' +
161 r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}')
162
163
164 - def to_html(self, docstring_linker, **options):
165
166
167 pieces = self._LINK_SPLIT_RE.split(self._docstring)
168
169
170 translate_xref = docstring_linker.translate_identifier_xref
171
172
173
174
175
176 html = ''
177 for i in range(len(pieces)):
178 if i%2 == 0:
179 html += pieces[i]
180 else:
181
182 m = self._LINK_RE.match(pieces[i])
183 if m is None: continue
184 (target, name) = m.groups()
185
186
187 if target[0] == '#': target = target[1:]
188 target = target.replace('#', '.')
189 target = re.sub(r'\(.*\)', '', target)
190
191
192 if name is None: name = target
193 else: name = name.strip()
194
195
196 html += translate_xref(target, name)
197 return html
198
200 """
201 Make sure that all @{link}s are valid. We need a separate
202 method for ths because we want to do this at parse time, not
203 html output time. Any errors found are appended to C{errors}.
204 """
205 pieces = self._LINK_SPLIT_RE.split(self._docstring)
206 linenum = 0
207 for i in range(len(pieces)):
208 if i%2 == 1 and not self._LINK_RE.match(pieces[i]):
209 estr = 'Bad link %r' % pieces[i]
210 errors.append(ParseError(estr, linenum, is_fatal=0))
211 linenum += pieces[i].count('\n')
212
213
214
215
216
217
218
219 - def to_plaintext(self, docstring_linker, **options):
220 return self._docstring
221
222 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')
223
224
226
227 doc = "\n".join([ row for row in self._docstring.split('\n')
228 if not row.lstrip().startswith('@') ])
229
230 m = self._SUMMARY_RE.match(doc)
231 if m:
232 other = doc[m.end():]
233 return (ParsedJavadocDocstring(m.group(1)),
234 other != '' and not other.isspace())
235
236 else:
237 parts = doc.strip('\n').split('\n', 1)
238 if len(parts) == 1:
239 summary = parts[0]
240 other = False
241 else:
242 summary = parts[0] + '...'
243 other = True
244
245 return ParsedJavadocDocstring(summary), other
246
247
248
249
250
251