1
2
3
4
5
6
7
8
9 """
10 Classes for encoding API documentation about Python programs.
11 These classes are used as a common representation for combining
12 information derived from introspection and from parsing.
13
14 The API documentation for a Python program is encoded using a graph of
15 L{APIDoc} objects, each of which encodes information about a single
16 Python variable or value. C{APIDoc} has two direct subclasses:
17 L{VariableDoc}, for documenting variables; and L{ValueDoc}, for
18 documenting values. The C{ValueDoc} class is subclassed further, to
19 define the different pieces of information that should be recorded
20 about each value type:
21
22 G{classtree: APIDoc}
23
24 The distinction between variables and values is intentionally made
25 explicit. This allows us to distinguish information about a variable
26 itself (such as whether it should be considered 'public' in its
27 containing namespace) from information about the value it contains
28 (such as what type the value has). This distinction is also important
29 because several variables can contain the same value: each variable
30 should be described by a separate C{VariableDoc}; but we only need one
31 C{ValueDoc}, since they share a single value.
32
33 @todo: Add a cache to canonical name lookup?
34 """
35 __docformat__ = 'epytext en'
36
37
38
39
40
41 import types, re, os.path, pickle
42 from epydoc import log
43 import epydoc
44 import __builtin__
45 from epydoc.compat import *
46 from epydoc.util import decode_with_backslashreplace, py_src_filename
47 import epydoc.markup.pyval_repr
48
49
50
51
52
54 """
55 A sequence of identifiers, separated by periods, used to name a
56 Python variable, value, or argument. The identifiers that make up
57 a dotted name can be accessed using the indexing operator:
58
59 >>> name = DottedName('epydoc', 'api_doc', 'DottedName')
60 >>> print name
61 epydoc.apidoc.DottedName
62 >>> name[1]
63 'api_doc'
64 """
65 UNREACHABLE = "??"
66 _IDENTIFIER_RE = re.compile("""(?x)
67 (%s | # UNREACHABLE marker, or..
68 (script-)? # Prefix: script (not a module)
69 \w+ # Identifier (yes, identifiers starting with a
70 # digit are allowed. See SF bug #1649347)
71 '?) # Suffix: submodule that is shadowed by a var
72 (-\d+)? # Suffix: unreachable vals with the same name
73 $"""
74 % re.escape(UNREACHABLE))
75
77 """
78 An exception raised by the DottedName constructor when one of
79 its arguments is not a valid dotted name.
80 """
81
82 _ok_identifiers = set()
83 """A cache of identifier strings that have been checked against
84 _IDENTIFIER_RE and found to be acceptable."""
85
87 """
88 Construct a new dotted name from the given sequence of pieces,
89 each of which can be either a C{string} or a C{DottedName}.
90 Each piece is divided into a sequence of identifiers, and
91 these sequences are combined together (in order) to form the
92 identifier sequence for the new C{DottedName}. If a piece
93 contains a string, then it is divided into substrings by
94 splitting on periods, and each substring is checked to see if
95 it is a valid identifier.
96
97 As an optimization, C{pieces} may also contain a single tuple
98 of values. In that case, that tuple will be used as the
99 C{DottedName}'s identifiers; it will I{not} be checked to
100 see if it's valid.
101
102 @kwparam strict: if true, then raise an L{InvalidDottedName}
103 if the given name is invalid.
104 """
105 if len(pieces) == 1 and isinstance(pieces[0], tuple):
106 self._identifiers = pieces[0]
107 return
108 if len(pieces) == 0:
109 raise DottedName.InvalidDottedName('Empty DottedName')
110 self._identifiers = []
111 for piece in pieces:
112 if isinstance(piece, DottedName):
113 self._identifiers += piece._identifiers
114 elif isinstance(piece, basestring):
115 for subpiece in piece.split('.'):
116 if piece not in self._ok_identifiers:
117 if not self._IDENTIFIER_RE.match(subpiece):
118 if options.get('strict'):
119 raise DottedName.InvalidDottedName(
120 'Bad identifier %r' % (piece,))
121 else:
122 log.warning("Identifier %r looks suspicious; "
123 "using it anyway." % piece)
124 self._ok_identifiers.add(piece)
125 self._identifiers.append(subpiece)
126 else:
127 raise TypeError('Bad identifier %r: expected '
128 'DottedName or str' % (piece,))
129 self._identifiers = tuple(self._identifiers)
130
132 idents = [`ident` for ident in self._identifiers]
133 return 'DottedName(' + ', '.join(idents) + ')'
134
136 """
137 Return the dotted name as a string formed by joining its
138 identifiers with periods:
139
140 >>> print DottedName('epydoc', 'api_doc', DottedName')
141 epydoc.apidoc.DottedName
142 """
143 return '.'.join(self._identifiers)
144
146 """
147 Return a new C{DottedName} whose identifier sequence is formed
148 by adding C{other}'s identifier sequence to C{self}'s.
149 """
150 if isinstance(other, (basestring, DottedName)):
151 return DottedName(self, other)
152 else:
153 return DottedName(self, *other)
154
156 """
157 Return a new C{DottedName} whose identifier sequence is formed
158 by adding C{self}'s identifier sequence to C{other}'s.
159 """
160 if isinstance(other, (basestring, DottedName)):
161 return DottedName(other, self)
162 else:
163 return DottedName(*(list(other)+[self]))
164
166 """
167 Return the C{i}th identifier in this C{DottedName}. If C{i} is
168 a non-empty slice, then return a C{DottedName} built from the
169 identifiers selected by the slice. If C{i} is an empty slice,
170 return an empty list (since empty C{DottedName}s are not valid).
171 """
172 if isinstance(i, types.SliceType):
173 pieces = self._identifiers[i.start:i.stop]
174 if pieces: return DottedName(pieces)
175 else: return []
176 else:
177 return self._identifiers[i]
178
180 return hash(self._identifiers)
181
183 """
184 Compare this dotted name to C{other}. Two dotted names are
185 considered equal if their identifier subsequences are equal.
186 Ordering between dotted names is lexicographic, in order of
187 identifier from left to right.
188 """
189 if not isinstance(other, DottedName):
190 return -1
191 return cmp(self._identifiers, other._identifiers)
192
194 """
195 Return the number of identifiers in this dotted name.
196 """
197 return len(self._identifiers)
198
200 """
201 Return the DottedName formed by removing the last identifier
202 from this dotted name's identifier sequence. If this dotted
203 name only has one name in its identifier sequence, return
204 C{None} instead.
205 """
206 if len(self._identifiers) == 1:
207 return None
208 else:
209 return DottedName(*self._identifiers[:-1])
210
212 """
213 Return true if this dotted name is equal to a prefix of
214 C{name}. If C{strict} is true, then also require that
215 C{self!=name}.
216
217 >>> DottedName('a.b').dominates(DottedName('a.b.c.d'))
218 True
219 """
220 len_self = len(self._identifiers)
221 len_name = len(name._identifiers)
222
223 if (len_self > len_name) or (strict and len_self == len_name):
224 return False
225
226
227 return ((self._identifiers[0] == name._identifiers[0]) and
228 self._identifiers == name._identifiers[:len_self])
229
230 - def contextualize(self, context):
231 """
232 If C{self} and C{context} share a common ancestor, then return
233 a name for C{self}, relative to that ancestor. If they do not
234 share a common ancestor (or if C{context} is C{UNKNOWN}), then
235 simply return C{self}.
236
237 This is used to generate shorter versions of dotted names in
238 cases where users can infer the intended target from the
239 context.
240
241 @type context: L{DottedName}
242 @rtype: L{DottedName}
243 """
244 if context is UNKNOWN or not context or len(self) <= 1:
245 return self
246 if self[0] == context[0]:
247 return self[1:].contextualize(context[1:])
248 else:
249 return self
250
251
252 for i in range(min(len(context), len(self))):
253 if self._identifiers[i] != context._identifiers[i]:
254 first_difference = i
255 break
256 else:
257 first_difference = i+1
258
259
260 if first_difference == 0:
261 return self
262 elif first_difference == len(self):
263 return self[-1:]
264 else:
265 return self[first_difference:]
266
267
268
269
270
272 """
273 A unique value that won't compare equal to any other value. This
274 class is used to create L{UNKNOWN}.
275 """
279 return '<%s>' % self.name
281 raise ValueError('Sentinel value <%s> can not be used as a boolean' %
282 self.name)
283
284 UNKNOWN = _Sentinel('UNKNOWN')
285 """A special value used to indicate that a given piece of
286 information about an object is unknown. This is used as the
287 default value for all instance variables."""
288
289
290
291
292
294 """
295 API documentation information for a single element of a Python
296 program. C{APIDoc} itself is an abstract base class; subclasses
297 are used to specify what information should be recorded about each
298 type of program element. In particular, C{APIDoc} has two direct
299 subclasses, C{VariableDoc} for documenting variables and
300 C{ValueDoc} for documenting values; and the C{ValueDoc} class is
301 subclassed further for different value types.
302
303 Each C{APIDoc} subclass specifies the set of attributes that
304 should be used to record information about the corresponding
305 program element type. The default value for each attribute is
306 stored in the class; these default values can then be overridden
307 with instance variables. Most attributes use the special value
308 L{UNKNOWN} as their default value, to indicate that the correct
309 value for that attribute has not yet been determined. This makes
310 it easier to merge two C{APIDoc} objects that are documenting the
311 same element (in particular, to merge information about an element
312 that was derived from parsing with information that was derived
313 from introspection).
314
315 For all attributes with boolean values, use only the constants
316 C{True} and C{False} to designate true and false. In particular,
317 do I{not} use other values that evaluate as true or false, such as
318 C{2} or C{()}. This restriction makes it easier to handle
319 C{UNKNOWN} values. For example, to test if a boolean attribute is
320 C{True} or C{UNKNOWN}, use 'C{attrib in (True, UNKNOWN)}' or
321 'C{attrib is not False}'.
322
323 Two C{APIDoc} objects describing the same object can be X{merged},
324 using the method L{merge_and_overwrite(other)}. After two
325 C{APIDoc}s are merged, any changes to one will be reflected in the
326 other. This is accomplished by setting the two C{APIDoc} objects
327 to use a shared instance dictionary. See the documentation for
328 L{merge_and_overwrite} for more information, and some important
329 caveats about hashing.
330 """
331
332 docstring = UNKNOWN
333 """@ivar: The documented item's docstring.
334 @type: C{string} or C{None}"""
335
336 docstring_lineno = UNKNOWN
337 """@ivar: The line number on which the documented item's docstring
338 begins.
339 @type: C{int}"""
340
341
342
343 descr = UNKNOWN
344 """@ivar: A description of the documented item, extracted from its
345 docstring.
346 @type: L{ParsedDocstring<epydoc.markup.ParsedDocstring>}"""
347
348 summary = UNKNOWN
349 """@ivar: A summary description of the documented item, extracted from
350 its docstring.
351 @type: L{ParsedDocstring<epydoc.markup.ParsedDocstring>}"""
352
353 other_docs = UNKNOWN
354 """@ivar: A flag indicating if the entire L{docstring} body (except tags
355 if any) is entirely included in the L{summary}.
356 @type: C{bool}"""
357
358 metadata = UNKNOWN
359 """@ivar: Metadata about the documented item, extracted from fields in
360 its docstring. I{Currently} this is encoded as a list of tuples
361 C{(field, arg, descr)}. But that may change.
362 @type: C{(str, str, L{ParsedDocstring<markup.ParsedDocstring>})}"""
363
364 extra_docstring_fields = UNKNOWN
365 """@ivar: A list of new docstring fields tags that are defined by the
366 documented item's docstring. These new field tags can be used by
367 this item or by any item it contains.
368 @type: L{DocstringField <epydoc.docstringparser.DocstringField>}"""
369
370
371
372 docs_extracted_by = UNKNOWN
373 """@ivar: Information about where the information contained by this
374 C{APIDoc} came from. Can be one of C{'parser'},
375 C{'introspector'}, or C{'both'}.
376 @type: C{str}"""
377
378
380 """
381 Construct a new C{APIDoc} object. Keyword arguments may be
382 used to initialize the new C{APIDoc}'s attributes.
383
384 @raise TypeError: If a keyword argument is specified that does
385 not correspond to a valid attribute for this (sub)class of
386 C{APIDoc}.
387 """
388 if epydoc.DEBUG:
389 for key in kwargs:
390 if key[0] != '_' and not hasattr(self.__class__, key):
391 raise TypeError('%s got unexpected arg %r' %
392 (self.__class__.__name__, key))
393 self.__dict__.update(kwargs)
394
396 """
397 Modify an C{APIDoc}'s attribute. This is used when
398 L{epydoc.DEBUG} is true, to make sure we don't accidentally
399 set any inappropriate attributes on C{APIDoc} objects.
400
401 @raise AttributeError: If C{attr} is not a valid attribute for
402 this (sub)class of C{APIDoc}. (C{attr} is considered a
403 valid attribute iff C{self.__class__} defines an attribute
404 with that name.)
405 """
406
407
408 if attr.startswith('_'):
409 return object.__setattr__(self, attr, val)
410 if not hasattr(self, attr):
411 raise AttributeError('%s does not define attribute %r' %
412 (self.__class__.__name__, attr))
413 self.__dict__[attr] = val
414
415 if epydoc.DEBUG:
416 __setattr__ = _debug_setattr
417
419 return '<%s>' % self.__class__.__name__
420
421 - def pp(self, doublespace=0, depth=5, exclude=(), include=()):
422 """
423 Return a pretty-printed string representation for the
424 information contained in this C{APIDoc}.
425 """
426 return pp_apidoc(self, doublespace, depth, exclude, include)
427 __str__ = pp
428
430 """
431 Change C{self}'s class to C{cls}. C{cls} must be a subclass
432 of C{self}'s current class. For example, if a generic
433 C{ValueDoc} was created for a value, and it is determined that
434 the value is a routine, you can update its class with:
435
436 >>> valdoc.specialize_to(RoutineDoc)
437 """
438 if not issubclass(cls, self.__class__):
439 raise ValueError('Can not specialize to %r' % cls)
440
441 self.__class__ = cls
442
443 if self.__mergeset is not None:
444 for apidoc in self.__mergeset:
445 apidoc.__class__ = cls
446
447
448 self.__init__(**self.__dict__)
449
450 __has_been_hashed = False
451 """True iff L{self.__hash__()} has ever been called."""
452
456
458 if not isinstance(other, APIDoc): return -1
459 if self.__dict__ is other.__dict__: return 0
460 name_cmp = cmp(self.canonical_name, other.canonical_name)
461 if name_cmp == 0: return -1
462 else: return name_cmp
463