1
2
3
4
5
6
7
8
9 """
10 Syntax highlighter for Python values. Currently provides special
11 colorization support for:
12
13 - lists, tuples, sets, frozensets, dicts
14 - numbers
15 - strings
16 - compiled regexps
17
18 The highlighter also takes care of line-wrapping, and automatically
19 stops generating repr output as soon as it has exceeded the specified
20 number of lines (which should make it faster than pprint for large
21 values). It does I{not} bother to do automatic cycle detection,
22 because maxlines is typically around 5, so it's really not worth it.
23
24 The syntax-highlighted output is encoded using a
25 L{ParsedEpytextDocstring}, which can then be used to generate output in
26 a variety of formats.
27 """
28 __docformat__ = 'epytext en'
29
30
31
32
33
34 import types, re
35 import epydoc.apidoc
36 from epydoc.util import decode_with_backslashreplace
37 from epydoc.util import plaintext_to_html, plaintext_to_latex
38 from epydoc.compat import *
39 import sre_parse, sre_constants
40
41 from epydoc.markup.epytext import Element, ParsedEpytextDocstring
42
44 return type(pyval).__name__ == 'SRE_Pattern'
45
47 """
48 An object uesd to keep track of the current state of the pyval
49 colorizer. The L{mark()}/L{restore()} methods can be used to set
50 a backup point, and restore back to that backup point. This is
51 used by several colorization methods that first try colorizing
52 their object on a single line (setting linebreakok=False); and
53 then fall back on a multi-line output if that fails. The L{score}
54 variable is used to keep track of a 'score', reflecting how good
55 we think this repr is. E.g., unhelpful values like '<Foo instance
56 at 0x12345>' get low scores. If the score is too low, we'll use
57 the parse-derived repr instead.
58 """
60 self.result = []
61 self.charpos = 0
62 self.lineno = 1
63 self.linebreakok = True
64
65
66 self.score = 0
67
69 return (len(self.result), self.charpos,
70 self.lineno, self.linebreakok, self.score)
71
73 n, self.charpos, self.lineno, self.linebreakok, self.score = mark
74 del self.result[n:]
75
77 """A control-flow exception that is raised when PyvalColorizer
78 exeeds the maximum number of allowed lines."""
79
81 """A control-flow exception that is raised when PyvalColorizer
82 generates a string containing a newline, but the state object's
83 linebreakok variable is False."""
84
86 """
87 @ivar score: A score, evaluating how good this repr is.
88 @ivar is_complete: True if this colorized repr completely describes
89 the object.
90 """
91 - def __init__(self, tree, score, is_complete):
95
96 -def colorize_pyval(pyval, parse_repr=None, min_score=None,
97 linelen=75, maxlines=5, linebreakok=True, sort=True):
100
102 """
103 Syntax highlighter for Python values.
104 """
105
106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
107 self.linelen = linelen
108 self.maxlines = maxlines
109 self.linebreakok = linebreakok
110 self.sort = sort
111
112
113
114
115
116 GROUP_TAG = 'variable-group'
117 COMMA_TAG = 'variable-op'
118 COLON_TAG = 'variable-op'
119 CONST_TAG = None
120 NUMBER_TAG = None
121 QUOTE_TAG = 'variable-quote'
122 STRING_TAG = 'variable-string'
123
124 RE_CHAR_TAG = None
125 RE_GROUP_TAG = 're-group'
126 RE_REF_TAG = 're-ref'
127 RE_OP_TAG = 're-op'
128 RE_FLAGS_TAG = 're-flags'
129
130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
131 LINEWRAP = Element('symbol', u'crarr')
132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
133
134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)
135
136 ESCAPE_UNICODE = False
137
138
139
140
141
142 - def colorize(self, pyval, parse_repr=None, min_score=None):
143 """
144 @return: A L{ColorizedPyvalRepr} describing the given pyval.
145 """
146 UNKNOWN = epydoc.apidoc.UNKNOWN
147
148 state = _ColorizerState()
149 state.linebreakok = self.linebreakok
150
151
152 try:
153 if pyval is not UNKNOWN:
154 self._colorize(pyval, state)
155 elif parse_repr not in (None, UNKNOWN):
156 self._output(parse_repr, None, state)
157 else:
158 state.result.append(PyvalColorizer.UNKNOWN_REPR)
159 is_complete = True
160 except (_Maxlines, _Linebreak):
161 if self.linebreakok:
162 state.result.append('\n')
163 state.result.append(self.ELLIPSIS)
164 else:
165 if state.result[-1] is self.LINEWRAP:
166 state.result.pop()
167 self._trim_result(state.result, 3)
168 state.result.append(self.ELLIPSIS)
169 is_complete = False
170
171 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
172 and min_score is not None and state.score < min_score):
173 return self.colorize(UNKNOWN, parse_repr)
174
175 tree = Element('epytext', *state.result)
176 return ColorizedPyvalRepr(tree, state.score, is_complete)
177
179 pyval_type = type(pyval)
180 state.score += 1
181
182 if pyval is None or pyval is True or pyval is False:
183 self._output(unicode(pyval), self.CONST_TAG, state)
184 elif pyval_type in (int, float, long, types.ComplexType):
185 self._output(unicode(pyval), self.NUMBER_TAG, state)
186 elif pyval_type is str:
187 self._colorize_str(pyval, state, '', 'string-escape')
188 elif pyval_type is unicode:
189 if self.ESCAPE_UNICODE:
190 self._colorize_str(pyval, state, 'u', 'unicode-escape')
191 else:
192 self._colorize_str(pyval, state, 'u', None)
193 elif pyval_type is list:
194 self._multiline(self._colorize_iter, pyval, state, '[', ']')
195 elif pyval_type is tuple:
196 self._multiline(self._colorize_iter, pyval, state, '(', ')')
197 elif pyval_type is set:
198 self._multiline(self._colorize_iter, self._sort(pyval),
199 state, 'set([', '])')
200 elif pyval_type is frozenset:
201 self._multiline(self._colorize_iter, self._sort(pyval),
202 state, 'frozenset([', '])')
203 elif pyval_type is dict:
204 self._multiline(self._colorize_dict, self._sort(pyval.items()),
205 state, '{', '}')
206 elif is_re_pattern(pyval):
207 self._colorize_re(pyval, state)
208 else:
209 try:
210 pyval_repr = repr(pyval)
211 if not isinstance(pyval_repr, (str, unicode)):
212 pyval_repr = unicode(pyval_repr)
213 pyval_repr_ok = True
214 except KeyboardInterrupt:
215 raise
216 except:
217 pyval_repr_ok = False
218 state.score -= 100
219
220 if pyval_repr_ok:
221 if self.GENERIC_OBJECT_RE.match(pyval_repr):
222 state.score -= 5
223 self._output(pyval_repr, None, state)
224 else:
225 state.result.append(self.UNKNOWN_REPR)
226
228 if not self.sort: return items
229 try: return sorted(items)
230 except KeyboardInterrupt: raise
231 except: return items
232
234 while num_chars > 0:
235 if not result: return
236 if isinstance(result[-1], Element):
237 assert len(result[-1].children) == 1
238 trim = min(num_chars, len(result[-1].children[0]))
239 result[-1].children[0] = result[-1].children[0][:-trim]
240 if not result[-1].children[0]: result.pop()
241 num_chars -= trim
242 else:
243 trim = min(num_chars, len(result[-1]))
244 result[-1] = result[-1][:-trim]
245 if not result[-1]: result.pop()
246 num_chars -= trim
247
248
249
250
251
253 """
254 Helper for container-type colorizers. First, try calling
255 C{func(pyval, state, *args)} with linebreakok set to false;
256 and if that fails, then try again with it set to true.
257 """
258 linebreakok = state.linebreakok
259 mark = state.mark()
260
261 try:
262 state.linebreakok = False
263 func(pyval, state, *args)
264 state.linebreakok = linebreakok
265
266 except _Linebreak:
267 if not linebreakok:
268 raise
269 state.restore(mark)
270 func(pyval, state, *args)
271
273 self._output(prefix, self.GROUP_TAG, state)
274 indent = state.charpos
275 for i, elt in enumerate(pyval):
276 if i>=1:
277 if state.linebreakok:
278 self._output(',', self.COMMA_TAG, state)
279 self._output('\n'+' '*indent, None, state)
280 else:
281 self._output(', ', self.COMMA_TAG, state)
282 self._colorize(elt, state)
283 self._output(suffix, self.GROUP_TAG, state)
284
286 self._output(prefix, self.GROUP_TAG, state)
287 indent = state.charpos
288 for i, (key, val) in enumerate(items):
289 if i>=1:
290 if state.linebreakok:
291 self._output(',', self.COMMA_TAG, state)
292 self._output('\n'+' '*indent, None, state)
293 else:
294 self._output(', ', self.COMMA_TAG, state)
295 self._colorize(key, state)
296 self._output(': ', self.COLON_TAG, state)
297 self._colorize(val, state)
298 self._output(suffix, self.GROUP_TAG, state)
299
301
302 if '\n' in pyval and state.linebreakok: quote = "'''"
303 else: quote = "'"
304
305 if state.linebreakok:
306 lines = pyval.split('\n')
307 else:
308 lines = [pyval]
309
310 self._output(prefix+quote, self.QUOTE_TAG, state)
311
312 for i, line in enumerate(lines):
313 if i>0: self._output('\n', None, state)
314 if encoding: line = line.encode(encoding)
315 self._output(line, self.STRING_TAG, state)
316
317 self._output(quote, self.QUOTE_TAG, state)
318
320
321 pat, flags = pyval.pattern, pyval.flags
322
323 if isinstance(pat, str):
324 pat = decode_with_backslashreplace(pat)
325
326 tree = sre_parse.parse(pat, flags)
327 groups = dict([(num,name) for (name,num) in
328 tree.pattern.groupdict.items()])
329
330 self._output("re.compile(r'", None, state)
331 self._colorize_re_flags(tree.pattern.flags, state)
332 self._colorize_re_tree(tree, state, True, groups)
333 self._output("')", None, state)
334
336 if flags:
337 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
338 if (n&flags)]
339 flags = '(?%s)' % ''.join(flags)
340 self._output(flags, self.RE_FLAGS_TAG, state)
341
343 assert noparen in (True, False)
344 if len(tree) > 1 and not noparen:
345 self._output('(', self.RE_GROUP_TAG, state)
346 for elt in tree:
347 op = elt[0]
348 args = elt[1]
349
350 if op == sre_constants.LITERAL:
351 c = unichr(args)
352
353 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
354 elif c == '\t': c = '\\t'
355 elif c == '\r': c = '\\r'
356 elif c == '\n': c = '\\n'
357 elif c == '\f': c = '\\f'
358 elif c == '\v': c = '\\v'
359 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
360 elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
361 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
362 self._output(c, self.RE_CHAR_TAG, state)
363
364 elif op == sre_constants.ANY:
365 self._output('.', self.RE_CHAR_TAG, state)
366
367 elif op == sre_constants.BRANCH:
368 if args[0] is not None:
369 raise ValueError('Branch expected None arg but got %s'
370 % args[0])
371 for i, item in enumerate(args[1]):
372 if i > 0:
373 self._output('|', self.RE_OP_TAG, state)
374 self._colorize_re_tree(item, state, True, groups)
375
376 elif op == sre_constants.IN:
377 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
378 self._colorize_re_tree(args, state, False, groups)
379 else:
380 self._output('[', self.RE_GROUP_TAG, state)
381 self._colorize_re_tree(args, state, True, groups)
382 self._output(']', self.RE_GROUP_TAG, state)
383
384 elif op == sre_constants.CATEGORY:
385 if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
386 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
387 elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
388 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
389 elif args == sre_constants.CATEGORY_WORD: val = r'\w'
390 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
391 else: raise ValueError('Unknown category %s' % args)
392 self._output(val, self.RE_CHAR_TAG, state)
393
394 elif op == sre_constants.AT:
395 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
396 elif args == sre_constants.AT_BEGINNING: val = r'^'
397 elif args == sre_constants.AT_END: val = r'$'
398 elif args == sre_constants.AT_BOUNDARY: val = r'\b'
399 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
400 elif args == sre_constants.AT_END_STRING: val = r'\Z'
401 else: raise ValueError('Unknown position %s' % args)
402 self._output(val, self.RE_CHAR_TAG, state)
403
404 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
405 minrpt = args[0]
406 maxrpt = args[1]
407 if maxrpt == sre_constants.MAXREPEAT:
408 if minrpt == 0: val = '*'
409 elif minrpt == 1: val = '+'
410 else: val = '{%d,}' % (minrpt)
411 elif minrpt == 0:
412 if maxrpt == 1: val = '?'
413 else: val = '{,%d}' % (maxrpt)
414 elif minrpt == maxrpt:
415 val = '{%d}' % (maxrpt)
416 else:
417 val = '{%d,%d}' % (minrpt, maxrpt)
418 if op == sre_constants.MIN_REPEAT:
419 val += '?'
420
421 self._colorize_re_tree(args[2], state, False, groups)
422 self._output(val, self.RE_OP_TAG, state)
423
424 elif op == sre_constants.SUBPATTERN:
425 if args[0] is None:
426 self._output('(?:', self.RE_GROUP_TAG, state)
427 elif args[0] in groups:
428 self._output('(?P<', self.RE_GROUP_TAG, state)
429 self._output(groups[args[0]], self.RE_REF_TAG, state)
430 self._output('>', self.RE_GROUP_TAG, state)
431 elif isinstance(args[0], (int, long)):
432
433 self._output('(', self.RE_GROUP_TAG, state)
434 else:
435 self._output('(?P<', self.RE_GROUP_TAG, state)
436 self._output(args[0], self.RE_REF_TAG, state)
437 self._output('>', self.RE_GROUP_TAG, state)
438 self._colorize_re_tree(args[1], state, True, groups)
439 self._output(')', self.RE_GROUP_TAG, state)
440
441 elif op == sre_constants.GROUPREF:
442 self._output('\\%d' % args, self.RE_REF_TAG, state)
443
444 elif op == sre_constants.RANGE:
445 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
446 state, False, groups )
447 self._output('-', self.RE_OP_TAG, state)
448 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
449 state, False, groups )
450
451 elif op == sre_constants.NEGATE:
452 self._output('^', self.RE_OP_TAG, state)
453
454 elif op == sre_constants.ASSERT:
455 if args[0] > 0:
456 self._output('(?=', self.RE_GROUP_TAG, state)
457 else:
458 self._output('(?<=', self.RE_GROUP_TAG, state)
459 self._colorize_re_tree(args[1], state, True, groups)
460 self._output(')', self.RE_GROUP_TAG, state)
461
462 elif op == sre_constants.ASSERT_NOT:
463 if args[0] > 0:
464 self._output('(?!', self.RE_GROUP_TAG, state)
465 else:
466 self._output('(?<!', self.RE_GROUP_TAG, state)
467 self._colorize_re_tree(args[1], state, True, groups)
468 self._output(')', self.RE_GROUP_TAG, state)
469
470 elif op == sre_constants.NOT_LITERAL:
471 self._output('[^', self.RE_GROUP_TAG, state)
472 self._colorize_re_tree( ((sre_constants.LITERAL, args),),
473 state, False, groups )
474 self._output(']', self.RE_GROUP_TAG, state)
475 else:
476 log.error("Error colorizing regexp: unknown elt %r" % elt)
477 if len(tree) > 1 and not noparen:
478 self._output(')', self.RE_GROUP_TAG, state)
479
480
481
482
483
485 """
486 Add the string `s` to the result list, tagging its contents
487 with tag `tag`. Any lines that go beyond `self.linelen` will
488 be line-wrapped. If the total number of lines exceeds
489 `self.maxlines`, then raise a `_Maxlines` exception.
490 """
491
492 if isinstance(s, str):
493 s = decode_with_backslashreplace(s)
494
495
496
497
498 segments = s.split('\n')
499
500 for i, segment in enumerate(segments):
501
502
503 if i > 0:
504 if (state.lineno+1) > self.maxlines:
505 raise _Maxlines()
506 if not state.linebreakok:
507 raise _Linebreak()
508 state.result.append(u'\n')
509 state.lineno += 1
510 state.charpos = 0
511
512
513
514 if state.charpos + len(segment) <= self.linelen:
515 state.charpos += len(segment)
516 if tag:
517 segment = Element('code', segment, style=tag)
518 state.result.append(segment)
519
520
521
522
523
524
525 else:
526 split = self.linelen-state.charpos
527 segments.insert(i+1, segment[split:])
528 segment = segment[:split]
529 if tag:
530 segment = Element('code', segment, style=tag)
531 state.result += [segment, self.LINEWRAP]
532