epydoc.markup.pyval

102 """ 103 Syntax highlighter for Python values. 104 """ 105

106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):

107 self.linelen = linelen 108 self.maxlines = maxlines 109 self.linebreakok = linebreakok 110 self.sort = sort

111 112 #//////////////////////////////////////////////////////////// 113 # Colorization Tags & other constants 114 #//////////////////////////////////////////////////////////// 115 116 GROUP_TAG = 'variable-group' # e.g., "[" and "]" 117 COMMA_TAG = 'variable-op' # The "," that separates elements 118 COLON_TAG = 'variable-op' # The ":" in dictionaries 119 CONST_TAG = None # None, True, False 120 NUMBER_TAG = None # ints, floats, etc 121 QUOTE_TAG = 'variable-quote' # Quotes around strings. 122 STRING_TAG = 'variable-string' # Body of string literals 123 124 RE_CHAR_TAG = None 125 RE_GROUP_TAG = 're-group' 126 RE_REF_TAG = 're-ref' 127 RE_OP_TAG = 're-op' 128 RE_FLAGS_TAG = 're-flags' 129 130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis') 131 LINEWRAP = Element('symbol', u'crarr') 132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') 133 134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) 135 136 ESCAPE_UNICODE = False # should we escape non-ascii unicode chars? 137 138 #//////////////////////////////////////////////////////////// 139 # Entry Point 140 #//////////////////////////////////////////////////////////// 141

142 - def colorize(self, pyval, parse_repr=None, min_score=None):

143 """ 144 @return: A L{ColorizedPyvalRepr} describing the given pyval. 145 """ 146 UNKNOWN = epydoc.apidoc.UNKNOWN 147 # Create an object to keep track of the colorization. 148 state = _ColorizerState() 149 state.linebreakok = self.linebreakok 150 # Colorize the value. If we reach maxlines, then add on an 151 # ellipsis marker and call it a day. 152 try: 153 if pyval is not UNKNOWN: 154 self._colorize(pyval, state) 155 elif parse_repr not in (None, UNKNOWN): 156 self._output(parse_repr, None, state) 157 else: 158 state.result.append(PyvalColorizer.UNKNOWN_REPR) 159 is_complete = True 160 except (_Maxlines, _Linebreak): 161 if self.linebreakok: 162 state.result.append('\n') 163 state.result.append(self.ELLIPSIS) 164 else: 165 if state.result[-1] is self.LINEWRAP: 166 state.result.pop() 167 self._trim_result(state.result, 3) 168 state.result.append(self.ELLIPSIS) 169 is_complete = False 170 # If we didn't score high enough, then try again. 171 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) 172 and min_score is not None and state.score < min_score): 173 return self.colorize(UNKNOWN, parse_repr) 174 # Put it all together. 175 tree = Element('epytext', *state.result) 176 return ColorizedPyvalRepr(tree, state.score, is_complete)

177

178 - def _colorize(self, pyval, state):

179 pyval_type = type(pyval) 180 state.score += 1 181 182 if pyval is None or pyval is True or pyval is False: 183 self._output(unicode(pyval), self.CONST_TAG, state) 184 elif pyval_type in (int, float, long, types.ComplexType): 185 self._output(unicode(pyval), self.NUMBER_TAG, state) 186 elif pyval_type is str: 187 self._colorize_str(pyval, state, '', 'string-escape') 188 elif pyval_type is unicode: 189 if self.ESCAPE_UNICODE: 190 self._colorize_str(pyval, state, 'u', 'unicode-escape') 191 else: 192 self._colorize_str(pyval, state, 'u', None) 193 elif pyval_type is list: 194 self._multiline(self._colorize_iter, pyval, state, '[', ']') 195 elif pyval_type is tuple: 196 self._multiline(self._colorize_iter, pyval, state, '(', ')') 197 elif pyval_type is set: 198 self._multiline(self._colorize_iter, self._sort(pyval), 199 state, 'set([', '])') 200 elif pyval_type is frozenset: 201 self._multiline(self._colorize_iter, self._sort(pyval), 202 state, 'frozenset([', '])') 203 elif pyval_type is dict: 204 self._multiline(self._colorize_dict, self._sort(pyval.items()), 205 state, '{', '}') 206 elif is_re_pattern(pyval): 207 self._colorize_re(pyval, state) 208 else: 209 try: 210 pyval_repr = repr(pyval) 211 if not isinstance(pyval_repr, (str, unicode)): 212 pyval_repr = unicode(pyval_repr) 213 pyval_repr_ok = True 214 except KeyboardInterrupt: 215 raise 216 except: 217 pyval_repr_ok = False 218 state.score -= 100 219 220 if pyval_repr_ok: 221 if self.GENERIC_OBJECT_RE.match(pyval_repr): 222 state.score -= 5 223 self._output(pyval_repr, None, state) 224 else: 225 state.result.append(self.UNKNOWN_REPR)

226

227 - def _sort(self, items):

228 if not self.sort: return items 229 try: return sorted(items) 230 except KeyboardInterrupt: raise 231 except: return items

232

233 - def _trim_result(self, result, num_chars):

234 while num_chars > 0: 235 if not result: return 236 if isinstance(result[-1], Element): 237 assert len(result[-1].children) == 1 238 trim = min(num_chars, len(result[-1].children[0])) 239 result[-1].children[0] = result[-1].children[0][:-trim] 240 if not result[-1].children[0]: result.pop() 241 num_chars -= trim 242 else: 243 trim = min(num_chars, len(result[-1])) 244 result[-1] = result[-1][:-trim] 245 if not result[-1]: result.pop() 246 num_chars -= trim

247 248 #//////////////////////////////////////////////////////////// 249 # Object Colorization Functions 250 #//////////////////////////////////////////////////////////// 251

252 - def _multiline(self, func, pyval, state, *args):

253 """ 254 Helper for container-type colorizers. First, try calling 255 C{func(pyval, state, *args)} with linebreakok set to false; 256 and if that fails, then try again with it set to true. 257 """ 258 linebreakok = state.linebreakok 259 mark = state.mark() 260 261 try: 262 state.linebreakok = False 263 func(pyval, state, *args) 264 state.linebreakok = linebreakok 265 266 except _Linebreak: 267 if not linebreakok: 268 raise 269 state.restore(mark) 270 func(pyval, state, *args)

271

272 - def _colorize_iter(self, pyval, state, prefix, suffix):

273 self._output(prefix, self.GROUP_TAG, state) 274 indent = state.charpos 275 for i, elt in enumerate(pyval): 276 if i>=1: 277 if state.linebreakok: 278 self._output(',', self.COMMA_TAG, state) 279 self._output('\n'+' '*indent, None, state) 280 else: 281 self._output(', ', self.COMMA_TAG, state) 282 self._colorize(elt, state) 283 self._output(suffix, self.GROUP_TAG, state)

284

285 - def _colorize_dict(self, items, state, prefix, suffix):

286 self._output(prefix, self.GROUP_TAG, state) 287 indent = state.charpos 288 for i, (key, val) in enumerate(items): 289 if i>=1: 290 if state.linebreakok: 291 self._output(',', self.COMMA_TAG, state) 292 self._output('\n'+' '*indent, None, state) 293 else: 294 self._output(', ', self.COMMA_TAG, state) 295 self._colorize(key, state) 296 self._output(': ', self.COLON_TAG, state) 297 self._colorize(val, state) 298 self._output(suffix, self.GROUP_TAG, state)

299

300 - def _colorize_str(self, pyval, state, prefix, encoding):

301 # Decide which quote to use. 302 if '\n' in pyval and state.linebreakok: quote = "'''" 303 else: quote = "'" 304 # Divide the string into lines. 305 if state.linebreakok: 306 lines = pyval.split('\n') 307 else: 308 lines = [pyval] 309 # Open quote. 310 self._output(prefix+quote, self.QUOTE_TAG, state) 311 # Body 312 for i, line in enumerate(lines): 313 if i>0: self._output('\n', None, state) 314 if encoding: line = line.encode(encoding) 315 self._output(line, self.STRING_TAG, state) 316 # Close quote. 317 self._output(quote, self.QUOTE_TAG, state)

318

319 - def _colorize_re(self, pyval, state):

320 # Extract the flag & pattern from the regexp. 321 pat, flags = pyval.pattern, pyval.flags 322 # If the pattern is a string, decode it to unicode. 323 if isinstance(pat, str): 324 pat = decode_with_backslashreplace(pat) 325 # Parse the regexp pattern. 326 tree = sre_parse.parse(pat, flags) 327 groups = dict([(num,name) for (name,num) in 328 tree.pattern.groupdict.items()]) 329 # Colorize it! 330 self._output("re.compile(r'", None, state) 331 self._colorize_re_flags(tree.pattern.flags, state) 332 self._colorize_re_tree(tree, state, True, groups) 333 self._output("')", None, state)

334

335 - def _colorize_re_flags(self, flags, state):

336 if flags: 337 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) 338 if (n&flags)] 339 flags = '(?%s)' % ''.join(flags) 340 self._output(flags, self.RE_FLAGS_TAG, state)

341

342 - def _colorize_re_tree(self, tree, state, noparen, groups):

343 assert noparen in (True, False) 344 if len(tree) > 1 and not noparen: 345 self._output('(', self.RE_GROUP_TAG, state) 346 for elt in tree: 347 op = elt[0] 348 args = elt[1] 349 350 if op == sre_constants.LITERAL: 351 c = unichr(args) 352 # Add any appropriate escaping. 353 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c 354 elif c == '\t': c = '\\t' 355 elif c == '\r': c = '\\r' 356 elif c == '\n': c = '\\n' 357 elif c == '\f': c = '\\f' 358 elif c == '\v': c = '\\v' 359 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) 360 elif ord(c) > 0xff: c = r'\u%04x' % ord(c) 361 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) 362 self._output(c, self.RE_CHAR_TAG, state) 363 364 elif op == sre_constants.ANY: 365 self._output('.', self.RE_CHAR_TAG, state) 366 367 elif op == sre_constants.BRANCH: 368 if args[0] is not None: 369 raise ValueError('Branch expected None arg but got %s' 370 % args[0]) 371 for i, item in enumerate(args[1]): 372 if i > 0: 373 self._output('|', self.RE_OP_TAG, state) 374 self._colorize_re_tree(item, state, True, groups) 375 376 elif op == sre_constants.IN: 377 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): 378 self._colorize_re_tree(args, state, False, groups) 379 else: 380 self._output('[', self.RE_GROUP_TAG, state) 381 self._colorize_re_tree(args, state, True, groups) 382 self._output(']', self.RE_GROUP_TAG, state) 383 384 elif op == sre_constants.CATEGORY: 385 if args == sre_constants.CATEGORY_DIGIT: val = r'\d' 386 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' 387 elif args == sre_constants.CATEGORY_SPACE: val = r'\s' 388 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' 389 elif args == sre_constants.CATEGORY_WORD: val = r'\w' 390 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' 391 else: raise ValueError('Unknown category %s' % args) 392 self._output(val, self.RE_CHAR_TAG, state) 393 394 elif op == sre_constants.AT: 395 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' 396 elif args == sre_constants.AT_BEGINNING: val = r'^' 397 elif args == sre_constants.AT_END: val = r'$' 398 elif args == sre_constants.AT_BOUNDARY: val = r'\b' 399 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' 400 elif args == sre_constants.AT_END_STRING: val = r'\Z' 401 else: raise ValueError('Unknown position %s' % args) 402 self._output(val, self.RE_CHAR_TAG, state) 403 404 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): 405 minrpt = args[0] 406 maxrpt = args[1] 407 if maxrpt == sre_constants.MAXREPEAT: 408 if minrpt == 0: val = '*' 409 elif minrpt == 1: val = '+' 410 else: val = '{%d,}' % (minrpt) 411 elif minrpt == 0: 412 if maxrpt == 1: val = '?' 413 else: val = '{,%d}' % (maxrpt) 414 elif minrpt == maxrpt: 415 val = '{%d}' % (maxrpt) 416 else: 417 val = '{%d,%d}' % (minrpt, maxrpt) 418 if op == sre_constants.MIN_REPEAT: 419 val += '?' 420 421 self._colorize_re_tree(args[2], state, False, groups) 422 self._output(val, self.RE_OP_TAG, state) 423 424 elif op == sre_constants.SUBPATTERN: 425 if args[0] is None: 426 self._output('(?:', self.RE_GROUP_TAG, state) 427 elif args[0] in groups: 428 self._output('(?P<', self.RE_GROUP_TAG, state) 429 self._output(groups[args[0]], self.RE_REF_TAG, state) 430 self._output('>', self.RE_GROUP_TAG, state) 431 elif isinstance(args[0], (int, long)): 432 # This is cheating: 433 self._output('(', self.RE_GROUP_TAG, state) 434 else: 435 self._output('(?P<', self.RE_GROUP_TAG, state) 436 self._output(args[0], self.RE_REF_TAG, state) 437 self._output('>', self.RE_GROUP_TAG, state) 438 self._colorize_re_tree(args[1], state, True, groups) 439 self._output(')', self.RE_GROUP_TAG, state) 440 441 elif op == sre_constants.GROUPREF: 442 self._output('\\%d' % args, self.RE_REF_TAG, state) 443 444 elif op == sre_constants.RANGE: 445 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), 446 state, False, groups ) 447 self._output('-', self.RE_OP_TAG, state) 448 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), 449 state, False, groups ) 450 451 elif op == sre_constants.NEGATE: 452 self._output('^', self.RE_OP_TAG, state) 453 454 elif op == sre_constants.ASSERT: 455 if args[0] > 0: 456 self._output('(?=', self.RE_GROUP_TAG, state) 457 else: 458 self._output('(?<=', self.RE_GROUP_TAG, state) 459 self._colorize_re_tree(args[1], state, True, groups) 460 self._output(')', self.RE_GROUP_TAG, state) 461 462 elif op == sre_constants.ASSERT_NOT: 463 if args[0] > 0: 464 self._output('(?!', self.RE_GROUP_TAG, state) 465 else: 466 self._output('(?<!', self.RE_GROUP_TAG, state) 467 self._colorize_re_tree(args[1], state, True, groups) 468 self._output(')', self.RE_GROUP_TAG, state) 469 470 elif op == sre_constants.NOT_LITERAL: 471 self._output('[^', self.RE_GROUP_TAG, state) 472 self._colorize_re_tree( ((sre_constants.LITERAL, args),), 473 state, False, groups ) 474 self._output(']', self.RE_GROUP_TAG, state) 475 else: 476 log.error("Error colorizing regexp: unknown elt %r" % elt) 477 if len(tree) > 1 and not noparen: 478 self._output(')', self.RE_GROUP_TAG, state)

479 480 #//////////////////////////////////////////////////////////// 481 # Output function 482 #//////////////////////////////////////////////////////////// 483

484 - def _output(self, s, tag, state):

485 """ 486 Add the string `s` to the result list, tagging its contents 487 with tag `tag`. Any lines that go beyond `self.linelen` will 488 be line-wrapped. If the total number of lines exceeds 489 `self.maxlines`, then raise a `_Maxlines` exception. 490 """ 491 # Make sure the string is unicode. 492 if isinstance(s, str): 493 s = decode_with_backslashreplace(s) 494 495 # Split the string into segments. The first segment is the 496 # content to add to the current line, and the remaining 497 # segments are new lines. 498 segments = s.split('\n') 499 500 for i, segment in enumerate(segments): 501 # If this isn't the first segment, then add a newline to 502 # split it from the previous segment. 503 if i > 0: 504 if (state.lineno+1) > self.maxlines: 505 raise _Maxlines() 506 if not state.linebreakok: 507 raise _Linebreak() 508 state.result.append(u'\n') 509 state.lineno += 1 510 state.charpos = 0 511 512 # If the segment fits on the current line, then just call 513 # markup to tag it, and store the result. 514 if state.charpos + len(segment) <= self.linelen: 515 state.charpos += len(segment) 516 if tag: 517 segment = Element('code', segment, style=tag) 518 state.result.append(segment) 519 520 # If the segment doesn't fit on the current line, then 521 # line-wrap it, and insert the remainder of the line into 522 # the segments list that we're iterating over. (We'll go 523 # the the beginning of the next line at the start of the 524 # next iteration through the loop.) 525 else: 526 split = self.linelen-state.charpos 527 segments.insert(i+1, segment[split:]) 528 segment = segment[:split] 529 if tag: 530 segment = Element('code', segment, style=tag) 531 state.result += [segment, self.LINEWRAP]

Source Code for Module epydoc.markup.pyval_repr