1
2
3
4
5
6
7
8
9 """
10 Syntax highlighting for doctest blocks. This module defines two
11 functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can
12 be used to perform syntax highlighting on doctest blocks. It also
13 defines the more general C{colorize_doctest()}, which could be used to
14 do syntac highlighting on doctest blocks with other output formats.
15 (Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using
16 C{colorize_doctest()}.)
17 """
18 __docformat__ = 'epytext en'
19
20 import re
21 from epydoc.util import plaintext_to_html, plaintext_to_latex
22
23 __all__ = ['doctest_to_html', 'doctest_to_latex',
24 'DoctestColorizer', 'XMLDoctestColorizer',
25 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer']
26
28 """
29 Perform syntax highlighting on the given doctest string, and
30 return the resulting HTML code. This code consists of a C{<pre>}
31 block with class=py-doctest. Syntax highlighting is performed
32 using the following css classes:
33
34 - C{py-prompt} -- the Python PS1 prompt (>>>)
35 - C{py-more} -- the Python PS2 prompt (...)
36 - C{py-keyword} -- a Python keyword (for, if, etc.)
37 - C{py-builtin} -- a Python builtin name (abs, dir, etc.)
38 - C{py-string} -- a string literal
39 - C{py-comment} -- a comment
40 - C{py-except} -- an exception traceback (up to the next >>>)
41 - C{py-output} -- the output from a doctest block.
42 - C{py-defname} -- the name of a function or class defined by
43 a C{def} or C{class} statement.
44 """
45 return HTMLDoctestColorizer().colorize_doctest(s)
46
48 """
49 Perform syntax highlighting on the given doctest string, and
50 return the resulting LaTeX code. This code consists of an
51 C{alltt} environment. Syntax highlighting is performed using
52 the following new latex commands, which must be defined externally:
53 - C{\pysrcprompt} -- the Python PS1 prompt (>>>)
54 - C{\pysrcmore} -- the Python PS2 prompt (...)
55 - C{\pysrckeyword} -- a Python keyword (for, if, etc.)
56 - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
57 - C{\pysrcstring} -- a string literal
58 - C{\pysrccomment} -- a comment
59 - C{\pysrcexcept} -- an exception traceback (up to the next >>>)
60 - C{\pysrcoutput} -- the output from a doctest block.
61 - C{\pysrcdefname} -- the name of a function or class defined by
62 a C{def} or C{class} statement.
63 """
64 return LaTeXDoctestColorizer().colorize_doctest(s)
65
67 """
68 An abstract base class for performing syntax highlighting on
69 doctest blocks and other bits of Python code. Subclasses should
70 provide definitions for:
71
72 - The L{markup()} method, which takes a substring and a tag, and
73 returns a colorized version of the substring.
74 - The L{PREFIX} and L{SUFFIX} variables, which will be added
75 to the beginning and end of the strings returned by
76 L{colorize_codeblock} and L{colorize_doctest}.
77 """
78
79
80
81
82 PREFIX = None
83
84
85
86
87 SUFFIX = None
88
89
90 NEWLINE = '\n'
91
92
93
94 _KEYWORDS = ("and del for is raise"
95 "assert elif from lambda return"
96 "break else global not try"
97 "class except if or while"
98 "continue exec import pass yield"
99 "def finally in print as").split()
100
101
102 _BUILTINS = [_BI for _BI in dir(__builtins__)
103 if not _BI.startswith('__')]
104
105
106 _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
107
108
109 _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI
110 for _BI in _BUILTINS]))
111
112
113 _STRING_GRP = '|'.join(
114 [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
115 r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
116
117
118 _COMMENT_GRP = '(#.*?$)'
119
120
121 _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
122
123
124 _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'
125
126
127 _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'
128
129
130 PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
131 re.MULTILINE | re.DOTALL)
132
133
134 PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
135 re.MULTILINE | re.DOTALL)
136
137
138 EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
139 re.DOTALL | re.MULTILINE)
140
141
142 DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')
143
144
145
146 DOCTEST_RE = re.compile(
147 r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|'
148 r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|'
149 r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % (
150 _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
151 _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)
152
153
154
155
156 DOCTEST_EXAMPLE_RE = re.compile(r'''
157 # Source consists of a PS1 line followed by zero or more PS2 lines.
158 (?P<source>
159 (?:^(?P<indent> [ ]*) >>> .*) # PS1 line
160 (?:\n [ ]* \.\.\. .*)* # PS2 lines
161 \n?)
162 # Want consists of any non-blank lines that do not start with PS1.
163 (?P<want> (?:(?![ ]*$) # Not a blank line
164 (?![ ]*>>>) # Not a line starting with PS1
165 .*$\n? # But any other line
166 )*)
167 ''', re.MULTILINE | re.VERBOSE)
168
170 """
171 Colorize a string containing Python code. Do not add the
172 L{PREFIX} and L{SUFFIX} strings to the returned value. This
173 method is intended for generating syntax-highlighted strings
174 that are appropriate for inclusion as inline expressions.
175 """
176 return self.DOCTEST_RE.sub(self.subfunc, s)
177
179 """
180 Colorize a string containing only Python code. This method
181 differs from L{colorize_doctest} in that it will not search
182 for doctest prompts when deciding how to colorize the string.
183 """
184 body = self.DOCTEST_RE.sub(self.subfunc, s)
185 return self.PREFIX + body + self.SUFFIX
186
188 """
189 Colorize a string containing one or more doctest examples.
190 """
191 output = []
192 charno = 0
193 for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
194
195 pysrc, want = m.group('source', 'want')
196
197 output.append(self.NEWLINE.join(s[charno:m.start()].split('\n')))
198
199 output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
200
201 if want:
202 if self.EXCEPT_RE.match(want):
203 output += self.NEWLINE.join([self.markup(line, 'except')
204 for line in want.split('\n')])
205 else:
206 output += self.NEWLINE.join([self.markup(line, 'output')
207 for line in want.split('\n')])
208
209 charno = m.end()
210
211 output.append(self.NEWLINE.join(s[charno:].split('\n')))
212
213 return self.PREFIX + ''.join(output) + self.SUFFIX
214
216 other, text = match.group(1, 2)
217
218 if other:
219 other = self.NEWLINE.join([self.markup(line, 'other')
220 for line in other.split('\n')])
221
222 if match.group('PROMPT1'):
223 return other + self.markup(text, 'prompt')
224 elif match.group('PROMPT2'):
225 return other + self.markup(text, 'more')
226 elif match.group('KEYWORD'):
227 return other + self.markup(text, 'keyword')
228 elif match.group('BUILTIN'):
229 return other + self.markup(text, 'builtin')
230 elif match.group('COMMENT'):
231 return other + self.markup(text, 'comment')
232 elif match.group('STRING') and '\n' not in text:
233 return other + self.markup(text, 'string')
234 elif match.group('STRING'):
235
236
237 pieces = []
238 for line in text.split('\n'):
239 if self.PROMPT2_RE.match(line):
240 if len(line) > 4:
241 pieces.append(self.markup(line[:4], 'more') +
242 self.markup(line[4:], 'string'))
243 else:
244 pieces.append(self.markup(line[:4], 'more'))
245 elif line:
246 pieces.append(self.markup(line, 'string'))
247 else:
248 pieces.append('')
249 return other + self.NEWLINE.join(pieces)
250 elif match.group('DEFINE'):
251 m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text)
252 return other + (self.markup(m.group('def'), 'keyword') +
253 self.markup(m.group('space'), 'other') +
254 self.markup(m.group('name'), 'defname'))
255 elif match.group('EOS') is not None:
256 return other
257 else:
258 assert 0, 'Unexpected match!'
259
261 """
262 Apply syntax highlighting to a single substring from a doctest
263 block. C{s} is the substring, and C{tag} is the tag that
264 should be applied to the substring. C{tag} will be one of the
265 following strings:
266
267 - C{prompt} -- the Python PS1 prompt (>>>)
268 - C{more} -- the Python PS2 prompt (...)
269 - C{keyword} -- a Python keyword (for, if, etc.)
270 - C{builtin} -- a Python builtin name (abs, dir, etc.)
271 - C{string} -- a string literal
272 - C{comment} -- a comment
273 - C{except} -- an exception traceback (up to the next >>>)
274 - C{output} -- the output from a doctest block.
275 - C{defname} -- the name of a function or class defined by
276 a C{def} or C{class} statement.
277 - C{other} -- anything else (does *not* include output.)
278 """
279 raise AssertionError("Abstract method")
280
282 """
283 A subclass of DoctestColorizer that generates XML-like output.
284 This class is mainly intended to be used for testing purposes.
285 """
286 PREFIX = '<colorized>\n'
287 SUFFIX = '</colorized>\n'
289 s = s.replace('&', '&').replace('<', '<').replace('>', '>')
290 if tag == 'other': return s
291 else: return '<%s>%s</%s>' % (tag, s, tag)
292
294 """A subclass of DoctestColorizer that generates HTML output."""
295 PREFIX = '<pre class="py-doctest">\n'
296 SUFFIX = '</pre>\n'
303
305 """A subclass of DoctestColorizer that generates LaTeX output."""
306 PREFIX = ('\\begin{alltt}')
307 SUFFIX = '\\end{alltt}\n'
308 NEWLINE = '\\\\'
314