diff options
Diffstat (limited to 'pycco/main.py')
-rw-r--r-- | pycco/main.py | 355 |
1 files changed, 182 insertions, 173 deletions
diff --git a/pycco/main.py b/pycco/main.py index 787a5d2..e95ad73 100644 --- a/pycco/main.py +++ b/pycco/main.py @@ -1,4 +1,8 @@ #!/usr/bin/env python +from __future__ import print_function + +# This module contains all of our static resources. +from pycco_resources import pycco_template, css as pycco_css """ "**Pycco**" is a Python port of [Docco](http://jashkenas.github.com/docco/): @@ -17,7 +21,7 @@ If you install Pycco, you can run it from the command-line: This will generate linked HTML documentation for the named source files, saving it into a `docs` folder by default. -The [source for Pycco](https://github.com/fitzgen/pycco) is available on GitHub, +The [source for Pycco](https://github.com/pycco-docs/pycco) is available on GitHub, and released under the MIT license. To install Pycco, simply @@ -26,15 +30,16 @@ To install Pycco, simply Or, to install the latest source - git clone git://github.com/fitzgen/pycco.git + git clone git://github.com/pycco-docs/pycco.git cd pycco python setup.py install """ # === Main Documentation Generation Functions === + def generate_documentation(source, outdir=None, preserve_paths=True, - language=None): + language=None, encoding="utf8"): """ Generate the documentation for a source file by reading it in, splitting it up into comment/code sections, highlighting them for the appropriate @@ -43,13 +48,21 @@ def generate_documentation(source, outdir=None, preserve_paths=True, if not outdir: raise TypeError("Missing the required 'outdir' keyword argument.") - code = open(source, "r").read() - language = get_language(source, code, language=language) - sections = parse(source, code, language) - highlight(source, sections, language, preserve_paths=preserve_paths, outdir=outdir) - return generate_html(source, sections, preserve_paths=preserve_paths, outdir=outdir) + code = open(source, "rb").read().decode(encoding) + return _generate_documentation(source, code, outdir, preserve_paths, language) -def parse(source, code, language): + +def _generate_documentation(file_path, code, outdir, preserve_paths, language): + """ + Helper function to allow documentation generation without file handling. + """ + language = get_language(file_path, code, language=language) + sections = parse(code, language) + highlight(sections, language, preserve_paths=preserve_paths, outdir=outdir) + return generate_html(file_path, sections, preserve_paths=preserve_paths, outdir=outdir) + + +def parse(code, language): """ Given a string of source code, parse out each comment and the code that follows it, and create an individual **section** for it. @@ -76,7 +89,6 @@ def parse(source, code, language): lines.pop(linenum) break - def save(docs, code): if docs or code: sections.append({ @@ -86,50 +98,67 @@ def parse(source, code, language): # Setup the variables to get ready to check for multiline comments multi_line = False - multi_line_delimiters = [language.get("multistart"), language.get("multiend")] + multi_string = False + multistart, multiend = language.get("multistart"), language.get("multiend") + comment_matcher = language['comment_matcher'] for line in lines: - + process_as_code = False # Only go into multiline comments section when one of the delimiters is # found to be at the start of a line - if all(multi_line_delimiters) and any([line.lstrip().startswith(delim) or line.rstrip().endswith(delim) for delim in multi_line_delimiters]): - if not multi_line: - multi_line = True - - else: + if multistart and multiend \ + and any(line.lstrip().startswith(delim) or line.rstrip().endswith(delim) + for delim in (multistart, multiend)): + multi_line = not multi_line + + if multi_line \ + and line.strip().endswith(multiend) \ + and len(line.strip()) > len(multiend): multi_line = False - if (multi_line - and line.strip().endswith(language.get("multiend")) - and len(line.strip()) > len(language.get("multiend"))): - multi_line = False + if not line.strip().startswith(multistart) and not multi_line \ + or multi_string: - # Get rid of the delimiters so that they aren't in the final docs - line = line.replace(language["multistart"], '') - line = line.replace(language["multiend"], '') - docs_text += line.strip() + '\n' - indent_level = re.match("\s*", line).group(0) + process_as_code = True - if has_code and docs_text.strip(): - save(docs_text, code_text[:-1]) - code_text = code_text.split('\n')[-1] - has_code = docs_text = '' + if multi_string: + multi_line = False + multi_string = False + else: + multi_string = True + + else: + # Get rid of the delimiters so that they aren't in the final + # docs + line = line.replace(multistart, '') + line = line.replace(multiend, '') + docs_text += line.strip() + '\n' + indent_level = re.match("\s*", line).group(0) + + if has_code and docs_text.strip(): + save(docs_text, code_text[:-1]) + code_text = code_text.split('\n')[-1] + has_code = docs_text = '' elif multi_line: # Remove leading spaces - if re.match(r' {%d}' % len(indent_level), line): + if re.match(r' {{{:d}}}'.format(len(indent_level)), line): docs_text += line[len(indent_level):] + '\n' else: docs_text += line + '\n' - elif re.match(language["comment_matcher"], line): + elif re.match(comment_matcher, line): if has_code: save(docs_text, code_text) has_code = docs_text = code_text = '' - docs_text += re.sub(language["comment_matcher"], "", line) + "\n" + docs_text += re.sub(comment_matcher, "", line) + "\n" else: - if code_text and any([line.lstrip().startswith(x) for x in ['class ', 'def ', '@']]): + process_as_code = True + + if process_as_code: + if code_text and any(line.lstrip().startswith(x) + for x in ['class ', 'def ', '@']): if not code_text.lstrip().startswith("@"): save(docs_text, code_text) code_text = has_code = docs_text = '' @@ -137,14 +166,14 @@ def parse(source, code, language): has_code = True code_text += line + '\n' - save(docs_text, code_text) return sections # === Preprocessing the comments === -def preprocess(comment, section_nr, preserve_paths=True, outdir=None): + +def preprocess(comment, preserve_paths=True, outdir=None): """ Add cross-references before having the text processed by markdown. It's possible to reference another file, like this : `[[main.py]]` which renders @@ -157,6 +186,7 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None): if not outdir: raise TypeError("Missing the required 'outdir' keyword argument.") + def sanitize_section_name(name): return "-".join(name.lower().strip().split(" ")) @@ -164,33 +194,37 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None): # Check if the match contains an anchor if '#' in match.group(1): name, anchor = match.group(1).split('#') - return " [%s](%s#%s)" % (name, - path.basename(destination(name, - preserve_paths=preserve_paths, - outdir=outdir)), - anchor) + return " [{}]({}#{})".format(name, + path.basename(destination(name, + preserve_paths=preserve_paths, + outdir=outdir)), + anchor) else: - return " [%s](%s)" % (match.group(1), - path.basename(destination(match.group(1), - preserve_paths=preserve_paths, - outdir=outdir))) + return " [{}]({})".format(match.group(1), + path.basename(destination(match.group(1), + preserve_paths=preserve_paths, + outdir=outdir))) def replace_section_name(match): - return '%(lvl)s <span id="%(id)s" href="%(id)s">%(name)s</span>' % { - "lvl" : re.sub('=', '#', match.group(1)), - "id" : sanitize_section_name(match.group(2)), - "name" : match.group(2) - } + """ + Replace equals-sign-formatted section names with anchor links. + """ + return '{lvl} <span id="{id}" href="{id}">{name}</span>'.format( + lvl=re.sub('=', '#', match.group(1)), + id=sanitize_section_name(match.group(2)), + name=match.group(2) + ) comment = re.sub('^([=]+)([^=]+)[=]*\s*$', replace_section_name, comment) - comment = re.sub('[^`]\[\[(.+?)\]\]', replace_crossref, comment) + comment = re.sub('(?<!`)\[\[(.+?)\]\]', replace_crossref, comment) return comment # === Highlighting the source code === -def highlight(source, sections, language, preserve_paths=True, outdir=None): + +def highlight(sections, language, preserve_paths=True, outdir=None): """ Highlights a single chunk of code using the **Pygments** module, and runs the text of its corresponding comment through **Markdown**. @@ -215,14 +249,18 @@ def highlight(source, sections, language, preserve_paths=True, outdir=None): docs_text = unicode(section["docs_text"]) except UnicodeError: docs_text = unicode(section["docs_text"].decode('utf-8')) + except NameError: + docs_text = section['docs_text'] section["docs_html"] = markdown(preprocess(docs_text, - i, preserve_paths=preserve_paths, outdir=outdir)) section["num"] = i + return sections + # === HTML Code generation === + def generate_html(source, sections, preserve_paths=True, outdir=None): """ Once all of the code is finished highlighting, we can generate the HTML file @@ -245,82 +283,25 @@ def generate_html(source, sections, preserve_paths=True, outdir=None): sect["code_html"] = re.sub(r"\{\{", r"__DOUBLE_OPEN_STACHE__", sect["code_html"]) rendered = pycco_template({ - "title" : title, - "stylesheet" : csspath, - "sections" : sections, - "source" : source, - "path" : path, - "destination" : destination - }) - - return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8") - -# === Sitemap Generation === -def generate_index(files, outdir): - - css_path = path.join(outdir, "pycco.css") - - sections = [] - - def add_file(entry, path, tree): - node, subpath = path[0], path[1:] - if not node in tree: - tree[node] = {} - - if subpath: - add_file(entry, subpath, tree[node]) - - else: - tree[node]['entry'] = entry - - tree = {} - for file_path in files: - entry = { - 'path': file_path, - 'relpath': path.relpath(file_path, outdir) - } - - add_file(entry=entry, path=entry['relpath'].split(path.sep), tree=tree) - - def generate_tree_html(tree): - items = [] - for node, subtree in tree.items(): - if 'entry' in subtree: - html = '<li><a href="%s">%s</a></li>' % (subtree['entry']['relpath'], node) - - else: - html = '<dl><dt>%s</dt><dd><ul>%s</ul></dd></dl>' % (node, generate_tree_html(subtree)) - - items.append(html) - - return ''.join(items) - - sections.append({'docs_html': generate_tree_html(tree)}) - - rendered = pycco_template({ - "title" : 'Index', - "stylesheet" : css_path, - "sections" : sections, - "source" : '', - "path" : path, - "destination" : destination + "title": title, + "stylesheet": csspath, + "sections": sections, + "source": source, }) return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8") # === Helpers & Setup === -# This module contains all of our static resources. -import pycco_resources - # Import our external dependencies. import optparse import os import pygments -import pystache import re import sys import time +import pycco.generate_index as generate_index + from markdown import markdown from os import path from pygments import lexers, formatters @@ -329,45 +310,43 @@ from pygments import lexers, formatters # the name of the Pygments lexer and the symbol that indicates a comment. To # add another language to Pycco's repertoire, add it here. languages = { - ".coffee": { "name": "coffee-script", "symbol": "#", - "multistart": '###', "multiend": '###' }, - - ".pl": { "name": "perl", "symbol": "#" }, + ".coffee": {"name": "coffee-script", "symbol": "#", + "multistart": '###', "multiend": '###'}, - ".sql": { "name": "sql", "symbol": "--" }, + ".pl": {"name": "perl", "symbol": "#"}, - ".c": { "name": "c", "symbol": "//", - "multistart": "/*", "multiend": "*/"}, + ".sql": {"name": "sql", "symbol": "--"}, - ".h": { "name": "c", "symbol": "//", - "multistart": "/*", "multiend": "*/"}, + ".c": {"name": "c", "symbol": "//", + "multistart": "/*", "multiend": "*/"}, + ".h": {"name": "c", "symbol": "//", + "multistart": "/*", "multiend": "*/"}, - ".cpp": { "name": "cpp", "symbol": "//"}, + ".cpp": {"name": "cpp", "symbol": "//"}, - ".cl": { "name": "c", "symbol": "//", - "multistart": "/*", "multiend": "*/"}, + ".cl": {"name": "c", "symbol": "//", + "multistart": "/*", "multiend": "*/"}, - ".js": { "name": "javascript", "symbol": "//", - "multistart": "/*", "multiend": "*/"}, + ".js": {"name": "javascript", "symbol": "//", + "multistart": "/*", "multiend": "*/"}, + ".rb": {"name": "ruby", "symbol": "#", + "multistart": "=begin", "multiend": "=end"}, - ".rb": { "name": "ruby", "symbol": "#", - "multistart": "=begin", "multiend": "=end"}, + ".py": {"name": "python", "symbol": "#", + "multistart": '"""', "multiend": '"""'}, - ".py": { "name": "python", "symbol": "#", - "multistart": '"""', "multiend": '"""' }, + ".scm": {"name": "scheme", "symbol": ";;", + "multistart": "#|", "multiend": "|#"}, - ".scm": { "name": "scheme", "symbol": ";;", - "multistart": "#|", "multiend": "|#"}, + ".lua": {"name": "lua", "symbol": "--", + "multistart": "--[[", "multiend": "--]]"}, - ".lua": { "name": "lua", "symbol": "--", - "multistart": "--[[", "multiend": "--]]"}, + ".erl": {"name": "erlang", "symbol": "%%"}, - ".erl": { "name": "erlang", "symbol": "%%" }, + ".tcl": {"name": "tcl", "symbol": "#"}, - ".tcl": { "name": "tcl", "symbol": "#" }, - - ".hs": { "name": "haskell", "symbol": "--", - "multistart": "{-", "multiend": "-}"}, + ".hs": {"name": "haskell", "symbol": "--", + "multistart": "{-", "multiend": "-}"}, } # Build out the appropriate matchers and delimiters for each language. @@ -386,6 +365,7 @@ for ext, l in languages.items(): # Get the Pygments Lexer for this language. l["lexer"] = lexers.get_lexer_by_name(l["name"]) + def get_language(source, code, language=None): """Get the current language we're documenting, based on the extension.""" @@ -396,16 +376,23 @@ def get_language(source, code, language=None): else: raise ValueError("Unknown forced language: " + language) - m = re.match(r'.*(\..+)', os.path.basename(source)) + m = re.match(r'.*(\..+)', os.path.basename(source)) if source else None if m and m.group(1) in languages: return languages[m.group(1)] else: - lang = lexers.guess_lexer(code).name.lower() - for l in languages.values(): - if l["name"] == lang: - return l - else: - raise ValueError("Can't figure out the language! of %s" % source) + try: + lang = lexers.guess_lexer(code).name.lower() + for l in languages.values(): + if l["name"] == lang: + return l + else: + raise ValueError() + except ValueError: + # If pygments can't find any lexers, it will raise its own + # subclass of ValueError. We will catch it and raise ours + # for consistency. + raise ValueError("Can't figure out the language!") + def destination(filepath, preserve_paths=True, outdir=None): """ @@ -422,7 +409,14 @@ def destination(filepath, preserve_paths=True, outdir=None): name = filename if preserve_paths: name = path.join(dirname, name) - return path.join(outdir, "%s.html" % name) + dest = path.join(outdir, u"{}.html".format(name)) + # If `join` is passed an absolute path, it will ignore any earlier path + # elements. We will force outdir to the beginning of the path to avoid + # writing outside our destination. + if not dest.startswith(outdir): + dest = outdir + os.sep + dest + return dest + def shift(list, default): """ @@ -435,20 +429,26 @@ def shift(list, default): except IndexError: return default -def ensure_directory(directory): - """Ensure that the destination directory exists.""" +def remove_control_chars(s): + # Sanitization regexp copied from + # http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python + from pycco.compat import pycco_unichr + control_chars = ''.join(map(pycco_unichr, list(range(0, 32)) + list(range(127, 160)))) + control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars))) + return control_char_re.sub('', s) + + +def ensure_directory(directory): + """ + Sanitize directory string and ensure that the destination directory exists. + """ + directory = remove_control_chars(directory) if not os.path.isdir(directory): os.makedirs(directory) -def template(source): - return lambda context: pystache.render(source, context) - -# Create the template that we will use to generate the Pycco HTML page. -pycco_template = template(pycco_resources.html) + return directory -# The CSS styles we'd like to apply to the documentation. -pycco_styles = pycco_resources.css # The start of each Pygments highlight block. highlight_start = "<div class=\"highlight\"><pre>" @@ -456,11 +456,12 @@ highlight_start = "<div class=\"highlight\"><pre>" # The end of each Pygments highlight block. highlight_end = "</pre></div>" -def process(sources, preserve_paths=True, outdir=None, language=None, index=False): + +def process(sources, preserve_paths=True, outdir=None, language=None, encoding="utf8", index=False): """For each source file passed as argument, generate the documentation.""" if not outdir: - raise TypeError("Missing the required 'outdir' keyword argument.") + raise TypeError("Missing the required 'directory' keyword argument.") # Make a copy of sources given on the command line. `main()` needs the # original list when monitoring for changed files. @@ -468,16 +469,15 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals # Proceed to generating the documentation. if sources: - ensure_directory(outdir) - css = open(path.join(outdir, "pycco.css"), "w") - css.write(pycco_styles) + outdir = ensure_directory(outdir) + css = open(path.join(outdir, "pycco.css"), "wb") + css.write(pycco_css.encode(encoding)) css.close() generated_files = [] def next_file(): s = sources.pop(0) - print "pycco = %s ->" % s, dest = destination(s, preserve_paths=preserve_paths, outdir=outdir) try: @@ -485,10 +485,13 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals except OSError: pass - with open(dest, "w") as f: - f.write(generate_documentation(s, preserve_paths=preserve_paths, outdir=outdir, - language=language)) - print dest + with open(dest, "wb") as f: + f.write(generate_documentation(s, preserve_paths=preserve_paths, + outdir=outdir, + language=language, + encoding=encoding)) + + print("pycco: {} -> {}".format(s, dest)) generated_files.append(dest) if sources: @@ -496,8 +499,8 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals next_file() if index: - with open(path.join(outdir, "index.html"), "w") as f: - f.write(generate_index(generated_files, outdir)) + with open(path.join(outdir, "index.html"), "wb") as f: + f.write(generate_index.generate_index(generated_files, outdir)) __all__ = ("process", "generate_documentation") @@ -516,7 +519,9 @@ def monitor(sources, opts): for source in sources) class RegenerateHandler(watchdog.events.FileSystemEventHandler): + """A handler for recompiling files which triggered watchdog events""" + def on_modified(self, event): """Regenerate documentation for a file which triggered an event""" # Re-generate documentation from a source file if it was listed on @@ -567,8 +572,12 @@ def main(): help='Generate an index.html document with sitemap content') opts, sources = parser.parse_args() + if opts.outdir == '': + outdir = '.' + else: + outdir = opts.outdir - process(sources, outdir=opts.outdir, preserve_paths=opts.paths, + process(sources, outdir=outdir, preserve_paths=opts.paths, language=opts.language, index=opts.generate_index) # If the -w / --watch option was present, monitor the source directories |