summaryrefslogtreecommitdiff
path: root/pycco
diff options
context:
space:
mode:
Diffstat (limited to 'pycco')
-rw-r--r--pycco/compat.py10
-rw-r--r--pycco/generate_index.py77
-rw-r--r--pycco/main.py355
3 files changed, 269 insertions, 173 deletions
diff --git a/pycco/compat.py b/pycco/compat.py
new file mode 100644
index 0000000..68233a4
--- /dev/null
+++ b/pycco/compat.py
@@ -0,0 +1,10 @@
+try:
+ pycco_unichr = unichr
+except NameError:
+ pycco_unichr = chr
+
+def compat_items(d):
+ try:
+ return d.iteritems()
+ except AttributeError:
+ return d.items()
diff --git a/pycco/generate_index.py b/pycco/generate_index.py
new file mode 100644
index 0000000..2ddf301
--- /dev/null
+++ b/pycco/generate_index.py
@@ -0,0 +1,77 @@
+"""
+This is the module responsible for automatically generating an HTML index of
+all documentation files generated by Pycco.
+"""
+import re
+from os import path
+
+from pycco.compat import compat_items
+from pycco_resources import pycco_template
+
+
+__all__ = ('generate_index',)
+
+
+def build_tree(file_paths, outdir):
+ tree = {}
+ for file_path in file_paths:
+ entry = {
+ 'path': file_path,
+ 'relpath': path.relpath(file_path, outdir)
+ }
+ path_steps = entry['relpath'].split(path.sep)
+ add_file(entry, path_steps, tree)
+
+ return tree
+
+
+def add_file(entry, path_steps, tree):
+ """
+ :param entry: A dictionary containing a path to a documentation file, and a
+ relative path to the same file.
+ :param path_steps: A list of steps in a file path to look within.
+ """
+ node, subpath = path_steps[0], path_steps[1:]
+ if node not in tree:
+ tree[node] = {}
+
+ if subpath:
+ add_file(entry, subpath, tree[node])
+
+ else:
+ tree[node]['entry'] = entry
+
+
+def generate_tree_html(tree):
+ """
+ Given a tree representing HTML file paths, return an HTML table plotting
+ those paths.
+ """
+ items = []
+ for node, subtree in sorted(compat_items(tree)):
+ if 'entry' in subtree:
+ html = '<li><a href="{}">{}</a></li>'.format(subtree['entry']['relpath'], node)
+ else:
+ html = '<dl><dt>{}</dt><dd><ul>{}</ul></dd></dl>'.format(node, generate_tree_html(subtree))
+
+ items.append(html)
+
+ return ''.join(items)
+
+
+def generate_index(files, outdir):
+ """
+ Given a list of generated documentation files, generate HTML to display
+ index of all files.
+ """
+ tree = build_tree(files, outdir)
+ css_path = path.join(outdir, "pycco.css")
+
+ rendered = pycco_template({
+ "title": 'Index',
+ "stylesheet": css_path,
+ "sections": {'docs_html': generate_tree_html(tree)},
+ "source": '',
+ })
+
+ return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8")
diff --git a/pycco/main.py b/pycco/main.py
index 787a5d2..e95ad73 100644
--- a/pycco/main.py
+++ b/pycco/main.py
@@ -1,4 +1,8 @@
#!/usr/bin/env python
+from __future__ import print_function
+
+# This module contains all of our static resources.
+from pycco_resources import pycco_template, css as pycco_css
"""
"**Pycco**" is a Python port of [Docco](http://jashkenas.github.com/docco/):
@@ -17,7 +21,7 @@ If you install Pycco, you can run it from the command-line:
This will generate linked HTML documentation for the named source files,
saving it into a `docs` folder by default.
-The [source for Pycco](https://github.com/fitzgen/pycco) is available on GitHub,
+The [source for Pycco](https://github.com/pycco-docs/pycco) is available on GitHub,
and released under the MIT license.
To install Pycco, simply
@@ -26,15 +30,16 @@ To install Pycco, simply
Or, to install the latest source
- git clone git://github.com/fitzgen/pycco.git
+ git clone git://github.com/pycco-docs/pycco.git
cd pycco
python setup.py install
"""
# === Main Documentation Generation Functions ===
+
def generate_documentation(source, outdir=None, preserve_paths=True,
- language=None):
+ language=None, encoding="utf8"):
"""
Generate the documentation for a source file by reading it in, splitting it
up into comment/code sections, highlighting them for the appropriate
@@ -43,13 +48,21 @@ def generate_documentation(source, outdir=None, preserve_paths=True,
if not outdir:
raise TypeError("Missing the required 'outdir' keyword argument.")
- code = open(source, "r").read()
- language = get_language(source, code, language=language)
- sections = parse(source, code, language)
- highlight(source, sections, language, preserve_paths=preserve_paths, outdir=outdir)
- return generate_html(source, sections, preserve_paths=preserve_paths, outdir=outdir)
+ code = open(source, "rb").read().decode(encoding)
+ return _generate_documentation(source, code, outdir, preserve_paths, language)
-def parse(source, code, language):
+
+def _generate_documentation(file_path, code, outdir, preserve_paths, language):
+ """
+ Helper function to allow documentation generation without file handling.
+ """
+ language = get_language(file_path, code, language=language)
+ sections = parse(code, language)
+ highlight(sections, language, preserve_paths=preserve_paths, outdir=outdir)
+ return generate_html(file_path, sections, preserve_paths=preserve_paths, outdir=outdir)
+
+
+def parse(code, language):
"""
Given a string of source code, parse out each comment and the code that
follows it, and create an individual **section** for it.
@@ -76,7 +89,6 @@ def parse(source, code, language):
lines.pop(linenum)
break
-
def save(docs, code):
if docs or code:
sections.append({
@@ -86,50 +98,67 @@ def parse(source, code, language):
# Setup the variables to get ready to check for multiline comments
multi_line = False
- multi_line_delimiters = [language.get("multistart"), language.get("multiend")]
+ multi_string = False
+ multistart, multiend = language.get("multistart"), language.get("multiend")
+ comment_matcher = language['comment_matcher']
for line in lines:
-
+ process_as_code = False
# Only go into multiline comments section when one of the delimiters is
# found to be at the start of a line
- if all(multi_line_delimiters) and any([line.lstrip().startswith(delim) or line.rstrip().endswith(delim) for delim in multi_line_delimiters]):
- if not multi_line:
- multi_line = True
-
- else:
+ if multistart and multiend \
+ and any(line.lstrip().startswith(delim) or line.rstrip().endswith(delim)
+ for delim in (multistart, multiend)):
+ multi_line = not multi_line
+
+ if multi_line \
+ and line.strip().endswith(multiend) \
+ and len(line.strip()) > len(multiend):
multi_line = False
- if (multi_line
- and line.strip().endswith(language.get("multiend"))
- and len(line.strip()) > len(language.get("multiend"))):
- multi_line = False
+ if not line.strip().startswith(multistart) and not multi_line \
+ or multi_string:
- # Get rid of the delimiters so that they aren't in the final docs
- line = line.replace(language["multistart"], '')
- line = line.replace(language["multiend"], '')
- docs_text += line.strip() + '\n'
- indent_level = re.match("\s*", line).group(0)
+ process_as_code = True
- if has_code and docs_text.strip():
- save(docs_text, code_text[:-1])
- code_text = code_text.split('\n')[-1]
- has_code = docs_text = ''
+ if multi_string:
+ multi_line = False
+ multi_string = False
+ else:
+ multi_string = True
+
+ else:
+ # Get rid of the delimiters so that they aren't in the final
+ # docs
+ line = line.replace(multistart, '')
+ line = line.replace(multiend, '')
+ docs_text += line.strip() + '\n'
+ indent_level = re.match("\s*", line).group(0)
+
+ if has_code and docs_text.strip():
+ save(docs_text, code_text[:-1])
+ code_text = code_text.split('\n')[-1]
+ has_code = docs_text = ''
elif multi_line:
# Remove leading spaces
- if re.match(r' {%d}' % len(indent_level), line):
+ if re.match(r' {{{:d}}}'.format(len(indent_level)), line):
docs_text += line[len(indent_level):] + '\n'
else:
docs_text += line + '\n'
- elif re.match(language["comment_matcher"], line):
+ elif re.match(comment_matcher, line):
if has_code:
save(docs_text, code_text)
has_code = docs_text = code_text = ''
- docs_text += re.sub(language["comment_matcher"], "", line) + "\n"
+ docs_text += re.sub(comment_matcher, "", line) + "\n"
else:
- if code_text and any([line.lstrip().startswith(x) for x in ['class ', 'def ', '@']]):
+ process_as_code = True
+
+ if process_as_code:
+ if code_text and any(line.lstrip().startswith(x)
+ for x in ['class ', 'def ', '@']):
if not code_text.lstrip().startswith("@"):
save(docs_text, code_text)
code_text = has_code = docs_text = ''
@@ -137,14 +166,14 @@ def parse(source, code, language):
has_code = True
code_text += line + '\n'
-
save(docs_text, code_text)
return sections
# === Preprocessing the comments ===
-def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
+
+def preprocess(comment, preserve_paths=True, outdir=None):
"""
Add cross-references before having the text processed by markdown. It's
possible to reference another file, like this : `[[main.py]]` which renders
@@ -157,6 +186,7 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
if not outdir:
raise TypeError("Missing the required 'outdir' keyword argument.")
+
def sanitize_section_name(name):
return "-".join(name.lower().strip().split(" "))
@@ -164,33 +194,37 @@ def preprocess(comment, section_nr, preserve_paths=True, outdir=None):
# Check if the match contains an anchor
if '#' in match.group(1):
name, anchor = match.group(1).split('#')
- return " [%s](%s#%s)" % (name,
- path.basename(destination(name,
- preserve_paths=preserve_paths,
- outdir=outdir)),
- anchor)
+ return " [{}]({}#{})".format(name,
+ path.basename(destination(name,
+ preserve_paths=preserve_paths,
+ outdir=outdir)),
+ anchor)
else:
- return " [%s](%s)" % (match.group(1),
- path.basename(destination(match.group(1),
- preserve_paths=preserve_paths,
- outdir=outdir)))
+ return " [{}]({})".format(match.group(1),
+ path.basename(destination(match.group(1),
+ preserve_paths=preserve_paths,
+ outdir=outdir)))
def replace_section_name(match):
- return '%(lvl)s <span id="%(id)s" href="%(id)s">%(name)s</span>' % {
- "lvl" : re.sub('=', '#', match.group(1)),
- "id" : sanitize_section_name(match.group(2)),
- "name" : match.group(2)
- }
+ """
+ Replace equals-sign-formatted section names with anchor links.
+ """
+ return '{lvl} <span id="{id}" href="{id}">{name}</span>'.format(
+ lvl=re.sub('=', '#', match.group(1)),
+ id=sanitize_section_name(match.group(2)),
+ name=match.group(2)
+ )
comment = re.sub('^([=]+)([^=]+)[=]*\s*$', replace_section_name, comment)
- comment = re.sub('[^`]\[\[(.+?)\]\]', replace_crossref, comment)
+ comment = re.sub('(?<!`)\[\[(.+?)\]\]', replace_crossref, comment)
return comment
# === Highlighting the source code ===
-def highlight(source, sections, language, preserve_paths=True, outdir=None):
+
+def highlight(sections, language, preserve_paths=True, outdir=None):
"""
Highlights a single chunk of code using the **Pygments** module, and runs
the text of its corresponding comment through **Markdown**.
@@ -215,14 +249,18 @@ def highlight(source, sections, language, preserve_paths=True, outdir=None):
docs_text = unicode(section["docs_text"])
except UnicodeError:
docs_text = unicode(section["docs_text"].decode('utf-8'))
+ except NameError:
+ docs_text = section['docs_text']
section["docs_html"] = markdown(preprocess(docs_text,
- i,
preserve_paths=preserve_paths,
outdir=outdir))
section["num"] = i
+ return sections
+
# === HTML Code generation ===
+
def generate_html(source, sections, preserve_paths=True, outdir=None):
"""
Once all of the code is finished highlighting, we can generate the HTML file
@@ -245,82 +283,25 @@ def generate_html(source, sections, preserve_paths=True, outdir=None):
sect["code_html"] = re.sub(r"\{\{", r"__DOUBLE_OPEN_STACHE__", sect["code_html"])
rendered = pycco_template({
- "title" : title,
- "stylesheet" : csspath,
- "sections" : sections,
- "source" : source,
- "path" : path,
- "destination" : destination
- })
-
- return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8")
-
-# === Sitemap Generation ===
-def generate_index(files, outdir):
-
- css_path = path.join(outdir, "pycco.css")
-
- sections = []
-
- def add_file(entry, path, tree):
- node, subpath = path[0], path[1:]
- if not node in tree:
- tree[node] = {}
-
- if subpath:
- add_file(entry, subpath, tree[node])
-
- else:
- tree[node]['entry'] = entry
-
- tree = {}
- for file_path in files:
- entry = {
- 'path': file_path,
- 'relpath': path.relpath(file_path, outdir)
- }
-
- add_file(entry=entry, path=entry['relpath'].split(path.sep), tree=tree)
-
- def generate_tree_html(tree):
- items = []
- for node, subtree in tree.items():
- if 'entry' in subtree:
- html = '<li><a href="%s">%s</a></li>' % (subtree['entry']['relpath'], node)
-
- else:
- html = '<dl><dt>%s</dt><dd><ul>%s</ul></dd></dl>' % (node, generate_tree_html(subtree))
-
- items.append(html)
-
- return ''.join(items)
-
- sections.append({'docs_html': generate_tree_html(tree)})
-
- rendered = pycco_template({
- "title" : 'Index',
- "stylesheet" : css_path,
- "sections" : sections,
- "source" : '',
- "path" : path,
- "destination" : destination
+ "title": title,
+ "stylesheet": csspath,
+ "sections": sections,
+ "source": source,
})
return re.sub(r"__DOUBLE_OPEN_STACHE__", "{{", rendered).encode("utf-8")
# === Helpers & Setup ===
-# This module contains all of our static resources.
-import pycco_resources
-
# Import our external dependencies.
import optparse
import os
import pygments
-import pystache
import re
import sys
import time
+import pycco.generate_index as generate_index
+
from markdown import markdown
from os import path
from pygments import lexers, formatters
@@ -329,45 +310,43 @@ from pygments import lexers, formatters
# the name of the Pygments lexer and the symbol that indicates a comment. To
# add another language to Pycco's repertoire, add it here.
languages = {
- ".coffee": { "name": "coffee-script", "symbol": "#",
- "multistart": '###', "multiend": '###' },
-
- ".pl": { "name": "perl", "symbol": "#" },
+ ".coffee": {"name": "coffee-script", "symbol": "#",
+ "multistart": '###', "multiend": '###'},
- ".sql": { "name": "sql", "symbol": "--" },
+ ".pl": {"name": "perl", "symbol": "#"},
- ".c": { "name": "c", "symbol": "//",
- "multistart": "/*", "multiend": "*/"},
+ ".sql": {"name": "sql", "symbol": "--"},
- ".h": { "name": "c", "symbol": "//",
- "multistart": "/*", "multiend": "*/"},
+ ".c": {"name": "c", "symbol": "//",
+ "multistart": "/*", "multiend": "*/"},
+ ".h": {"name": "c", "symbol": "//",
+ "multistart": "/*", "multiend": "*/"},
- ".cpp": { "name": "cpp", "symbol": "//"},
+ ".cpp": {"name": "cpp", "symbol": "//"},
- ".cl": { "name": "c", "symbol": "//",
- "multistart": "/*", "multiend": "*/"},
+ ".cl": {"name": "c", "symbol": "//",
+ "multistart": "/*", "multiend": "*/"},
- ".js": { "name": "javascript", "symbol": "//",
- "multistart": "/*", "multiend": "*/"},
+ ".js": {"name": "javascript", "symbol": "//",
+ "multistart": "/*", "multiend": "*/"},
+ ".rb": {"name": "ruby", "symbol": "#",
+ "multistart": "=begin", "multiend": "=end"},
- ".rb": { "name": "ruby", "symbol": "#",
- "multistart": "=begin", "multiend": "=end"},
+ ".py": {"name": "python", "symbol": "#",
+ "multistart": '"""', "multiend": '"""'},
- ".py": { "name": "python", "symbol": "#",
- "multistart": '"""', "multiend": '"""' },
+ ".scm": {"name": "scheme", "symbol": ";;",
+ "multistart": "#|", "multiend": "|#"},
- ".scm": { "name": "scheme", "symbol": ";;",
- "multistart": "#|", "multiend": "|#"},
+ ".lua": {"name": "lua", "symbol": "--",
+ "multistart": "--[[", "multiend": "--]]"},
- ".lua": { "name": "lua", "symbol": "--",
- "multistart": "--[[", "multiend": "--]]"},
+ ".erl": {"name": "erlang", "symbol": "%%"},
- ".erl": { "name": "erlang", "symbol": "%%" },
+ ".tcl": {"name": "tcl", "symbol": "#"},
- ".tcl": { "name": "tcl", "symbol": "#" },
-
- ".hs": { "name": "haskell", "symbol": "--",
- "multistart": "{-", "multiend": "-}"},
+ ".hs": {"name": "haskell", "symbol": "--",
+ "multistart": "{-", "multiend": "-}"},
}
# Build out the appropriate matchers and delimiters for each language.
@@ -386,6 +365,7 @@ for ext, l in languages.items():
# Get the Pygments Lexer for this language.
l["lexer"] = lexers.get_lexer_by_name(l["name"])
+
def get_language(source, code, language=None):
"""Get the current language we're documenting, based on the extension."""
@@ -396,16 +376,23 @@ def get_language(source, code, language=None):
else:
raise ValueError("Unknown forced language: " + language)
- m = re.match(r'.*(\..+)', os.path.basename(source))
+ m = re.match(r'.*(\..+)', os.path.basename(source)) if source else None
if m and m.group(1) in languages:
return languages[m.group(1)]
else:
- lang = lexers.guess_lexer(code).name.lower()
- for l in languages.values():
- if l["name"] == lang:
- return l
- else:
- raise ValueError("Can't figure out the language! of %s" % source)
+ try:
+ lang = lexers.guess_lexer(code).name.lower()
+ for l in languages.values():
+ if l["name"] == lang:
+ return l
+ else:
+ raise ValueError()
+ except ValueError:
+ # If pygments can't find any lexers, it will raise its own
+ # subclass of ValueError. We will catch it and raise ours
+ # for consistency.
+ raise ValueError("Can't figure out the language!")
+
def destination(filepath, preserve_paths=True, outdir=None):
"""
@@ -422,7 +409,14 @@ def destination(filepath, preserve_paths=True, outdir=None):
name = filename
if preserve_paths:
name = path.join(dirname, name)
- return path.join(outdir, "%s.html" % name)
+ dest = path.join(outdir, u"{}.html".format(name))
+ # If `join` is passed an absolute path, it will ignore any earlier path
+ # elements. We will force outdir to the beginning of the path to avoid
+ # writing outside our destination.
+ if not dest.startswith(outdir):
+ dest = outdir + os.sep + dest
+ return dest
+
def shift(list, default):
"""
@@ -435,20 +429,26 @@ def shift(list, default):
except IndexError:
return default
-def ensure_directory(directory):
- """Ensure that the destination directory exists."""
+def remove_control_chars(s):
+ # Sanitization regexp copied from
+ # http://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
+ from pycco.compat import pycco_unichr
+ control_chars = ''.join(map(pycco_unichr, list(range(0, 32)) + list(range(127, 160))))
+ control_char_re = re.compile(u'[{}]'.format(re.escape(control_chars)))
+ return control_char_re.sub('', s)
+
+
+def ensure_directory(directory):
+ """
+ Sanitize directory string and ensure that the destination directory exists.
+ """
+ directory = remove_control_chars(directory)
if not os.path.isdir(directory):
os.makedirs(directory)
-def template(source):
- return lambda context: pystache.render(source, context)
-
-# Create the template that we will use to generate the Pycco HTML page.
-pycco_template = template(pycco_resources.html)
+ return directory
-# The CSS styles we'd like to apply to the documentation.
-pycco_styles = pycco_resources.css
# The start of each Pygments highlight block.
highlight_start = "<div class=\"highlight\"><pre>"
@@ -456,11 +456,12 @@ highlight_start = "<div class=\"highlight\"><pre>"
# The end of each Pygments highlight block.
highlight_end = "</pre></div>"
-def process(sources, preserve_paths=True, outdir=None, language=None, index=False):
+
+def process(sources, preserve_paths=True, outdir=None, language=None, encoding="utf8", index=False):
"""For each source file passed as argument, generate the documentation."""
if not outdir:
- raise TypeError("Missing the required 'outdir' keyword argument.")
+ raise TypeError("Missing the required 'directory' keyword argument.")
# Make a copy of sources given on the command line. `main()` needs the
# original list when monitoring for changed files.
@@ -468,16 +469,15 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals
# Proceed to generating the documentation.
if sources:
- ensure_directory(outdir)
- css = open(path.join(outdir, "pycco.css"), "w")
- css.write(pycco_styles)
+ outdir = ensure_directory(outdir)
+ css = open(path.join(outdir, "pycco.css"), "wb")
+ css.write(pycco_css.encode(encoding))
css.close()
generated_files = []
def next_file():
s = sources.pop(0)
- print "pycco = %s ->" % s,
dest = destination(s, preserve_paths=preserve_paths, outdir=outdir)
try:
@@ -485,10 +485,13 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals
except OSError:
pass
- with open(dest, "w") as f:
- f.write(generate_documentation(s, preserve_paths=preserve_paths, outdir=outdir,
- language=language))
- print dest
+ with open(dest, "wb") as f:
+ f.write(generate_documentation(s, preserve_paths=preserve_paths,
+ outdir=outdir,
+ language=language,
+ encoding=encoding))
+
+ print("pycco: {} -> {}".format(s, dest))
generated_files.append(dest)
if sources:
@@ -496,8 +499,8 @@ def process(sources, preserve_paths=True, outdir=None, language=None, index=Fals
next_file()
if index:
- with open(path.join(outdir, "index.html"), "w") as f:
- f.write(generate_index(generated_files, outdir))
+ with open(path.join(outdir, "index.html"), "wb") as f:
+ f.write(generate_index.generate_index(generated_files, outdir))
__all__ = ("process", "generate_documentation")
@@ -516,7 +519,9 @@ def monitor(sources, opts):
for source in sources)
class RegenerateHandler(watchdog.events.FileSystemEventHandler):
+
"""A handler for recompiling files which triggered watchdog events"""
+
def on_modified(self, event):
"""Regenerate documentation for a file which triggered an event"""
# Re-generate documentation from a source file if it was listed on
@@ -567,8 +572,12 @@ def main():
help='Generate an index.html document with sitemap content')
opts, sources = parser.parse_args()
+ if opts.outdir == '':
+ outdir = '.'
+ else:
+ outdir = opts.outdir
- process(sources, outdir=opts.outdir, preserve_paths=opts.paths,
+ process(sources, outdir=outdir, preserve_paths=opts.paths,
language=opts.language, index=opts.generate_index)
# If the -w / --watch option was present, monitor the source directories