diff options
Diffstat (limited to 'libgo/go/go/doc/comment.go')
-rw-r--r-- | libgo/go/go/doc/comment.go | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/libgo/go/go/doc/comment.go b/libgo/go/go/doc/comment.go new file mode 100644 index 0000000000..9ff0bd536a --- /dev/null +++ b/libgo/go/go/doc/comment.go @@ -0,0 +1,357 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Godoc comment extraction and comment -> HTML formatting. + +package doc + +import ( + "go/ast" + "io" + "regexp" + "strings" + "template" // for htmlEscape +) + + +func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } + + +func stripTrailingWhitespace(s string) string { + i := len(s) + for i > 0 && isWhitespace(s[i-1]) { + i-- + } + return s[0:i] +} + + +// CommentText returns the text of comment, +// with the comment markers - //, /*, and */ - removed. +func CommentText(comment *ast.CommentGroup) string { + if comment == nil { + return "" + } + comments := make([]string, len(comment.List)) + for i, c := range comment.List { + comments[i] = string(c.Text) + } + + lines := make([]string, 0, 10) // most comments are less than 10 lines + for _, c := range comments { + // Remove comment markers. + // The parser has given us exactly the comment text. + switch c[1] { + case '/': + //-style comment + c = c[2:] + // Remove leading space after //, if there is one. + // TODO(gri) This appears to be necessary in isolated + // cases (bignum.RatFromString) - why? + if len(c) > 0 && c[0] == ' ' { + c = c[1:] + } + case '*': + /*-style comment */ + c = c[2 : len(c)-2] + } + + // Split on newlines. + cl := strings.Split(c, "\n", -1) + + // Walk lines, stripping trailing white space and adding to list. + for _, l := range cl { + lines = append(lines, stripTrailingWhitespace(l)) + } + } + + // Remove leading blank lines; convert runs of + // interior blank lines to a single blank line. + n := 0 + for _, line := range lines { + if line != "" || n > 0 && lines[n-1] != "" { + lines[n] = line + n++ + } + } + lines = lines[0:n] + + // Add final "" entry to get trailing newline from Join. + if n > 0 && lines[n-1] != "" { + lines = append(lines, "") + } + + return strings.Join(lines, "\n") +} + + +// Split bytes into lines. +func split(text []byte) [][]byte { + // count lines + n := 0 + last := 0 + for i, c := range text { + if c == '\n' { + last = i + 1 + n++ + } + } + if last < len(text) { + n++ + } + + // split + out := make([][]byte, n) + last = 0 + n = 0 + for i, c := range text { + if c == '\n' { + out[n] = text[last : i+1] + last = i + 1 + n++ + } + } + if last < len(text) { + out[n] = text[last:] + } + + return out +} + + +var ( + ldquo = []byte("“") + rdquo = []byte("”") +) + +// Escape comment text for HTML. If nice is set, +// also turn `` into “ and '' into ”. +func commentEscape(w io.Writer, s []byte, nice bool) { + last := 0 + if nice { + for i := 0; i < len(s)-1; i++ { + ch := s[i] + if ch == s[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, s[last:i]) + last = i + 2 + switch ch { + case '`': + w.Write(ldquo) + case '\'': + w.Write(rdquo) + } + i++ // loop will add one more + } + } + } + template.HTMLEscape(w, s[last:]) +} + + +const ( + // Regexp for Go identifiers + identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this + + // Regexp for URLs + protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` + hostPart = `[a-zA-Z0-9_@\-]+` + filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` + urlRx = protocol + `//` + // http:// + hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ + filePart + `([:.,]` + filePart + `)*` +) + +var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`) + +var ( + html_a = []byte(`<a href="`) + html_aq = []byte(`">`) + html_enda = []byte("</a>") + html_i = []byte("<i>") + html_endi = []byte("</i>") + html_p = []byte("<p>\n") + html_endp = []byte("</p>\n") + html_pre = []byte("<pre>") + html_endpre = []byte("</pre>\n") +) + + +// Emphasize and escape a line of text for HTML. URLs are converted into links; +// if the URL also appears in the words map, the link is taken from the map (if +// the corresponding map value is the empty string, the URL is not converted +// into a link). Go identifiers that appear in the words map are italicized; if +// the corresponding map value is not the empty string, it is considered a URL +// and the word is converted into a link. If nice is set, the remaining text's +// appearance is improved where it makes sense (e.g., `` is turned into “ +// and '' into ”). +func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { + for { + m := matchRx.FindSubmatchIndex(line) + if m == nil { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // analyze match + match := line[m[0]:m[1]] + url := "" + italics := false + if words != nil { + url, italics = words[string(match)] + } + if m[2] < 0 { + // didn't match against first parenthesized sub-regexp; must be match against urlRx + if !italics { + // no alternative URL in words list, use match instead + url = string(match) + } + italics = false // don't italicize URLs + } + + // write match + if len(url) > 0 { + w.Write(html_a) + template.HTMLEscape(w, []byte(url)) + w.Write(html_aq) + } + if italics { + w.Write(html_i) + } + commentEscape(w, match, nice) + if italics { + w.Write(html_endi) + } + if len(url) > 0 { + w.Write(html_enda) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + + +func indentLen(s []byte) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + + +func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') } + + +func commonPrefix(a, b []byte) []byte { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + + +func unindent(block [][]byte) { + if len(block) == 0 { + return + } + + // compute maximum common white prefix + prefix := block[0][0:indentLen(block[0])] + for _, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line[0:indentLen(line)]) + } + } + n := len(prefix) + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n:] + } + } +} + + +// Convert comment text to formatted HTML. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Turn each run of multiple \n into </p><p> +// Turn each run of indented lines into a <pre> block without indent. +// +// URLs in the comment text are converted into links; if the URL also appears +// in the words map, the link is taken from the map (if the corresponding map +// value is the empty string, the URL is not converted into a link). +// +// Go identifiers that appear in the words map are italicized; if the corresponding +// map value is not the empty string, it is considered a URL and the word is converted +// into a link. +func ToHTML(w io.Writer, s []byte, words map[string]string) { + inpara := false + + close := func() { + if inpara { + w.Write(html_endp) + inpara = false + } + } + open := func() { + if !inpara { + w.Write(html_p) + inpara = true + } + } + + lines := split(s) + unindent(lines) + for i := 0; i < len(lines); { + line := lines[i] + if isBlank(line) { + // close paragraph + close() + i++ + continue + } + if indentLen(line) > 0 { + // close paragraph + close() + + // count indented or blank lines + j := i + 1 + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++ + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j-- + } + block := lines[i:j] + i = j + + unindent(block) + + // put those lines in a pre block + w.Write(html_pre) + for _, line := range block { + emphasize(w, line, nil, false) // no nice text formatting + } + w.Write(html_endpre) + continue + } + // open paragraph + open() + emphasize(w, lines[i], words, true) // nice text formatting + i++ + } + close() +} |