diff options
Diffstat (limited to 'src/go/doc/comment.go')
-rw-r--r-- | src/go/doc/comment.go | 480 |
1 files changed, 480 insertions, 0 deletions
diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go new file mode 100644 index 000000000..f414ca409 --- /dev/null +++ b/src/go/doc/comment.go @@ -0,0 +1,480 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Godoc comment extraction and comment -> HTML formatting. + +package doc + +import ( + "io" + "regexp" + "strings" + "text/template" // for HTMLEscape + "unicode" + "unicode/utf8" +) + +var ( + ldquo = []byte("“") + rdquo = []byte("”") +) + +// Escape comment text for HTML. If nice is set, +// also turn `` into “ and '' into ”. +func commentEscape(w io.Writer, text string, nice bool) { + last := 0 + if nice { + for i := 0; i < len(text)-1; i++ { + ch := text[i] + if ch == text[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, []byte(text[last:i])) + last = i + 2 + switch ch { + case '`': + w.Write(ldquo) + case '\'': + w.Write(rdquo) + } + i++ // loop will add one more + } + } + } + template.HTMLEscape(w, []byte(text[last:])) +} + +const ( + // Regexp for Go identifiers + identRx = `[\pL_][\pL_0-9]*` + + // Regexp for URLs + protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero` + hostPart = `[a-zA-Z0-9_@\-]+` + filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen + urlRx = `(` + protocol + `)://` + // http:// + hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ + filePart + `([:.,]` + filePart + `)*` +) + +var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) + +var ( + html_a = []byte(`<a href="`) + html_aq = []byte(`">`) + html_enda = []byte("</a>") + html_i = []byte("<i>") + html_endi = []byte("</i>") + html_p = []byte("<p>\n") + html_endp = []byte("</p>\n") + html_pre = []byte("<pre>") + html_endpre = []byte("</pre>\n") + html_h = []byte(`<h3 id="`) + html_hq = []byte(`">`) + html_endh = []byte("</h3>\n") +) + +// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses. +func pairedParensPrefixLen(s string) int { + parens := 0 + l := len(s) + for i, ch := range s { + switch ch { + case '(': + if parens == 0 { + l = i + } + parens++ + case ')': + parens-- + if parens == 0 { + l = len(s) + } else if parens < 0 { + return i + } + } + } + return l +} + +// Emphasize and escape a line of text for HTML. URLs are converted into links; +// if the URL also appears in the words map, the link is taken from the map (if +// the corresponding map value is the empty string, the URL is not converted +// into a link). Go identifiers that appear in the words map are italicized; if +// the corresponding map value is not the empty string, it is considered a URL +// and the word is converted into a link. If nice is set, the remaining text's +// appearance is improved where it makes sense (e.g., `` is turned into “ +// and '' into ”). +func emphasize(w io.Writer, line string, words map[string]string, nice bool) { + for { + m := matchRx.FindStringSubmatchIndex(line) + if m == nil { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // adjust match if necessary + match := line[m[0]:m[1]] + if n := pairedParensPrefixLen(match); n < len(match) { + // match contains unpaired parentheses (rare); + // redo matching with shortened line for correct indices + m = matchRx.FindStringSubmatchIndex(line[:m[0]+n]) + match = match[:n] + } + + // analyze match + url := "" + italics := false + if words != nil { + url, italics = words[match] + } + if m[2] >= 0 { + // match against first parenthesized sub-regexp; must be match against urlRx + if !italics { + // no alternative URL in words list, use match instead + url = match + } + italics = false // don't italicize URLs + } + + // write match + if len(url) > 0 { + w.Write(html_a) + template.HTMLEscape(w, []byte(url)) + w.Write(html_aq) + } + if italics { + w.Write(html_i) + } + commentEscape(w, match, nice) + if italics { + w.Write(html_endi) + } + if len(url) > 0 { + w.Write(html_enda) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + +func indentLen(s string) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + +func isBlank(s string) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} + +func commonPrefix(a, b string) string { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + +func unindent(block []string) { + if len(block) == 0 { + return + } + + // compute maximum common white prefix + prefix := block[0][0:indentLen(block[0])] + for _, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line[0:indentLen(line)]) + } + } + n := len(prefix) + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n:] + } + } +} + +// heading returns the trimmed line if it passes as a section heading; +// otherwise it returns the empty string. +func heading(line string) string { + line = strings.TrimSpace(line) + if len(line) == 0 { + return "" + } + + // a heading must start with an uppercase letter + r, _ := utf8.DecodeRuneInString(line) + if !unicode.IsLetter(r) || !unicode.IsUpper(r) { + return "" + } + + // it must end in a letter or digit: + r, _ = utf8.DecodeLastRuneInString(line) + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return "" + } + + // exclude lines with illegal characters + if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { + return "" + } + + // allow "'" for possessive "'s" only + for b := line; ; { + i := strings.IndexRune(b, '\'') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { + return "" // not followed by "s " + } + b = b[i+2:] + } + + return line +} + +type op int + +const ( + opPara op = iota + opHead + opPre +) + +type block struct { + op op + lines []string +} + +var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) + +func anchorID(line string) string { + // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. + return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") +} + +// ToHTML converts comment text to formatted HTML. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Each span of unindented non-blank lines is converted into +// a single paragraph. There is one exception to the rule: a span that +// consists of a single line, is followed by another paragraph span, +// begins with a capital letter, and contains no punctuation +// is formatted as a heading. +// +// A span of indented lines is converted into a <pre> block, +// with the common indent prefix removed. +// +// URLs in the comment text are converted into links; if the URL also appears +// in the words map, the link is taken from the map (if the corresponding map +// value is the empty string, the URL is not converted into a link). +// +// Go identifiers that appear in the words map are italicized; if the corresponding +// map value is not the empty string, it is considered a URL and the word is converted +// into a link. +func ToHTML(w io.Writer, text string, words map[string]string) { + for _, b := range blocks(text) { + switch b.op { + case opPara: + w.Write(html_p) + for _, line := range b.lines { + emphasize(w, line, words, true) + } + w.Write(html_endp) + case opHead: + w.Write(html_h) + id := "" + for _, line := range b.lines { + if id == "" { + id = anchorID(line) + w.Write([]byte(id)) + w.Write(html_hq) + } + commentEscape(w, line, true) + } + if id == "" { + w.Write(html_hq) + } + w.Write(html_endh) + case opPre: + w.Write(html_pre) + for _, line := range b.lines { + emphasize(w, line, nil, false) + } + w.Write(html_endpre) + } + } +} + +func blocks(text string) []block { + var ( + out []block + para []string + + lastWasBlank = false + lastWasHeading = false + ) + + close := func() { + if para != nil { + out = append(out, block{opPara, para}) + para = nil + } + } + + lines := strings.SplitAfter(text, "\n") + unindent(lines) + for i := 0; i < len(lines); { + line := lines[i] + if isBlank(line) { + // close paragraph + close() + i++ + lastWasBlank = true + continue + } + if indentLen(line) > 0 { + // close paragraph + close() + + // count indented or blank lines + j := i + 1 + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++ + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j-- + } + pre := lines[i:j] + i = j + + unindent(pre) + + // put those lines in a pre block + out = append(out, block{opPre, pre}) + lastWasHeading = false + continue + } + + if lastWasBlank && !lastWasHeading && i+2 < len(lines) && + isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { + // current line is non-blank, surrounded by blank lines + // and the next non-blank line is not indented: this + // might be a heading. + if head := heading(line); head != "" { + close() + out = append(out, block{opHead, []string{head}}) + i += 2 + lastWasHeading = true + continue + } + } + + // open paragraph + lastWasBlank = false + lastWasHeading = false + para = append(para, lines[i]) + i++ + } + close() + + return out +} + +// ToText prepares comment text for presentation in textual output. +// It wraps paragraphs of text to width or fewer Unicode code points +// and then prefixes each line with the indent. In preformatted sections +// (such as program text), it prefixes each non-blank line with preIndent. +func ToText(w io.Writer, text string, indent, preIndent string, width int) { + l := lineWrapper{ + out: w, + width: width, + indent: indent, + } + for _, b := range blocks(text) { + switch b.op { + case opPara: + // l.write will add leading newline if required + for _, line := range b.lines { + l.write(line) + } + l.flush() + case opHead: + w.Write(nl) + for _, line := range b.lines { + l.write(line + "\n") + } + l.flush() + case opPre: + w.Write(nl) + for _, line := range b.lines { + if isBlank(line) { + w.Write([]byte("\n")) + } else { + w.Write([]byte(preIndent)) + w.Write([]byte(line)) + } + } + } + } +} + +type lineWrapper struct { + out io.Writer + printed bool + width int + indent string + n int + pendSpace int +} + +var nl = []byte("\n") +var space = []byte(" ") + +func (l *lineWrapper) write(text string) { + if l.n == 0 && l.printed { + l.out.Write(nl) // blank line before new paragraph + } + l.printed = true + + for _, f := range strings.Fields(text) { + w := utf8.RuneCountInString(f) + // wrap if line is too long + if l.n > 0 && l.n+l.pendSpace+w > l.width { + l.out.Write(nl) + l.n = 0 + l.pendSpace = 0 + } + if l.n == 0 { + l.out.Write([]byte(l.indent)) + } + l.out.Write(space[:l.pendSpace]) + l.out.Write([]byte(f)) + l.n += l.pendSpace + w + l.pendSpace = 1 + } +} + +func (l *lineWrapper) flush() { + if l.n == 0 { + return + } + l.out.Write(nl) + l.pendSpace = 0 + l.n = 0 +} |