// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Godoc comment extraction and comment -> HTML formatting. package doc import ( "io" "regexp" "strings" "text/template" // for HTMLEscape "unicode" "unicode/utf8" ) var ( ldquo = []byte("“") rdquo = []byte("”") ) // Escape comment text for HTML. If nice is set, // also turn `` into “ and '' into ”. func commentEscape(w io.Writer, text string, nice bool) { last := 0 if nice { for i := 0; i < len(text)-1; i++ { ch := text[i] if ch == text[i+1] && (ch == '`' || ch == '\'') { template.HTMLEscape(w, []byte(text[last:i])) last = i + 2 switch ch { case '`': w.Write(ldquo) case '\'': w.Write(rdquo) } i++ // loop will add one more } } } template.HTMLEscape(w, []byte(text[last:])) } const ( // Regexp for Go identifiers identRx = `[\pL_][\pL_0-9]*` // Regexp for URLs protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero` hostPart = `[a-zA-Z0-9_@\-]+` filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen urlRx = `(` + protocol + `)://` + // http:// hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ filePart + `([:.,]` + filePart + `)*` ) var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) var ( html_a = []byte(``) html_enda = []byte("") html_i = []byte("") html_endi = []byte("") html_p = []byte("
\n") html_endp = []byte("
\n") html_pre = []byte("") html_endpre = []byte("\n") html_h = []byte(`
block, // with the common indent prefix removed. // // URLs in the comment text are converted into links; if the URL also appears // in the words map, the link is taken from the map (if the corresponding map // value is the empty string, the URL is not converted into a link). // // Go identifiers that appear in the words map are italicized; if the corresponding // map value is not the empty string, it is considered a URL and the word is converted // into a link. func ToHTML(w io.Writer, text string, words map[string]string) { for _, b := range blocks(text) { switch b.op { case opPara: w.Write(html_p) for _, line := range b.lines { emphasize(w, line, words, true) } w.Write(html_endp) case opHead: w.Write(html_h) id := "" for _, line := range b.lines { if id == "" { id = anchorID(line) w.Write([]byte(id)) w.Write(html_hq) } commentEscape(w, line, true) } if id == "" { w.Write(html_hq) } w.Write(html_endh) case opPre: w.Write(html_pre) for _, line := range b.lines { emphasize(w, line, nil, false) } w.Write(html_endpre) } } } func blocks(text string) []block { var ( out []block para []string lastWasBlank = false lastWasHeading = false ) close := func() { if para != nil { out = append(out, block{opPara, para}) para = nil } } lines := strings.SplitAfter(text, "\n") unindent(lines) for i := 0; i < len(lines); { line := lines[i] if isBlank(line) { // close paragraph close() i++ lastWasBlank = true continue } if indentLen(line) > 0 { // close paragraph close() // count indented or blank lines j := i + 1 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { j++ } // but not trailing blank lines for j > i && isBlank(lines[j-1]) { j-- } pre := lines[i:j] i = j unindent(pre) // put those lines in a pre block out = append(out, block{opPre, pre}) lastWasHeading = false continue } if lastWasBlank && !lastWasHeading && i+2 < len(lines) && isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { // current line is non-blank, surrounded by blank lines // and the next non-blank line is not indented: this // might be a heading. if head := heading(line); head != "" { close() out = append(out, block{opHead, []string{head}}) i += 2 lastWasHeading = true continue } } // open paragraph lastWasBlank = false lastWasHeading = false para = append(para, lines[i]) i++ } close() return out } // ToText prepares comment text for presentation in textual output. // It wraps paragraphs of text to width or fewer Unicode code points // and then prefixes each line with the indent. In preformatted sections // (such as program text), it prefixes each non-blank line with preIndent. func ToText(w io.Writer, text string, indent, preIndent string, width int) { l := lineWrapper{ out: w, width: width, indent: indent, } for _, b := range blocks(text) { switch b.op { case opPara: // l.write will add leading newline if required for _, line := range b.lines { l.write(line) } l.flush() case opHead: w.Write(nl) for _, line := range b.lines { l.write(line + "\n") } l.flush() case opPre: w.Write(nl) for _, line := range b.lines { if isBlank(line) { w.Write([]byte("\n")) } else { w.Write([]byte(preIndent)) w.Write([]byte(line)) } } } } } type lineWrapper struct { out io.Writer printed bool width int indent string n int pendSpace int } var nl = []byte("\n") var space = []byte(" ") func (l *lineWrapper) write(text string) { if l.n == 0 && l.printed { l.out.Write(nl) // blank line before new paragraph } l.printed = true for _, f := range strings.Fields(text) { w := utf8.RuneCountInString(f) // wrap if line is too long if l.n > 0 && l.n+l.pendSpace+w > l.width { l.out.Write(nl) l.n = 0 l.pendSpace = 0 } if l.n == 0 { l.out.Write([]byte(l.indent)) } l.out.Write(space[:l.pendSpace]) l.out.Write([]byte(f)) l.n += l.pendSpace + w l.pendSpace = 1 } } func (l *lineWrapper) flush() { if l.n == 0 { return } l.out.Write(nl) l.pendSpace = 0 l.n = 0 }