summaryrefslogtreecommitdiff
path: root/misc/linkcheck
diff options
context:
space:
mode:
authorAndrew Gerrand <adg@golang.org>2013-10-25 17:31:02 +0300
committerAndrew Gerrand <adg@golang.org>2013-10-25 17:31:02 +0300
commit329aa0cdcdbbf852c5b01c33046faa4cc2d4ba59 (patch)
tree8be371fb37b223af8c21cd8a4c31cc12a0222d55 /misc/linkcheck
parentfb7ccddb0795c0e6f1a17866509fcfb63d6f069d (diff)
downloadgo-329aa0cdcdbbf852c5b01c33046faa4cc2d4ba59.tar.gz
misc/linkcheck: better redirect handling, use meaningful exit code
Prevent linkcheck from following redirects that lead beyond the outside the root URL. Return a non-zero exit code when there are problems. Some minor refactoring for clarity. R=golang-dev, bradfitz CC=golang-dev https://codereview.appspot.com/14425049
Diffstat (limited to 'misc/linkcheck')
-rw-r--r--misc/linkcheck/linkcheck.go90
1 files changed, 58 insertions, 32 deletions
diff --git a/misc/linkcheck/linkcheck.go b/misc/linkcheck/linkcheck.go
index 01e9879a1..d9bfd2f76 100644
--- a/misc/linkcheck/linkcheck.go
+++ b/misc/linkcheck/linkcheck.go
@@ -8,11 +8,13 @@
package main
import (
+ "errors"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
+ "os"
"regexp"
"strings"
"sync"
@@ -101,49 +103,71 @@ func crawl(url string, sourceURL string) {
func addProblem(url, errmsg string) {
msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url])
- log.Print(msg)
+ if *verbose {
+ log.Print(msg)
+ }
problems = append(problems, msg)
}
func crawlLoop() {
for url := range urlq {
- res, err := http.Get(url)
- if err != nil {
- addProblem(url, fmt.Sprintf("Error fetching: %v", err))
- wg.Done()
- continue
+ if err := doCrawl(url); err != nil {
+ addProblem(url, err.Error())
}
- if res.StatusCode != 200 {
- addProblem(url, fmt.Sprintf("Status code = %d", res.StatusCode))
- wg.Done()
- continue
- }
- slurp, err := ioutil.ReadAll(res.Body)
- res.Body.Close()
+ }
+}
+
+func doCrawl(url string) error {
+ defer wg.Done()
+
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return err
+ }
+ res, err := http.DefaultTransport.RoundTrip(req)
+ if err != nil {
+ return err
+ }
+ // Handle redirects.
+ if res.StatusCode/100 == 3 {
+ newURL, err := res.Location()
if err != nil {
- log.Fatalf("Error reading %s body: %v", url, err)
+ return fmt.Errorf("resolving redirect: %v", err)
}
- if *verbose {
- log.Printf("Len of %s: %d", url, len(slurp))
+ if !strings.HasPrefix(newURL.String(), *root) {
+ // Skip off-site redirects.
+ return nil
}
- body := string(slurp)
- for _, ref := range localLinks(body) {
- if *verbose {
- log.Printf(" links to %s", ref)
- }
- dest := *root + ref
- linkSources[dest] = append(linkSources[dest], url)
- crawl(dest, url)
+ crawl(newURL.String(), url)
+ return nil
+ }
+ if res.StatusCode != 200 {
+ return errors.New(res.Status)
+ }
+ slurp, err := ioutil.ReadAll(res.Body)
+ res.Body.Close()
+ if err != nil {
+ log.Fatalf("Error reading %s body: %v", url, err)
+ }
+ if *verbose {
+ log.Printf("Len of %s: %d", url, len(slurp))
+ }
+ body := string(slurp)
+ for _, ref := range localLinks(body) {
+ if *verbose {
+ log.Printf(" links to %s", ref)
}
- for _, id := range pageIDs(body) {
- if *verbose {
- log.Printf(" url %s has #%s", url, id)
- }
- fragExists[urlFrag{url, id}] = true
+ dest := *root + ref
+ linkSources[dest] = append(linkSources[dest], url)
+ crawl(dest, url)
+ }
+ for _, id := range pageIDs(body) {
+ if *verbose {
+ log.Printf(" url %s has #%s", url, id)
}
-
- wg.Done()
+ fragExists[urlFrag{url, id}] = true
}
+ return nil
}
func main() {
@@ -151,7 +175,6 @@ func main() {
go crawlLoop()
crawl(*root, "")
- crawl(*root+"/doc/go1.1.html", "")
wg.Wait()
close(urlq)
@@ -164,4 +187,7 @@ func main() {
for _, s := range problems {
fmt.Println(s)
}
+ if len(problems) > 0 {
+ os.Exit(1)
+ }
}