From 595a93ee2c89be666725aa793a013179537d98b0 Mon Sep 17 00:00:00 2001 From: Ben Bodenmiller Date: Mon, 17 Aug 2015 23:11:16 -0700 Subject: disallow irrelevant pages by default in robots Update default robots.txt rules to disallow irrelevant pages that search engines should not care about. This will still allow important pages like the files, commit details, merge requests, issues, comments, etc. to be crawled. --- CHANGELOG | 1 + public/robots.txt | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 1683b9ac094..0ed952086d5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ Please view this file on the master branch, on stable branches it's out of date. v 7.14.0 (unreleased) + - Update default robots.txt rules to disallow crawling of irrelevant pages (Ben Bodenmiller) - Upgrade gitlab_git to 7.2.14 to ignore CRLFs in .gitmodules (Stan Hu) - Provide more feedback what went wrong if HipChat service failed test (Stan Hu) - Fix bug where backslashes in inline diffs could be dropped (Stan Hu) diff --git a/public/robots.txt b/public/robots.txt index 085187fa58b..528f421083e 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -1,5 +1,66 @@ -# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file +# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file # # To ban all spiders from the entire site uncomment the next two lines: # User-Agent: * # Disallow: / + +User-Agent: * + +# Add a 1 second delay between successive requests to the same server, limits resources used by crawler +# Only some crawlers respect this setting, e.g. Googlebot does not +# Crawl-delay: 1 + +# Based on details in https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/routes.rb, https://gitlab.com/gitlab-org/gitlab-ce/blob/master/spec/routing, and using application +Disallow: /autocomplete/users +Disallow: /search +Disallow: /api +Disallow: /admin +Disallow: /profile +Disallow: /dashboard +Disallow: /projects/new +Disallow: /groups/new +Disallow: /groups/*/edit +Disallow: /users + +# Global snippets +Disallow: /s +Disallow: /snippets/new +Disallow: /snippets/*/edit +Disallow: /snippets/*/raw + +# Project details +Disallow: /*/*.git +Disallow: /*/*/fork/new +Disallow: /*/*/repository/archive* +Disallow: /*/*/activity +Disallow: /*/*/new +Disallow: /*/*/edit +Disallow: /*/*/raw +Disallow: /*/*/blame +Disallow: /*/*/commits/*/* +Disallow: /*/*/commit +Disallow: /*/*/compare +Disallow: /*/*/branches/new +Disallow: /*/*/tags/new +Disallow: /*/*/network +Disallow: /*/*/graphs +Disallow: /*/*/milestones/new +Disallow: /*/*/milestones/*/edit +Disallow: /*/*/issues/new +Disallow: /*/*/issues/*/edit +Disallow: /*/*/merge_requests/new +Disallow: /*/*/merge_requests/*.patch +Disallow: /*/*/merge_requests/*.diff +Disallow: /*/*/merge_requests/*/edit +Disallow: /*/*/merge_requests/*/diffs +Disallow: /*/*/project_members/import +Disallow: /*/*/labels/new +Disallow: /*/*/labels/*/edit +Disallow: /*/*/wikis/*/edit +Disallow: /*/*/snippets/new +Disallow: /*/*/snippets/*/edit +Disallow: /*/*/snippets/*/raw +Disallow: /*/*/deploy_keys +Disallow: /*/*/hooks +Disallow: /*/*/services +Disallow: /*/*/protected_branches -- cgit v1.2.1