summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDouwe Maan <douwe@selenight.nl>2017-05-19 19:46:40 -0500
committerDouwe Maan <douwe@selenight.nl>2017-05-23 20:38:24 -0500
commit4345bb8c507a11af694617187dea14284f48fb96 (patch)
tree20bc96bf6f90f8654492fb4e8b5cb1108e3d131d /lib
parent3cfcbcf35badfdb21244f7f16c8640cd83b49205 (diff)
downloadgitlab-ce-4345bb8c507a11af694617187dea14284f48fb96.tar.gz
Fix ambiguous routing issues by teaching router about reserved words
Diffstat (limited to 'lib')
-rw-r--r--lib/api/repositories.rb2
-rw-r--r--lib/api/v3/repositories.rb2
-rw-r--r--lib/constraints/group_url_constrainer.rb6
-rw-r--r--lib/constraints/project_url_constrainer.rb4
-rw-r--r--lib/constraints/user_url_constrainer.rb6
-rw-r--r--lib/gitlab/etag_caching/router.rb2
-rw-r--r--lib/gitlab/path_regex.rb264
-rw-r--r--lib/gitlab/regex.rb80
8 files changed, 278 insertions, 88 deletions
diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb
index 8f16e532ecb..14d2bff9cb5 100644
--- a/lib/api/repositories.rb
+++ b/lib/api/repositories.rb
@@ -85,7 +85,7 @@ module API
optional :sha, type: String, desc: 'The commit sha of the archive to be downloaded'
optional :format, type: String, desc: 'The archive format'
end
- get ':id/repository/archive', requirements: { format: Gitlab::Regex.archive_formats_regex } do
+ get ':id/repository/archive', requirements: { format: Gitlab::PathRegex.archive_formats_regex } do
begin
send_git_archive user_project.repository, ref: params[:sha], format: params[:format]
rescue
diff --git a/lib/api/v3/repositories.rb b/lib/api/v3/repositories.rb
index e4d14bc8168..0eaa0de2eef 100644
--- a/lib/api/v3/repositories.rb
+++ b/lib/api/v3/repositories.rb
@@ -72,7 +72,7 @@ module API
optional :sha, type: String, desc: 'The commit sha of the archive to be downloaded'
optional :format, type: String, desc: 'The archive format'
end
- get ':id/repository/archive', requirements: { format: Gitlab::Regex.archive_formats_regex } do
+ get ':id/repository/archive', requirements: { format: Gitlab::PathRegex.archive_formats_regex } do
begin
send_git_archive user_project.repository, ref: params[:sha], format: params[:format]
rescue
diff --git a/lib/constraints/group_url_constrainer.rb b/lib/constraints/group_url_constrainer.rb
index 5f379756c11..6fc1d56d7a0 100644
--- a/lib/constraints/group_url_constrainer.rb
+++ b/lib/constraints/group_url_constrainer.rb
@@ -1,9 +1,9 @@
class GroupUrlConstrainer
def matches?(request)
- id = request.params[:id]
+ full_path = request.params[:group_id] || request.params[:id]
- return false unless DynamicPathValidator.valid?(id)
+ return false unless DynamicPathValidator.valid_group_path?(full_path)
- Group.find_by_full_path(id, follow_redirects: request.get?).present?
+ Group.find_by_full_path(full_path, follow_redirects: request.get?).present?
end
end
diff --git a/lib/constraints/project_url_constrainer.rb b/lib/constraints/project_url_constrainer.rb
index 6f542f63f98..4c0aee6c48f 100644
--- a/lib/constraints/project_url_constrainer.rb
+++ b/lib/constraints/project_url_constrainer.rb
@@ -2,9 +2,9 @@ class ProjectUrlConstrainer
def matches?(request)
namespace_path = request.params[:namespace_id]
project_path = request.params[:project_id] || request.params[:id]
- full_path = namespace_path + '/' + project_path
+ full_path = [namespace_path, project_path].join('/')
- return false unless DynamicPathValidator.valid?(full_path)
+ return false unless DynamicPathValidator.valid_project_path?(full_path)
Project.find_by_full_path(full_path, follow_redirects: request.get?).present?
end
diff --git a/lib/constraints/user_url_constrainer.rb b/lib/constraints/user_url_constrainer.rb
index 28159dc0dec..d16ae7f3f40 100644
--- a/lib/constraints/user_url_constrainer.rb
+++ b/lib/constraints/user_url_constrainer.rb
@@ -1,5 +1,9 @@
class UserUrlConstrainer
def matches?(request)
- User.find_by_full_path(request.params[:username], follow_redirects: request.get?).present?
+ full_path = request.params[:username]
+
+ return false unless DynamicPathValidator.valid_user_path?(full_path)
+
+ User.find_by_full_path(full_path, follow_redirects: request.get?).present?
end
end
diff --git a/lib/gitlab/etag_caching/router.rb b/lib/gitlab/etag_caching/router.rb
index ba31041d0c1..7d03209684a 100644
--- a/lib/gitlab/etag_caching/router.rb
+++ b/lib/gitlab/etag_caching/router.rb
@@ -10,7 +10,7 @@ module Gitlab
# - Ending in `issues/id`/realtime_changes` for the `issue_title` route
USED_IN_ROUTES = %w[noteable issue notes issues realtime_changes
commit pipelines merge_requests new].freeze
- RESERVED_WORDS = DynamicPathValidator::WILDCARD_ROUTES - USED_IN_ROUTES
+ RESERVED_WORDS = Gitlab::PathRegex::ILLEGAL_PROJECT_PATH_WORDS - USED_IN_ROUTES
RESERVED_WORDS_REGEX = Regexp.union(*RESERVED_WORDS)
ROUTES = [
Gitlab::EtagCaching::Router::Route.new(
diff --git a/lib/gitlab/path_regex.rb b/lib/gitlab/path_regex.rb
new file mode 100644
index 00000000000..1ee0f2b3998
--- /dev/null
+++ b/lib/gitlab/path_regex.rb
@@ -0,0 +1,264 @@
+module Gitlab
+ module PathRegex
+ extend self
+
+ # All routes that appear on the top level must be listed here.
+ # This will make sure that groups cannot be created with these names
+ # as these routes would be masked by the paths already in place.
+ #
+ # Example:
+ # /api/api-project
+ #
+ # the path `api` shouldn't be allowed because it would be masked by `api/*`
+ #
+ TOP_LEVEL_ROUTES = %w[
+ -
+ .well-known
+ abuse_reports
+ admin
+ all
+ api
+ assets
+ autocomplete
+ ci
+ dashboard
+ explore
+ files
+ groups
+ health_check
+ help
+ hooks
+ import
+ invites
+ issues
+ jwt
+ koding
+ member
+ merge_requests
+ new
+ notes
+ notification_settings
+ oauth
+ profile
+ projects
+ public
+ repository
+ robots.txt
+ s
+ search
+ sent_notifications
+ services
+ snippets
+ teams
+ u
+ unicorn_test
+ unsubscribes
+ uploads
+ users
+ ].freeze
+
+ # This list should contain all words following `/*namespace_id/:project_id` in
+ # routes that contain a second wildcard.
+ #
+ # Example:
+ # /*namespace_id/:project_id/badges/*ref/build
+ #
+ # If `badges` was allowed as a project/group name, we would not be able to access the
+ # `badges` route for those projects:
+ #
+ # Consider a namespace with path `foo/bar` and a project called `badges`.
+ # The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
+ #
+ # When accessing this path the route would be matched to the `badges` path
+ # with the following params:
+ # - namespace_id: `foo`
+ # - project_id: `bar`
+ # - ref: `badges/master`
+ #
+ # Failing to find the project, this would result in a 404.
+ #
+ # By rejecting `badges` the router can _count_ on the fact that `badges` will
+ # be preceded by the `namespace/project`.
+ PROJECT_WILDCARD_ROUTES = %w[
+ badges
+ blame
+ blob
+ builds
+ commits
+ create
+ create_dir
+ edit
+ environments/folders
+ files
+ find_file
+ gitlab-lfs/objects
+ info/lfs/objects
+ new
+ preview
+ raw
+ refs
+ tree
+ update
+ wikis
+ ].freeze
+
+ # These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
+ # We need to reject these because we have a `/groups/*id` page that is the same
+ # as the `/*id`.
+ #
+ # If we would allow a subgroup to be created with the name `activity` then
+ # this group would not be accessible through `/groups/parent/activity` since
+ # this would map to the activity-page of its parent.
+ GROUP_ROUTES = %w[
+ activity
+ analytics
+ audit_events
+ avatar
+ edit
+ group_members
+ hooks
+ issues
+ labels
+ ldap
+ ldap_group_links
+ merge_requests
+ milestones
+ notification_setting
+ pipeline_quota
+ projects
+ subgroups
+ ].freeze
+
+ ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES
+ ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze
+
+ # The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript
+ # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
+ # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
+ # allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of
+ # `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation
+ # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
+ PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze
+ NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze
+
+ NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze
+ NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze
+ PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze
+ FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/)*#{NAMESPACE_FORMAT_REGEX}}.freeze
+
+ def root_namespace_route_regex
+ @root_namespace_route_regex ||= begin
+ illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE)
+
+ single_line_regexp %r{
+ (?!(#{illegal_words})/)
+ #{NAMESPACE_FORMAT_REGEX}
+ }x
+ end
+ end
+
+ def full_namespace_route_regex
+ @full_namespace_route_regex ||= begin
+ illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE)
+
+ single_line_regexp %r{
+ #{root_namespace_route_regex}
+ (?:
+ /
+ (?!#{illegal_words}/)
+ #{NAMESPACE_FORMAT_REGEX}
+ )*
+ }x
+ end
+ end
+
+ def project_route_regex
+ @project_route_regex ||= begin
+ illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE)
+
+ single_line_regexp %r{
+ (?!(#{illegal_words})/)
+ #{PROJECT_PATH_FORMAT_REGEX}
+ }x
+ end
+ end
+
+ def project_git_route_regex
+ @project_git_route_regex ||= /#{project_route_regex}\.git/.freeze
+ end
+
+ def root_namespace_path_regex
+ @root_namespace_path_regex ||= %r{\A#{root_namespace_route_regex}/\z}
+ end
+
+ def full_namespace_path_regex
+ @full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z}
+ end
+
+ def project_path_regex
+ @project_path_regex ||= %r{\A#{project_route_regex}/\z}
+ end
+
+ def full_project_path_regex
+ @full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z}
+ end
+
+ def full_namespace_format_regex
+ @namespace_format_regex ||= /A#{FULL_NAMESPACE_FORMAT_REGEX}\z/.freeze
+ end
+
+ def namespace_format_regex
+ @namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze
+ end
+
+ def namespace_format_message
+ "can contain only letters, digits, '_', '-' and '.'. " \
+ "Cannot start with '-' or end in '.', '.git' or '.atom'." \
+ end
+
+ def project_path_format_regex
+ @project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze
+ end
+
+ def project_path_format_message
+ "can contain only letters, digits, '_', '-' and '.'. " \
+ "Cannot start with '-', end in '.git' or end in '.atom'" \
+ end
+
+ def archive_formats_regex
+ # |zip|tar| tar.gz | tar.bz2 |
+ @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
+ end
+
+ def git_reference_regex
+ # Valid git ref regex, see:
+ # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
+
+ @git_reference_regex ||= single_line_regexp %r{
+ (?!
+ (?# doesn't begins with)
+ \/| (?# rule #6)
+ (?# doesn't contain)
+ .*(?:
+ [\/.]\.| (?# rule #1,3)
+ \/\/| (?# rule #6)
+ @\{| (?# rule #8)
+ \\ (?# rule #9)
+ )
+ )
+ [^\000-\040\177~^:?*\[]+ (?# rule #4-5)
+ (?# doesn't end with)
+ (?<!\.lock) (?# rule #1)
+ (?<![\/.]) (?# rule #6-7)
+ }x
+ end
+
+ private
+
+ def single_line_regexp(regex)
+ # Turns a multiline extended regexp into a single line one,
+ # beacuse `rake routes` breaks on multiline regexes.
+ Regexp.new(regex.source.gsub(/\(\?#.+?\)/ , '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze
+ end
+ end
+end
diff --git a/lib/gitlab/regex.rb b/lib/gitlab/regex.rb
index b7fef5dd068..e4d2a992470 100644
--- a/lib/gitlab/regex.rb
+++ b/lib/gitlab/regex.rb
@@ -2,39 +2,6 @@ module Gitlab
module Regex
extend self
- # The namespace regex is used in Javascript to validate usernames in the "Register" form. However, Javascript
- # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
- # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
- # allow non-regex validatiions, etc), `NAMESPACE_REGEX_STR_JS` serves as a Javascript-compatible version of
- # `NAMESPACE_REGEX_STR`, with the negative lookbehind assertion removed. This means that the client-side validation
- # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
- PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze
- NAMESPACE_REGEX_STR_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze
- NO_SUFFIX_REGEX_STR = '(?<!\.git|\.atom)'.freeze
- NAMESPACE_REGEX_STR = "(?:#{NAMESPACE_REGEX_STR_JS})#{NO_SUFFIX_REGEX_STR}".freeze
- PROJECT_REGEX_STR = "(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX_STR}".freeze
-
- # Same as NAMESPACE_REGEX_STR but allows `/` in the path.
- # So `group/subgroup` will match this regex but not NAMESPACE_REGEX_STR
- FULL_NAMESPACE_REGEX_STR = "(?:#{NAMESPACE_REGEX_STR}/)*#{NAMESPACE_REGEX_STR}".freeze
-
- def namespace_regex
- @namespace_regex ||= /\A#{NAMESPACE_REGEX_STR}\z/.freeze
- end
-
- def full_namespace_regex
- @full_namespace_regex ||= %r{\A#{FULL_NAMESPACE_REGEX_STR}\z}
- end
-
- def namespace_route_regex
- @namespace_route_regex ||= /#{NAMESPACE_REGEX_STR}/.freeze
- end
-
- def namespace_regex_message
- "can contain only letters, digits, '_', '-' and '.'. " \
- "Cannot start with '-' or end in '.', '.git' or '.atom'." \
- end
-
def namespace_name_regex
@namespace_name_regex ||= /\A[\p{Alnum}\p{Pd}_\. ]*\z/.freeze
end
@@ -52,23 +19,6 @@ module Gitlab
"It must start with letter, digit, emoji or '_'."
end
- def project_path_regex
- @project_path_regex ||= /\A#{PROJECT_REGEX_STR}\z/.freeze
- end
-
- def project_route_regex
- @project_route_regex ||= /#{PROJECT_REGEX_STR}/.freeze
- end
-
- def project_git_route_regex
- @project_route_git_regex ||= /#{PATH_REGEX_STR}\.git/.freeze
- end
-
- def project_path_regex_message
- "can contain only letters, digits, '_', '-' and '.'. " \
- "Cannot start with '-', end in '.git' or end in '.atom'" \
- end
-
def file_name_regex
@file_name_regex ||= /\A[[[:alnum:]]_\-\.\@\+]*\z/.freeze
end
@@ -77,36 +27,8 @@ module Gitlab
"can contain only letters, digits, '_', '-', '@', '+' and '.'."
end
- def archive_formats_regex
- # |zip|tar| tar.gz | tar.bz2 |
- @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
- end
-
- def git_reference_regex
- # Valid git ref regex, see:
- # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
-
- @git_reference_regex ||= %r{
- (?!
- (?# doesn't begins with)
- \/| (?# rule #6)
- (?# doesn't contain)
- .*(?:
- [\/.]\.| (?# rule #1,3)
- \/\/| (?# rule #6)
- @\{| (?# rule #8)
- \\ (?# rule #9)
- )
- )
- [^\000-\040\177~^:?*\[]+ (?# rule #4-5)
- (?# doesn't end with)
- (?<!\.lock) (?# rule #1)
- (?<![\/.]) (?# rule #6-7)
- }x.freeze
- end
-
def container_registry_reference_regex
- git_reference_regex
+ Gitlab::PathRegex.git_reference_regex
end
##