diff options
author | Douwe Maan <douwe@selenight.nl> | 2017-05-19 19:46:40 -0500 |
---|---|---|
committer | Douwe Maan <douwe@selenight.nl> | 2017-05-23 20:38:24 -0500 |
commit | 4345bb8c507a11af694617187dea14284f48fb96 (patch) | |
tree | 20bc96bf6f90f8654492fb4e8b5cb1108e3d131d | |
parent | 3cfcbcf35badfdb21244f7f16c8640cd83b49205 (diff) | |
download | gitlab-ce-4345bb8c507a11af694617187dea14284f48fb96.tar.gz |
Fix ambiguous routing issues by teaching router about reserved words
29 files changed, 793 insertions, 590 deletions
diff --git a/app/controllers/projects/refs_controller.rb b/app/controllers/projects/refs_controller.rb index 667f4870c7a..2a0b58fae7c 100644 --- a/app/controllers/projects/refs_controller.rb +++ b/app/controllers/projects/refs_controller.rb @@ -74,6 +74,6 @@ class Projects::RefsController < Projects::ApplicationController private def validate_ref_id - return not_found! if params[:id].present? && params[:id] !~ Gitlab::Regex.git_reference_regex + return not_found! if params[:id].present? && params[:id] !~ Gitlab::PathRegex.git_reference_regex end end diff --git a/app/models/project.rb b/app/models/project.rb index 65745fd6d37..29af57d7664 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -205,8 +205,8 @@ class Project < ActiveRecord::Base presence: true, dynamic_path: true, length: { maximum: 255 }, - format: { with: Gitlab::Regex.project_path_regex, - message: Gitlab::Regex.project_path_regex_message }, + format: { with: Gitlab::PathRegex.project_path_format_regex, + message: Gitlab::PathRegex.project_path_format_message }, uniqueness: { scope: :namespace_id } validates :namespace, presence: true @@ -380,11 +380,9 @@ class Project < ActiveRecord::Base end def reference_pattern - name_pattern = Gitlab::Regex::FULL_NAMESPACE_REGEX_STR - %r{ - ((?<namespace>#{name_pattern})\/)? - (?<project>#{name_pattern}) + ((?<namespace>#{Gitlab::PathRegex::FULL_NAMESPACE_FORMAT_REGEX})\/)? + (?<project>#{Gitlab::PathRegex::PROJECT_PATH_FORMAT_REGEX}) }x end diff --git a/app/models/user.rb b/app/models/user.rb index 837ab78228b..55614233230 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -367,7 +367,7 @@ class User < ActiveRecord::Base def reference_pattern %r{ #{Regexp.escape(reference_prefix)} - (?<user>#{Gitlab::Regex::FULL_NAMESPACE_REGEX_STR}) + (?<user>#{Gitlab::PathRegex::FULL_NAMESPACE_FORMAT_REGEX}) }x end diff --git a/app/validators/dynamic_path_validator.rb b/app/validators/dynamic_path_validator.rb index d992b0c3725..6819886ebf4 100644 --- a/app/validators/dynamic_path_validator.rb +++ b/app/validators/dynamic_path_validator.rb @@ -3,212 +3,45 @@ # Custom validator for GitLab path values. # These paths are assigned to `Namespace` (& `Group` as a subclass) & `Project` # -# Values are checked for formatting and exclusion from a list of reserved path +# Values are checked for formatting and exclusion from a list of illegal path # names. class DynamicPathValidator < ActiveModel::EachValidator - # All routes that appear on the top level must be listed here. - # This will make sure that groups cannot be created with these names - # as these routes would be masked by the paths already in place. - # - # Example: - # /api/api-project - # - # the path `api` shouldn't be allowed because it would be masked by `api/*` - # - TOP_LEVEL_ROUTES = %w[ - - - .well-known - abuse_reports - admin - all - api - assets - autocomplete - ci - dashboard - explore - files - groups - health_check - help - hooks - import - invites - issues - jwt - koding - member - merge_requests - new - notes - notification_settings - oauth - profile - projects - public - repository - robots.txt - s - search - sent_notifications - services - snippets - teams - u - unicorn_test - unsubscribes - uploads - users - ].freeze - - # This list should contain all words following `/*namespace_id/:project_id` in - # routes that contain a second wildcard. - # - # Example: - # /*namespace_id/:project_id/badges/*ref/build - # - # If `badges` was allowed as a project/group name, we would not be able to access the - # `badges` route for those projects: - # - # Consider a namespace with path `foo/bar` and a project called `badges`. - # The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg` - # - # When accessing this path the route would be matched to the `badges` path - # with the following params: - # - namespace_id: `foo` - # - project_id: `bar` - # - ref: `badges/master` - # - # Failing to find the project, this would result in a 404. - # - # By rejecting `badges` the router can _count_ on the fact that `badges` will - # be preceded by the `namespace/project`. - WILDCARD_ROUTES = %w[ - badges - blame - blob - builds - commits - create - create_dir - edit - environments/folders - files - find_file - gitlab-lfs/objects - info/lfs/objects - new - preview - raw - refs - tree - update - wikis - ].freeze - - # These are all the paths that follow `/groups/*id/ or `/groups/*group_id` - # We need to reject these because we have a `/groups/*id` page that is the same - # as the `/*id`. - # - # If we would allow a subgroup to be created with the name `activity` then - # this group would not be accessible through `/groups/parent/activity` since - # this would map to the activity-page of it's parent. - GROUP_ROUTES = %w[ - activity - analytics - audit_events - avatar - edit - group_members - hooks - issues - labels - ldap - ldap_group_links - merge_requests - milestones - notification_setting - pipeline_quota - projects - subgroups - ].freeze - - CHILD_ROUTES = (WILDCARD_ROUTES | GROUP_ROUTES).freeze - - def self.without_reserved_wildcard_paths_regex - @without_reserved_wildcard_paths_regex ||= regex_excluding_child_paths(WILDCARD_ROUTES) - end - - def self.without_reserved_child_paths_regex - @without_reserved_child_paths_regex ||= regex_excluding_child_paths(CHILD_ROUTES) - end - - # This is used to validate a full path. - # It doesn't match paths - # - Starting with one of the top level words - # - Containing one of the child level words in the middle of a path - def self.regex_excluding_child_paths(child_routes) - reserved_top_level_words = Regexp.union(TOP_LEVEL_ROUTES) - not_starting_in_reserved_word = %r{\A/?(?!(#{reserved_top_level_words})(/|\z))} - - reserved_child_level_words = Regexp.union(child_routes) - not_containing_reserved_child = %r{(?!\S+/(#{reserved_child_level_words})(/|\z))} - - %r{#{not_starting_in_reserved_word} - #{not_containing_reserved_child} - #{Gitlab::Regex.full_namespace_regex}}x - end - - def self.valid?(path) - path =~ Gitlab::Regex.full_namespace_regex && !full_path_reserved?(path) - end - - def self.full_path_reserved?(path) - path = path.to_s.downcase - _project_part, namespace_parts = path.reverse.split('/', 2).map(&:reverse) - - wildcard_reserved?(path) || child_reserved?(namespace_parts) - end - - def self.child_reserved?(path) - return false unless path - - path !~ without_reserved_child_paths_regex - end + class << self + def valid_user_path?(path) + "#{path}/" =~ Gitlab::PathRegex.root_namespace_path_regex + end - def self.wildcard_reserved?(path) - return false unless path + def valid_group_path?(path) + "#{path}/" =~ Gitlab::PathRegex.full_namespace_path_regex + end - path !~ without_reserved_wildcard_paths_regex + def valid_project_path?(path) + "#{path}/" =~ Gitlab::PathRegex.full_project_path_regex + end end - delegate :full_path_reserved?, - :child_reserved?, - to: :class - - def path_reserved_for_record?(record, value) + def path_valid_for_record?(record, value) full_path = record.respond_to?(:full_path) ? record.full_path : value - # For group paths the entire path cannot contain a reserved child word - # The path doesn't contain the last `_project_part` so we need to validate - # if the entire path. - # Example: - # A *group* with full path `parent/activity` is reserved. - # A *project* with full path `parent/activity` is allowed. - if record.is_a? Group - child_reserved?(full_path) - else - full_path_reserved?(full_path) + return true unless full_path + + case record + when Project + self.class.valid_project_path?(full_path) + when Group + self.class.valid_group_path?(full_path) + else # User or non-Group Namespace + self.class.valid_user_path?(full_path) end end def validate_each(record, attribute, value) - unless value =~ Gitlab::Regex.namespace_regex - record.errors.add(attribute, Gitlab::Regex.namespace_regex_message) + unless value =~ Gitlab::PathRegex.namespace_format_regex + record.errors.add(attribute, Gitlab::PathRegex.namespace_format_message) return end - if path_reserved_for_record?(record, value) + unless path_valid_for_record?(record, value) record.errors.add(attribute, "#{value} is a reserved name") end end diff --git a/app/views/devise/shared/_signup_box.html.haml b/app/views/devise/shared/_signup_box.html.haml index a2f6a7ab1cb..d696577278d 100644 --- a/app/views/devise/shared/_signup_box.html.haml +++ b/app/views/devise/shared/_signup_box.html.haml @@ -8,7 +8,7 @@ = f.text_field :name, class: "form-control top", required: true, title: "This field is required." .username.form-group = f.label :username - = f.text_field :username, class: "form-control middle", pattern: Gitlab::Regex::NAMESPACE_REGEX_STR_JS, required: true, title: 'Please create a username with only alphanumeric characters.' + = f.text_field :username, class: "form-control middle", pattern: Gitlab::PathRegex::NAMESPACE_FORMAT_REGEX_JS, required: true, title: 'Please create a username with only alphanumeric characters.' %p.validation-error.hide Username is already taken. %p.validation-success.hide Username is available. %p.validation-pending.hide Checking username availability... diff --git a/app/views/shared/_group_form.html.haml b/app/views/shared/_group_form.html.haml index 90ae3f06a98..8d5b5129454 100644 --- a/app/views/shared/_group_form.html.haml +++ b/app/views/shared/_group_form.html.haml @@ -15,7 +15,7 @@ %strong= parent.full_path + '/' = f.text_field :path, placeholder: 'open-source', class: 'form-control', autofocus: local_assigns[:autofocus] || false, required: true, - pattern: Gitlab::Regex::NAMESPACE_REGEX_STR_JS, + pattern: Gitlab::PathRegex::NAMESPACE_FORMAT_REGEX_JS, title: 'Please choose a group path with no special characters.', "data-bind-in" => "#{'create_chat_team' if Gitlab.config.mattermost.enabled}" - if parent diff --git a/config/routes.rb b/config/routes.rb index 2584981bb04..846054e6917 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,6 +1,5 @@ require 'sidekiq/web' require 'sidekiq/cron/web' -require 'constraints/group_url_constrainer' Rails.application.routes.draw do concern :access_requestable do @@ -85,20 +84,6 @@ Rails.application.routes.draw do root to: "root#index" - # Since group show page is wildcard routing - # we want all other routing to be checked before matching this one - constraints(GroupUrlConstrainer.new) do - scope(path: '*id', - as: :group, - constraints: { id: Gitlab::Regex.namespace_route_regex, format: /(html|json|atom)/ }, - controller: :groups) do - get '/', action: :show - patch '/', action: :update - put '/', action: :update - delete '/', action: :destroy - end - end - draw :test if Rails.env.test? get '*unmatched_route', to: 'application#route_not_found' diff --git a/config/routes/admin.rb b/config/routes/admin.rb index 48993420ed9..6e34dd3a2eb 100644 --- a/config/routes/admin.rb +++ b/config/routes/admin.rb @@ -36,7 +36,7 @@ namespace :admin do scope(path: 'groups/*id', controller: :groups, - constraints: { id: Gitlab::Regex.namespace_route_regex, format: /(html|json|atom)/ }) do + constraints: { id: Gitlab::PathRegex.full_namespace_route_regex, format: /(html|json|atom)/ }) do scope(as: :group) do put :members_update @@ -68,10 +68,12 @@ namespace :admin do resources :projects, only: [:index] - scope(path: 'projects/*namespace_id', as: :namespace) do + scope(path: 'projects/*namespace_id', + as: :namespace, + constraints: { namespace_id: Gitlab::PathRegex.full_namespace_route_regex }) do resources(:projects, path: '/', - constraints: { id: Gitlab::Regex.project_route_regex }, + constraints: { id: Gitlab::PathRegex.project_route_regex }, only: [:show]) do member do diff --git a/config/routes/git_http.rb b/config/routes/git_http.rb index 42d874eeebc..a53c94326d4 100644 --- a/config/routes/git_http.rb +++ b/config/routes/git_http.rb @@ -1,5 +1,7 @@ -scope(path: '*namespace_id/:project_id', constraints: { format: nil }) do - scope(constraints: { project_id: Gitlab::Regex.project_git_route_regex }, module: :projects) do +scope(path: '*namespace_id/:project_id', + format: nil, + constraints: { namespace_id: Gitlab::PathRegex.full_namespace_route_regex }) do + scope(constraints: { project_id: Gitlab::PathRegex.project_git_route_regex }, module: :projects) do # Git HTTP clients ('git clone' etc.) scope(controller: :git_http) do get '/info/refs', action: :info_refs @@ -26,7 +28,7 @@ scope(path: '*namespace_id/:project_id', constraints: { format: nil }) do end # Redirect /group/project/info/refs to /group/project.git/info/refs - scope(constraints: { project_id: Gitlab::Regex.project_route_regex }) do + scope(constraints: { project_id: Gitlab::PathRegex.project_route_regex }) do # Allow /info/refs, /info/refs?service=git-upload-pack, and # /info/refs?service=git-receive-pack, but nothing else. # diff --git a/config/routes/group.rb b/config/routes/group.rb index 7b29e0e807c..11cdff55ed8 100644 --- a/config/routes/group.rb +++ b/config/routes/group.rb @@ -1,9 +1,11 @@ +require 'constraints/group_url_constrainer' + resources :groups, only: [:index, :new, :create] scope(path: 'groups/*group_id', module: :groups, as: :group, - constraints: { group_id: Gitlab::Regex.namespace_route_regex }) do + constraints: { group_id: Gitlab::PathRegex.full_namespace_route_regex }) do resources :group_members, only: [:index, :create, :update, :destroy], concerns: :access_requestable do post :resend_invite, on: :member delete :leave, on: :collection @@ -25,7 +27,7 @@ end scope(path: 'groups/*id', controller: :groups, - constraints: { id: Gitlab::Regex.namespace_route_regex, format: /(html|json|atom)/ }) do + constraints: { id: Gitlab::PathRegex.full_namespace_route_regex, format: /(html|json|atom)/ }) do get :edit, as: :edit_group get :issues, as: :issues_group get :merge_requests, as: :merge_requests_group @@ -34,3 +36,15 @@ scope(path: 'groups/*id', get :subgroups, as: :subgroups_group get '/', action: :show, as: :group_canonical end + +constraints(GroupUrlConstrainer.new) do + scope(path: '*id', + as: :group, + constraints: { id: Gitlab::PathRegex.full_namespace_route_regex, format: /(html|json|atom)/ }, + controller: :groups) do + get '/', action: :show + patch '/', action: :update + put '/', action: :update + delete '/', action: :destroy + end +end diff --git a/config/routes/project.rb b/config/routes/project.rb index 01b94f9f2b8..a2bc63a4734 100644 --- a/config/routes/project.rb +++ b/config/routes/project.rb @@ -5,9 +5,24 @@ resources :projects, only: [:index, :new, :create] draw :git_http constraints(ProjectUrlConstrainer.new) do - scope(path: '*namespace_id', as: :namespace) do + # If the route has a wildcard segment, the segment has a regex constraint, + # the segment is potentially followed by _another_ wildcard segment, and + # the `format` option is not set to false, we need to specify that + # regex constraint _outside_ of `constraints: {}`. + # + # Otherwise, Rails will overwrite the constraint with `/.+?/`, + # which breaks some of our wildcard routes like `/blob/*id` + # and `/tree/*id` that depend on the negative lookahead inside + # `Gitlab::PathRegex.full_namespace_route_regex`, which helps the router + # determine whether a certain path segment is part of `*namespace_id`, + # `:project_id`, or `*id`. + # + # See https://github.com/rails/rails/blob/v4.2.8/actionpack/lib/action_dispatch/routing/mapper.rb#L155 + scope(path: '*namespace_id', + as: :namespace, + namespace_id: Gitlab::PathRegex.full_namespace_route_regex) do scope(path: ':project_id', - constraints: { project_id: Gitlab::Regex.project_route_regex }, + constraints: { project_id: Gitlab::PathRegex.project_route_regex }, module: :projects, as: :project) do @@ -314,7 +329,7 @@ constraints(ProjectUrlConstrainer.new) do resources :runner_projects, only: [:create, :destroy] resources :badges, only: [:index] do collection do - scope '*ref', constraints: { ref: Gitlab::Regex.git_reference_regex } do + scope '*ref', constraints: { ref: Gitlab::PathRegex.git_reference_regex } do constraints format: /svg/ do get :build get :coverage @@ -337,7 +352,7 @@ constraints(ProjectUrlConstrainer.new) do resources(:projects, path: '/', - constraints: { id: Gitlab::Regex.project_route_regex }, + constraints: { id: Gitlab::PathRegex.project_route_regex }, only: [:edit, :show, :update, :destroy]) do member do put :transfer diff --git a/config/routes/repository.rb b/config/routes/repository.rb index 5cf37a06e97..11911636fa7 100644 --- a/config/routes/repository.rb +++ b/config/routes/repository.rb @@ -2,7 +2,7 @@ resource :repository, only: [:create] do member do - get 'archive', constraints: { format: Gitlab::Regex.archive_formats_regex } + get 'archive', constraints: { format: Gitlab::PathRegex.archive_formats_regex } end end @@ -24,7 +24,7 @@ scope format: false do member do # tree viewer logs - get 'logs_tree', constraints: { id: Gitlab::Regex.git_reference_regex } + get 'logs_tree', constraints: { id: Gitlab::PathRegex.git_reference_regex } # Directories with leading dots erroneously get rejected if git # ref regex used in constraints. Regex verification now done in controller. get 'logs_tree/*path', action: :logs_tree, as: :logs_file, format: false, constraints: { @@ -34,7 +34,7 @@ scope format: false do end end - scope constraints: { id: Gitlab::Regex.git_reference_regex } do + scope constraints: { id: Gitlab::PathRegex.git_reference_regex } do resources :network, only: [:show] resources :graphs, only: [:show] do diff --git a/config/routes/user.rb b/config/routes/user.rb index b064a15e802..df49a752f5e 100644 --- a/config/routes/user.rb +++ b/config/routes/user.rb @@ -11,19 +11,7 @@ devise_scope :user do get '/users/almost_there' => 'confirmations#almost_there' end -constraints(UserUrlConstrainer.new) do - # Get all keys of user - get ':username.keys' => 'profiles/keys#get_keys', constraints: { username: Gitlab::Regex.namespace_route_regex } - - scope(path: ':username', - as: :user, - constraints: { username: Gitlab::Regex.namespace_route_regex }, - controller: :users) do - get '/', action: :show - end -end - -scope(constraints: { username: Gitlab::Regex.namespace_route_regex }) do +scope(constraints: { username: Gitlab::PathRegex.root_namespace_route_regex }) do scope(path: 'users/:username', as: :user, controller: :users) do @@ -46,3 +34,15 @@ scope(constraints: { username: Gitlab::Regex.namespace_route_regex }) do get '/u/:username/snippets', to: redirect('/users/%{username}/snippets') get '/u/:username/contributed', to: redirect('/users/%{username}/contributed') end + +constraints(UserUrlConstrainer.new) do + # Get all keys of user + get ':username.keys' => 'profiles/keys#get_keys', constraints: { username: Gitlab::PathRegex.root_namespace_route_regex } + + scope(path: ':username', + as: :user, + constraints: { username: Gitlab::PathRegex.root_namespace_route_regex }, + controller: :users) do + get '/', action: :show + end +end diff --git a/doc/user/group/subgroups/index.md b/doc/user/group/subgroups/index.md index a4726673fc4..4ab7d65d5f7 100644 --- a/doc/user/group/subgroups/index.md +++ b/doc/user/group/subgroups/index.md @@ -71,9 +71,9 @@ structure. - You need to be an Owner of a group in order to be able to create a subgroup. For more information check the [permissions table][permissions]. - For a list of words that are not allowed to be used as group names see the - [`dynamic_path_validator.rb` file][reserved] under the `TOP_LEVEL_ROUTES`, `WILDCARD_ROUTES` and `GROUP_ROUTES` lists: + [`dynamic_path_validator.rb` file][reserved] under the `TOP_LEVEL_ROUTES`, `PROJECT_WILDCARD_ROUTES` and `GROUP_ROUTES` lists: - `TOP_LEVEL_ROUTES`: are names that are reserved as usernames or top level groups - - `WILDCARD_ROUTES`: are names that are reserved for child groups or projects. + - `PROJECT_WILDCARD_ROUTES`: are names that are reserved for child groups or projects. - `GROUP_ROUTES`: are names that are reserved for all groups or projects. To create a subgroup: diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb index 8f16e532ecb..14d2bff9cb5 100644 --- a/lib/api/repositories.rb +++ b/lib/api/repositories.rb @@ -85,7 +85,7 @@ module API optional :sha, type: String, desc: 'The commit sha of the archive to be downloaded' optional :format, type: String, desc: 'The archive format' end - get ':id/repository/archive', requirements: { format: Gitlab::Regex.archive_formats_regex } do + get ':id/repository/archive', requirements: { format: Gitlab::PathRegex.archive_formats_regex } do begin send_git_archive user_project.repository, ref: params[:sha], format: params[:format] rescue diff --git a/lib/api/v3/repositories.rb b/lib/api/v3/repositories.rb index e4d14bc8168..0eaa0de2eef 100644 --- a/lib/api/v3/repositories.rb +++ b/lib/api/v3/repositories.rb @@ -72,7 +72,7 @@ module API optional :sha, type: String, desc: 'The commit sha of the archive to be downloaded' optional :format, type: String, desc: 'The archive format' end - get ':id/repository/archive', requirements: { format: Gitlab::Regex.archive_formats_regex } do + get ':id/repository/archive', requirements: { format: Gitlab::PathRegex.archive_formats_regex } do begin send_git_archive user_project.repository, ref: params[:sha], format: params[:format] rescue diff --git a/lib/constraints/group_url_constrainer.rb b/lib/constraints/group_url_constrainer.rb index 5f379756c11..6fc1d56d7a0 100644 --- a/lib/constraints/group_url_constrainer.rb +++ b/lib/constraints/group_url_constrainer.rb @@ -1,9 +1,9 @@ class GroupUrlConstrainer def matches?(request) - id = request.params[:id] + full_path = request.params[:group_id] || request.params[:id] - return false unless DynamicPathValidator.valid?(id) + return false unless DynamicPathValidator.valid_group_path?(full_path) - Group.find_by_full_path(id, follow_redirects: request.get?).present? + Group.find_by_full_path(full_path, follow_redirects: request.get?).present? end end diff --git a/lib/constraints/project_url_constrainer.rb b/lib/constraints/project_url_constrainer.rb index 6f542f63f98..4c0aee6c48f 100644 --- a/lib/constraints/project_url_constrainer.rb +++ b/lib/constraints/project_url_constrainer.rb @@ -2,9 +2,9 @@ class ProjectUrlConstrainer def matches?(request) namespace_path = request.params[:namespace_id] project_path = request.params[:project_id] || request.params[:id] - full_path = namespace_path + '/' + project_path + full_path = [namespace_path, project_path].join('/') - return false unless DynamicPathValidator.valid?(full_path) + return false unless DynamicPathValidator.valid_project_path?(full_path) Project.find_by_full_path(full_path, follow_redirects: request.get?).present? end diff --git a/lib/constraints/user_url_constrainer.rb b/lib/constraints/user_url_constrainer.rb index 28159dc0dec..d16ae7f3f40 100644 --- a/lib/constraints/user_url_constrainer.rb +++ b/lib/constraints/user_url_constrainer.rb @@ -1,5 +1,9 @@ class UserUrlConstrainer def matches?(request) - User.find_by_full_path(request.params[:username], follow_redirects: request.get?).present? + full_path = request.params[:username] + + return false unless DynamicPathValidator.valid_user_path?(full_path) + + User.find_by_full_path(full_path, follow_redirects: request.get?).present? end end diff --git a/lib/gitlab/etag_caching/router.rb b/lib/gitlab/etag_caching/router.rb index ba31041d0c1..7d03209684a 100644 --- a/lib/gitlab/etag_caching/router.rb +++ b/lib/gitlab/etag_caching/router.rb @@ -10,7 +10,7 @@ module Gitlab # - Ending in `issues/id`/realtime_changes` for the `issue_title` route USED_IN_ROUTES = %w[noteable issue notes issues realtime_changes commit pipelines merge_requests new].freeze - RESERVED_WORDS = DynamicPathValidator::WILDCARD_ROUTES - USED_IN_ROUTES + RESERVED_WORDS = Gitlab::PathRegex::ILLEGAL_PROJECT_PATH_WORDS - USED_IN_ROUTES RESERVED_WORDS_REGEX = Regexp.union(*RESERVED_WORDS) ROUTES = [ Gitlab::EtagCaching::Router::Route.new( diff --git a/lib/gitlab/path_regex.rb b/lib/gitlab/path_regex.rb new file mode 100644 index 00000000000..1ee0f2b3998 --- /dev/null +++ b/lib/gitlab/path_regex.rb @@ -0,0 +1,264 @@ +module Gitlab + module PathRegex + extend self + + # All routes that appear on the top level must be listed here. + # This will make sure that groups cannot be created with these names + # as these routes would be masked by the paths already in place. + # + # Example: + # /api/api-project + # + # the path `api` shouldn't be allowed because it would be masked by `api/*` + # + TOP_LEVEL_ROUTES = %w[ + - + .well-known + abuse_reports + admin + all + api + assets + autocomplete + ci + dashboard + explore + files + groups + health_check + help + hooks + import + invites + issues + jwt + koding + member + merge_requests + new + notes + notification_settings + oauth + profile + projects + public + repository + robots.txt + s + search + sent_notifications + services + snippets + teams + u + unicorn_test + unsubscribes + uploads + users + ].freeze + + # This list should contain all words following `/*namespace_id/:project_id` in + # routes that contain a second wildcard. + # + # Example: + # /*namespace_id/:project_id/badges/*ref/build + # + # If `badges` was allowed as a project/group name, we would not be able to access the + # `badges` route for those projects: + # + # Consider a namespace with path `foo/bar` and a project called `badges`. + # The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg` + # + # When accessing this path the route would be matched to the `badges` path + # with the following params: + # - namespace_id: `foo` + # - project_id: `bar` + # - ref: `badges/master` + # + # Failing to find the project, this would result in a 404. + # + # By rejecting `badges` the router can _count_ on the fact that `badges` will + # be preceded by the `namespace/project`. + PROJECT_WILDCARD_ROUTES = %w[ + badges + blame + blob + builds + commits + create + create_dir + edit + environments/folders + files + find_file + gitlab-lfs/objects + info/lfs/objects + new + preview + raw + refs + tree + update + wikis + ].freeze + + # These are all the paths that follow `/groups/*id/ or `/groups/*group_id` + # We need to reject these because we have a `/groups/*id` page that is the same + # as the `/*id`. + # + # If we would allow a subgroup to be created with the name `activity` then + # this group would not be accessible through `/groups/parent/activity` since + # this would map to the activity-page of its parent. + GROUP_ROUTES = %w[ + activity + analytics + audit_events + avatar + edit + group_members + hooks + issues + labels + ldap + ldap_group_links + merge_requests + milestones + notification_setting + pipeline_quota + projects + subgroups + ].freeze + + ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES + ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze + + # The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript + # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`. + # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to + # allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of + # `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation + # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation. + PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze + NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze + + NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze + NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze + PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze + FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/)*#{NAMESPACE_FORMAT_REGEX}}.freeze + + def root_namespace_route_regex + @root_namespace_route_regex ||= begin + illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE) + + single_line_regexp %r{ + (?!(#{illegal_words})/) + #{NAMESPACE_FORMAT_REGEX} + }x + end + end + + def full_namespace_route_regex + @full_namespace_route_regex ||= begin + illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE) + + single_line_regexp %r{ + #{root_namespace_route_regex} + (?: + / + (?!#{illegal_words}/) + #{NAMESPACE_FORMAT_REGEX} + )* + }x + end + end + + def project_route_regex + @project_route_regex ||= begin + illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE) + + single_line_regexp %r{ + (?!(#{illegal_words})/) + #{PROJECT_PATH_FORMAT_REGEX} + }x + end + end + + def project_git_route_regex + @project_git_route_regex ||= /#{project_route_regex}\.git/.freeze + end + + def root_namespace_path_regex + @root_namespace_path_regex ||= %r{\A#{root_namespace_route_regex}/\z} + end + + def full_namespace_path_regex + @full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z} + end + + def project_path_regex + @project_path_regex ||= %r{\A#{project_route_regex}/\z} + end + + def full_project_path_regex + @full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z} + end + + def full_namespace_format_regex + @namespace_format_regex ||= /A#{FULL_NAMESPACE_FORMAT_REGEX}\z/.freeze + end + + def namespace_format_regex + @namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze + end + + def namespace_format_message + "can contain only letters, digits, '_', '-' and '.'. " \ + "Cannot start with '-' or end in '.', '.git' or '.atom'." \ + end + + def project_path_format_regex + @project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze + end + + def project_path_format_message + "can contain only letters, digits, '_', '-' and '.'. " \ + "Cannot start with '-', end in '.git' or end in '.atom'" \ + end + + def archive_formats_regex + # |zip|tar| tar.gz | tar.bz2 | + @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze + end + + def git_reference_regex + # Valid git ref regex, see: + # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html + + @git_reference_regex ||= single_line_regexp %r{ + (?! + (?# doesn't begins with) + \/| (?# rule #6) + (?# doesn't contain) + .*(?: + [\/.]\.| (?# rule #1,3) + \/\/| (?# rule #6) + @\{| (?# rule #8) + \\ (?# rule #9) + ) + ) + [^\000-\040\177~^:?*\[]+ (?# rule #4-5) + (?# doesn't end with) + (?<!\.lock) (?# rule #1) + (?<![\/.]) (?# rule #6-7) + }x + end + + private + + def single_line_regexp(regex) + # Turns a multiline extended regexp into a single line one, + # beacuse `rake routes` breaks on multiline regexes. + Regexp.new(regex.source.gsub(/\(\?#.+?\)/ , '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze + end + end +end diff --git a/lib/gitlab/regex.rb b/lib/gitlab/regex.rb index b7fef5dd068..e4d2a992470 100644 --- a/lib/gitlab/regex.rb +++ b/lib/gitlab/regex.rb @@ -2,39 +2,6 @@ module Gitlab module Regex extend self - # The namespace regex is used in Javascript to validate usernames in the "Register" form. However, Javascript - # does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`. - # Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to - # allow non-regex validatiions, etc), `NAMESPACE_REGEX_STR_JS` serves as a Javascript-compatible version of - # `NAMESPACE_REGEX_STR`, with the negative lookbehind assertion removed. This means that the client-side validation - # will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation. - PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze - NAMESPACE_REGEX_STR_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze - NO_SUFFIX_REGEX_STR = '(?<!\.git|\.atom)'.freeze - NAMESPACE_REGEX_STR = "(?:#{NAMESPACE_REGEX_STR_JS})#{NO_SUFFIX_REGEX_STR}".freeze - PROJECT_REGEX_STR = "(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX_STR}".freeze - - # Same as NAMESPACE_REGEX_STR but allows `/` in the path. - # So `group/subgroup` will match this regex but not NAMESPACE_REGEX_STR - FULL_NAMESPACE_REGEX_STR = "(?:#{NAMESPACE_REGEX_STR}/)*#{NAMESPACE_REGEX_STR}".freeze - - def namespace_regex - @namespace_regex ||= /\A#{NAMESPACE_REGEX_STR}\z/.freeze - end - - def full_namespace_regex - @full_namespace_regex ||= %r{\A#{FULL_NAMESPACE_REGEX_STR}\z} - end - - def namespace_route_regex - @namespace_route_regex ||= /#{NAMESPACE_REGEX_STR}/.freeze - end - - def namespace_regex_message - "can contain only letters, digits, '_', '-' and '.'. " \ - "Cannot start with '-' or end in '.', '.git' or '.atom'." \ - end - def namespace_name_regex @namespace_name_regex ||= /\A[\p{Alnum}\p{Pd}_\. ]*\z/.freeze end @@ -52,23 +19,6 @@ module Gitlab "It must start with letter, digit, emoji or '_'." end - def project_path_regex - @project_path_regex ||= /\A#{PROJECT_REGEX_STR}\z/.freeze - end - - def project_route_regex - @project_route_regex ||= /#{PROJECT_REGEX_STR}/.freeze - end - - def project_git_route_regex - @project_route_git_regex ||= /#{PATH_REGEX_STR}\.git/.freeze - end - - def project_path_regex_message - "can contain only letters, digits, '_', '-' and '.'. " \ - "Cannot start with '-', end in '.git' or end in '.atom'" \ - end - def file_name_regex @file_name_regex ||= /\A[[[:alnum:]]_\-\.\@\+]*\z/.freeze end @@ -77,36 +27,8 @@ module Gitlab "can contain only letters, digits, '_', '-', '@', '+' and '.'." end - def archive_formats_regex - # |zip|tar| tar.gz | tar.bz2 | - @archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze - end - - def git_reference_regex - # Valid git ref regex, see: - # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html - - @git_reference_regex ||= %r{ - (?! - (?# doesn't begins with) - \/| (?# rule #6) - (?# doesn't contain) - .*(?: - [\/.]\.| (?# rule #1,3) - \/\/| (?# rule #6) - @\{| (?# rule #8) - \\ (?# rule #9) - ) - ) - [^\000-\040\177~^:?*\[]+ (?# rule #4-5) - (?# doesn't end with) - (?<!\.lock) (?# rule #1) - (?<![\/.]) (?# rule #6-7) - }x.freeze - end - def container_registry_reference_regex - git_reference_regex + Gitlab::PathRegex.git_reference_regex end ## diff --git a/spec/lib/gitlab/path_regex_spec.rb b/spec/lib/gitlab/path_regex_spec.rb new file mode 100644 index 00000000000..1eea710c80b --- /dev/null +++ b/spec/lib/gitlab/path_regex_spec.rb @@ -0,0 +1,384 @@ +# coding: utf-8 +require 'spec_helper' + +describe Gitlab::PathRegex, lib: true do + # Pass in a full path to remove the format segment: + # `/ci/lint(.:format)` -> `/ci/lint` + def without_format(path) + path.split('(', 2)[0] + end + + # Pass in a full path and get the last segment before a wildcard + # That's not a parameter + # `/*namespace_id/:project_id/builds/artifacts/*ref_name_and_path` + # -> 'builds/artifacts' + def path_before_wildcard(path) + path = path.gsub(STARTING_WITH_NAMESPACE, "") + path_segments = path.split('/').reject(&:empty?) + wildcard_index = path_segments.index { |segment| parameter?(segment) } + + segments_before_wildcard = path_segments[0..wildcard_index - 1] + + segments_before_wildcard.join('/') + end + + def parameter?(segment) + segment =~ /[*:]/ + end + + # If the path is reserved. Then no conflicting paths can# be created for any + # route using this reserved word. + # + # Both `builds/artifacts` & `build` are covered by reserving the word + # `build` + def wildcards_include?(path) + described_class::PROJECT_WILDCARD_ROUTES.include?(path) || + described_class::PROJECT_WILDCARD_ROUTES.include?(path.split('/').first) + end + + def failure_message(missing_words, constant_name, migration_helper) + missing_words = Array(missing_words) + <<-MSG + Found new routes that could cause conflicts with existing namespaced routes + for groups or projects. + + Add <#{missing_words.join(', ')}> to `Gitlab::PathRegex::#{constant_name} + to make sure no projects or namespaces can be created with those paths. + + To rename any existing records with those paths you can use the + `Gitlab::Database::RenameReservedpathsMigration::<VERSION>.#{migration_helper}` + migration helper. + + Make sure to make a note of the renamed records in the release blog post. + + MSG + end + + let(:all_routes) do + route_set = Rails.application.routes + routes_collection = route_set.routes + routes_array = routes_collection.routes + routes_array.map { |route| route.path.spec.to_s } + end + + let(:routes_without_format) { all_routes.map { |path| without_format(path) } } + + # Routes not starting with `/:` or `/*` + # all routes not starting with a param + let(:routes_not_starting_in_wildcard) { routes_without_format.select { |p| p !~ %r{^/[:*]} } } + + let(:top_level_words) do + routes_not_starting_in_wildcard.map do |route| + route.split('/')[1] + end.compact.uniq + end + + # All routes that start with a namespaced path, that have 1 or more + # path-segments before having another wildcard parameter. + # - Starting with paths: + # - `/*namespace_id/:project_id/` + # - `/*namespace_id/:id/` + # - Followed by one or more path-parts not starting with `:` or `*` + # - Followed by a path-part that includes a wildcard parameter `*` + # At the time of writing these routes match: http://rubular.com/r/Rv2pDE5Dvw + STARTING_WITH_NAMESPACE = %r{^/\*namespace_id/:(project_)?id} + NON_PARAM_PARTS = %r{[^:*][a-z\-_/]*} + ANY_OTHER_PATH_PART = %r{[a-z\-_/:]*} + WILDCARD_SEGMENT = %r{\*} + let(:namespaced_wildcard_routes) do + routes_without_format.select do |p| + p =~ %r{#{STARTING_WITH_NAMESPACE}/#{NON_PARAM_PARTS}/#{ANY_OTHER_PATH_PART}#{WILDCARD_SEGMENT}} + end + end + + # This will return all paths that are used in a namespaced route + # before another wildcard path: + # + # /*namespace_id/:project_id/builds/artifacts/*ref_name_and_path + # /*namespace_id/:project_id/info/lfs/objects/*oid + # /*namespace_id/:project_id/commits/*id + # /*namespace_id/:project_id/builds/:build_id/artifacts/file/*path + # -> ['builds/artifacts', 'info/lfs/objects', 'commits', 'artifacts/file'] + let(:all_wildcard_paths) do + namespaced_wildcard_routes.map do |route| + path_before_wildcard(route) + end.uniq + end + + STARTING_WITH_GROUP = %r{^/groups/\*(group_)?id/} + let(:group_routes) do + routes_without_format.select do |path| + path =~ STARTING_WITH_GROUP + end + end + + let(:paths_after_group_id) do + group_routes.map do |route| + route.gsub(STARTING_WITH_GROUP, '').split('/').first + end.uniq + end + + describe 'TOP_LEVEL_ROUTES' do + it 'includes all the top level namespaces' do + failure_block = lambda do + missing_words = top_level_words - described_class::TOP_LEVEL_ROUTES + failure_message(missing_words, 'TOP_LEVEL_ROUTES', 'rename_root_paths') + end + + expect(described_class::TOP_LEVEL_ROUTES) + .to include(*top_level_words), failure_block + end + end + + describe 'GROUP_ROUTES' do + it "don't contain a second wildcard" do + failure_block = lambda do + missing_words = paths_after_group_id - described_class::GROUP_ROUTES + failure_message(missing_words, 'GROUP_ROUTES', 'rename_child_paths') + end + + expect(described_class::GROUP_ROUTES) + .to include(*paths_after_group_id), failure_block + end + end + + describe 'PROJECT_WILDCARD_ROUTES' do + it 'includes all paths that can be used after a namespace/project path' do + aggregate_failures do + all_wildcard_paths.each do |path| + expect(wildcards_include?(path)) + .to be(true), failure_message(path, 'PROJECT_WILDCARD_ROUTES', 'rename_wildcard_paths') + end + end + end + end + + describe '.root_namespace_path_regex' do + subject { described_class.root_namespace_path_regex } + + it 'rejects top level routes' do + expect(subject).not_to match('admin/') + expect(subject).not_to match('api/') + expect(subject).not_to match('.well-known/') + end + + it 'accepts project wildcard routes' do + expect(subject).to match('blob/') + expect(subject).to match('edit/') + expect(subject).to match('wikis/') + end + + it 'accepts group routes' do + expect(subject).to match('activity/') + expect(subject).to match('group_members/') + expect(subject).to match('subgroups/') + end + + it 'is not case sensitive' do + expect(subject).not_to match('Users/') + end + + it 'does not allow extra slashes' do + expect(subject).not_to match('/blob/') + expect(subject).not_to match('blob//') + end + end + + describe '.full_namespace_path_regex' do + subject { described_class.full_namespace_path_regex } + + context 'at the top level' do + context 'when the final level' do + it 'rejects top level routes' do + expect(subject).not_to match('admin/') + expect(subject).not_to match('api/') + expect(subject).not_to match('.well-known/') + end + + it 'accepts project wildcard routes' do + expect(subject).to match('blob/') + expect(subject).to match('edit/') + expect(subject).to match('wikis/') + end + + it 'accepts group routes' do + expect(subject).to match('activity/') + expect(subject).to match('group_members/') + expect(subject).to match('subgroups/') + end + end + + context 'when more levels follow' do + it 'rejects top level routes' do + expect(subject).not_to match('admin/more/') + expect(subject).not_to match('api/more/') + expect(subject).not_to match('.well-known/more/') + end + + it 'accepts project wildcard routes' do + expect(subject).to match('blob/more/') + expect(subject).to match('edit/more/') + expect(subject).to match('wikis/more/') + expect(subject).to match('environments/folders/') + expect(subject).to match('info/lfs/objects/') + end + + it 'accepts group routes' do + expect(subject).to match('activity/more/') + expect(subject).to match('group_members/more/') + expect(subject).to match('subgroups/more/') + end + end + end + + context 'at the second level' do + context 'when the final level' do + it 'accepts top level routes' do + expect(subject).to match('root/admin/') + expect(subject).to match('root/api/') + expect(subject).to match('root/.well-known/') + end + + it 'rejects project wildcard routes' do + expect(subject).not_to match('root/blob/') + expect(subject).not_to match('root/edit/') + expect(subject).not_to match('root/wikis/') + expect(subject).not_to match('root/environments/folders/') + expect(subject).not_to match('root/info/lfs/objects/') + end + + it 'rejects group routes' do + expect(subject).not_to match('root/activity/') + expect(subject).not_to match('root/group_members/') + expect(subject).not_to match('root/subgroups/') + end + end + + context 'when more levels follow' do + it 'accepts top level routes' do + expect(subject).to match('root/admin/more/') + expect(subject).to match('root/api/more/') + expect(subject).to match('root/.well-known/more/') + end + + it 'rejects project wildcard routes' do + expect(subject).not_to match('root/blob/more/') + expect(subject).not_to match('root/edit/more/') + expect(subject).not_to match('root/wikis/more/') + expect(subject).not_to match('root/environments/folders/more/') + expect(subject).not_to match('root/info/lfs/objects/more/') + end + + it 'rejects group routes' do + expect(subject).not_to match('root/activity/more/') + expect(subject).not_to match('root/group_members/more/') + expect(subject).not_to match('root/subgroups/more/') + end + end + end + + it 'is not case sensitive' do + expect(subject).not_to match('root/Blob/') + end + + it 'does not allow extra slashes' do + expect(subject).not_to match('/root/admin/') + expect(subject).not_to match('root/admin//') + end + end + + describe '.project_path_regex' do + subject { described_class.project_path_regex } + + it 'accepts top level routes' do + expect(subject).to match('admin/') + expect(subject).to match('api/') + expect(subject).to match('.well-known/') + end + + it 'rejects project wildcard routes' do + expect(subject).not_to match('blob/') + expect(subject).not_to match('edit/') + expect(subject).not_to match('wikis/') + expect(subject).not_to match('environments/folders/') + expect(subject).not_to match('info/lfs/objects/') + end + + it 'accepts group routes' do + expect(subject).to match('activity/') + expect(subject).to match('group_members/') + expect(subject).to match('subgroups/') + end + + it 'is not case sensitive' do + expect(subject).not_to match('Blob/') + end + + it 'does not allow extra slashes' do + expect(subject).not_to match('/admin/') + expect(subject).not_to match('admin//') + end + end + + describe '.full_project_path_regex' do + subject { described_class.full_project_path_regex } + + it 'accepts top level routes' do + expect(subject).to match('root/admin/') + expect(subject).to match('root/api/') + expect(subject).to match('root/.well-known/') + end + + it 'rejects project wildcard routes' do + expect(subject).not_to match('root/blob/') + expect(subject).not_to match('root/edit/') + expect(subject).not_to match('root/wikis/') + expect(subject).not_to match('root/environments/folders/') + expect(subject).not_to match('root/info/lfs/objects/') + end + + it 'accepts group routes' do + expect(subject).to match('root/activity/') + expect(subject).to match('root/group_members/') + expect(subject).to match('root/subgroups/') + end + + it 'is not case sensitive' do + expect(subject).not_to match('root/Blob/') + end + + it 'does not allow extra slashes' do + expect(subject).not_to match('/root/admin/') + expect(subject).not_to match('root/admin//') + end + end + + describe '.namespace_format_regex' do + subject { described_class.namespace_format_regex } + + it { is_expected.to match('gitlab-ce') } + it { is_expected.to match('gitlab_git') } + it { is_expected.to match('_underscore.js') } + it { is_expected.to match('100px.com') } + it { is_expected.to match('gitlab.org') } + it { is_expected.not_to match('?gitlab') } + it { is_expected.not_to match('git lab') } + it { is_expected.not_to match('gitlab.git') } + it { is_expected.not_to match('gitlab.org.') } + it { is_expected.not_to match('gitlab.org/') } + it { is_expected.not_to match('/gitlab.org') } + it { is_expected.not_to match('gitlab git') } + end + + describe '.project_path_format_regex' do + subject { described_class.project_path_format_regex } + + it { is_expected.to match('gitlab-ce') } + it { is_expected.to match('gitlab_git') } + it { is_expected.to match('_underscore.js') } + it { is_expected.to match('100px.com') } + it { is_expected.not_to match('?gitlab') } + it { is_expected.not_to match('git lab') } + it { is_expected.not_to match('gitlab.git') } + end +end diff --git a/spec/lib/gitlab/regex_spec.rb b/spec/lib/gitlab/regex_spec.rb index 72e947f2cc2..0bee892fe0c 100644 --- a/spec/lib/gitlab/regex_spec.rb +++ b/spec/lib/gitlab/regex_spec.rb @@ -2,18 +2,6 @@ require 'spec_helper' describe Gitlab::Regex, lib: true do - describe '.project_path_regex' do - subject { described_class.project_path_regex } - - it { is_expected.to match('gitlab-ce') } - it { is_expected.to match('gitlab_git') } - it { is_expected.to match('_underscore.js') } - it { is_expected.to match('100px.com') } - it { is_expected.not_to match('?gitlab') } - it { is_expected.not_to match('git lab') } - it { is_expected.not_to match('gitlab.git') } - end - describe '.project_name_regex' do subject { described_class.project_name_regex } @@ -44,16 +32,4 @@ describe Gitlab::Regex, lib: true do it { is_expected.not_to match('9foo') } it { is_expected.not_to match('foo-') } end - - describe '.full_namespace_regex' do - subject { described_class.full_namespace_regex } - - it { is_expected.to match('gitlab.org') } - it { is_expected.to match('gitlab.org/gitlab-git') } - it { is_expected.not_to match('gitlab.org.') } - it { is_expected.not_to match('gitlab.org/') } - it { is_expected.not_to match('/gitlab.org') } - it { is_expected.not_to match('gitlab.git') } - it { is_expected.not_to match('gitlab git') } - end end diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index 8624616316c..ff5e7c350aa 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -37,7 +37,7 @@ describe Namespace, models: true do it 'rejects nested paths' do parent = create(:group, :nested, path: 'environments') - namespace = build(:project, path: 'folders', namespace: parent) + namespace = build(:group, path: 'folders', parent: parent) expect(namespace).not_to be_valid end @@ -238,8 +238,8 @@ describe Namespace, models: true do end context 'in sub-groups' do - let(:parent) { create(:namespace, path: 'parent') } - let(:child) { create(:namespace, parent: parent, path: 'child') } + let(:parent) { create(:group, path: 'parent') } + let(:child) { create(:group, parent: parent, path: 'child') } let!(:project) { create(:project_empty_repo, namespace: child) } let(:path_in_dir) { File.join(repository_storage_path, 'parent', 'child') } let(:deleted_path) { File.join('parent', "child+#{child.id}+deleted") } diff --git a/spec/models/project_services/kubernetes_service_spec.rb b/spec/models/project_services/kubernetes_service_spec.rb index c1c2f2a7219..8c0a8f657cb 100644 --- a/spec/models/project_services/kubernetes_service_spec.rb +++ b/spec/models/project_services/kubernetes_service_spec.rb @@ -100,7 +100,7 @@ describe KubernetesService, models: true, caching: true do it 'sets the namespace to the default' do expect(kube_namespace).not_to be_nil - expect(kube_namespace[:placeholder]).to match(/\A#{Gitlab::Regex::PATH_REGEX_STR}-\d+\z/) + expect(kube_namespace[:placeholder]).to match(/\A#{Gitlab::PathRegex::PATH_REGEX_STR}-\d+\z/) end end end @@ -187,7 +187,7 @@ describe KubernetesService, models: true, caching: true do kube_namespace = subject.predefined_variables.find { |h| h[:key] == 'KUBE_NAMESPACE' } expect(kube_namespace).not_to be_nil - expect(kube_namespace[:value]).to match(/\A#{Gitlab::Regex::PATH_REGEX_STR}-\d+\z/) + expect(kube_namespace[:value]).to match(/\A#{Gitlab::PathRegex::PATH_REGEX_STR}-\d+\z/) end end end diff --git a/spec/requests/api/users_spec.rb b/spec/requests/api/users_spec.rb index 4919ad19833..a2503dbeb69 100644 --- a/spec/requests/api/users_spec.rb +++ b/spec/requests/api/users_spec.rb @@ -287,7 +287,7 @@ describe API::Users do expect(json_response['message']['projects_limit']). to eq(['must be greater than or equal to 0']) expect(json_response['message']['username']). - to eq([Gitlab::Regex.namespace_regex_message]) + to eq([Gitlab::PathRegex.namespace_format_message]) end it "is not available for non admin users" do @@ -459,7 +459,7 @@ describe API::Users do expect(json_response['message']['projects_limit']). to eq(['must be greater than or equal to 0']) expect(json_response['message']['username']). - to eq([Gitlab::Regex.namespace_regex_message]) + to eq([Gitlab::PathRegex.namespace_format_message]) end it 'returns 400 if provider is missing for identity update' do diff --git a/spec/routing/project_routing_spec.rb b/spec/routing/project_routing_spec.rb index d5400bbaaf1..a391c046f92 100644 --- a/spec/routing/project_routing_spec.rb +++ b/spec/routing/project_routing_spec.rb @@ -462,6 +462,8 @@ describe 'project routing' do expect(get('/gitlab/gitlabhq/blob/master/app/models/compare.rb')).to route_to('projects/blob#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/app/models/compare.rb') expect(get('/gitlab/gitlabhq/blob/master/app/models/diff.js')).to route_to('projects/blob#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/app/models/diff.js') expect(get('/gitlab/gitlabhq/blob/master/files.scss')).to route_to('projects/blob#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/files.scss') + expect(get('/gitlab/gitlabhq/blob/master/blob/index.js')).to route_to('projects/blob#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/blob/index.js') + expect(get('/gitlab/gitlabhq/blob/blob/master/blob/index.js')).to route_to('projects/blob#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'blob/master/blob/index.js') end end @@ -470,6 +472,8 @@ describe 'project routing' do it 'to #show' do expect(get('/gitlab/gitlabhq/tree/master/app/models/project.rb')).to route_to('projects/tree#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/app/models/project.rb') expect(get('/gitlab/gitlabhq/tree/master/files.scss')).to route_to('projects/tree#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/files.scss') + expect(get('/gitlab/gitlabhq/tree/master/tree/files')).to route_to('projects/tree#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master/tree/files') + expect(get('/gitlab/gitlabhq/tree/tree/master/tree/files')).to route_to('projects/tree#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'tree/master/tree/files') end end diff --git a/spec/validators/dynamic_path_validator_spec.rb b/spec/validators/dynamic_path_validator_spec.rb index b114bfc1bca..5f998e78f07 100644 --- a/spec/validators/dynamic_path_validator_spec.rb +++ b/spec/validators/dynamic_path_validator_spec.rb @@ -3,246 +3,46 @@ require 'spec_helper' describe DynamicPathValidator do let(:validator) { described_class.new(attributes: [:path]) } - # Pass in a full path to remove the format segment: - # `/ci/lint(.:format)` -> `/ci/lint` - def without_format(path) - path.split('(', 2)[0] - end - - # Pass in a full path and get the last segment before a wildcard - # That's not a parameter - # `/*namespace_id/:project_id/builds/artifacts/*ref_name_and_path` - # -> 'builds/artifacts' - def path_before_wildcard(path) - path = path.gsub(STARTING_WITH_NAMESPACE, "") - path_segments = path.split('/').reject(&:empty?) - wildcard_index = path_segments.index { |segment| parameter?(segment) } - - segments_before_wildcard = path_segments[0..wildcard_index - 1] - - segments_before_wildcard.join('/') - end - - def parameter?(segment) - segment =~ /[*:]/ - end - - # If the path is reserved. Then no conflicting paths can# be created for any - # route using this reserved word. - # - # Both `builds/artifacts` & `build` are covered by reserving the word - # `build` - def wildcards_include?(path) - described_class::WILDCARD_ROUTES.include?(path) || - described_class::WILDCARD_ROUTES.include?(path.split('/').first) - end - - def failure_message(missing_words, constant_name, migration_helper) - missing_words = Array(missing_words) - <<-MSG - Found new routes that could cause conflicts with existing namespaced routes - for groups or projects. + describe '#path_valid_for_record?' do + context 'for project' do + it 'calls valid_project_path?' do + project = build(:project, path: 'activity') - Add <#{missing_words.join(', ')}> to `DynamicPathValidator::#{constant_name} - to make sure no projects or namespaces can be created with those paths. + expect(described_class).to receive(:valid_project_path?).with(project.full_path).and_call_original - To rename any existing records with those paths you can use the - `Gitlab::Database::RenameReservedpathsMigration::<VERSION>.#{migration_helper}` - migration helper. - - Make sure to make a note of the renamed records in the release blog post. - - MSG - end - - let(:all_routes) do - Rails.application.routes.routes.routes. - map { |r| r.path.spec.to_s } - end - - let(:routes_without_format) { all_routes.map { |path| without_format(path) } } - - # Routes not starting with `/:` or `/*` - # all routes not starting with a param - let(:routes_not_starting_in_wildcard) { routes_without_format.select { |p| p !~ %r{^/[:*]} } } - - let(:top_level_words) do - routes_not_starting_in_wildcard.map do |route| - route.split('/')[1] - end.compact.uniq - end - - # All routes that start with a namespaced path, that have 1 or more - # path-segments before having another wildcard parameter. - # - Starting with paths: - # - `/*namespace_id/:project_id/` - # - `/*namespace_id/:id/` - # - Followed by one or more path-parts not starting with `:` or `*` - # - Followed by a path-part that includes a wildcard parameter `*` - # At the time of writing these routes match: http://rubular.com/r/Rv2pDE5Dvw - STARTING_WITH_NAMESPACE = %r{^/\*namespace_id/:(project_)?id} - NON_PARAM_PARTS = %r{[^:*][a-z\-_/]*} - ANY_OTHER_PATH_PART = %r{[a-z\-_/:]*} - WILDCARD_SEGMENT = %r{\*} - let(:namespaced_wildcard_routes) do - routes_without_format.select do |p| - p =~ %r{#{STARTING_WITH_NAMESPACE}/#{NON_PARAM_PARTS}/#{ANY_OTHER_PATH_PART}#{WILDCARD_SEGMENT}} - end - end - - # This will return all paths that are used in a namespaced route - # before another wildcard path: - # - # /*namespace_id/:project_id/builds/artifacts/*ref_name_and_path - # /*namespace_id/:project_id/info/lfs/objects/*oid - # /*namespace_id/:project_id/commits/*id - # /*namespace_id/:project_id/builds/:build_id/artifacts/file/*path - # -> ['builds/artifacts', 'info/lfs/objects', 'commits', 'artifacts/file'] - let(:all_wildcard_paths) do - namespaced_wildcard_routes.map do |route| - path_before_wildcard(route) - end.uniq - end - - STARTING_WITH_GROUP = %r{^/groups/\*(group_)?id/} - let(:group_routes) do - routes_without_format.select do |path| - path =~ STARTING_WITH_GROUP - end - end - - let(:paths_after_group_id) do - group_routes.map do |route| - route.gsub(STARTING_WITH_GROUP, '').split('/').first - end.uniq - end - - describe 'TOP_LEVEL_ROUTES' do - it 'includes all the top level namespaces' do - failure_block = lambda do - missing_words = top_level_words - described_class::TOP_LEVEL_ROUTES - failure_message(missing_words, 'TOP_LEVEL_ROUTES', 'rename_root_paths') + expect(validator.path_valid_for_record?(project, 'activity')).to be_truthy end - - expect(described_class::TOP_LEVEL_ROUTES) - .to include(*top_level_words), failure_block end - end - describe 'GROUP_ROUTES' do - it "don't contain a second wildcard" do - failure_block = lambda do - missing_words = paths_after_group_id - described_class::GROUP_ROUTES - failure_message(missing_words, 'GROUP_ROUTES', 'rename_child_paths') - end + context 'for group' do + it 'calls valid_group_path?' do + group = build(:group, :nested, path: 'activity') - expect(described_class::GROUP_ROUTES) - .to include(*paths_after_group_id), failure_block - end - end + expect(described_class).to receive(:valid_group_path?).with(group.full_path).and_call_original - describe 'WILDCARD_ROUTES' do - it 'includes all paths that can be used after a namespace/project path' do - aggregate_failures do - all_wildcard_paths.each do |path| - expect(wildcards_include?(path)) - .to be(true), failure_message(path, 'WILDCARD_ROUTES', 'rename_wildcard_paths') - end + expect(validator.path_valid_for_record?(group, 'activity')).to be_falsey end end - end - describe '.without_reserved_wildcard_paths_regex' do - subject { described_class.without_reserved_wildcard_paths_regex } + context 'for user' do + it 'calls valid_user_path?' do + user = build(:user, username: 'activity') - it 'rejects paths starting with a reserved top level' do - expect(subject).not_to match('dashboard/hello/world') - expect(subject).not_to match('dashboard') - end + expect(described_class).to receive(:valid_user_path?).with(user.full_path).and_call_original - it 'matches valid paths with a toplevel word in a different place' do - expect(subject).to match('parent/dashboard/project-path') - end - - it 'rejects paths containing a wildcard reserved word' do - expect(subject).not_to match('hello/edit') - expect(subject).not_to match('hello/edit/in-the-middle') - expect(subject).not_to match('foo/bar1/refs/master/logs_tree') - end - - it 'matches valid paths' do - expect(subject).to match('parent/child/project-path') - end - end - - describe '.regex_excluding_child_paths' do - let(:subject) { described_class.without_reserved_child_paths_regex } - - it 'rejects paths containing a child reserved word' do - expect(subject).not_to match('hello/group_members') - expect(subject).not_to match('hello/activity/in-the-middle') - expect(subject).not_to match('foo/bar1/refs/master/logs_tree') - end - - it 'allows a child path on the top level' do - expect(subject).to match('activity/foo') - expect(subject).to match('avatar') - end - end - - describe ".valid?" do - it 'is not case sensitive' do - expect(described_class.valid?("Users")).to be_falsey - end - - it "isn't valid when the top level is reserved" do - test_path = 'u/should-be-a/reserved-word' - - expect(described_class.valid?(test_path)).to be_falsey - end - - it "isn't valid if any of the path segments is reserved" do - test_path = 'the-wildcard/wikis/is-not-allowed' - - expect(described_class.valid?(test_path)).to be_falsey - end - - it "is valid if the path doesn't contain reserved words" do - test_path = 'there-are/no-wildcards/in-this-path' - - expect(described_class.valid?(test_path)).to be_truthy - end - - it 'allows allows a child path on the last spot' do - test_path = 'there/can-be-a/project-called/labels' - - expect(described_class.valid?(test_path)).to be_truthy - end - - it 'rejects a child path somewhere else' do - test_path = 'there/can-be-no/labels/group' - - expect(described_class.valid?(test_path)).to be_falsey + expect(validator.path_valid_for_record?(user, 'activity')).to be_truthy + end end - it 'rejects paths that are in an incorrect format' do - test_path = 'incorrect/format.git' - - expect(described_class.valid?(test_path)).to be_falsey - end - end + context 'for user namespace' do + it 'calls valid_user_path?' do + user = create(:user, username: 'activity') + namespace = user.namespace - describe '#path_reserved_for_record?' do - it 'reserves a sub-group named activity' do - group = build(:group, :nested, path: 'activity') + expect(described_class).to receive(:valid_user_path?).with(namespace.full_path).and_call_original - expect(validator.path_reserved_for_record?(group, 'activity')).to be_truthy - end - - it "doesn't reserve a project called activity" do - project = build(:project, path: 'activity') - - expect(validator.path_reserved_for_record?(project, 'activity')).to be_falsey + expect(validator.path_valid_for_record?(namespace, 'activity')).to be_truthy + end end end @@ -252,7 +52,7 @@ describe DynamicPathValidator do validator.validate_each(group, :path, "Path with spaces, and comma's!") - expect(group.errors[:path]).to include(Gitlab::Regex.namespace_regex_message) + expect(group.errors[:path]).to include(Gitlab::PathRegex.namespace_format_message) end it 'adds a message when the path is not in the correct format' do |