diff options
-rw-r--r-- | app/models/application_setting.rb | 14 | ||||
-rw-r--r-- | app/models/audit_event.rb | 2 | ||||
-rw-r--r-- | app/models/ci/build.rb | 4 | ||||
-rw-r--r-- | app/models/ci/trigger_request.rb | 2 | ||||
-rw-r--r-- | app/models/diff_note.rb | 6 | ||||
-rw-r--r-- | app/models/event.rb | 2 | ||||
-rw-r--r-- | app/models/hooks/web_hook_log.rb | 6 | ||||
-rw-r--r-- | app/models/legacy_diff_note.rb | 2 | ||||
-rw-r--r-- | app/models/merge_request.rb | 2 | ||||
-rw-r--r-- | app/models/merge_request_diff.rb | 4 | ||||
-rw-r--r-- | app/models/personal_access_token.rb | 2 | ||||
-rw-r--r-- | app/models/project_import_data.rb | 2 | ||||
-rw-r--r-- | app/models/sent_notification.rb | 2 | ||||
-rw-r--r-- | app/models/service.rb | 2 | ||||
-rw-r--r-- | app/models/user.rb | 2 | ||||
-rw-r--r-- | doc/development/README.md | 1 | ||||
-rw-r--r-- | doc/development/serializing_data.md | 84 | ||||
-rw-r--r-- | rubocop/cop/activerecord_serialize.rb | 24 | ||||
-rw-r--r-- | rubocop/rubocop.rb | 1 | ||||
-rw-r--r-- | spec/rubocop/cop/activerecord_serialize_spec.rb | 33 |
20 files changed, 170 insertions, 27 deletions
diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index 043f57241a3..3d12f3c306b 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -13,13 +13,13 @@ class ApplicationSetting < ActiveRecord::Base [\r\n] # any number of newline characters }x - serialize :restricted_visibility_levels - serialize :import_sources - serialize :disabled_oauth_sign_in_sources, Array - serialize :domain_whitelist, Array - serialize :domain_blacklist, Array - serialize :repository_storages - serialize :sidekiq_throttling_queues, Array + serialize :restricted_visibility_levels # rubocop:disable Cop/ActiverecordSerialize + serialize :import_sources # rubocop:disable Cop/ActiverecordSerialize + serialize :disabled_oauth_sign_in_sources, Array # rubocop:disable Cop/ActiverecordSerialize + serialize :domain_whitelist, Array # rubocop:disable Cop/ActiverecordSerialize + serialize :domain_blacklist, Array # rubocop:disable Cop/ActiverecordSerialize + serialize :repository_storages # rubocop:disable Cop/ActiverecordSerialize + serialize :sidekiq_throttling_queues, Array # rubocop:disable Cop/ActiverecordSerialize cache_markdown_field :sign_in_text cache_markdown_field :help_page_text diff --git a/app/models/audit_event.rb b/app/models/audit_event.rb index 967ffd46db0..46d412fbd72 100644 --- a/app/models/audit_event.rb +++ b/app/models/audit_event.rb @@ -1,5 +1,5 @@ class AuditEvent < ActiveRecord::Base - serialize :details, Hash + serialize :details, Hash # rubocop:disable Cop/ActiverecordSerialize belongs_to :user, foreign_key: :author_id diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 60b71ff0d93..0000ecc5bbf 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -19,8 +19,8 @@ module Ci ) end - serialize :options - serialize :yaml_variables, Gitlab::Serializer::Ci::Variables + serialize :options # rubocop:disable Cop/ActiverecordSerialize + serialize :yaml_variables, Gitlab::Serializer::Ci::Variables # rubocop:disable Cop/ActiverecordSerialize delegate :name, to: :project, prefix: true diff --git a/app/models/ci/trigger_request.rb b/app/models/ci/trigger_request.rb index 2b807731d0d..564334ad1ad 100644 --- a/app/models/ci/trigger_request.rb +++ b/app/models/ci/trigger_request.rb @@ -6,7 +6,7 @@ module Ci belongs_to :pipeline, foreign_key: :commit_id has_many :builds - serialize :variables + serialize :variables # rubocop:disable Cop/ActiverecordSerialize def user_variables return [] unless variables diff --git a/app/models/diff_note.rb b/app/models/diff_note.rb index 2a4cff37566..6cd0502069a 100644 --- a/app/models/diff_note.rb +++ b/app/models/diff_note.rb @@ -6,9 +6,9 @@ class DiffNote < Note NOTEABLE_TYPES = %w(MergeRequest Commit).freeze - serialize :original_position, Gitlab::Diff::Position - serialize :position, Gitlab::Diff::Position - serialize :change_position, Gitlab::Diff::Position + serialize :original_position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize + serialize :position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize + serialize :change_position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize validates :original_position, presence: true validates :position, presence: true diff --git a/app/models/event.rb b/app/models/event.rb index e6fad46077a..46e89388bc1 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -26,7 +26,7 @@ class Event < ActiveRecord::Base belongs_to :target, polymorphic: true # For Hash only - serialize :data + serialize :data # rubocop:disable Cop/ActiverecordSerialize # Callbacks after_create :reset_project_activity diff --git a/app/models/hooks/web_hook_log.rb b/app/models/hooks/web_hook_log.rb index 2738b229d84..d73cfcf630d 100644 --- a/app/models/hooks/web_hook_log.rb +++ b/app/models/hooks/web_hook_log.rb @@ -1,9 +1,9 @@ class WebHookLog < ActiveRecord::Base belongs_to :web_hook - serialize :request_headers, Hash - serialize :request_data, Hash - serialize :response_headers, Hash + serialize :request_headers, Hash # rubocop:disable Cop/ActiverecordSerialize + serialize :request_data, Hash # rubocop:disable Cop/ActiverecordSerialize + serialize :response_headers, Hash # rubocop:disable Cop/ActiverecordSerialize validates :web_hook, presence: true diff --git a/app/models/legacy_diff_note.rb b/app/models/legacy_diff_note.rb index ebf8fb92ab5..7126de2d488 100644 --- a/app/models/legacy_diff_note.rb +++ b/app/models/legacy_diff_note.rb @@ -7,7 +7,7 @@ class LegacyDiffNote < Note include NoteOnDiff - serialize :st_diff + serialize :st_diff # rubocop:disable Cop/ActiverecordSerialize validates :line_code, presence: true, line_code: true diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 56fec3ca39c..4bccda8a732 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -21,7 +21,7 @@ class MergeRequest < ActiveRecord::Base belongs_to :assignee, class_name: "User" - serialize :merge_params, Hash + serialize :merge_params, Hash # rubocop:disable Cop/ActiverecordSerialize after_create :ensure_merge_request_diff, unless: :importing? after_update :reload_diff_if_branch_changed diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index 1bd61c1d465..1c2f335f95e 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -11,8 +11,8 @@ class MergeRequestDiff < ActiveRecord::Base belongs_to :merge_request - serialize :st_commits - serialize :st_diffs + serialize :st_commits # rubocop:disable Cop/ActiverecordSerialize + serialize :st_diffs # rubocop:disable Cop/ActiverecordSerialize state_machine :state, initial: :empty do state :collected diff --git a/app/models/personal_access_token.rb b/app/models/personal_access_token.rb index e8b000ddad6..ae9f71e7747 100644 --- a/app/models/personal_access_token.rb +++ b/app/models/personal_access_token.rb @@ -3,7 +3,7 @@ class PersonalAccessToken < ActiveRecord::Base include TokenAuthenticatable add_authentication_token_field :token - serialize :scopes, Array + serialize :scopes, Array # rubocop:disable Cop/ActiverecordSerialize belongs_to :user diff --git a/app/models/project_import_data.rb b/app/models/project_import_data.rb index 331123a5a5b..e3cafd4d1c6 100644 --- a/app/models/project_import_data.rb +++ b/app/models/project_import_data.rb @@ -10,7 +10,7 @@ class ProjectImportData < ActiveRecord::Base insecure_mode: true, algorithm: 'aes-256-cbc' - serialize :data, JSON + serialize :data, JSON # rubocop:disable Cop/ActiverecordSerialize validates :project, presence: true diff --git a/app/models/sent_notification.rb b/app/models/sent_notification.rb index 0ae5864615a..eed3ca7e179 100644 --- a/app/models/sent_notification.rb +++ b/app/models/sent_notification.rb @@ -1,5 +1,5 @@ class SentNotification < ActiveRecord::Base - serialize :position, Gitlab::Diff::Position + serialize :position, Gitlab::Diff::Position # rubocop:disable Cop/ActiverecordSerialize belongs_to :project belongs_to :noteable, polymorphic: true diff --git a/app/models/service.rb b/app/models/service.rb index 8916f88076e..6a0b0a5c522 100644 --- a/app/models/service.rb +++ b/app/models/service.rb @@ -2,7 +2,7 @@ # and implement a set of methods class Service < ActiveRecord::Base include Sortable - serialize :properties, JSON + serialize :properties, JSON # rubocop:disable Cop/ActiverecordSerialize default_value_for :active, false default_value_for :push_events, true diff --git a/app/models/user.rb b/app/models/user.rb index 9e13e91536d..8114d0ff88e 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -40,7 +40,7 @@ class User < ActiveRecord::Base otp_secret_encryption_key: Gitlab::Application.secrets.otp_key_base devise :two_factor_backupable, otp_number_of_backup_codes: 10 - serialize :otp_backup_codes, JSON + serialize :otp_backup_codes, JSON # rubocop:disable Cop/ActiverecordSerialize devise :lockable, :recoverable, :rememberable, :trackable, :validatable, :omniauthable, :confirmable, :registerable diff --git a/doc/development/README.md b/doc/development/README.md index be013667684..af4131c4a8f 100644 --- a/doc/development/README.md +++ b/doc/development/README.md @@ -50,6 +50,7 @@ - [Adding database indexes](adding_database_indexes.md) - [Post Deployment Migrations](post_deployment_migrations.md) - [Foreign Keys & Associations](foreign_keys.md) +- [Serializing Data](serializing_data.md) ## i18n diff --git a/doc/development/serializing_data.md b/doc/development/serializing_data.md new file mode 100644 index 00000000000..2b56f48bc44 --- /dev/null +++ b/doc/development/serializing_data.md @@ -0,0 +1,84 @@ +# Serializing Data + +**Summary:** don't store serialized data in the database, use separate columns +and/or tables instead. + +Rails makes it possible to store serialized data in JSON, YAML or other formats. +Such a field can be defined as follows: + +```ruby +class Issue < ActiveRecord::Model + serialize :custom_fields +end +``` + +While it may be tempting to store serialized data in the database there are many +problems with this. This document will outline these problems and provide an +alternative. + +## Serialized Data Is Less Powerful + +When using a relational database you have the ability to query individual +fields, change the schema, index data and so forth. When you use serialized data +all of that becomes either very difficult or downright impossible. While +PostgreSQL does offer the ability to query JSON fields it is mostly meant for +very specialized use cases, and not for more general use. If you use YAML in +turn there's no way to query the data at all. + +## Waste Of Space + +Storing serialized data such as JSON or YAML will end up wasting a lot of space. +This is because these formats often include additional characters (e.g. double +quotes or newlines) besides the data that you are storing. + +## Difficult To Manage + +There comes a time where you will need to add a new field to the serialized +data, or change an existing one. Using serialized data this becomes difficult +and very time consuming as the only way of doing so is to re-write all the +stored values. To do so you would have to: + +1. Retrieve the data +1. Parse it into a Ruby structure +1. Mutate it +1. Serialize it back to a String +1. Store it in the database + +On the other hand, if one were to use regular columns adding a column would be +as easy as this: + +```sql +ALTER TABLE table_name ADD COLUMN column_name type; +``` + +Such a query would take very little to no time and would immediately apply to +all rows, without having to re-write large JSON or YAML structures. + +Finally, there comes a time when the JSON or YAML structure is no longer +sufficient and you need to migrate away from it. When storing only a few rows +this may not be a problem, but when storing millions of rows such a migration +can easily take hours or even days to complete. + +## Relational Databases Are Not Document Stores + +When storing data as JSON or YAML you're essentially using your database as if +it were a document store (e.g. MongoDB), except you're not using any of the +powerful features provided by a typical RDBMS _nor_ are you using any of the +features provided by a typical document store (e.g. the ability to index fields +of documents with variable fields). In other words, it's a waste. + +## Consistent Fields + +One argument sometimes made in favour of serialized data is having to store +widely varying fields and values. Sometimes this is truly the case, and then +perhaps it might make sense to use serialized data. However, in 99% of the cases +the fields and types stored tend to be the same for every row. Even if there is +a slight difference you can still use separate columns and just not set the ones +you don't need. + +## The Solution + +The solution is very simple: just use separate columns and/or separate tables. +This will allow you to use all the features provided by your database, it will +make it easier to manage and migrate the data, you'll conserve space, you can +index the data efficiently and so forth. diff --git a/rubocop/cop/activerecord_serialize.rb b/rubocop/cop/activerecord_serialize.rb new file mode 100644 index 00000000000..bfa0cff9a67 --- /dev/null +++ b/rubocop/cop/activerecord_serialize.rb @@ -0,0 +1,24 @@ +module RuboCop + module Cop + # Cop that prevents the use of `serialize` in ActiveRecord models. + class ActiverecordSerialize < RuboCop::Cop::Cop + MSG = 'Do not store serialized data in the database, use separate columns and/or tables instead'.freeze + + def on_send(node) + return unless in_models?(node) + + add_offense(node, :selector) if node.children[1] == :serialize + end + + def models_path + File.join(Dir.pwd, 'app', 'models') + end + + def in_models?(node) + path = node.location.expression.source_buffer.name + + path.start_with?(models_path) + end + end + end +end diff --git a/rubocop/rubocop.rb b/rubocop/rubocop.rb index b65efbc41f4..17d2bf6aa1c 100644 --- a/rubocop/rubocop.rb +++ b/rubocop/rubocop.rb @@ -1,5 +1,6 @@ require_relative 'cop/custom_error_class' require_relative 'cop/gem_fetcher' +require_relative 'cop/activerecord_serialize' require_relative 'cop/migration/add_column' require_relative 'cop/migration/add_column_with_default_to_large_table' require_relative 'cop/migration/add_concurrent_foreign_key' diff --git a/spec/rubocop/cop/activerecord_serialize_spec.rb b/spec/rubocop/cop/activerecord_serialize_spec.rb new file mode 100644 index 00000000000..a303b16d264 --- /dev/null +++ b/spec/rubocop/cop/activerecord_serialize_spec.rb @@ -0,0 +1,33 @@ +require 'spec_helper' +require 'rubocop' +require 'rubocop/rspec/support' +require_relative '../../../rubocop/cop/activerecord_serialize' + +describe RuboCop::Cop::ActiverecordSerialize do + include CopHelper + + subject(:cop) { described_class.new } + + context 'inside the app/models directory' do + it 'registers an offense when serialize is used' do + allow(cop).to receive(:in_models?).and_return(true) + + inspect_source(cop, 'serialize :foo') + + aggregate_failures do + expect(cop.offenses.size).to eq(1) + expect(cop.offenses.map(&:line)).to eq([1]) + end + end + end + + context 'outside the app/models directory' do + it 'does nothing' do + allow(cop).to receive(:in_models?).and_return(false) + + inspect_source(cop, 'serialize :foo') + + expect(cop.offenses).to be_empty + end + end +end |