summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/backfill_issue_search_data.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/background_migration/backfill_issue_search_data.rb')
-rw-r--r--lib/gitlab/background_migration/backfill_issue_search_data.rb63
1 files changed, 63 insertions, 0 deletions
diff --git a/lib/gitlab/background_migration/backfill_issue_search_data.rb b/lib/gitlab/background_migration/backfill_issue_search_data.rb
new file mode 100644
index 00000000000..ec206cbfd41
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_issue_search_data.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+# rubocop:disable Style/Documentation
+
+module Gitlab
+ module BackgroundMigration
+ # Backfills the new `issue_search_data` table, which contains
+ # the tsvector from the issue title and description.
+ class BackfillIssueSearchData
+ include Gitlab::Database::DynamicModelHelpers
+
+ def perform(start_id, stop_id, batch_table, batch_column, sub_batch_size, pause_ms)
+ define_batchable_model(batch_table, connection: ActiveRecord::Base.connection).where(batch_column => start_id..stop_id).each_batch(of: sub_batch_size) do |sub_batch|
+ update_search_data(sub_batch)
+
+ sleep(pause_ms * 0.001)
+ rescue ActiveRecord::StatementInvalid => e
+ raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
+
+ update_search_data_individually(sub_batch, pause_ms)
+ end
+ end
+
+ private
+
+ def update_search_data(relation)
+ relation.klass.connection.execute(
+ <<~SQL
+ INSERT INTO issue_search_data (project_id, issue_id, search_vector, created_at, updated_at)
+ SELECT
+ project_id,
+ id,
+ setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/@]{50,}', ' ', 'g'), 1048576)), 'B'),
+ NOW(),
+ NOW()
+ FROM issues
+ WHERE issues.id IN (#{relation.select(:id).to_sql})
+ ON CONFLICT DO NOTHING
+ SQL
+ )
+ end
+
+ def update_search_data_individually(relation, pause_ms)
+ relation.pluck(:id).each do |issue_id|
+ update_search_data(relation.klass.where(id: issue_id))
+
+ sleep(pause_ms * 0.001)
+ rescue ActiveRecord::StatementInvalid => e
+ raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')
+
+ logger.error(
+ message: 'Error updating search data: string is too long for tsvector',
+ class: relation.klass.name,
+ model_id: issue_id
+ )
+ end
+ end
+
+ def logger
+ @logger ||= Gitlab::BackgroundMigration::Logger.build
+ end
+ end
+ end
+end