summaryrefslogtreecommitdiff
path: root/lib/gitlab/background_migration/backfill_imported_issue_search_data.rb
blob: c95fed512c9e3a6fead5758d5e299a5676d0ab4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# frozen_string_literal: true
# rubocop:disable Style/Documentation

module Gitlab
  module BackgroundMigration
    # Rechedules the backfill for the `issue_search_data` table for issues imported prior
    # to the fix for the imported issues search data bug:

    class BackfillImportedIssueSearchData < BatchedMigrationJob
      SUB_BATCH_SIZE = 1_000

      operation_name :update_search_data

      def perform
        each_sub_batch do |sub_batch|
          update_search_data(sub_batch)
        rescue ActiveRecord::StatementInvalid => e
          raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')

          update_search_data_individually(sub_batch)
        end
      end

      private

      def update_search_data(relation)
        ApplicationRecord.connection.execute(
          <<~SQL
          INSERT INTO issue_search_data
          SELECT
            project_id,
            id,
            NOW(),
            NOW(),
            setweight(to_tsvector('english', LEFT(title, 255)), 'A') || setweight(to_tsvector('english', LEFT(REGEXP_REPLACE(description, '[A-Za-z0-9+/@]{50,}', ' ', 'g'), 1048576)), 'B')
          FROM (#{relation.limit(SUB_BATCH_SIZE).to_sql}) issues
          ON CONFLICT DO NOTHING
          SQL
        )
      end

      def update_search_data_individually(relation)
        relation.pluck(:id).each do |issue_id|
          update_search_data(relation.klass.where(id: issue_id))
          sleep(pause_ms * 0.001)
        rescue ActiveRecord::StatementInvalid => e
          raise unless e.cause.is_a?(PG::ProgramLimitExceeded) && e.message.include?('string is too long for tsvector')

          logger.error(
            message: "Error updating search data: #{e.message}",
            class: relation.klass.name,
            model_id: issue_id
          )
        end
      end

      def logger
        @logger ||= Gitlab::BackgroundMigration::Logger.build
      end
    end
  end
end