summaryrefslogtreecommitdiff
path: root/db/post_migrate/20200305082754_remove_duplicate_labels_from_project.rb
blob: 33f8118534de0b43d53bfd39818a0a1daa0a6f7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# frozen_string_literal: true

class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
  DOWNTIME = false

  CREATE = 1
  RENAME = 2

  disable_ddl_transaction!

  class BackupLabel < Label
    self.table_name = 'backup_labels'
  end

  class Label < ApplicationRecord
    self.table_name = 'labels'
  end

  class Project < ApplicationRecord
    include EachBatch

    self.table_name = 'projects'
  end

  BATCH_SIZE = 100_000

  def up
    # Split to smaller chunks
    # Loop rather than background job, every 100,000
    # there are 45,000,000 projects in total
    Project.each_batch(of: BATCH_SIZE) do |batch|
      range = batch.pluck('MIN(id)', 'MAX(id)').first

      transaction do
        remove_full_duplicates(*range)
      end

      transaction do
        rename_partial_duplicates(*range)
      end
    end
  end

  def down
    Project.each_batch(of: BATCH_SIZE) do |batch|
      range = batch.pluck('MIN(id)', 'MAX(id)').first

      restore_renamed_labels(*range)
      restore_deleted_labels(*range)
    end
  end

  def remove_full_duplicates(start_id, stop_id)
    # Fields that are considered duplicate:
    # project_id title template description type color

    duplicate_labels = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
  SELECT labels.*,
  row_number() OVER (PARTITION BY labels.project_id, labels.title, labels.template, labels.description, labels.type, labels.color ORDER BY labels.id) AS row_number,
  #{CREATE} AS restore_action
  FROM labels
  WHERE labels.project_id BETWEEN #{start_id} AND #{stop_id}
  AND NOT EXISTS (SELECT * FROM board_labels WHERE board_labels.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM label_links WHERE label_links.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM label_priorities WHERE label_priorities.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM lists WHERE lists.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM resource_label_events WHERE resource_label_events.label_id = labels.id)
) SELECT * FROM data WHERE row_number > 1;
    SQL

    if duplicate_labels.any?
      # create backup records
      BackupLabel.insert_all!(duplicate_labels.map { |label| label.except("row_number") })

      Label.where(id: duplicate_labels.pluck("id")).delete_all
    end
  end

  def rename_partial_duplicates(start_id, stop_id)
    # We need to ensure that the new title (with `_duplicate#{ID}`) doesn't exceed the limit.
    # Truncate the original title (if needed) to 245 characters minus the length of the ID
    # then add `_duplicate#{ID}`

    soft_duplicates = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
  SELECT
     *,
     substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text as new_title,
     #{RENAME} AS restore_action,
     row_number() OVER (PARTITION BY project_id, title ORDER BY id) AS row_number
  FROM labels
  WHERE project_id BETWEEN #{start_id} AND #{stop_id}
) SELECT * FROM data WHERE row_number > 1;
    SQL

    if soft_duplicates.any?
      # create backup records
      BackupLabel.insert_all!(soft_duplicates.map { |label| label.except("row_number") })

      ApplicationRecord.connection.execute(<<-SQL.squish)
UPDATE labels SET title = substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text
WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
      SQL
    end
  end

  def restore_renamed_labels(start_id, stop_id)
    # the backup label IDs are not incremental, they are copied directly from the Labels table
    ApplicationRecord.connection.execute(<<-SQL.squish)
WITH backups AS (
  SELECT id, title
  FROM backup_labels
  WHERE project_id BETWEEN #{start_id} AND #{stop_id} AND
  restore_action = #{RENAME}
) UPDATE labels SET title = backups.title
FROM backups
WHERE labels.id = backups.id;
    SQL
  end

  def restore_deleted_labels(start_id, stop_id)
    ActiveRecord::Base.connection.execute(<<-SQL.squish)
INSERT INTO labels
SELECT id, title, color, project_id, created_at, updated_at, template, description, description_html, type, group_id, cached_markdown_version FROM backup_labels
  WHERE backup_labels.project_id BETWEEN #{start_id} AND #{stop_id}
  AND backup_labels.restore_action = #{CREATE}
    SQL
  end
end