summaryrefslogtreecommitdiff
path: root/lib/gitlab/usage/metrics/names_suggestions/generator.rb
blob: 495811694520fd53ce862182afbeedab30eabfec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# frozen_string_literal: true

module Gitlab
  module Usage
    module Metrics
      module NamesSuggestions
        class Generator < ::Gitlab::UsageData
          FREE_TEXT_METRIC_NAME = "<please fill metric name>"
          REDIS_EVENT_METRIC_NAME = "<please fill metric name, suggested format is: {subject}_{verb}{ing|ed}_{object} eg: users_creating_epics or merge_requests_viewed_in_single_file_mode>"
          CONSTRAINTS_PROMPT_TEMPLATE = "<adjective describing: '%{constraints}'>"

          class << self
            def generate(key_path)
              uncached_data.deep_stringify_keys.dig(*key_path.split('.'))
            end

            private

            def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
              name_suggestion(column: column, relation: relation, prefix: 'count')
            end

            def distinct_count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
              name_suggestion(column: column, relation: relation, prefix: 'count_distinct', distinct: :distinct)
            end

            def redis_usage_counter
              REDIS_EVENT_METRIC_NAME
            end

            def alt_usage_data(*)
              FREE_TEXT_METRIC_NAME
            end

            def redis_usage_data_totals(counter)
              counter.fallback_totals.transform_values { |_| REDIS_EVENT_METRIC_NAME }
            end

            def sum(relation, column, *rest)
              name_suggestion(column: column, relation: relation, prefix: 'sum')
            end

            def estimate_batch_distinct_count(relation, column = nil, *rest)
              name_suggestion(column: column, relation: relation, prefix: 'estimate_distinct_count')
            end

            def add(*args)
              "add_#{args.join('_and_')}"
            end

            def name_suggestion(relation:, column: nil, prefix: nil, distinct: nil)
              # rubocop: disable CodeReuse/ActiveRecord
              relation = relation.unscope(where: :created_at)
              # rubocop: enable CodeReuse/ActiveRecord

              parts = [prefix]
              arel_column = arelize_column(relation, column)

              # nil as column indicates that the counting would use fallback value of primary key.
              # Because counting primary key from relation is the conceptual equal to counting all
              # records from given relation, in order to keep name suggestion more condensed
              # primary key column is skipped.
              # eg: SELECT COUNT(id) FROM issues would translate as count_issues and not
              # as count_id_from_issues since it does not add more information to the name suggestion
              if arel_column != Arel::Table.new(relation.table_name)[relation.primary_key]
                parts << arel_column.name
                parts << 'from'
              end

              arel = arel_query(relation: relation, column: arel_column, distinct: distinct)
              constraints = parse_constraints(relation: relation, arel: arel)

              # In some cases due to performance reasons metrics are instrumented with joined relations
              # where relation listed in FROM statement is not the one that includes counted attribute
              # in such situations to make name suggestion more intuitive source should be inferred based
              # on the relation that provide counted attribute
              # EG: SELECT COUNT(deployments.environment_id) FROM clusters
              #       JOIN deployments ON deployments.cluster_id = cluster.id
              # should be translated into:
              #   count_environment_id_from_deployments_with_clusters
              # instead of
              #   count_environment_id_from_clusters_with_deployments
              actual_source = parse_source(relation, arel_column)

              append_constraints_prompt(actual_source, [constraints], parts)

              parts << actual_source
              parts += process_joined_relations(actual_source, arel, relation, constraints)
              parts.compact.join('_').delete('"')
            end

            def append_constraints_prompt(target, constraints, parts)
              applicable_constraints = constraints.select { |constraint| constraint.include?(target) }
              return unless applicable_constraints.any?

              parts << CONSTRAINTS_PROMPT_TEMPLATE % { constraints: applicable_constraints.join(' AND ') }
            end

            def parse_constraints(relation:, arel:)
              connection = relation.connection
              ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Constraints
                .new(connection)
                .accept(arel, collector(connection))
                .value
            end

            # TODO: joins with `USING` keyword
            def process_joined_relations(actual_source, arel, relation, where_constraints)
              joins = parse_joins(connection: relation.connection, arel: arel)
              return [] unless joins.any?

              sources = [relation.table_name, *joins.map { |join| join[:source] }]
              joins = extract_joins_targets(joins, sources)

              relations = if actual_source != relation.table_name
                            build_relations_tree(joins + [{ source: relation.table_name }], actual_source)
                          else
                            # in case where counter attribute comes from joined relations, the relations
                            # diagram has to be built bottom up, thus source and target are reverted
                            build_relations_tree(joins + [{ source: relation.table_name }], actual_source, source_key: :target, target_key: :source)
                          end

              collect_join_parts(relations: relations[actual_source], joins: joins, wheres: where_constraints)
            end

            def parse_joins(connection:, arel:)
              ::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Joins
                .new(connection)
                .accept(arel)
            end

            def extract_joins_targets(joins, sources)
              joins.map do |join|
                source_regex = /(#{join[:source]})\.(\w+_)*id/i

                tables_except_src = (sources - [join[:source]]).join('|')
                target_regex = /(?<target>#{tables_except_src})\.(\w+_)*id/i

                join_cond_regex = /(#{source_regex}\s+=\s+#{target_regex})|(#{target_regex}\s+=\s+#{source_regex})/i
                matched = join_cond_regex.match(join[:constraints])

                if matched
                  join[:target] = matched[:target]
                  join[:constraints].gsub!(/#{join_cond_regex}(\s+(and|or))*/i, '')
                end

                join
              end
            end

            def build_relations_tree(joins, parent, source_key: :source, target_key: :target)
              return [] if joins.blank?

              tree = {}
              tree[parent] = []

              joins.each do |join|
                if join[source_key] == parent
                  tree[parent] << build_relations_tree(joins - [join], join[target_key], source_key: source_key, target_key: target_key)
                end
              end
              tree
            end

            def collect_join_parts(relations:, joins:, wheres:, parts: [], conjunctions: %w[with having including].cycle)
              conjunction = conjunctions.next
              relations.each do |subtree|
                subtree.each do |parent, children|
                  parts << "<#{conjunction}>"
                  join_constraints = joins.find { |join| join[:source] == parent }&.dig(:constraints)
                  append_constraints_prompt(parent, [wheres, join_constraints].compact, parts)
                  parts << parent
                  collect_join_parts(relations: children, joins: joins, wheres: wheres, parts: parts, conjunctions: conjunctions)
                end
              end
              parts
            end

            def arelize_column(relation, column)
              case column
              when Arel::Attribute
                column
              when NilClass
                Arel::Table.new(relation.table_name)[relation.primary_key]
              when String
                if column.include?('.')
                  table, col = column.split('.')
                  Arel::Table.new(table)[col]
                else
                  Arel::Table.new(relation.table_name)[column]
                end
              when Symbol
                arelize_column(relation, column.to_s)
              end
            end

            def parse_source(relation, column)
              column.relation.name || relation.table_name
            end

            def collector(connection)
              Arel::Collectors::SubstituteBinds.new(connection, Arel::Collectors::SQLString.new)
            end

            def arel_query(relation:, column: nil, distinct: nil)
              column ||= relation.primary_key

              if column.is_a?(Arel::Attribute)
                relation.select(column.count(distinct)).arel
              else
                relation.select(relation.all.table[column].count(distinct)).arel
              end
            end
          end
        end
      end
    end
  end
end