summaryrefslogtreecommitdiff
path: root/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb
blob: a01efbdb1a68907c909e97495748da1ac147d620 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# frozen_string_literal: true

module Gitlab
  module Usage
    module Metrics
      module Aggregates
        module Sources
          class PostgresHll
            class << self
              def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
                time_period = start_date && end_date ? (start_date..end_date) : nil

                Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
                  json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
                  raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json

                  buckets.merge_hash!(Gitlab::Json.parse(json))
                end.estimated_distinct_count
              end

              def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
                unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
                  Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
                  return
                end

                # Usage Ping report generation for gitlab.com is very long running process
                # to make sure that saved keys are available at the end of report generation process
                # lets use triple max generation time
                keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3

                Gitlab::Redis::SharedState.with do |redis|
                  redis.set(
                    redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
                    data.to_json,
                    ex: keys_expiration
                  )
                end
              rescue ::Redis::CommandError => e
                Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
              end

              private

              def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
                Gitlab::Redis::SharedState.with do |redis|
                  redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
                end
              end

              def redis_key(metric_name:, time_period:, recorded_at:)
                # add timestamp at the end of the key to avoid stale keys if
                # usage ping job is retried
                "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
              end

              def time_period_to_human_name(time_period)
                return Gitlab::Utils::UsageData::ALL_TIME_TIME_FRAME_NAME if time_period.blank?

                start_date = time_period.first.to_date
                end_date = time_period.last.to_date

                if (end_date - start_date).to_i > 7
                  Gitlab::Utils::UsageData::TWENTY_EIGHT_DAYS_TIME_FRAME_NAME
                else
                  Gitlab::Utils::UsageData::SEVEN_DAYS_TIME_FRAME_NAME
                end
              end
            end
          end
        end
      end
    end
  end
end