summaryrefslogtreecommitdiff
path: root/spec/lib/gitlab/database/postgres_hll/batch_distinct_counter_spec.rb
blob: 2c550f14a08d3bf19b9c8d27bc47e58700aa7c61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Gitlab::Database::PostgresHll::BatchDistinctCounter do
  let_it_be(:error_rate) { described_class::ERROR_RATE } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin
  let_it_be(:fallback) { ::Gitlab::Database::BatchCounter::FALLBACK }
  let_it_be(:small_batch_size) { calculate_batch_size(described_class::MIN_REQUIRED_BATCH_SIZE) }
  let(:model) { Issue }
  let(:column) { :author_id }

  let(:in_transaction) { false }

  let_it_be(:user) { create(:user, email: 'email1@domain.com') }
  let_it_be(:another_user) { create(:user, email: 'email2@domain.com') }

  def calculate_batch_size(batch_size)
    zero_offset_modifier = -1

    batch_size + zero_offset_modifier
  end

  before do
    allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(in_transaction)
  end

  context 'unit test for different counting parameters' do
    before_all do
      create_list(:issue, 3, author: user)
      create_list(:issue, 2, author: another_user)
    end

    describe '#execute' do
      it 'builds hll buckets' do
        expect(described_class.new(model).execute).to be_an_instance_of(Gitlab::Database::PostgresHll::Buckets)
      end

      it "defaults batch size to #{Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE}" do
        min_id = model.minimum(:id)
        batch_end_id = min_id + calculate_batch_size(Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE)

        expect(model).to receive(:where).with("id" => min_id..batch_end_id).and_call_original

        described_class.new(model).execute
      end

      context 'when a transaction is open' do
        let(:in_transaction) { true }

        it 'raises an error' do
          expect { described_class.new(model, column).execute }.to raise_error('BatchCount can not be run inside a transaction')
        end
      end

      context 'disallowed configurations' do
        let(:default_batch_size) { Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE }

        it 'raises WRONG_CONFIGURATION_ERROR if start is bigger than finish' do
          expect { described_class.new(model, column).execute(start: 1, finish: 0) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
        end

        it 'raises WRONG_CONFIGURATION_ERROR if data volume exceeds upper limit' do
          large_finish = Gitlab::Database::PostgresHll::BatchDistinctCounter::MAX_DATA_VOLUME + 1
          expect { described_class.new(model, column).execute(start: 1, finish: large_finish) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
        end

        it 'raises WRONG_CONFIGURATION_ERROR if batch size is less than min required' do
          expect { described_class.new(model, column).execute(batch_size: small_batch_size) }.to raise_error(described_class::WRONG_CONFIGURATION_ERROR)
        end
      end
    end
  end
end