summaryrefslogtreecommitdiff
path: root/chromium/components/federated_learning/sim_hash.h
blob: 26087e74e9c7badb191064c6db2abeefcae80e55 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
#define COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_

#include <stdint.h>
#include <set>
#include <string>
#include <unordered_set>

namespace federated_learning {

// A 2^64 bit vector
class LargeBitVector {
 public:
  LargeBitVector();
  LargeBitVector(const LargeBitVector&);
  ~LargeBitVector();

  void SetBit(uint64_t pos);
  const std::set<uint64_t>& PositionsOfSetBits() const;

 private:
  // Sparse representation of a 2^64 bit vector. Each number in
  // |positions_of_set_bits_| represents the position of a bit that is being
  // set.
  std::set<uint64_t> positions_of_set_bits_;
};

// Set the two seeds used for generating the random gaussian.
void SetSeedsForTesting(uint64_t seed1, uint64_t seed2);

// SimHash a 2^64 bit vector to an |output_dimensions| bit number.
// |output_dimensions| must be greater than 0 and no greater than 64.
uint64_t SimHashBits(const LargeBitVector& input, size_t output_dimensions);

// SimHash a set of strings to an |output_dimensions| bit number.
// |output_dimensions| must be greater than 0 and no greater than 64.
uint64_t SimHashStrings(const std::unordered_set<std::string>& input,
                        size_t output_dimensions);

}  // namespace federated_learning

#endif  // COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_