blob: 26087e74e9c7badb191064c6db2abeefcae80e55 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
#define COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
#include <stdint.h>
#include <set>
#include <string>
#include <unordered_set>
namespace federated_learning {
// A 2^64 bit vector
class LargeBitVector {
public:
LargeBitVector();
LargeBitVector(const LargeBitVector&);
~LargeBitVector();
void SetBit(uint64_t pos);
const std::set<uint64_t>& PositionsOfSetBits() const;
private:
// Sparse representation of a 2^64 bit vector. Each number in
// |positions_of_set_bits_| represents the position of a bit that is being
// set.
std::set<uint64_t> positions_of_set_bits_;
};
// Set the two seeds used for generating the random gaussian.
void SetSeedsForTesting(uint64_t seed1, uint64_t seed2);
// SimHash a 2^64 bit vector to an |output_dimensions| bit number.
// |output_dimensions| must be greater than 0 and no greater than 64.
uint64_t SimHashBits(const LargeBitVector& input, size_t output_dimensions);
// SimHash a set of strings to an |output_dimensions| bit number.
// |output_dimensions| must be greater than 0 and no greater than 64.
uint64_t SimHashStrings(const std::unordered_set<std::string>& input,
size_t output_dimensions);
} // namespace federated_learning
#endif // COMPONENTS_FEDERATED_LEARNING_SIM_HASH_H_
|