-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfixed_size_set.hpp
More file actions
121 lines (107 loc) · 3.06 KB
/
fixed_size_set.hpp
File metadata and controls
121 lines (107 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#pragma once
#include <algorithm>
#include <chrono>
#include <cstddef>
#include <forward_list>
#include <mutex>
#include <utility>
#include <vector>
/**
* Hash set for use by multiple threads at once.
*
* The number of buckets is fixed,
* so one should try and allocate an
* appropriate amount beforehand.
*
* @tparam Key The type to store in the set
* @tparam Hash Function-object type for hasing keys
* @tparam KeyEqual Function-object type for checking key equality
*/
template<
class Key,
class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>
> class fixed_size_set {
public:
/**
* Constructs the thread-safe set.
*
* @param bits Number of buckets will be 1<<bits
* @param seed Used in post-hash to make adversarial
* input hard to create
* @param hash Instance to use of the Hash function-object type
* @param key_equal Instance to use of the KeyEqual function-object type
*/
fixed_size_set(
int bits,
uint64_t seed = std::chrono::steady_clock::now().time_since_epoch().count(),
const Hash &hash = Hash(),
const KeyEqual &key_equal = KeyEqual()) :
fixed_random_(seed),
hash_(hash),
key_equal_(key_equal),
bits_(bits),
buckets_(size_t(1)<<bits) {}
/**
* @brief holds a member
* boolean with the name
* second to allow drop in
* replacement for std::set
*/
struct second_holder {
bool second;
operator bool() {
return second;
}
};
/**
* Insert a key in a set if it is not there,
* and return whether it was already there.
*
* @param key The key to insert
* @return true if the element was inserted,
* and false if it was already there.
*/
second_holder emplace(const Key &key) {
size_t index = get_index(key);
auto &[mutex, bucket] = buckets_[index];
std::lock_guard<std::mutex> lock(mutex);
auto already = std::find_if(bucket.begin(), bucket.end(), [&](auto &&in_bucket) {
return key_equal_(in_bucket, key);
});
if (already != bucket.end())
return {false};
bucket.push_front(key);
return {true};
}
private:
const uint64_t fixed_random_;
Hash hash_;
KeyEqual key_equal_;
const int bits_;
std::vector<std::pair<std::mutex, std::forward_list<Key>>> buckets_;
/**
* @brief Post-hash used to to mitigate
* issues from a bad distribution from hash_
*
* @param x hash value with a possibly bad distribution when truncated
* @return a new hash with low-bits hopefully better distributed
*/
static uint64_t splitmix64(uint64_t x) {
// http://xorshift.di.unimi.it/splitmix64.c
x += 0x9e3779b97f4a7c15;
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
return x ^ (x >> 31);
}
/**
* @brief maps a key to an index in [0, 2**bits)
*
* @param key the key to find an index for
* @return size_t the index
*/
size_t get_index(const Key &key) {
uint64_t hash = splitmix64(hash_(key) + fixed_random_);
return hash & ((size_t(1) << bits_) - 1);
}
};