Commit d8d8c113 authored by Thibault Allançon's avatar Thibault Allançon

mc: bloom_filter: implement lock-free concurrent filter

* spot/mc/bloom_filter.hh: implementation here
* bench/bitstate/bitstate.hh: update filter usage
parent 50b80924
Pipeline #17687 failed with stage
in 57 minutes and 58 seconds
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include <spot/kripke/kripke.hh> #include <spot/kripke/kripke.hh>
#include <spot/mc/bloom_filter.hh> #include <spot/mc/bloom_filter.hh>
#include <spot/misc/hashfunc.hh>
using namespace spot; using namespace spot;
...@@ -32,14 +31,11 @@ class bitstate_hashing_stats ...@@ -32,14 +31,11 @@ class bitstate_hashing_stats
public: public:
bitstate_hashing_stats(kripkecube<State, SuccIterator>& sys, unsigned tid, bitstate_hashing_stats(kripkecube<State, SuccIterator>& sys, unsigned tid,
size_t mem_size): size_t mem_size):
sys_(sys), tid_(tid) sys_(sys), tid_(tid), bloom_filter_(mem_size)
{ {
static_assert(spot::is_a_kripkecube_ptr<decltype(&sys), static_assert(spot::is_a_kripkecube_ptr<decltype(&sys),
State, SuccIterator>::value, State, SuccIterator>::value,
"error: does not match the kripkecube requirements"); "error: does not match the kripkecube requirements");
bloom_filter::hash_functions_t hash_functions = {lookup3_hash};
bf_ = std::make_unique<bloom_filter>(mem_size, hash_functions);
} }
void push(State s) void push(State s)
...@@ -54,7 +50,7 @@ public: ...@@ -54,7 +50,7 @@ public:
auto current = todo_.back(); auto current = todo_.back();
todo_.pop_back(); todo_.pop_back();
seen_.erase(current.s); seen_.erase(current.s);
bf_->insert(state_hash_(current.s)); bloom_filter_.insert(state_hash_(current.s));
sys_.recycle(current.it, tid_); sys_.recycle(current.it, tid_);
} }
...@@ -77,7 +73,7 @@ public: ...@@ -77,7 +73,7 @@ public:
todo_.back().it->next(); todo_.back().it->next();
bool marked = seen_.find(next) != seen_.end() || bool marked = seen_.find(next) != seen_.end() ||
bf_->contains(state_hash_(next)); bloom_filter_.contains(state_hash_(next));
if (marked == false) if (marked == false)
push(next); push(next);
} }
...@@ -105,6 +101,5 @@ protected: ...@@ -105,6 +101,5 @@ protected:
std::unordered_set<State, StateHash, StateEqual> seen_; std::unordered_set<State, StateHash, StateEqual> seen_;
std::vector<todo__element> todo_; std::vector<todo__element> todo_;
unsigned int state_number_; unsigned int state_number_;
// TODO: unique_ptr are not thread safe concurrent_bloom_filter bloom_filter_;
std::unique_ptr<bloom_filter> bf_;
}; };
...@@ -19,33 +19,85 @@ ...@@ -19,33 +19,85 @@
#pragma once #pragma once
#include <spot/misc/hashfunc.hh>
#include <atomic>
#include <functional> #include <functional>
/* Lock-free concurrent Bloom Filter implementation */
namespace spot namespace spot
{ {
class bloom_filter class concurrent_bitset
{
public:
static const size_t BITS_PER_ELEMENT = 8 * sizeof(uint32_t);
Please register or sign in to reply
concurrent_bitset(size_t mem_size)
: mem_size_(mem_size)
{
bits_ = new std::atomic<uint32_t>[mem_size]();
Please register or sign in to reply
}
~concurrent_bitset()
{
delete[] bits_;
}
size_t get_size() const
{
return mem_size_;
}
size_t get_index(size_t bit) const
{
return bit / BITS_PER_ELEMENT;
}
size_t get_mask(size_t bit) const
{
return 1L << (bit % BITS_PER_ELEMENT);
}
void set(size_t bit)
{
bits_[get_index(bit)] |= get_mask(bit);
}
bool test(size_t bit) const
{
return bits_[get_index(bit)] & get_mask(bit);
}
private:
std::atomic<uint32_t> *bits_;
size_t mem_size_;
};
class concurrent_bloom_filter
Please register or sign in to reply
{ {
public: public:
// TODO: benchmark std::function overhead
using hash_t = size_t; using hash_t = size_t;
using hash_function_t = std::function<hash_t(hash_t)>; using hash_function_t = std::function<hash_t(hash_t)>;
using hash_functions_t = std::vector<hash_function_t>; using hash_functions_t = std::vector<hash_function_t>;
Please register or sign in to reply
bloom_filter(size_t mem_size, hash_functions_t hash_functions) concurrent_bloom_filter(size_t mem_size, hash_functions_t hash_functions)
: mem_size_(mem_size), hash_functions_(hash_functions) : bitset_(mem_size), hash_functions_(hash_functions)
{ {
if (hash_functions.empty()) if (hash_functions.empty())
throw std::invalid_argument("Bloom filter has no hash functions"); throw std::invalid_argument("Bloom filter has no hash functions");
bitset_.assign(mem_size, false);
} }
// Default internal hash function
concurrent_bloom_filter(size_t mem_size)
: concurrent_bloom_filter(mem_size, {lookup3_hash}) { }
void insert(hash_t elem) void insert(hash_t elem)
{ {
for (const auto& f : hash_functions_) for (const auto& f : hash_functions_)
  • Do we need a cache for already computed values since hashing may be costly? In yes, probably opt for a cache per thread, meaning that known our interface has to take, for every operation a tid

  • Since we are hashing integers I don't think the cost is really noticable and I would prefer to wait on further benchmark and profiling to identify if an internal cache is really needed. ;)

Please register or sign in to reply
{ {
hash_t hash = f(elem) % mem_size_; hash_t hash = f(elem) % bitset_.get_size();
  • ouch ! one call to bitset_.get_size() at the beginning of this class is probably a reasonable memory tradeoff compared to a call for every ELEMENT*NB_HASH_FUN, right?

  • Fixed in 04b98c2b.

Please register or sign in to reply
bitset_[hash] = true; bitset_.set(hash);
} }
} }
...@@ -53,8 +105,8 @@ namespace spot ...@@ -53,8 +105,8 @@ namespace spot
{ {
for (const auto& f : hash_functions_) for (const auto& f : hash_functions_)
{ {
hash_t hash = f(elem) % mem_size_; hash_t hash = f(elem) % bitset_.get_size();
if (bitset_[hash] == false) if (bitset_.test(hash) == false)
  • I understand why you split bit_set in one hand and concurrent_bloom_filter in the other hand. Nonetheless, it appears that we have a dereferencing every time we want and access to bitset (which in turn also dereference... ) i would probably prefer to merge the two. What do you think?

  • Plus the fact that each algorithm will dereference to access concurrent_bloom_filter

  • Fixed in 04b98c2b.

Please register or sign in to reply
return false; return false;
} }
...@@ -62,8 +114,7 @@ namespace spot ...@@ -62,8 +114,7 @@ namespace spot
} }
private: private:
size_t mem_size_; concurrent_bitset bitset_;
hash_functions_t hash_functions_; hash_functions_t hash_functions_;
std::vector<bool> bitset_;
}; };
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment