4 #include "detail/slab_node.h"
5 #include "detail/traverse.h"
7 #include <thrust/device_vector.h>
9 #include <cuda/std/utility>
12 #include "detail/set_kernels.h"
21 template<
typename K,
typename Allocator = device_allocator<detail::set_node<K>>,
typename Hash = hash<K>>
27 LSLAB_HOST
set() :
set(10) {
31 LSLAB_HOST
set(
unsigned n_log_2) : number_of_buckets_log_2(n_log_2) {
32 size_t size = 1 << n_log_2;
33 cudaMalloc(&lock_table,
sizeof(
warp_mutex) * size);
35 cudaMemset(lock_table, 0,
sizeof(
warp_mutex) * size);
42 LSLAB_HOST
set(
unsigned n_log_2, Allocator&& a) : number_of_buckets_log_2(n_log_2), alloc(a) {
43 size_t size = 1 << n_log_2;
44 cudaMalloc(&lock_table,
sizeof(
warp_mutex) * size);
46 cudaMemset(lock_table, 0,
sizeof(
warp_mutex) * size);
57 LSLAB_DEVICE
set(
warp_mutex* lt,
detail::set_node<K>* s,
unsigned n_log_2, Allocator&& a) : lock_table(lt), buckets_array(s), number_of_buckets_log_2(n_log_2), alloc(a) {
61 LSLAB_HOST_DEVICE ~
set() {
64 LSLAB_DEVICE
bool contains(
const K& key,
bool thread_mask =
true) {
66 size_t hash = Hash{}(key);
67 hash &= ((1 << number_of_buckets_log_2) - 1);
74 LSLAB_DEVICE
bool insert(
const K& key,
bool thread_mask =
true) {
77 size_t hash = Hash{}(key) & ((1 << number_of_buckets_log_2) - 1);
79 t.template operator()<K>(lock_table, buckets_array, key, result, alloc,
hash, thread_mask);
83 LSLAB_DEVICE
bool remove(
const K& key,
bool thread_mask =
true) {
86 size_t hash = Hash{}(key) & ((1 << number_of_buckets_log_2) - 1);
88 t.template operator()<K>(lock_table, buckets_array, key, result, alloc,
hash, thread_mask);
92 template<
int block_size = 256>
93 LSLAB_HOST
void contains(
const K* keys,
bool* output,
size_t size, cudaStream_t stream = 0x0) {
94 set_kernels::contains_<block_size, this_t, K><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, keys, output, size);
97 template<
int block_size = 256>
98 LSLAB_HOST
void contains(
const thrust::device_vector<K>& keys, thrust::device_vector<bool>& output, cudaStream_t stream = 0x0) {
99 size_t size = keys.size();
100 this->
template contains<block_size>(keys.data().get(), output.data().get(), size, stream);
103 template<
int block_size = 256>
104 LSLAB_HOST
void insert(
const K* keys,
bool* output,
size_t size, cudaStream_t stream = 0x0) {
105 set_kernels::insert_<block_size, this_t, K><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, keys, output, size);
108 template<
int block_size = 256>
109 LSLAB_HOST
void insert(
const thrust::device_vector<K>& keys, thrust::device_vector<bool>& output, cudaStream_t stream = 0x0) {
110 size_t size = keys.size();
111 this->
template insert<block_size>(keys.data().get(), output.data().get(), size, stream);
114 template<
int block_size = 256>
115 LSLAB_HOST
void remove(
const K* keys,
bool* output,
size_t size, cudaStream_t stream = 0x0) {
116 set_kernels::remove_<block_size, this_t, K><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, keys, output, size);
119 template<
int block_size = 256>
120 LSLAB_HOST
void remove(
const thrust::device_vector<K>& keys, thrust::device_vector<bool>& output, cudaStream_t stream = 0x0) {
121 size_t size = keys.size();
122 this->
template remove<block_size>(keys.data().get(), output.data().get(), size, stream);
125 LSLAB_HOST_DEVICE
unsigned buckets()
const {
126 return 1 << number_of_buckets_log_2;
132 unsigned number_of_buckets_log_2;
Definition: traverse.h:22
Definition: warp_mutex.h:13