4 #include "detail/slab_node.h"
5 #include "detail/traverse.h"
7 #include <thrust/device_vector.h>
9 #include <cuda/std/utility>
24 template<
typename K,
typename V,
typename Allocator = device_allocator<detail::slab_node<K, V>>,
typename Hash = hash<K>>
43 LSLAB_HOST
map(
unsigned n_log_2) : number_of_buckets_log_2(n_log_2) {
44 size_t size = 1 << n_log_2;
45 cudaMalloc(&lock_table,
sizeof(
warp_mutex) * size);
47 cudaMemset(lock_table, 0,
sizeof(
warp_mutex) * size);
57 LSLAB_HOST
map(
unsigned n_log_2, Allocator&& a) : number_of_buckets_log_2(n_log_2), alloc(a) {
58 size_t size = 1 << n_log_2;
59 cudaMalloc(&lock_table,
sizeof(
warp_mutex) * size);
61 cudaMemset(lock_table, 0,
sizeof(
warp_mutex) * size);
85 LSLAB_HOST_DEVICE ~
map() {
93 LSLAB_DEVICE
void find_function(
const K& key, Fn&& fn,
bool thread_mask =
true) {
95 size_t hash = Hash{}(key);
96 hash &= ((1 << number_of_buckets_log_2) - 1);
105 LSLAB_DEVICE
bool get(
const K& key, V& value,
bool thread_mask =
true) {
107 LSLAB_DEVICE
void operator()(
const V& val) {
117 find_function(key, fn, thread_mask);
125 template<
typename Fn>
126 LSLAB_DEVICE
void insert_function(
const K& key, Fn&& fn,
bool thread_mask =
true) {
129 size_t hash = Hash{}(key) & ((1 << number_of_buckets_log_2) - 1);
130 t.template operator()<K, V, Fn>(lock_table, buckets_array, key, std::forward<Fn>(fn), alloc,
hash, thread_mask);
136 LSLAB_DEVICE V put(
const K& key,
const V& value,
bool thread_mask =
true) {
140 LSLAB_DEVICE
void operator()(V& val) {
150 insert_function(key, fn, thread_mask);
158 template<
typename Fn>
159 LSLAB_DEVICE
bool update_function(
const K& key, Fn&& fn,
bool thread_mask =
true) {
167 LSLAB_DEVICE cuda::std::pair<bool, V> update(
const K& key,
const V& value,
bool thread_mask =
true) {
169 LSLAB_DEVICE
void operator()(V& val) {
181 update_function(key, fn, thread_mask);
182 return {fn.found, fn.tmp};
189 template<
int block_size = 256>
190 LSLAB_HOST
void put(cuda::std::pair<K, V>* operations, V* output,
size_t size, cudaStream_t stream = 0x0) {
191 map_kernels::put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, operations, output, size);
198 template<
int block_size = 256>
199 LSLAB_HOST
void put(K* operations_keys, V* operations_values, V* output,
size_t size, cudaStream_t stream = 0x0) {
200 map_kernels::put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, operations_keys, operations_values, output, size);
208 template<
int block_size = 256>
209 LSLAB_HOST
void get(K* operations, cuda::std::pair<bool, V>* output,
size_t size, cudaStream_t stream = 0x0) {
210 map_kernels::get_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, operations, output, size);
217 template<
int block_size = 256>
218 LSLAB_HOST
void update(cuda::std::pair<K, V>* operations, cuda::std::pair<bool, V>* output,
size_t size, cudaStream_t stream = 0x0) {
219 map_kernels::update_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*
this, operations, output, size);
225 LSLAB_HOST_DEVICE
unsigned buckets() {
226 return 1 << number_of_buckets_log_2;
232 unsigned number_of_buckets_log_2;
lslab map for GPU
Definition: map.h:33
Definition: traverse.h:22
Definition: warp_mutex.h:13