LSLab
map.h
Go to the documentation of this file.
1 
4 #include "detail/slab_node.h"
5 #include "detail/traverse.h"
6 #include "device_allocator.h"
7 #include <thrust/device_vector.h>
8 #include <cuda.h>
9 #include <cuda/std/utility>
10 #include "hash.h"
11 #include "detail/map_kernels.h"
12 
13 #pragma once
14 
15 namespace lslab {
16 
24 template<typename K, typename V, typename Allocator = device_allocator<detail::slab_node<K, V>>, typename Hash = hash<K>>
25 class map {
26 public:
27 
32 
36  LSLAB_HOST map() : map(10) {
37 
38  }
39 
43  LSLAB_HOST map(unsigned n_log_2) : number_of_buckets_log_2(n_log_2) {
44  size_t size = 1 << n_log_2;
45  cudaMalloc(&lock_table, sizeof(warp_mutex) * size);
46 
47  cudaMemset(lock_table, 0, sizeof(warp_mutex) * size);
48 
49  cudaMalloc(&buckets_array, sizeof(detail::slab_node<K, V>) * size);
50 
51  cudaMemset(buckets_array, 0, sizeof(detail::slab_node<K, V>) * size);
52  }
53 
57  LSLAB_HOST map(unsigned n_log_2, Allocator&& a) : number_of_buckets_log_2(n_log_2), alloc(a) {
58  size_t size = 1 << n_log_2;
59  cudaMalloc(&lock_table, sizeof(warp_mutex) * size);
60 
61  cudaMemset(lock_table, 0, sizeof(warp_mutex) * size);
62 
63  cudaMalloc(&buckets_array, sizeof(detail::slab_node<K, V>) * size);
64 
65  cudaMemset(buckets_array, 0, sizeof(detail::slab_node<K, V>) * size);
66  }
67 
71  LSLAB_HOST_DEVICE map(warp_mutex* lt, detail::slab_node<K, V>* s, unsigned n_log_2) : lock_table(lt), buckets_array(s), number_of_buckets_log_2(n_log_2) {
72 
73  }
74 
78  LSLAB_DEVICE map(warp_mutex* lt, detail::slab_node<K, V>* s, unsigned n_log_2, Allocator&& a) : lock_table(lt), buckets_array(s), number_of_buckets_log_2(n_log_2), alloc(a) {
79 
80  }
81 
85  LSLAB_HOST_DEVICE ~map() {
86  }
87 
92  template<typename Fn>
93  LSLAB_DEVICE void find_function(const K& key, Fn&& fn, bool thread_mask = true) {
94 
95  size_t hash = Hash{}(key);
96  hash &= ((1 << number_of_buckets_log_2) - 1);
97 
98  detail::traverse<Allocator, detail::OPERATION_TYPE::FIND>{}(lock_table, buckets_array, key, fn, alloc, hash, thread_mask);
99  }
100 
105  LSLAB_DEVICE bool get(const K& key, V& value, bool thread_mask = true) {
106  struct Fn {
107  LSLAB_DEVICE void operator()(const V& val) {
108  value = val;
109  found = true;
110  }
111  bool found;
112  V& value;
113  };
114 
115  Fn fn{false, value};
116 
117  find_function(key, fn, thread_mask);
118  return fn.found;
119  }
120 
125  template<typename Fn>
126  LSLAB_DEVICE void insert_function(const K& key, Fn&& fn, bool thread_mask = true) {
128  traverse_t t;
129  size_t hash = Hash{}(key) & ((1 << number_of_buckets_log_2) - 1);
130  t.template operator()<K, V, Fn>(lock_table, buckets_array, key, std::forward<Fn>(fn), alloc, hash, thread_mask);
131  }
132 
136  LSLAB_DEVICE V put(const K& key, const V& value, bool thread_mask = true) {
137 
138  struct Fn_put {
139 
140  LSLAB_DEVICE void operator()(V& val) {
141  tmp = val;
142  val = value;
143  }
144  const V& value;
145  V tmp;
146  };
147 
148  Fn_put fn{value};
149 
150  insert_function(key, fn, thread_mask);
151  return fn.tmp;
152  }
153 
158  template<typename Fn>
159  LSLAB_DEVICE bool update_function(const K& key, Fn&& fn, bool thread_mask = true) {
160  detail::traverse<Allocator, detail::OPERATION_TYPE::UPDATE>{}(lock_table, buckets_array, key, fn, alloc, Hash{}(key) & ((1 << number_of_buckets_log_2) - 1), thread_mask);
161  }
162 
167  LSLAB_DEVICE cuda::std::pair<bool, V> update(const K& key, const V& value, bool thread_mask = true) {
168  struct Fn {
169  LSLAB_DEVICE void operator()(V& val) {
170  tmp = value;
171  val = value;
172  found = true;
173  }
174  bool found;
175  const V& value;
176  V tmp;
177  };
178 
179  Fn fn{false, value};
180 
181  update_function(key, fn, thread_mask);
182  return {fn.found, fn.tmp};
183  }
184 
189  template<int block_size = 256>
190  LSLAB_HOST void put(cuda::std::pair<K, V>* operations, V* output, size_t size, cudaStream_t stream = 0x0) {
191  map_kernels::put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
192  }
193 
198  template<int block_size = 256>
199  LSLAB_HOST void put(K* operations_keys, V* operations_values, V* output, size_t size, cudaStream_t stream = 0x0) {
200  map_kernels::put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations_keys, operations_values, output, size);
201  }
202 
203 
208  template<int block_size = 256>
209  LSLAB_HOST void get(K* operations, cuda::std::pair<bool, V>* output, size_t size, cudaStream_t stream = 0x0) {
210  map_kernels::get_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
211  }
212 
217  template<int block_size = 256>
218  LSLAB_HOST void update(cuda::std::pair<K, V>* operations, cuda::std::pair<bool, V>* output, size_t size, cudaStream_t stream = 0x0) {
219  map_kernels::update_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
220  }
221 
225  LSLAB_HOST_DEVICE unsigned buckets() {
226  return 1 << number_of_buckets_log_2;
227  }
228 
229 private:
230  warp_mutex* lock_table;
231  detail::slab_node<K, V>* buckets_array;
232  unsigned number_of_buckets_log_2;
233  Allocator alloc;
234 };
235 
236 }
lslab map for GPU
Definition: map.h:33
Definition: traverse.h:22
Definition: hash.h:11
Definition: warp_mutex.h:13