LSLab
map.h
Go to the documentation of this file.
1 
4 #include "detail/slab_node.h"
5 #include "detail/traverse.h"
6 #include "device_allocator.h"
7 #include <thrust/device_vector.h>
8 #include <cuda.h>
9 #include <cuda/std/utility>
10 #include "hash.h"
11 #include "warp_mutex.h"
12 
13 #pragma once
14 
15 namespace lslab {
16 
17 template<int block_size, typename map_t, typename K, typename V>
18 __global__ void put_(map_t map, cuda::std::pair<K, V>* operations, V* output, size_t size);
19 
20 template<int block_size, typename map_t, typename K, typename V>
21 __global__ void put_(map_t map, K* operations_keys, V* operations_values, V* output, size_t size);
22 
23 template<int block_size, typename map_t, typename K, typename V>
24 __global__ void get_(map_t map, K* operations, cuda::std::pair<bool, V>* output, size_t size);
25 
26 template<int block_size, typename map_t, typename K, typename V>
27 __global__ void update_(map_t map, cuda::std::pair<K, V>* operations, cuda::std::pair<bool, V>* output, size_t size);
28 
32 template<typename K, typename V, typename Allocator = device_allocator<detail::slab_node<K, V>>, typename Hash = hash<K>>
33 class map {
34 public:
35 
37 
38  LSLAB_HOST map() : map(10) {
39 
40  }
41 
42  LSLAB_HOST map(unsigned n_log_2) : number_of_buckets_log_2(n_log_2) {
43  size_t size = 1 << n_log_2;
44  cudaMalloc(&lock_table, sizeof(warp_mutex) * size);
45 
46  cudaMemset(lock_table, 0, sizeof(warp_mutex) * size);
47 
48  cudaMalloc(&buckets_array, sizeof(detail::slab_node<K, V>) * size);
49 
50  cudaMemset(buckets_array, 0, sizeof(detail::slab_node<K, V>) * size);
51  }
52 
53  LSLAB_HOST map(unsigned n_log_2, Allocator&& a) : number_of_buckets_log_2(n_log_2), alloc(a) {
54  size_t size = 1 << n_log_2;
55  cudaMalloc(&lock_table, sizeof(warp_mutex) * size);
56 
57  cudaMemset(lock_table, 0, sizeof(warp_mutex) * size);
58 
59  cudaMalloc(&buckets_array, sizeof(detail::slab_node<K, V>) * size);
60 
61  cudaMemset(buckets_array, 0, sizeof(detail::slab_node<K, V>) * size);
62  }
63 
64  LSLAB_HOST_DEVICE map(warp_mutex* lt, detail::slab_node<K, V>* s, unsigned n_log_2) : lock_table(lt), buckets_array(s), number_of_buckets_log_2(n_log_2) {
65 
66  }
67 
68  LSLAB_DEVICE map(warp_mutex* lt, detail::slab_node<K, V>* s, unsigned n_log_2, Allocator&& a) : lock_table(lt), buckets_array(s), number_of_buckets_log_2(n_log_2), alloc(a) {
69 
70  }
71 
72  LSLAB_HOST_DEVICE ~map() {
73  }
74 
75  template<typename Fn>
76  LSLAB_DEVICE void find_function(const K& key, Fn&& fn, bool thread_mask = true) {
77 
78  size_t hash = Hash{}(key);
79  hash &= ((1 << number_of_buckets_log_2) - 1);
80 
81  detail::traverse<Allocator, detail::OPERATION_TYPE::FIND>{}(lock_table, buckets_array, key, fn, alloc, hash, thread_mask);
82  }
83 
84  LSLAB_DEVICE bool get(const K& key, V& value, bool thread_mask = true) {
85  struct Fn {
86  LSLAB_DEVICE void operator()(const V& val) {
87  value = val;
88  found = true;
89  }
90  bool found;
91  V& value;
92  };
93 
94  Fn fn{false, value};
95 
96  find_function(key, fn, thread_mask);
97  return fn.found;
98  }
99 
100  template<typename Fn>
101  LSLAB_DEVICE void insert_function(const K& key, Fn&& fn, bool thread_mask = true) {
103  traverse_t t;
104  size_t hash = Hash{}(key) & ((1 << number_of_buckets_log_2) - 1);
105  t.template operator()<K, V, Fn>(lock_table, buckets_array, key, std::forward<Fn>(fn), alloc, hash, thread_mask);
106  }
107 
108  LSLAB_DEVICE V put(const K& key, const V& value, bool thread_mask = true) {
109 
110  struct Fn_put {
111 
112  LSLAB_DEVICE void operator()(V& val) {
113  tmp = val;
114  val = value;
115  }
116  const V& value;
117  V tmp;
118  };
119 
120  Fn_put fn{value};
121 
122  insert_function(key, fn, thread_mask);
123  return fn.tmp;
124  }
125 
126  template<typename Fn>
127  LSLAB_DEVICE bool update_function(const K& key, Fn&& fn, bool thread_mask = true) {
128  detail::traverse<Allocator, detail::OPERATION_TYPE::UPDATE>{}(lock_table, buckets_array, key, fn, alloc, Hash{}(key) & ((1 << number_of_buckets_log_2) - 1), thread_mask);
129  }
130 
131  LSLAB_DEVICE cuda::std::pair<bool, V> update(const K& key, const V& value, bool thread_mask = true) {
132  struct Fn {
133  LSLAB_DEVICE void operator()(V& val) {
134  tmp = value;
135  val = value;
136  found = true;
137  }
138  bool found;
139  const V& value;
140  V tmp;
141  };
142 
143  Fn fn{false, value};
144 
145  update_function(key, fn, thread_mask);
146  return {fn.found, fn.tmp};
147  }
148 
149  template<int block_size = 256>
150  LSLAB_HOST void put(cuda::std::pair<K, V>* operations, V* output, size_t size, cudaStream_t stream = 0x0) {
151  put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
152  }
153 
154  template<int block_size = 256>
155  LSLAB_HOST void put(K* operations_keys, V* operations_values, V* output, size_t size, cudaStream_t stream = 0x0) {
156  put_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations_keys, operations_values, output, size);
157  }
158 
159 
160  template<int block_size = 256>
161  LSLAB_HOST void get(K* operations, cuda::std::pair<bool, V>* output, size_t size, cudaStream_t stream = 0x0) {
162  get_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
163  }
164 
165  template<int block_size = 256>
166  LSLAB_HOST void update(cuda::std::pair<K, V>* operations, cuda::std::pair<bool, V>* output, size_t size, cudaStream_t stream = 0x0) {
167  update_<block_size, this_t, K, V><<<(size + block_size - 1) / block_size, block_size, 0, stream>>>(*this, operations, output, size);
168  }
169 
170  LSLAB_HOST_DEVICE unsigned buckets() {
171  return 1 << number_of_buckets_log_2;
172  }
173 
174 private:
175  warp_mutex* lock_table;
176  detail::slab_node<K, V>* buckets_array;
177  unsigned number_of_buckets_log_2;
178  Allocator alloc;
179 };
180 
181 template<int block_size, typename map_t, typename K, typename V>
182 __global__ void put_(map_t map, cuda::std::pair<K, V>* operations, V* output, size_t size) {
183 
184  int tidx = threadIdx.x;
185  int bidx = blockIdx.x;
186 
187  K key;
188  V val;
189  if(tidx + bidx * block_size < size) {
190  key = operations[tidx + bidx * block_size].first;
191  val = operations[tidx + bidx * block_size].second;
192  }
193 
194  V res = map.put(key, val, tidx + bidx * block_size < size);
195 
196  if(tidx + bidx * block_size < size) {
197  output[tidx + bidx * block_size] = res;
198  }
199 }
200 
201 template<int block_size, typename map_t, typename K, typename V>
202 __global__ void put_(map_t map, K* operations_keys, V* operations_values, V* output, size_t size) {
203 
204  int tidx = threadIdx.x;
205  int bidx = blockIdx.x;
206 
207  K key;
208  V val;
209  if(tidx + bidx * block_size < size) {
210  key = operations_keys[tidx + bidx * block_size];
211  val = operations_values[tidx + bidx * block_size];
212  }
213 
214  V res = map.put(key, val, tidx + bidx * block_size < size);
215 
216  if(tidx + bidx * block_size < size) {
217  output[tidx + bidx * block_size] = res;
218  }
219 }
220 
221 
222 template<int block_size, typename map_t, typename K, typename V>
223 __global__ void get_(map_t map, K* operations, cuda::std::pair<bool, V>* output, size_t size) {
224 
225  int tidx = threadIdx.x;
226  int bidx = blockIdx.x;
227 
228  K key;
229  V value;
230  if(tidx + bidx * block_size < size) {
231  key = operations[tidx + bidx * block_size];
232  }
233 
234  bool res = map.get(key, value, tidx + bidx * block_size < size);
235  if(tidx + bidx * block_size < size) {
236  output[tidx + bidx * block_size] = {res, value};
237  }
238 
239 }
240 
241 template<int block_size, typename map_t, typename K, typename V>
242 __global__ void update_(map_t map, cuda::std::pair<K, V>* operations, cuda::std::pair<bool, V>* output, size_t size) {
243 
244  int tidx = threadIdx.x;
245  int bidx = blockIdx.x;
246 
247  K key;
248  V val;
249  if(tidx + bidx * block_size < size) {
250  key = operations[tidx + bidx * block_size].first;
251  val = operations[tidx + bidx * block_size].second;
252  }
253 
254  cuda::std::pair<bool, V> res = map.put(key, val, tidx + bidx * block_size < size);
255  if(tidx + bidx * block_size < size) {
256  output[tidx + bidx * block_size] = res;
257  }
258 }
259 
260 
261 }
lslab map for GPU
Definition: map.h:33
Definition: traverse.h:22
Definition: hash.h:11
Definition: warp_mutex.h:13