11 #include <cuda_runtime.h>
16 namespace groupallocator {
29 : PAGE_SIZE(page_size), pagesAllocated(0) {}
37 gpuAssert(cudaFree((
void *) e.first), __FILE__, __LINE__);
48 void allocate(T **ptr,
size_t s,
bool forceAligned128) {
50 std::clog <<
"Allocating in MPA " << __FILE__ <<
":" << __LINE__ << std::endl;
52 size_t pages_needed = (size_t) ceil(s / (
double) PAGE_SIZE);
54 gpuAssert(cudaMallocManaged((
void **) &c, pages_needed * PAGE_SIZE), __FILE__, __LINE__);
57 pagesAllocated += pages_needed;
58 mem.push_back({c, pages_needed * PAGE_SIZE});
65 for (
auto i = mem.begin(); i != mem.end(); i++) {
66 if ((
size_t) i->first == (
size_t) ptr) {
67 gpuAssert(cudaFree((
void *) i->first), __FILE__, __LINE__);
75 void moveToDevice(
int device, cudaStream_t stream) {
76 #ifndef DISABLE_PREFETCH
78 for (
auto i = mem.begin(); i != mem.end(); i++) {
79 gpuAssert(cudaMemPrefetchAsync(i->first, i->second, device, stream), __FILE__, __LINE__);
86 std::unique_lock <std::mutex> ul(m);
87 return pagesAllocated;
90 size_t getPageSize() {
return PAGE_SIZE; }
93 std::list <std::pair<char *, size_t>> mem;
95 const size_t PAGE_SIZE;
96 size_t pagesAllocated;