GPU-Accelerated Cuckoo Filter
Loading...
Searching...
No Matches
helpers.cuh
Go to the documentation of this file.
1#pragma once
2
3#include <cstddef>
4#include <cstdint>
5#include <cstdio>
6#include <cstring>
7#include <iostream>
8
9namespace cuckoogpu::detail {
10
16constexpr bool powerOfTwo(size_t n) {
17 return n != 0 && (n & (n - 1)) == 0;
18}
19
27
33constexpr size_t nextPowerOfTwo(size_t n) {
34 if (powerOfTwo(n))
35 return n;
36
37 n--;
38 n |= n >> 1;
39 n |= n >> 2;
40 n |= n >> 4;
41 n |= n >> 8;
42 n |= n >> 16;
43 n |= n >> 32;
44 n++;
45
46 return n;
47}
48
56template <typename T>
57size_t countOnes(T* data, size_t n) {
58 size_t count = 0;
59 for (size_t i = 0; i < n; ++i) {
60 if (data[i]) {
61 count++;
62 }
63 }
64 return count;
65}
66
80template <typename TagType, typename WordType>
81__host__ __device__ __forceinline__ constexpr WordType getZeroMask(WordType v) {
82 static_assert(sizeof(WordType) == 4 || sizeof(WordType) == 8, "WordType must be 32 or 64 bits");
83
84 if constexpr (sizeof(WordType) == 8) {
85 if constexpr (sizeof(TagType) == 1) {
86 return (v - 0x0101010101010101ULL) & ~v & 0x8080808080808080ULL;
87 } else if constexpr (sizeof(TagType) == 2) {
88 return (v - 0x0001000100010001ULL) & ~v & 0x8000800080008000ULL;
89 } else if constexpr (sizeof(TagType) == 4) {
90 return (v - 0x0000000100000001ULL) & ~v & 0x8000000080000000ULL;
91 } else {
92 return 0;
93 }
94 } else {
95 if constexpr (sizeof(TagType) == 1) {
96 return (v - 0x01010101U) & ~v & 0x80808080U;
97 } else if constexpr (sizeof(TagType) == 2) {
98 return (v - 0x00010001U) & ~v & 0x80008000U;
99 } else if constexpr (sizeof(TagType) == 4) {
100 return (v - 0x00000001U) & ~v & 0x80000000U;
101 } else {
102 return 0;
103 }
104 }
105}
106
115template <typename TagType, typename WordType>
116__host__ __device__ __forceinline__ constexpr bool hasZero(WordType v) {
118}
119
128template <typename TagType, typename WordType>
130 static_assert(sizeof(WordType) == 4 || sizeof(WordType) == 8, "WordType must be 32 or 64 bits");
131
132 if constexpr (sizeof(WordType) == 8) {
133 if constexpr (sizeof(TagType) == 1) {
134 return static_cast<uint64_t>(tag) * 0x0101010101010101ULL;
135 } else if constexpr (sizeof(TagType) == 2) {
136 return static_cast<uint64_t>(tag) * 0x0001000100010001ULL;
137 } else if constexpr (sizeof(TagType) == 4) {
138 return static_cast<uint64_t>(tag) * 0x0000000100000001ULL;
139 } else {
140 return tag;
141 }
142 } else {
143 if constexpr (sizeof(TagType) == 1) {
144 return static_cast<uint32_t>(tag) * 0x01010101U;
145 } else if constexpr (sizeof(TagType) == 2) {
146 return static_cast<uint32_t>(tag) * 0x00010001U;
147 } else if constexpr (sizeof(TagType) == 4) {
148 return static_cast<uint32_t>(tag);
149 } else {
150 return tag;
151 }
152 }
153}
154
155#if __CUDA_ARCH__ >= 1000
156
171template <typename T>
173 static_assert(sizeof(T) == 4 || sizeof(T) == 8, "T must be uint32_t or uint64_t");
174
175 if constexpr (sizeof(T) == 8) {
176 asm volatile("ld.global.nc.v4.u64 {%0, %1, %2, %3}, [%4];"
177 : "=l"(out[0]), "=l"(out[1]), "=l"(out[2]), "=l"(out[3])
178 : "l"(ptr));
179 } else {
180 asm volatile("ld.global.nc.v8.u32 {%0, %1, %2, %3, %4, %5, %6, %7}, [%8];"
181 : "=r"(out[0]),
182 "=r"(out[1]),
183 "=r"(out[2]),
184 "=r"(out[3]),
185 "=r"(out[4]),
186 "=r"(out[5]),
187 "=r"(out[6]),
188 "=r"(out[7])
189 : "l"(ptr));
190 }
191}
192
193#endif
194
198#define SDIV(x, y) (((x) + (y) - 1) / (y))
199
204#define CUDA_CALL(err) \
205 do { \
206 cudaError_t err_ = (err); \
207 if (err_ == cudaSuccess) [[likely]] { \
208 break; \
209 } \
210 printf("%s:%d %s\n", __FILE__, __LINE__, cudaGetErrorString(err_)); \
211 exit(err_); \
212 } while (0)
213
223template <typename Kernel>
238
239} // namespace cuckoogpu::detail
constexpr size_t maxOccupancyGridSize(int32_t blockSize, Kernel kernel, size_t dynamicSMemSize)
Calculates the maximum occupancy grid size for a kernel.
Definition helpers.cuh:224
constexpr size_t nextPowerOfTwo(size_t n)
Calculates the next power of two greater than or equal to n.
Definition helpers.cuh:33
__host__ __device__ __forceinline__ constexpr WordType getZeroMask(WordType v)
Returns a bitmask indicating which slots in a packed word are zero.
Definition helpers.cuh:81
__host__ __device__ __forceinline__ constexpr bool hasZero(WordType v)
Checks if a packed word contains a zero slot.
Definition helpers.cuh:116
__host__ __device__ __forceinline__ uint32_t globalThreadId()
Calculates the global thread ID in a 1D grid.
Definition helpers.cuh:24
__host__ __device__ __forceinline__ constexpr WordType replicateTag(TagType tag)
Replicates a tag value across all slots in a word.
Definition helpers.cuh:129
constexpr bool powerOfTwo(size_t n)
Checks if a number is a power of two.
Definition helpers.cuh:16
size_t countOnes(T *data, size_t n)
Counts the number of non-zero elements in an array.
Definition helpers.cuh:57