GPU-Accelerated Cuckoo Filter
Loading...
Searching...
No Matches
Public Types | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | List of all members
cuckoogpu::Filter< Config >::Bucket Struct Reference

Bucket structure that holds the fingerprint and tags for a given bucket. More...

Public Types

using WordType = typename Config::WordType
 

Public Member Functions

__host__ __device__ __forceinline__ TagType extractTag (WordType packed, size_t tagIdx) const
 
__host__ __device__ __forceinline__ WordType replaceTag (WordType packed, size_t tagIdx, TagType newTag) const
 
template<size_t N>
__device__ __forceinline__ void load128Bit (size_t startIdx, WordType(&out)[N]) const
 Loads words using 128-bit vectorized loads into a fixed-size array.
 
__device__ bool contains (TagType tag) const
 Checks if a tag is present in the bucket using vectorized loads.
 

Static Public Member Functions

template<size_t N>
__device__ static __forceinline__ bool checkWords (const WordType(&loaded)[N], WordType replicatedTag)
 Checks an array of loaded words for a matching tag using SWAR.
 

Public Attributes

cuda::std::atomic< WordTypepackedTags [wordCount]
 

Static Public Attributes

static constexpr size_t tagsPerWord = sizeof(WordType) / sizeof(TagType)
 
static constexpr size_t wordCount = bucketSize / tagsPerWord
 

Detailed Description

template<typename Config>
struct cuckoogpu::Filter< Config >::Bucket

Bucket structure that holds the fingerprint and tags for a given bucket.

The bucket is divided into words, where each word contains one or more fingerprints depending on tag size.

This optimisation allows us to avoid having to perform atomic operations on every fingerprint in the bucket, the extra computational overhead is negligible.

For efficiency reasons, the number of fingerprints per word is enforced to be a power of 2, same goes for the total number of fingerprints in the bucket.

Definition at line 228 of file CuckooFilter.cuh.

Member Typedef Documentation

◆ WordType

template<typename Config >
using cuckoogpu::Filter< Config >::Bucket::WordType = typename Config::WordType

Definition at line 231 of file CuckooFilter.cuh.

Member Function Documentation

◆ checkWords()

template<typename Config >
template<size_t N>
__device__ static __forceinline__ bool cuckoogpu::Filter< Config >::Bucket::checkWords ( const WordType(&)  loaded[N],
WordType  replicatedTag 
)
inlinestatic

Checks an array of loaded words for a matching tag using SWAR.

Definition at line 260 of file CuckooFilter.cuh.

260 {
261 _Pragma("unroll")
262 for (size_t j = 0; j < N; ++j) {
263 if (detail::hasZero<TagType, WordType>(loaded[j] ^ replicatedTag)) {
264 return true;
265 }
266 }
267 return false;
268 }
__host__ __device__ __forceinline__ constexpr bool hasZero(WordType v)
Checks if a packed word contains a zero slot.
Definition helpers.cuh:116
Here is the call graph for this function:
Here is the caller graph for this function:

◆ contains()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::Bucket::contains ( TagType  tag) const
inline

Checks if a tag is present in the bucket using vectorized loads.

Automatically selects the best load width based on bucket size and architecture.

Definition at line 294 of file CuckooFilter.cuh.

294 {
295 const WordType replicatedTag = detail::replicateTag<TagType, WordType>(tag);
296
297 // Scalar path
298 if constexpr (wordCount == 1) {
299 const auto packed = reinterpret_cast<const WordType&>(packedTags[0]);
300 return detail::hasZero<TagType, WordType>(packed ^ replicatedTag);
301 }
302
303 const uint32_t startSlot = tag & (bucketSize - 1);
304 const size_t startWordIdx = startSlot / tagsPerWord;
305
306#if __CUDA_ARCH__ >= 1000 && !defined(CUCKOO_FILTER_DISABLE_256BIT_LOADS)
307 // 256-bit load path
308 constexpr size_t wordsPerLoad256 = (sizeof(WordType) == 4) ? 8 : 4;
309 if constexpr (wordCount >= wordsPerLoad256) {
310 constexpr size_t alignMask = wordsPerLoad256 - 1;
311 const size_t startAlignedIdx = startWordIdx & ~alignMask;
312
313 WordType loaded[wordsPerLoad256];
314 for (size_t i = 0; i < wordCount / wordsPerLoad256; i++) {
315 const size_t idx = (startAlignedIdx + i * wordsPerLoad256) & (wordCount - 1);
317 reinterpret_cast<const WordType*>(&packedTags[idx]), loaded
318 );
319 if (checkWords(loaded, replicatedTag)) {
320 return true;
321 }
322 }
323 return false;
324 }
325#endif
326 // 128-bit load path
327 constexpr size_t wordsPerLoad128 = (sizeof(WordType) == 4) ? 4 : 2;
328 if constexpr (wordCount >= wordsPerLoad128) {
329 constexpr size_t alignMask = wordsPerLoad128 - 1;
330 const size_t startAlignedIdx = startWordIdx & ~alignMask;
331
332 WordType loaded[wordsPerLoad128];
333 for (size_t i = 0; i < wordCount / wordsPerLoad128; i++) {
334 const size_t idx = (startAlignedIdx + i * wordsPerLoad128) & (wordCount - 1);
335 load128Bit(idx, loaded);
336 if (checkWords(loaded, replicatedTag)) {
337 return true;
338 }
339 }
340 return false;
341 }
342
343 return false;
344 }
static constexpr size_t bucketSize
typename Config::WordType WordType
static constexpr size_t tagsPerWord
__device__ __forceinline__ void load128Bit(size_t startIdx, WordType(&out)[N]) const
Loads words using 128-bit vectorized loads into a fixed-size array.
static constexpr size_t wordCount
__device__ static __forceinline__ bool checkWords(const WordType(&loaded)[N], WordType replicatedTag)
Checks an array of loaded words for a matching tag using SWAR.
cuda::std::atomic< WordType > packedTags[wordCount]
Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractTag()

template<typename Config >
__host__ __device__ __forceinline__ TagType cuckoogpu::Filter< Config >::Bucket::extractTag ( WordType  packed,
size_t  tagIdx 
) const
inline

Definition at line 244 of file CuckooFilter.cuh.

244 {
245 return static_cast<TagType>((packed >> (tagIdx * bitsPerTag)) & fpMask);
246 }
static constexpr size_t fpMask
typename Config::TagType TagType
static constexpr size_t bitsPerTag
Here is the caller graph for this function:

◆ load128Bit()

template<typename Config >
template<size_t N>
__device__ __forceinline__ void cuckoogpu::Filter< Config >::Bucket::load128Bit ( size_t  startIdx,
WordType(&)  out[N] 
) const
inline

Loads words using 128-bit vectorized loads into a fixed-size array.

Definition at line 274 of file CuckooFilter.cuh.

274 {
275 static_assert(N == 2 || N == 4, "128-bit loads support 2 or 4 words");
276 if constexpr (sizeof(WordType) == 4) {
277 auto vec = __ldg(reinterpret_cast<const uint4*>(&packedTags[startIdx]));
278 out[0] = vec.x;
279 out[1] = vec.y;
280 out[2] = vec.z;
281 out[3] = vec.w;
282 } else {
283 auto vec = __ldg(reinterpret_cast<const ulonglong2*>(&packedTags[startIdx]));
284 out[0] = vec.x;
285 out[1] = vec.y;
286 }
287 }
Here is the caller graph for this function:

◆ replaceTag()

template<typename Config >
__host__ __device__ __forceinline__ WordType cuckoogpu::Filter< Config >::Bucket::replaceTag ( WordType  packed,
size_t  tagIdx,
TagType  newTag 
) const
inline

Definition at line 249 of file CuckooFilter.cuh.

249 {
250 size_t shift = tagIdx * bitsPerTag;
251 WordType cleared = packed & ~(static_cast<WordType>(fpMask) << shift);
252 return cleared | (static_cast<WordType>(newTag) << shift);
253 }
Here is the caller graph for this function:

Member Data Documentation

◆ packedTags

template<typename Config >
cuda::std::atomic<WordType> cuckoogpu::Filter< Config >::Bucket::packedTags[wordCount]

Definition at line 241 of file CuckooFilter.cuh.

◆ tagsPerWord

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::Bucket::tagsPerWord = sizeof(WordType) / sizeof(TagType)
staticconstexpr

Definition at line 233 of file CuckooFilter.cuh.

◆ wordCount

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::Bucket::wordCount = bucketSize / tagsPerWord
staticconstexpr

Definition at line 238 of file CuckooFilter.cuh.


The documentation for this struct was generated from the following file: