GPU-Accelerated Cuckoo Filter
Loading...
Searching...
No Matches
Classes | Public Types | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | List of all members
cuckoogpu::Filter< Config > Struct Template Reference

A CUDA-accelerated Cuckoo Filter implementation. More...

Collaboration diagram for cuckoogpu::Filter< Config >:
[legend]

Classes

struct  Bucket
 Bucket structure that holds the fingerprint and tags for a given bucket. More...
 
struct  PackedTag
 This is used by the sorted insert kernel to store the fingerprint and primary bucket index in a compact format that allows you to sort them directly since the bucket index lives in the upper bits. More...
 

Public Types

using T = typename Config::KeyType
 
using TagType = typename Config::TagType
 
using AltBucketPolicy = typename Config::AltBucketPolicy
 
using PackedTagType = typename std::conditional< bitsPerTag<=8, uint32_t, uint64_t >::type
 

Public Member Functions

 Filter (const Filter &)=delete
 
Filteroperator= (const Filter &)=delete
 
 Filter (size_t capacity)
 Constructs a new Cuckoo Filter.
 
 ~Filter ()
 Destroys the Cuckoo Filter.
 
size_t insertMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a batch of keys into the filter.
 
size_t insertManyWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a batch of keys and records per-attempt eviction counts.
 
size_t insertManySorted (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter.
 
size_t insertManySortedWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a pre-sorted batch of keys and records per-attempt eviction counts.
 
void containsMany (const T *d_keys, const size_t n, bool *d_output, cudaStream_t stream={})
 Checks for the existence of a batch of keys.
 
size_t deleteMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 Tries to remove a set of keys from the filter.
 
size_t insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector.
 
size_t insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector (uint8_t output).
 
size_t insertMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Inserts keys from a Thrust device vector without outputting results.
 
size_t insertManyWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={})
 Inserts keys from a Thrust device vector and records per-attempt eviction counts.
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first.
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first (uint8_t output).
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first, without outputting results.
 
size_t insertManySortedWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts.
 
void containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Checks for existence of keys in a Thrust device vector.
 
void containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Checks for existence of keys in a Thrust device vector (uint8_t output).
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Deletes keys in a Thrust device vector.
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Deletes keys in a Thrust device vector (uint8_t output).
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Deletes keys in a Thrust device vector without outputting results.
 
void clear ()
 Clears the filter, removing all items.
 
float loadFactor ()
 Calculates the current load factor of the filter.
 
size_t occupiedSlots ()
 Returns the total number of occupied slots.
 
size_t evictionCount ()
 Returns the total number of evictions performed.
 
void resetEvictionCount ()
 Resets the eviction counter to zero.
 
size_t capacity ()
 Returns the total capacity of the filter.
 
size_t getNumBuckets () const
 Returns the number of buckets in the filter.
 
size_t sizeInBytes () const
 Returns the size of the filter in bytes.
 
size_t countOccupiedSlots ()
 Counts occupied slots by iterating over all buckets on the host.
 
__device__ bool tryRemoveAtBucket (size_t bucketIdx, TagType tag)
 Attempt to remove a single instance of a fingerprint from a bucket.
 
__device__ bool tryInsertAtBucket (size_t bucketIdx, TagType tag)
 Attempts to insert a tag into a specific bucket.
 
__device__ bool insertWithEvictionDFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
 Inserts a fingerprint into the filter by evicting existing fingerprints.
 
__device__ bool insertWithEvictionBFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
 Inserts a fingerprint using repeated shallow breadth-first attempts.
 
__device__ bool insert (const T &key, uint32_t *evictionAttempts=nullptr)
 Inserts a single key into the filter.
 
__device__ bool contains (const T &key) const
 Checks if a key exists in the filter.
 
__device__ bool remove (const T &key)
 Removes a key from the filter.
 

Static Public Member Functions

template<typename H >
static __host__ __device__ uint64_t hash64 (const H &key)
 
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagTypegetCandidateBucketsAndFPs (const T &key, size_t numBuckets)
 
static __host__ __device__ size_t getAlternateBucket (size_t bucket, TagType fp, size_t numBuckets)
 Computes the alternate bucket for a fingerprint.
 
static __host__ __device__ cuda::std::tuple< size_t, TagTypegetAlternateBucketWithNewFp (size_t bucket, TagType fp, size_t numBuckets)
 Computes alternate bucket AND updated fingerprint for choice bit policies.
 
static size_t calculateNumBuckets (size_t capacity)
 The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices.
 

Public Attributes

size_t numBuckets
 Number of buckets in the filter.
 
Bucketd_buckets
 Pointer to the device memory for the buckets.
 
cuda::std::atomic< size_t > * d_numOccupied {}
 Pointer to the device memory for the occupancy counter.
 
cuda::std::atomic< size_t > * d_numEvictions {}
 Pointer to the device memory for the eviction counter.
 
size_t h_numOccupied = 0
 Number of occupied buckets in the filter.
 

Static Public Attributes

static constexpr size_t bitsPerTag = Config::bitsPerTag
 
static constexpr size_t tagEntryBytes = sizeof(TagType)
 
static constexpr size_t bucketSize = Config::bucketSize
 
static constexpr size_t maxEvictions = Config::maxEvictions
 
static constexpr size_t blockSize = Config::blockSize
 
static constexpr TagType EMPTY = 0
 
static constexpr size_t fpMask = (1ULL << bitsPerTag) - 1
 

Detailed Description

template<typename Config>
struct cuckoogpu::Filter< Config >

A CUDA-accelerated Cuckoo Filter implementation.

This class implements a Cuckoo Filter using CUDA for high-throughput insertion, lookup, and deletion. It supports concurrent operations and uses atomic operations for thread safety within buckets.

Template Parameters
ConfigThe configuration structure defining filter parameters.

Definition at line 144 of file CuckooFilter.cuh.

Member Typedef Documentation

◆ AltBucketPolicy

template<typename Config >
using cuckoogpu::Filter< Config >::AltBucketPolicy = typename Config::AltBucketPolicy

Definition at line 149 of file CuckooFilter.cuh.

◆ PackedTagType

template<typename Config >
using cuckoogpu::Filter< Config >::PackedTagType = typename std::conditional<bitsPerTag <= 8, uint32_t, uint64_t>::type

Definition at line 163 of file CuckooFilter.cuh.

◆ T

template<typename Config >
using cuckoogpu::Filter< Config >::T = typename Config::KeyType

Definition at line 145 of file CuckooFilter.cuh.

◆ TagType

template<typename Config >
using cuckoogpu::Filter< Config >::TagType = typename Config::TagType

Definition at line 148 of file CuckooFilter.cuh.

Constructor & Destructor Documentation

◆ Filter() [1/2]

template<typename Config >
cuckoogpu::Filter< Config >::Filter ( const Filter< Config > &  )
delete

◆ Filter() [2/2]

template<typename Config >
cuckoogpu::Filter< Config >::Filter ( size_t  capacity)
inlineexplicit

Constructs a new Cuckoo Filter.

Allocates device memory for buckets and occupancy counters.

Parameters
capacityDesired capacity (number of items) for the filter.

Definition at line 408 of file CuckooFilter.cuh.

409 CUDA_CALL(cudaMalloc(&d_buckets, numBuckets * sizeof(Bucket)));
410 CUDA_CALL(cudaMalloc(&d_numOccupied, sizeof(cuda::std::atomic<size_t>)));
411#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
412 CUDA_CALL(cudaMalloc(&d_numEvictions, sizeof(cuda::std::atomic<size_t>)));
413#endif
414
415 clear();
416 }
cuda::std::atomic< size_t > * d_numOccupied
Pointer to the device memory for the occupancy counter.
static size_t calculateNumBuckets(size_t capacity)
The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the ...
void clear()
Clears the filter, removing all items.
size_t numBuckets
Number of buckets in the filter.
cuda::std::atomic< size_t > * d_numEvictions
Pointer to the device memory for the eviction counter.
Bucket * d_buckets
Pointer to the device memory for the buckets.
size_t capacity()
Returns the total capacity of the filter.
#define CUDA_CALL(err)
Macro for checking CUDA errors.
Definition helpers.cuh:204
Here is the call graph for this function:

◆ ~Filter()

template<typename Config >
cuckoogpu::Filter< Config >::~Filter ( )
inline

Destroys the Cuckoo Filter.

Frees allocated device memory.

Definition at line 423 of file CuckooFilter.cuh.

423 {
424 if (d_buckets) {
425 CUDA_CALL(cudaFree(d_buckets));
426 }
427 if (d_numOccupied) {
428 CUDA_CALL(cudaFree(d_numOccupied));
429 }
430#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
431 if (d_numEvictions) {
432 CUDA_CALL(cudaFree(d_numEvictions));
433 }
434#endif
435 }

Member Function Documentation

◆ calculateNumBuckets()

template<typename Config >
static size_t cuckoogpu::Filter< Config >::calculateNumBuckets ( size_t  capacity)
inlinestatic

The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices.

Definition at line 394 of file CuckooFilter.cuh.

394 {
395 return AltBucketPolicy::calculateNumBuckets(capacity);
396 }
Here is the call graph for this function:

◆ capacity()

template<typename Config >
size_t cuckoogpu::Filter< Config >::capacity ( )
inline

Returns the total capacity of the filter.

Returns
size_t Total number of slots.

Definition at line 1015 of file CuckooFilter.cuh.

1015 {
1016 return numBuckets * bucketSize;
1017 }
static constexpr size_t bucketSize
Here is the caller graph for this function:

◆ clear()

template<typename Config >
void cuckoogpu::Filter< Config >::clear ( )
inline

Clears the filter, removing all items.

Definition at line 958 of file CuckooFilter.cuh.

958 {
959 CUDA_CALL(cudaMemset(d_buckets, 0, numBuckets * sizeof(Bucket)));
960 CUDA_CALL(cudaMemset(d_numOccupied, 0, sizeof(cuda::std::atomic<size_t>)));
961#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
962 CUDA_CALL(cudaMemset(d_numEvictions, 0, sizeof(cuda::std::atomic<size_t>)));
963#endif
964 h_numOccupied = 0;
965 }
size_t h_numOccupied
Number of occupied buckets in the filter.
Here is the caller graph for this function:

◆ contains()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::contains ( const T key) const
inline

Checks if a key exists in the filter.

Parameters
keyThe key to check.
Returns
true if the key is found, false otherwise.

Definition at line 1396 of file CuckooFilter.cuh.

1396 {
1397 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1398
1399 // fp1 is for bucket i1, fp2 is for bucket i2
1400 // For non-choice-bit policies, fp1 == fp2
1401 return d_buckets[i1].contains(fp1) || d_buckets[i2].contains(fp2);
1402 }
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagType > getCandidateBucketsAndFPs(const T &key, size_t numBuckets)
__device__ bool contains(TagType tag) const
Checks if a tag is present in the bucket using vectorized loads.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ containsMany() [1/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const T d_keys,
const size_t  n,
bool *  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for the existence of a batch of keys.

Parameters
d_keysPointer to device memory containing keys to check.
nNumber of keys to check.
d_outputPointer to device memory to store results (true/false).
streamCUDA stream to use for the operation.

Definition at line 631 of file CuckooFilter.cuh.

631 {}) {
632 size_t numBlocks = SDIV(n, blockSize);
634 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this);
635
636 CUDA_CALL(cudaStreamSynchronize(stream));
637 }
static constexpr size_t blockSize
#define SDIV(x, y)
Integer division with rounding up (ceiling).
Definition helpers.cuh:198
__host__ __device__ __forceinline__ constexpr bool hasZero(WordType v)
Checks if a packed word contains a zero slot.
Definition helpers.cuh:116
Here is the caller graph for this function:

◆ containsMany() [2/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for existence of keys in a Thrust device vector.

Parameters
d_keysVector of keys to check.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.

Definition at line 861 of file CuckooFilter.cuh.

864 {}
865 ) {
866 if (d_output.size() != d_keys.size()) {
867 d_output.resize(d_keys.size());
868 }
870 thrust::raw_pointer_cast(d_keys.data()),
871 d_keys.size(),
872 thrust::raw_pointer_cast(d_output.data()),
873 stream
874 );
875 }
void containsMany(const T *d_keys, const size_t n, bool *d_output, cudaStream_t stream={})
Checks for the existence of a batch of keys.

◆ containsMany() [3/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for existence of keys in a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to check.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.

Definition at line 883 of file CuckooFilter.cuh.

886 {}
887 ) {
888 if (d_output.size() != d_keys.size()) {
889 d_output.resize(d_keys.size());
890 }
892 thrust::raw_pointer_cast(d_keys.data()),
893 d_keys.size(),
894 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
895 stream
896 );
897 }

◆ countOccupiedSlots()

template<typename Config >
size_t cuckoogpu::Filter< Config >::countOccupiedSlots ( )
inline

Counts occupied slots by iterating over all buckets on the host.

This is a slow operation used for verification/debugging.

Returns
size_t Actual number of occupied slots.

Definition at line 1042 of file CuckooFilter.cuh.

1042 {
1043 std::vector<Bucket> h_buckets(numBuckets);
1044
1045 CUDA_CALL(cudaMemcpy(
1046 h_buckets.data(), d_buckets, numBuckets * sizeof(Bucket), cudaMemcpyDeviceToHost
1047 ));
1048
1049 size_t occupiedCount = 0;
1050
1051 for (size_t bucketIdx = 0; bucketIdx < numBuckets; ++bucketIdx) {
1052 const Bucket& bucket = h_buckets[bucketIdx];
1053
1054 for (size_t atomicIdx = 0; atomicIdx < Bucket::wordCount; ++atomicIdx) {
1055 uint64_t packed = reinterpret_cast<const uint64_t&>(bucket.packedTags[atomicIdx]);
1056
1057 for (size_t tagIdx = 0; tagIdx < Bucket::tagsPerWord; ++tagIdx) {
1058 auto tag = bucket.extractTag(packed, tagIdx);
1059
1060 if (tag != EMPTY) {
1061 occupiedCount++;
1062 }
1063 }
1064 }
1065 }
1066
1067 return occupiedCount;
1068 }
static constexpr TagType EMPTY
static constexpr size_t tagsPerWord
static constexpr size_t wordCount
Here is the call graph for this function:

◆ deleteMany() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Tries to remove a set of keys from the filter.

If the key is not present, it is ignored.

Parameters
d_keysPointer to the array of keys to remove
nNumber of keys to remove
d_outputOptional pointer to an output array indicating the success of each key removal
streamCUDA stream to use for the operation
Returns
size_t Updated number of occupied slots in the filter

Definition at line 650 of file CuckooFilter.cuh.

654 {}
655 ) {
656 size_t numBlocks = SDIV(n, blockSize);
658 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this);
659
660 CUDA_CALL(cudaStreamSynchronize(stream));
661
662 return occupiedSlots();
663 }
size_t occupiedSlots()
Returns the total number of occupied slots.
Here is the caller graph for this function:

◆ deleteMany() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector without outputting results.

Parameters
d_keysVector of keys to delete.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 951 of file CuckooFilter.cuh.

951 {}) {
952 return deleteMany(thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream);
953 }
size_t deleteMany(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
Tries to remove a set of keys from the filter.

◆ deleteMany() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector.

Parameters
d_keysVector of keys to delete.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 906 of file CuckooFilter.cuh.

909 {}
910 ) {
911 if (d_output.size() != d_keys.size()) {
912 d_output.resize(d_keys.size());
913 }
914 return deleteMany(
915 thrust::raw_pointer_cast(d_keys.data()),
916 d_keys.size(),
917 thrust::raw_pointer_cast(d_output.data()),
918 stream
919 );
920 }

◆ deleteMany() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to delete.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 929 of file CuckooFilter.cuh.

932 {}
933 ) {
934 if (d_output.size() != d_keys.size()) {
935 d_output.resize(d_keys.size());
936 }
937 return deleteMany(
938 thrust::raw_pointer_cast(d_keys.data()),
939 d_keys.size(),
940 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
941 stream
942 );
943 }

◆ evictionCount()

template<typename Config >
size_t cuckoogpu::Filter< Config >::evictionCount ( )
inline

Returns the total number of evictions performed.

Only available when CUCKOO_FILTER_COUNT_EVICTIONS is defined.

Returns
size_t Number of evictions.

Definition at line 997 of file CuckooFilter.cuh.

997 {
998 size_t count;
999 CUDA_CALL(cudaMemcpy(&count, d_numEvictions, sizeof(size_t), cudaMemcpyDeviceToHost));
1000 return count;
1001 }

◆ getAlternateBucket()

template<typename Config >
static __host__ __device__ size_t cuckoogpu::Filter< Config >::getAlternateBucket ( size_t  bucket,
TagType  fp,
size_t  numBuckets 
)
inlinestatic

Computes the alternate bucket for a fingerprint.

Definition at line 373 of file CuckooFilter.cuh.

373 {
374 return AltBucketPolicy::getAlternateBucket(bucket, fp, numBuckets);
375 }

◆ getAlternateBucketWithNewFp()

template<typename Config >
static __host__ __device__ cuda::std::tuple< size_t, TagType > cuckoogpu::Filter< Config >::getAlternateBucketWithNewFp ( size_t  bucket,
TagType  fp,
size_t  numBuckets 
)
inlinestatic

Computes alternate bucket AND updated fingerprint for choice bit policies.

For non-choice-bit policies, returns the original fingerprint unchanged.

Definition at line 382 of file CuckooFilter.cuh.

382 {
383 if constexpr (AltBucketPolicy::usesChoiceBit) {
384 return AltBucketPolicy::getAlternateBucketWithNewFp(bucket, fp, numBuckets);
385 } else {
386 return {AltBucketPolicy::getAlternateBucket(bucket, fp, numBuckets), fp};
387 }
388 }
Here is the caller graph for this function:

◆ getCandidateBucketsAndFPs()

template<typename Config >
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagType > cuckoogpu::Filter< Config >::getCandidateBucketsAndFPs ( const T key,
size_t  numBuckets 
)
inlinestatic

Definition at line 365 of file CuckooFilter.cuh.

365 {
366 return AltBucketPolicy::getCandidateBucketsAndFPs(key, numBuckets);
367 }
Here is the caller graph for this function:

◆ getNumBuckets()

template<typename Config >
size_t cuckoogpu::Filter< Config >::getNumBuckets ( ) const
inline

Returns the number of buckets in the filter.

Returns
size_t Number of buckets.

Definition at line 1023 of file CuckooFilter.cuh.

1023 {
1024 return numBuckets;
1025 }

◆ hash64()

template<typename Config >
template<typename H >
static __host__ __device__ uint64_t cuckoogpu::Filter< Config >::hash64 ( const H &  key)
inlinestatic

Definition at line 360 of file CuckooFilter.cuh.

360 {
361 return AltBucketPolicy::hash64(key);
362 }
Here is the caller graph for this function:

◆ insert()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insert ( const T key,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a single key into the filter.

Computes candidate buckets and attempts insertion, performing eviction if necessary.

Parameters
keyThe key to insert.
evictionAttemptsOptional pointer to a counter for eviction attempts
Returns
true if insertion succeeded, false if the filter is too full (max evictions reached).

Definition at line 1358 of file CuckooFilter.cuh.

1358 {
1359 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1360
1361 // For all policies: fp1 is for bucket i1, fp2 is for bucket i2
1362 // For non-choice-bit policies, fp1 == fp2
1363 if (tryInsertAtBucket(i1, fp1) || tryInsertAtBucket(i2, fp2)) {
1364 return true;
1365 }
1366
1367 // For eviction, use correct fingerprint for the starting bucket
1368 auto startBucket = (fp1 & 1) == 0 ? i1 : i2;
1369 TagType evictFp;
1370
1371 if constexpr (AltBucketPolicy::usesChoiceBit) {
1372 evictFp = (fp1 & 1) == 0 ? fp1 : fp2;
1373 } else {
1374 evictFp = fp1;
1375 }
1376
1378 return insertWithEvictionBFS(evictFp, startBucket, evictionAttempts);
1379 } else if constexpr (Config::evictionPolicy == EvictionPolicy::DFS) {
1380 return insertWithEvictionDFS(evictFp, startBucket, evictionAttempts);
1381 } else {
1382 static_assert(
1385 "Unhandled eviction policy"
1386 );
1387 }
1388 }
__device__ bool tryInsertAtBucket(size_t bucketIdx, TagType tag)
Attempts to insert a tag into a specific bucket.
typename Config::TagType TagType
__device__ bool insertWithEvictionDFS(TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
Inserts a fingerprint into the filter by evicting existing fingerprints.
__device__ bool insertWithEvictionBFS(TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
Inserts a fingerprint using repeated shallow breadth-first attempts.
@ BFS
Breadth-first search (default)
@ DFS
Pure depth-first search.
static constexpr EvictionPolicy evictionPolicy
Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertMany() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a batch of keys into the filter.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_outputOptional pointer to an output array indicating the success of each key insertion.
streamCUDA stream to use for the operation.
Returns
size_t Total number of occupied slots after insertion.

Definition at line 447 of file CuckooFilter.cuh.

451 {}
452 ) {
453 size_t numBlocks = SDIV(n, blockSize);
455 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this, nullptr);
456
457 CUDA_CALL(cudaStreamSynchronize(stream));
458
459 return occupiedSlots();
460 }
Here is the caller graph for this function:

◆ insertMany() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector without outputting results.

Parameters
d_keysVector of keys to insert.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 717 of file CuckooFilter.cuh.

717 {}) {
718 return insertMany(thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream);
719 }
size_t insertMany(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a batch of keys into the filter.

◆ insertMany() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector.

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 672 of file CuckooFilter.cuh.

675 {}
676 ) {
677 if (d_output.size() != d_keys.size()) {
678 d_output.resize(d_keys.size());
679 }
680 return insertMany(
681 thrust::raw_pointer_cast(d_keys.data()),
682 d_keys.size(),
683 thrust::raw_pointer_cast(d_output.data()),
684 stream
685 );
686 }

◆ insertMany() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 695 of file CuckooFilter.cuh.

698 {}
699 ) {
700 if (d_output.size() != d_keys.size()) {
701 d_output.resize(d_keys.size());
702 }
703 return insertMany(
704 thrust::raw_pointer_cast(d_keys.data()),
705 d_keys.size(),
706 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
707 stream
708 );
709 }

◆ insertManySorted() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter.

Parameters
d_keysPointer to device memory array of keys to insert
nNumber of keys to insert
d_outputOptional pointer to an output array indicating the success of each key insertion.
streamCUDA stream to use for the operation.
Returns
size_t Updated number of occupied slots in the filter

Definition at line 502 of file CuckooFilter.cuh.

506 {}
507 ) {
508 PackedTagType* d_packedTags;
509
510 CUDA_CALL(cudaMallocAsync(&d_packedTags, n * sizeof(PackedTagType), stream));
511
512 size_t numBlocks = SDIV(n, blockSize);
513
515 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_packedTags, n, numBuckets);
516
517 void* d_tempStorage = nullptr;
518 size_t tempStorageBytes = 0;
519
520 cub::DeviceRadixSort::SortKeys(
521 d_tempStorage,
522 tempStorageBytes,
523 d_packedTags,
524 d_packedTags,
525 n,
526 0,
527 sizeof(PackedTagType) * 8,
528 stream
529 );
530
531 CUDA_CALL(cudaMallocAsync(&d_tempStorage, tempStorageBytes, stream));
532
533 cub::DeviceRadixSort::SortKeys(
534 d_tempStorage,
535 tempStorageBytes,
536 d_packedTags,
537 d_packedTags,
538 n,
539 0,
540 sizeof(PackedTagType) * 8,
541 stream
542 );
543
544 CUDA_CALL(cudaFreeAsync(d_tempStorage, stream));
545
547 <<<numBlocks, blockSize, 0, stream>>>(d_packedTags, d_output, n, this, nullptr);
548
549 CUDA_CALL(cudaFreeAsync(d_packedTags, stream));
550 CUDA_CALL(cudaStreamSynchronize(stream));
551
552 return occupiedSlots();
553 }
typename std::conditional< bitsPerTag<=8, uint32_t, uint64_t >::type PackedTagType

◆ insertManySorted() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first, without outputting results.

Parameters
d_keysVector of keys to insert.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 811 of file CuckooFilter.cuh.

811 {}) {
812 return insertManySorted(
813 thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream
814 );
815 }
size_t insertManySorted(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access ...

◆ insertManySorted() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first.

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 765 of file CuckooFilter.cuh.

768 {}
769 ) {
770 if (d_output.size() != d_keys.size()) {
771 d_output.resize(d_keys.size());
772 }
773 return insertManySorted(
774 thrust::raw_pointer_cast(d_keys.data()),
775 d_keys.size(),
776 thrust::raw_pointer_cast(d_output.data()),
777 stream
778 );
779 }

◆ insertManySorted() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first (uint8_t output).

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 788 of file CuckooFilter.cuh.

791 {}
792 ) {
793 if (d_output.size() != d_keys.size()) {
794 d_output.resize(d_keys.size());
795 }
796 return insertManySorted(
797 thrust::raw_pointer_cast(d_keys.data()),
798 d_keys.size(),
799 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
800 stream
801 );
802 }

◆ insertManySortedWithEvictionCounts() [1/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySortedWithEvictionCounts ( const T d_keys,
const size_t  n,
uint32_t *  d_evictionAttempts,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a pre-sorted batch of keys and records per-attempt eviction counts.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_evictionAttemptsDevice output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt.
d_outputOptional pointer to an output array indicating per-key insertion success.
streamCUDA stream to use for the operation.
Returns
size_t Updated number of occupied slots in the filter.

Definition at line 567 of file CuckooFilter.cuh.

572 {}
573 ) {
574 PackedTagType* d_packedTags;
575
576 CUDA_CALL(cudaMallocAsync(&d_packedTags, n * sizeof(PackedTagType), stream));
577
578 size_t numBlocks = SDIV(n, blockSize);
579
581 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_packedTags, n, numBuckets);
582
583 void* d_tempStorage = nullptr;
584 size_t tempStorageBytes = 0;
585
586 cub::DeviceRadixSort::SortKeys(
587 d_tempStorage,
588 tempStorageBytes,
589 d_packedTags,
590 d_packedTags,
591 n,
592 0,
593 sizeof(PackedTagType) * 8,
594 stream
595 );
596
597 CUDA_CALL(cudaMallocAsync(&d_tempStorage, tempStorageBytes, stream));
598
599 cub::DeviceRadixSort::SortKeys(
600 d_tempStorage,
601 tempStorageBytes,
602 d_packedTags,
603 d_packedTags,
604 n,
605 0,
606 sizeof(PackedTagType) * 8,
607 stream
608 );
609
610 CUDA_CALL(cudaFreeAsync(d_tempStorage, stream));
611
612 detail::insertKernelSorted<Config><<<numBlocks, blockSize, 0, stream>>>(
613 d_packedTags, d_output, n, this, d_evictionAttempts
614 );
615
616 CUDA_CALL(cudaFreeAsync(d_packedTags, stream));
617 CUDA_CALL(cudaStreamSynchronize(stream));
618
619 return occupiedSlots();
620 }

◆ insertManySortedWithEvictionCounts() [2/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySortedWithEvictionCounts ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint32_t > &  d_evictionAttempts,
thrust::device_vector< uint8_t > *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts.

Parameters
d_keysVector of keys to insert.
d_evictionAttemptsVector to store per-key eviction counts. Resized if necessary.
d_outputOptional vector to store per-key insertion success. Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 827 of file CuckooFilter.cuh.

831 {}
832 ) {
833 if (d_evictionAttempts.size() != d_keys.size()) {
834 d_evictionAttempts.resize(d_keys.size());
835 }
836
837 bool* d_outputPtr = nullptr;
838 if (d_output != nullptr) {
839 if (d_output->size() != d_keys.size()) {
840 d_output->resize(d_keys.size());
841 }
842 d_outputPtr = reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output->data()));
843 }
844
846 thrust::raw_pointer_cast(d_keys.data()),
847 d_keys.size(),
848 thrust::raw_pointer_cast(d_evictionAttempts.data()),
849 d_outputPtr,
850 stream
851 );
852 }
size_t insertManySortedWithEvictionCounts(const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a pre-sorted batch of keys and records per-attempt eviction counts.

◆ insertManyWithEvictionCounts() [1/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManyWithEvictionCounts ( const T d_keys,
const size_t  n,
uint32_t *  d_evictionAttempts,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a batch of keys and records per-attempt eviction counts.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_evictionAttemptsDevice output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt.
d_outputOptional pointer to an output array indicating per-key insertion success.
streamCUDA stream to use for the operation.
Returns
size_t Total number of occupied slots after insertion.

Definition at line 474 of file CuckooFilter.cuh.

479 {}
480 ) {
481 size_t numBlocks = SDIV(n, blockSize);
483 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this, d_evictionAttempts);
484
485 CUDA_CALL(cudaStreamSynchronize(stream));
486
487 return occupiedSlots();
488 }

◆ insertManyWithEvictionCounts() [2/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManyWithEvictionCounts ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint32_t > &  d_evictionAttempts,
thrust::device_vector< uint8_t > *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector and records per-attempt eviction counts.

Parameters
d_keysVector of keys to insert.
d_evictionAttemptsVector to store per-key eviction counts. Resized if necessary.
d_outputOptional vector to store per-key insertion success. Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 730 of file CuckooFilter.cuh.

734 {}
735 ) {
736 if (d_evictionAttempts.size() != d_keys.size()) {
737 d_evictionAttempts.resize(d_keys.size());
738 }
739
740 bool* d_outputPtr = nullptr;
741 if (d_output != nullptr) {
742 if (d_output->size() != d_keys.size()) {
743 d_output->resize(d_keys.size());
744 }
745 d_outputPtr = reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output->data()));
746 }
747
749 thrust::raw_pointer_cast(d_keys.data()),
750 d_keys.size(),
751 thrust::raw_pointer_cast(d_evictionAttempts.data()),
752 d_outputPtr,
753 stream
754 );
755 }
size_t insertManyWithEvictionCounts(const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a batch of keys and records per-attempt eviction counts.

◆ insertWithEvictionBFS()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insertWithEvictionBFS ( TagType  fp,
size_t  startBucket,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a fingerprint using repeated shallow breadth-first attempts.

Each round scans a handful of candidate eviction slots in the current bucket and tries to place one candidate in its alternate bucket. If no shallow move succeeds, it evicts the last scanned candidate slot from the current bucket and restarts the same BFS round from that evicted tag's alternate bucket. This repeats until insertion succeeds or maxEvictions is reached.

Parameters
fpFingerprint to insert
evictionAttemptsOptional pointer to a counter for eviction attempts
startBucketIndex of the bucket to start the search from
Returns
true if the insertion was successful, false otherwise

Definition at line 1253 of file CuckooFilter.cuh.

1253 {
1254 constexpr size_t numCandidates = std::max(1UL, bucketSize / 2);
1255
1256 TagType currentFp = fp;
1257 size_t currentBucket = startBucket;
1258
1259 size_t evictions = 0;
1260 while (evictions < maxEvictions) {
1261 Bucket& bucket = d_buckets[currentBucket];
1262 size_t restartWord = 0;
1263 size_t restartTagIdx = 0;
1264
1265 for (size_t i = 0; i < numCandidates; ++i) {
1266 size_t evictSlot = (currentFp + i * 0x9E3779B1UL + (evictions + 1) * 0x85EBCA77) &
1267 (bucketSize - 1);
1268 size_t evictWord = evictSlot / Bucket::tagsPerWord;
1269 size_t evictTagIdx = evictSlot & (Bucket::tagsPerWord - 1);
1270 restartWord = evictWord;
1271 restartTagIdx = evictTagIdx;
1272
1273 auto packed = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1274 TagType candidateFp = bucket.extractTag(packed, evictTagIdx);
1275
1276 if (candidateFp == EMPTY) {
1277 if (tryInsertAtBucket(currentBucket, currentFp)) {
1278 return true;
1279 }
1280 continue;
1281 }
1282
1283 auto [altBucket, altFp] =
1284 getAlternateBucketWithNewFp(currentBucket, candidateFp, numBuckets);
1285 if (tryInsertAtBucket(altBucket, altFp)) {
1286 // Successfully inserted the evicted tag at its alternate location
1287 // Now atomically swap in our tag at the original location
1288 auto expected = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1289
1290 // Verify the tag is still there and try to replace it
1291 if (bucket.extractTag(expected, evictTagIdx) == candidateFp) {
1292 auto desired = bucket.replaceTag(expected, evictTagIdx, currentFp);
1293
1294 if (bucket.packedTags[evictWord].compare_exchange_strong(
1295 expected,
1296 desired,
1297 cuda::memory_order_relaxed,
1298 cuda::memory_order_relaxed
1299 )) {
1300#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1301 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1302 if (evictionAttempts != nullptr) {
1303 (*evictionAttempts)++;
1304 }
1305#endif
1306 return true;
1307 }
1308 }
1309
1310 // Failed to swap, clean up the tag we inserted to avoid duplicates
1311 tryRemoveAtBucket(altBucket, candidateFp);
1312 }
1313 }
1314
1315 // Evict the last scanned candidate and continue from its alternate location.
1316 auto expected = bucket.packedTags[restartWord].load(cuda::memory_order_relaxed);
1317 typename Bucket::WordType desired;
1318 TagType evictedFp;
1319
1320 do {
1321 evictedFp = bucket.extractTag(expected, restartTagIdx);
1322 desired = bucket.replaceTag(expected, restartTagIdx, currentFp);
1323 } while (!bucket.packedTags[restartWord].compare_exchange_strong(
1324 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1325 ));
1326
1327 if (evictedFp == EMPTY) {
1328 return true;
1329 }
1330
1331#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1332 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1333 if (evictionAttempts != nullptr) {
1334 (*evictionAttempts)++;
1335 }
1336#endif
1337
1338 evictions++;
1339 auto [altBucket, altFp] =
1340 getAlternateBucketWithNewFp(currentBucket, evictedFp, numBuckets);
1341 currentBucket = altBucket;
1342 currentFp = altFp;
1343 }
1344
1345 return false;
1346 }
static constexpr size_t maxEvictions
static __host__ __device__ cuda::std::tuple< size_t, TagType > getAlternateBucketWithNewFp(size_t bucket, TagType fp, size_t numBuckets)
Computes alternate bucket AND updated fingerprint for choice bit policies.
__device__ bool tryRemoveAtBucket(size_t bucketIdx, TagType tag)
Attempt to remove a single instance of a fingerprint from a bucket.
typename Config::WordType WordType
cuda::std::atomic< WordType > packedTags[wordCount]
Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertWithEvictionDFS()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insertWithEvictionDFS ( TagType  fp,
size_t  startBucket,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a fingerprint into the filter by evicting existing fingerprints.

The thread first picks a pseudo-random target to replace with the new fingerprint. Then it tries to insert the evicted fingerprint into its alternate bucket. This process is repeated until either a fingerprint is inserted into an empty slot or the maximum number of evictions is reached.

Parameters
fpFingerprint to insert
startBucketIndex of the bucket to start the search from
evictionAttemptsOptional pointer to a counter for eviction attempts
Returns
true if the insertion was successful, false otherwise

Definition at line 1196 of file CuckooFilter.cuh.

1196 {
1197 TagType currentFp = fp;
1198 size_t currentBucket = startBucket;
1199
1200 for (size_t evictions = 0; evictions < maxEvictions; ++evictions) {
1201 auto evictSlot = (currentFp + (evictions + 1) * 0x9E3779B1UL) & (bucketSize - 1);
1202
1203 size_t evictWord = evictSlot / Bucket::tagsPerWord;
1204 size_t evictTagIdx = evictSlot & (Bucket::tagsPerWord - 1);
1205
1206 Bucket& bucket = d_buckets[currentBucket];
1207 auto expected = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1208 typename Bucket::WordType desired;
1209 TagType evictedFp;
1210
1211 do {
1212 evictedFp = bucket.extractTag(expected, evictTagIdx);
1213 desired = bucket.replaceTag(expected, evictTagIdx, currentFp);
1214 } while (!bucket.packedTags[evictWord].compare_exchange_strong(
1215 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1216 ));
1217
1218#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1219 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1220 if (evictionAttempts != nullptr) {
1221 (*evictionAttempts)++;
1222 }
1223#endif
1224
1225 currentFp = evictedFp;
1226 auto [altBucket, newFp] =
1227 getAlternateBucketWithNewFp(currentBucket, evictedFp, numBuckets);
1228 currentBucket = altBucket;
1229 currentFp = newFp;
1230
1231 if (tryInsertAtBucket(currentBucket, currentFp)) {
1232 return true;
1233 }
1234 }
1235 return false;
1236 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ loadFactor()

template<typename Config >
float cuckoogpu::Filter< Config >::loadFactor ( )
inline

Calculates the current load factor of the filter.

Returns
float Load factor (occupied slots / total capacity).

Definition at line 971 of file CuckooFilter.cuh.

971 {
972 return static_cast<float>(occupiedSlots()) / (numBuckets * bucketSize);
973 }
Here is the call graph for this function:

◆ occupiedSlots()

template<typename Config >
size_t cuckoogpu::Filter< Config >::occupiedSlots ( )
inline

Returns the total number of occupied slots.

Retrieves the value from the device counter.

Returns
size_t Number of occupied slots.

Definition at line 982 of file CuckooFilter.cuh.

982 {
983 CUDA_CALL(
984 cudaMemcpy(&h_numOccupied, d_numOccupied, sizeof(size_t), cudaMemcpyDeviceToHost)
985 );
986 return h_numOccupied;
987 }
Here is the caller graph for this function:

◆ operator=()

template<typename Config >
Filter & cuckoogpu::Filter< Config >::operator= ( const Filter< Config > &  )
delete

◆ remove()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::remove ( const T key)
inline

Removes a key from the filter.

Parameters
keyThe key to remove.
Returns
true if the key was found and removed, false otherwise.

Definition at line 1410 of file CuckooFilter.cuh.

1410 {
1411 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1412
1413 // fp1 is for bucket i1, fp2 is for bucket i2
1414 // For non-choice-bit policies, fp1 == fp2
1415 return tryRemoveAtBucket(i1, fp1) || tryRemoveAtBucket(i2, fp2);
1416 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ resetEvictionCount()

template<typename Config >
void cuckoogpu::Filter< Config >::resetEvictionCount ( )
inline

Resets the eviction counter to zero.

Definition at line 1006 of file CuckooFilter.cuh.

1006 {
1007 CUDA_CALL(cudaMemset(d_numEvictions, 0, sizeof(cuda::std::atomic<size_t>)));
1008 }

◆ sizeInBytes()

template<typename Config >
size_t cuckoogpu::Filter< Config >::sizeInBytes ( ) const
inline

Returns the size of the filter in bytes.

Returns
size_t Size in bytes.

Definition at line 1031 of file CuckooFilter.cuh.

1031 {
1032 return numBuckets * sizeof(Bucket);
1033 }

◆ tryInsertAtBucket()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::tryInsertAtBucket ( size_t  bucketIdx,
TagType  tag 
)
inline

Attempts to insert a tag into a specific bucket.

Scans the bucket for an empty slot and attempts to atomically place the tag.

Parameters
bucketIdxIndex of the bucket.
tagTag to insert.
Returns
true if insertion succeeded, false if the bucket is full.

Definition at line 1143 of file CuckooFilter.cuh.

1143 {
1144 Bucket& bucket = d_buckets[bucketIdx];
1145 const uint32_t startIdx = tag & (bucketSize - 1);
1146 const size_t startWord = startIdx / Bucket::tagsPerWord;
1147
1148 using WordType = typename Bucket::WordType;
1149
1150 for (size_t i = 0; i < Bucket::wordCount; ++i) {
1151 const size_t currWord = (startWord + i) & (Bucket::wordCount - 1);
1152 auto expected = bucket.packedTags[currWord].load(cuda::memory_order_relaxed);
1153
1154 while (true) {
1155 WordType zeroMask = detail::getZeroMask<TagType, WordType>(expected);
1156
1157 if (zeroMask == 0) {
1158 // No empty slots in this word, move to next
1159 break;
1160 }
1161
1162 // Find position of first empty slot (returns 1-indexed bit position)
1163 int bitPos;
1164 if constexpr (sizeof(WordType) == 4) {
1165 bitPos = __ffs(static_cast<int>(zeroMask)) - 1;
1166 } else {
1167 bitPos = __ffsll(static_cast<long long>(zeroMask)) - 1;
1168 }
1169 size_t j = bitPos / bitsPerTag;
1170
1171 auto desired = bucket.replaceTag(expected, j, tag);
1172
1173 if (bucket.packedTags[currWord].compare_exchange_strong(
1174 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1175 )) {
1176 return true;
1177 }
1178 }
1179 }
1180 return false;
1181 }
static constexpr size_t bitsPerTag
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tryRemoveAtBucket()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::tryRemoveAtBucket ( size_t  bucketIdx,
TagType  tag 
)
inline

Attempt to remove a single instance of a fingerprint from a bucket.

Scans the atomic words that make up the bucket and attempts a CAS on each matching tag position until one removal succeeds. This allows multiple concurrent deleters to remove distinct copies when a bucket contains several identical fingerprints instead of all trying to clear the same slot

The function is lock-free and uses per-word compare-and-swap operations. It does NOT update any global occupancy counter, the caller is responsible for decrementing d_numOccupied if appropriate

Parameters
bucketIdxIndex of the bucket to search.
tagFingerprint value to remove (must not be EMPTY).
Returns
true if a single instance of tag was removed from the bucket; false if no matching tag remained (or another thread removed the last matching instance before this call could succeed).

Definition at line 1088 of file CuckooFilter.cuh.

1088 {
1089 Bucket& bucket = d_buckets[bucketIdx];
1090
1091 const uint32_t startSlot = tag & (bucketSize - 1);
1092 const size_t startWord = startSlot / Bucket::tagsPerWord;
1093
1094 using WordType = typename Bucket::WordType;
1095
1096 for (size_t i = 0; i < Bucket::wordCount; ++i) {
1097 const size_t currIdx = (startWord + i) & (Bucket::wordCount - 1);
1098
1099 while (true) {
1100 auto expected = bucket.packedTags[currIdx].load(cuda::memory_order_relaxed);
1101
1102 WordType matchMask = detail::getZeroMask<TagType, WordType>(
1104 );
1105
1106 if (matchMask == 0) {
1107 // No matching tags in this word
1108 break;
1109 }
1110
1111 // Find position of first matching tag
1112 int bitPos;
1113 if constexpr (sizeof(WordType) == 4) {
1114 bitPos = __ffs(static_cast<int>(matchMask)) - 1;
1115 } else {
1116 bitPos = __ffsll(static_cast<long long>(matchMask)) - 1;
1117 }
1118 size_t tagIdx = bitPos / bitsPerTag;
1119
1120 auto desired = bucket.replaceTag(expected, tagIdx, EMPTY);
1121
1122 if (bucket.packedTags[currIdx].compare_exchange_weak(
1123 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1124 )) {
1125 return true;
1126 }
1127 // CAS failed, retry with updated expected value
1128 }
1129 }
1130
1131 return false;
1132 }
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ bitsPerTag

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::bitsPerTag = Config::bitsPerTag
staticconstexpr

Definition at line 146 of file CuckooFilter.cuh.

◆ blockSize

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::blockSize = Config::blockSize
staticconstexpr

Definition at line 155 of file CuckooFilter.cuh.

◆ bucketSize

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::bucketSize = Config::bucketSize
staticconstexpr

Definition at line 152 of file CuckooFilter.cuh.

◆ d_buckets

template<typename Config >
Bucket* cuckoogpu::Filter< Config >::d_buckets

Pointer to the device memory for the buckets.

Definition at line 348 of file CuckooFilter.cuh.

◆ d_numEvictions

template<typename Config >
cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numEvictions {}

Pointer to the device memory for the eviction counter.

Definition at line 354 of file CuckooFilter.cuh.

354{};

◆ d_numOccupied

template<typename Config >
cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numOccupied {}

Pointer to the device memory for the occupancy counter.

Definition at line 350 of file CuckooFilter.cuh.

350{};

◆ EMPTY

template<typename Config >
constexpr TagType cuckoogpu::Filter< Config >::EMPTY = 0
staticconstexpr

Definition at line 213 of file CuckooFilter.cuh.

◆ fpMask

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::fpMask = (1ULL << bitsPerTag) - 1
staticconstexpr

Definition at line 214 of file CuckooFilter.cuh.

◆ h_numOccupied

template<typename Config >
size_t cuckoogpu::Filter< Config >::h_numOccupied = 0

Number of occupied buckets in the filter.

Definition at line 357 of file CuckooFilter.cuh.

◆ maxEvictions

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::maxEvictions = Config::maxEvictions
staticconstexpr

Definition at line 154 of file CuckooFilter.cuh.

◆ numBuckets

template<typename Config >
size_t cuckoogpu::Filter< Config >::numBuckets

Number of buckets in the filter.

Definition at line 347 of file CuckooFilter.cuh.

◆ tagEntryBytes

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::tagEntryBytes = sizeof(TagType)
staticconstexpr

Definition at line 151 of file CuckooFilter.cuh.


The documentation for this struct was generated from the following file: