GPU-Accelerated Cuckoo Filter
Loading...
Searching...
No Matches
Classes | Public Types | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | List of all members
cuckoogpu::Filter< Config > Struct Template Reference

A CUDA-accelerated Cuckoo Filter implementation. More...

Collaboration diagram for cuckoogpu::Filter< Config >:
[legend]

Classes

struct  Bucket
 Bucket structure that holds the fingerprint and tags for a given bucket. More...
 
struct  PackedTag
 This is used by the sorted insert kernel to store the fingerprint and primary bucket index in a compact format that allows you to sort them directly since the bucket index lives in the upper bits. More...
 

Public Types

using T = typename Config::KeyType
 
using TagType = typename Config::TagType
 
using AltBucketPolicy = typename Config::AltBucketPolicy
 
using PackedTagType = typename std::conditional< bitsPerTag<=8, uint32_t, uint64_t >::type
 

Public Member Functions

 Filter (const Filter &)=delete
 
Filteroperator= (const Filter &)=delete
 
 Filter (size_t capacity)
 Constructs a new Cuckoo Filter.
 
 ~Filter ()
 Destroys the Cuckoo Filter.
 
size_t insertMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a batch of keys into the filter.
 
size_t insertManyWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a batch of keys and records per-attempt eviction counts.
 
size_t insertManySorted (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter.
 
size_t insertManySortedWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
 Inserts a pre-sorted batch of keys and records per-attempt eviction counts.
 
void containsMany (const T *d_keys, const size_t n, bool *d_output, cudaStream_t stream={})
 Checks for the existence of a batch of keys.
 
size_t deleteMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
 Tries to remove a set of keys from the filter.
 
size_t insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector.
 
size_t insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector (uint8_t output).
 
size_t insertMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Inserts keys from a Thrust device vector without outputting results.
 
size_t insertManyWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={})
 Inserts keys from a Thrust device vector and records per-attempt eviction counts.
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first.
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first (uint8_t output).
 
size_t insertManySorted (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting them first, without outputting results.
 
size_t insertManySortedWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={})
 Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts.
 
void containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Checks for existence of keys in a Thrust device vector.
 
void containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Checks for existence of keys in a Thrust device vector (uint8_t output).
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={})
 Deletes keys in a Thrust device vector.
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={})
 Deletes keys in a Thrust device vector (uint8_t output).
 
size_t deleteMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={})
 Deletes keys in a Thrust device vector without outputting results.
 
void clear ()
 Clears the filter, removing all items.
 
float loadFactor ()
 Calculates the current load factor of the filter.
 
size_t occupiedSlots ()
 Returns the total number of occupied slots.
 
size_t evictionCount ()
 Returns the total number of evictions performed.
 
void resetEvictionCount ()
 Resets the eviction counter to zero.
 
size_t capacity ()
 Returns the total capacity of the filter.
 
size_t getNumBuckets () const
 Returns the number of buckets in the filter.
 
size_t sizeInBytes () const
 Returns the size of the filter in bytes.
 
size_t countOccupiedSlots ()
 Counts occupied slots by iterating over all buckets on the host.
 
__device__ bool tryRemoveAtBucket (size_t bucketIdx, TagType tag)
 Attempt to remove a single instance of a fingerprint from a bucket.
 
__device__ bool tryInsertAtBucket (size_t bucketIdx, TagType tag)
 Attempts to insert a tag into a specific bucket.
 
__device__ bool insertWithEvictionDFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
 Inserts a fingerprint into the filter by evicting existing fingerprints.
 
__device__ bool insertWithEvictionBFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
 Inserts a fingerprint using repeated shallow breadth-first attempts.
 
__device__ bool insert (const T &key, uint32_t *evictionAttempts=nullptr)
 Inserts a single key into the filter.
 
__device__ bool contains (const T &key) const
 Checks if a key exists in the filter.
 
__device__ bool remove (const T &key)
 Removes a key from the filter.
 

Static Public Member Functions

template<typename H >
static __host__ __device__ uint64_t hash64 (const H &key)
 
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagTypegetCandidateBucketsAndFPs (const T &key, size_t numBuckets)
 
static __host__ __device__ size_t getAlternateBucket (size_t bucket, TagType fp, size_t numBuckets)
 Computes the alternate bucket for a fingerprint.
 
static __host__ __device__ cuda::std::tuple< size_t, TagTypegetAlternateBucketWithNewFp (size_t bucket, TagType fp, size_t numBuckets)
 Computes alternate bucket AND updated fingerprint for choice bit policies.
 
static size_t calculateNumBuckets (size_t capacity)
 The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices.
 

Public Attributes

size_t numBuckets
 Number of buckets in the filter.
 
Bucketd_buckets
 Pointer to the device memory for the buckets.
 
cuda::std::atomic< size_t > * d_numOccupied {}
 Pointer to the device memory for the occupancy counter.
 
cuda::std::atomic< size_t > * d_numEvictions {}
 Pointer to the device memory for the eviction counter.
 
size_t h_numOccupied = 0
 Number of occupied buckets in the filter.
 

Static Public Attributes

static constexpr size_t bitsPerTag = Config::bitsPerTag
 
static constexpr size_t tagEntryBytes = sizeof(TagType)
 
static constexpr size_t bucketSize = Config::bucketSize
 
static constexpr size_t maxEvictions = Config::maxEvictions
 
static constexpr size_t blockSize = Config::blockSize
 
static constexpr TagType EMPTY = 0
 
static constexpr size_t fpMask = (1ULL << bitsPerTag) - 1
 

Detailed Description

template<typename Config>
struct cuckoogpu::Filter< Config >

A CUDA-accelerated Cuckoo Filter implementation.

This class implements a Cuckoo Filter using CUDA for high-throughput insertion, lookup, and deletion. It supports concurrent operations and uses atomic operations for thread safety within buckets.

Template Parameters
ConfigThe configuration structure defining filter parameters.

Definition at line 144 of file CuckooFilter.cuh.

Member Typedef Documentation

◆ AltBucketPolicy

template<typename Config >
using cuckoogpu::Filter< Config >::AltBucketPolicy = typename Config::AltBucketPolicy

Definition at line 149 of file CuckooFilter.cuh.

◆ PackedTagType

template<typename Config >
using cuckoogpu::Filter< Config >::PackedTagType = typename std::conditional<bitsPerTag <= 8, uint32_t, uint64_t>::type

Definition at line 163 of file CuckooFilter.cuh.

◆ T

template<typename Config >
using cuckoogpu::Filter< Config >::T = typename Config::KeyType

Definition at line 145 of file CuckooFilter.cuh.

◆ TagType

template<typename Config >
using cuckoogpu::Filter< Config >::TagType = typename Config::TagType

Definition at line 148 of file CuckooFilter.cuh.

Constructor & Destructor Documentation

◆ Filter() [1/2]

template<typename Config >
cuckoogpu::Filter< Config >::Filter ( const Filter< Config > &  )
delete

◆ Filter() [2/2]

template<typename Config >
cuckoogpu::Filter< Config >::Filter ( size_t  capacity)
inlineexplicit

Constructs a new Cuckoo Filter.

Allocates device memory for buckets and occupancy counters.

Parameters
capacityDesired capacity (number of items) for the filter.

Definition at line 408 of file CuckooFilter.cuh.

409 CUCKOO_CUDA_CALL(cudaMalloc(&d_buckets, numBuckets * sizeof(Bucket)));
410 CUCKOO_CUDA_CALL(cudaMalloc(&d_numOccupied, sizeof(cuda::std::atomic<size_t>)));
411#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
412 CUCKOO_CUDA_CALL(cudaMalloc(&d_numEvictions, sizeof(cuda::std::atomic<size_t>)));
413#endif
414
415 clear();
416 }
cuda::std::atomic< size_t > * d_numOccupied
Pointer to the device memory for the occupancy counter.
static size_t calculateNumBuckets(size_t capacity)
The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the ...
void clear()
Clears the filter, removing all items.
size_t numBuckets
Number of buckets in the filter.
cuda::std::atomic< size_t > * d_numEvictions
Pointer to the device memory for the eviction counter.
Bucket * d_buckets
Pointer to the device memory for the buckets.
size_t capacity()
Returns the total capacity of the filter.
#define CUCKOO_CUDA_CALL(err)
Macro for checking CUDA errors.
Definition helpers.cuh:204
Here is the call graph for this function:

◆ ~Filter()

template<typename Config >
cuckoogpu::Filter< Config >::~Filter ( )
inline

Destroys the Cuckoo Filter.

Frees allocated device memory.

Definition at line 423 of file CuckooFilter.cuh.

423 {
424 if (d_buckets) {
425 CUCKOO_CUDA_CALL(cudaFree(d_buckets));
426 }
427 if (d_numOccupied) {
429 }
430#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
431 if (d_numEvictions) {
433 }
434#endif
435 }

Member Function Documentation

◆ calculateNumBuckets()

template<typename Config >
static size_t cuckoogpu::Filter< Config >::calculateNumBuckets ( size_t  capacity)
inlinestatic

The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices.

Definition at line 394 of file CuckooFilter.cuh.

394 {
395 return AltBucketPolicy::calculateNumBuckets(capacity);
396 }
Here is the call graph for this function:

◆ capacity()

template<typename Config >
size_t cuckoogpu::Filter< Config >::capacity ( )
inline

Returns the total capacity of the filter.

Returns
size_t Total number of slots.

Definition at line 1017 of file CuckooFilter.cuh.

1017 {
1018 return numBuckets * bucketSize;
1019 }
static constexpr size_t bucketSize
Here is the caller graph for this function:

◆ clear()

template<typename Config >
void cuckoogpu::Filter< Config >::clear ( )
inline

Clears the filter, removing all items.

Definition at line 958 of file CuckooFilter.cuh.

958 {
959 CUCKOO_CUDA_CALL(cudaMemset(d_buckets, 0, numBuckets * sizeof(Bucket)));
960 CUCKOO_CUDA_CALL(cudaMemset(d_numOccupied, 0, sizeof(cuda::std::atomic<size_t>)));
961#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
962 CUCKOO_CUDA_CALL(cudaMemset(d_numEvictions, 0, sizeof(cuda::std::atomic<size_t>)));
963#endif
964 h_numOccupied = 0;
965 }
size_t h_numOccupied
Number of occupied buckets in the filter.
Here is the caller graph for this function:

◆ contains()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::contains ( const T key) const
inline

Checks if a key exists in the filter.

Parameters
keyThe key to check.
Returns
true if the key is found, false otherwise.

Definition at line 1398 of file CuckooFilter.cuh.

1398 {
1399 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1400
1401 // fp1 is for bucket i1, fp2 is for bucket i2
1402 // For non-choice-bit policies, fp1 == fp2
1403 return d_buckets[i1].contains(fp1) || d_buckets[i2].contains(fp2);
1404 }
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagType > getCandidateBucketsAndFPs(const T &key, size_t numBuckets)
__device__ bool contains(TagType tag) const
Checks if a tag is present in the bucket using vectorized loads.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ containsMany() [1/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const T d_keys,
const size_t  n,
bool *  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for the existence of a batch of keys.

Parameters
d_keysPointer to device memory containing keys to check.
nNumber of keys to check.
d_outputPointer to device memory to store results (true/false).
streamCUDA stream to use for the operation.

Definition at line 631 of file CuckooFilter.cuh.

631 {}) {
632 size_t numBlocks = SDIV(n, blockSize);
634 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this);
635
636 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
637 }
static constexpr size_t blockSize
#define SDIV(x, y)
Integer division with rounding up (ceiling).
Definition helpers.cuh:198
__host__ __device__ __forceinline__ constexpr bool hasZero(WordType v)
Checks if a packed word contains a zero slot.
Definition helpers.cuh:116
Here is the caller graph for this function:

◆ containsMany() [2/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for existence of keys in a Thrust device vector.

Parameters
d_keysVector of keys to check.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.

Definition at line 861 of file CuckooFilter.cuh.

864 {}
865 ) {
866 if (d_output.size() != d_keys.size()) {
867 d_output.resize(d_keys.size());
868 }
870 thrust::raw_pointer_cast(d_keys.data()),
871 d_keys.size(),
872 thrust::raw_pointer_cast(d_output.data()),
873 stream
874 );
875 }
void containsMany(const T *d_keys, const size_t n, bool *d_output, cudaStream_t stream={})
Checks for the existence of a batch of keys.

◆ containsMany() [3/3]

template<typename Config >
void cuckoogpu::Filter< Config >::containsMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Checks for existence of keys in a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to check.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.

Definition at line 883 of file CuckooFilter.cuh.

886 {}
887 ) {
888 if (d_output.size() != d_keys.size()) {
889 d_output.resize(d_keys.size());
890 }
892 thrust::raw_pointer_cast(d_keys.data()),
893 d_keys.size(),
894 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
895 stream
896 );
897 }

◆ countOccupiedSlots()

template<typename Config >
size_t cuckoogpu::Filter< Config >::countOccupiedSlots ( )
inline

Counts occupied slots by iterating over all buckets on the host.

This is a slow operation used for verification/debugging.

Returns
size_t Actual number of occupied slots.

Definition at line 1044 of file CuckooFilter.cuh.

1044 {
1045 std::vector<Bucket> h_buckets(numBuckets);
1046
1047 CUCKOO_CUDA_CALL(cudaMemcpy(
1048 h_buckets.data(), d_buckets, numBuckets * sizeof(Bucket), cudaMemcpyDeviceToHost
1049 ));
1050
1051 size_t occupiedCount = 0;
1052
1053 for (size_t bucketIdx = 0; bucketIdx < numBuckets; ++bucketIdx) {
1054 const Bucket& bucket = h_buckets[bucketIdx];
1055
1056 for (size_t atomicIdx = 0; atomicIdx < Bucket::wordCount; ++atomicIdx) {
1057 uint64_t packed = reinterpret_cast<const uint64_t&>(bucket.packedTags[atomicIdx]);
1058
1059 for (size_t tagIdx = 0; tagIdx < Bucket::tagsPerWord; ++tagIdx) {
1060 auto tag = bucket.extractTag(packed, tagIdx);
1061
1062 if (tag != EMPTY) {
1063 occupiedCount++;
1064 }
1065 }
1066 }
1067 }
1068
1069 return occupiedCount;
1070 }
static constexpr TagType EMPTY
static constexpr size_t tagsPerWord
static constexpr size_t wordCount
Here is the call graph for this function:

◆ deleteMany() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Tries to remove a set of keys from the filter.

If the key is not present, it is ignored.

Parameters
d_keysPointer to the array of keys to remove
nNumber of keys to remove
d_outputOptional pointer to an output array indicating the success of each key removal
streamCUDA stream to use for the operation
Returns
size_t Updated number of occupied slots in the filter

Definition at line 650 of file CuckooFilter.cuh.

654 {}
655 ) {
656 size_t numBlocks = SDIV(n, blockSize);
658 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this);
659
660 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
661
662 return occupiedSlots();
663 }
size_t occupiedSlots()
Returns the total number of occupied slots.
Here is the caller graph for this function:

◆ deleteMany() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector without outputting results.

Parameters
d_keysVector of keys to delete.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 951 of file CuckooFilter.cuh.

951 {}) {
952 return deleteMany(thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream);
953 }
size_t deleteMany(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
Tries to remove a set of keys from the filter.

◆ deleteMany() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector.

Parameters
d_keysVector of keys to delete.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 906 of file CuckooFilter.cuh.

909 {}
910 ) {
911 if (d_output.size() != d_keys.size()) {
912 d_output.resize(d_keys.size());
913 }
914 return deleteMany(
915 thrust::raw_pointer_cast(d_keys.data()),
916 d_keys.size(),
917 thrust::raw_pointer_cast(d_output.data()),
918 stream
919 );
920 }

◆ deleteMany() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::deleteMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Deletes keys in a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to delete.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 929 of file CuckooFilter.cuh.

932 {}
933 ) {
934 if (d_output.size() != d_keys.size()) {
935 d_output.resize(d_keys.size());
936 }
937 return deleteMany(
938 thrust::raw_pointer_cast(d_keys.data()),
939 d_keys.size(),
940 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
941 stream
942 );
943 }

◆ evictionCount()

template<typename Config >
size_t cuckoogpu::Filter< Config >::evictionCount ( )
inline

Returns the total number of evictions performed.

Only available when CUCKOO_FILTER_COUNT_EVICTIONS is defined.

Returns
size_t Number of evictions.

Definition at line 997 of file CuckooFilter.cuh.

997 {
998 size_t count;
1000 cudaMemcpy(&count, d_numEvictions, sizeof(size_t), cudaMemcpyDeviceToHost)
1001 );
1002 return count;
1003 }

◆ getAlternateBucket()

template<typename Config >
static __host__ __device__ size_t cuckoogpu::Filter< Config >::getAlternateBucket ( size_t  bucket,
TagType  fp,
size_t  numBuckets 
)
inlinestatic

Computes the alternate bucket for a fingerprint.

Definition at line 373 of file CuckooFilter.cuh.

373 {
374 return AltBucketPolicy::getAlternateBucket(bucket, fp, numBuckets);
375 }

◆ getAlternateBucketWithNewFp()

template<typename Config >
static __host__ __device__ cuda::std::tuple< size_t, TagType > cuckoogpu::Filter< Config >::getAlternateBucketWithNewFp ( size_t  bucket,
TagType  fp,
size_t  numBuckets 
)
inlinestatic

Computes alternate bucket AND updated fingerprint for choice bit policies.

For non-choice-bit policies, returns the original fingerprint unchanged.

Definition at line 382 of file CuckooFilter.cuh.

382 {
383 if constexpr (AltBucketPolicy::usesChoiceBit) {
384 return AltBucketPolicy::getAlternateBucketWithNewFp(bucket, fp, numBuckets);
385 } else {
386 return {AltBucketPolicy::getAlternateBucket(bucket, fp, numBuckets), fp};
387 }
388 }
Here is the caller graph for this function:

◆ getCandidateBucketsAndFPs()

template<typename Config >
static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagType > cuckoogpu::Filter< Config >::getCandidateBucketsAndFPs ( const T key,
size_t  numBuckets 
)
inlinestatic

Definition at line 365 of file CuckooFilter.cuh.

365 {
366 return AltBucketPolicy::getCandidateBucketsAndFPs(key, numBuckets);
367 }
Here is the caller graph for this function:

◆ getNumBuckets()

template<typename Config >
size_t cuckoogpu::Filter< Config >::getNumBuckets ( ) const
inline

Returns the number of buckets in the filter.

Returns
size_t Number of buckets.

Definition at line 1025 of file CuckooFilter.cuh.

1025 {
1026 return numBuckets;
1027 }

◆ hash64()

template<typename Config >
template<typename H >
static __host__ __device__ uint64_t cuckoogpu::Filter< Config >::hash64 ( const H &  key)
inlinestatic

Definition at line 360 of file CuckooFilter.cuh.

360 {
361 return AltBucketPolicy::hash64(key);
362 }
Here is the caller graph for this function:

◆ insert()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insert ( const T key,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a single key into the filter.

Computes candidate buckets and attempts insertion, performing eviction if necessary.

Parameters
keyThe key to insert.
evictionAttemptsOptional pointer to a counter for eviction attempts
Returns
true if insertion succeeded, false if the filter is too full (max evictions reached).

Definition at line 1360 of file CuckooFilter.cuh.

1360 {
1361 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1362
1363 // For all policies: fp1 is for bucket i1, fp2 is for bucket i2
1364 // For non-choice-bit policies, fp1 == fp2
1365 if (tryInsertAtBucket(i1, fp1) || tryInsertAtBucket(i2, fp2)) {
1366 return true;
1367 }
1368
1369 // For eviction, use correct fingerprint for the starting bucket
1370 auto startBucket = (fp1 & 1) == 0 ? i1 : i2;
1371 TagType evictFp;
1372
1373 if constexpr (AltBucketPolicy::usesChoiceBit) {
1374 evictFp = (fp1 & 1) == 0 ? fp1 : fp2;
1375 } else {
1376 evictFp = fp1;
1377 }
1378
1380 return insertWithEvictionBFS(evictFp, startBucket, evictionAttempts);
1381 } else if constexpr (Config::evictionPolicy == EvictionPolicy::DFS) {
1382 return insertWithEvictionDFS(evictFp, startBucket, evictionAttempts);
1383 } else {
1384 static_assert(
1387 "Unhandled eviction policy"
1388 );
1389 }
1390 }
__device__ bool tryInsertAtBucket(size_t bucketIdx, TagType tag)
Attempts to insert a tag into a specific bucket.
typename Config::TagType TagType
__device__ bool insertWithEvictionDFS(TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
Inserts a fingerprint into the filter by evicting existing fingerprints.
__device__ bool insertWithEvictionBFS(TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr)
Inserts a fingerprint using repeated shallow breadth-first attempts.
@ BFS
Breadth-first search (default)
@ DFS
Pure depth-first search.
static constexpr EvictionPolicy evictionPolicy
Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertMany() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a batch of keys into the filter.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_outputOptional pointer to an output array indicating the success of each key insertion.
streamCUDA stream to use for the operation.
Returns
size_t Total number of occupied slots after insertion.

Definition at line 447 of file CuckooFilter.cuh.

451 {}
452 ) {
453 size_t numBlocks = SDIV(n, blockSize);
455 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this, nullptr);
456
457 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
458
459 return occupiedSlots();
460 }
Here is the caller graph for this function:

◆ insertMany() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector without outputting results.

Parameters
d_keysVector of keys to insert.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 717 of file CuckooFilter.cuh.

717 {}) {
718 return insertMany(thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream);
719 }
size_t insertMany(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a batch of keys into the filter.

◆ insertMany() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector.

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 672 of file CuckooFilter.cuh.

675 {}
676 ) {
677 if (d_output.size() != d_keys.size()) {
678 d_output.resize(d_keys.size());
679 }
680 return insertMany(
681 thrust::raw_pointer_cast(d_keys.data()),
682 d_keys.size(),
683 thrust::raw_pointer_cast(d_output.data()),
684 stream
685 );
686 }

◆ insertMany() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertMany ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector (uint8_t output).

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 695 of file CuckooFilter.cuh.

698 {}
699 ) {
700 if (d_output.size() != d_keys.size()) {
701 d_output.resize(d_keys.size());
702 }
703 return insertMany(
704 thrust::raw_pointer_cast(d_keys.data()),
705 d_keys.size(),
706 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
707 stream
708 );
709 }

◆ insertManySorted() [1/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const T d_keys,
const size_t  n,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter.

Parameters
d_keysPointer to device memory array of keys to insert
nNumber of keys to insert
d_outputOptional pointer to an output array indicating the success of each key insertion.
streamCUDA stream to use for the operation.
Returns
size_t Updated number of occupied slots in the filter

Definition at line 502 of file CuckooFilter.cuh.

506 {}
507 ) {
508 PackedTagType* d_packedTags;
509
510 CUCKOO_CUDA_CALL(cudaMallocAsync(&d_packedTags, n * sizeof(PackedTagType), stream));
511
512 size_t numBlocks = SDIV(n, blockSize);
513
515 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_packedTags, n, numBuckets);
516
517 void* d_tempStorage = nullptr;
518 size_t tempStorageBytes = 0;
519
520 cub::DeviceRadixSort::SortKeys(
521 d_tempStorage,
522 tempStorageBytes,
523 d_packedTags,
524 d_packedTags,
525 n,
526 0,
527 sizeof(PackedTagType) * 8,
528 stream
529 );
530
531 CUCKOO_CUDA_CALL(cudaMallocAsync(&d_tempStorage, tempStorageBytes, stream));
532
533 cub::DeviceRadixSort::SortKeys(
534 d_tempStorage,
535 tempStorageBytes,
536 d_packedTags,
537 d_packedTags,
538 n,
539 0,
540 sizeof(PackedTagType) * 8,
541 stream
542 );
543
544 CUCKOO_CUDA_CALL(cudaFreeAsync(d_tempStorage, stream));
545
547 <<<numBlocks, blockSize, 0, stream>>>(d_packedTags, d_output, n, this, nullptr);
548
549 CUCKOO_CUDA_CALL(cudaFreeAsync(d_packedTags, stream));
550 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
551
552 return occupiedSlots();
553 }
typename std::conditional< bitsPerTag<=8, uint32_t, uint64_t >::type PackedTagType

◆ insertManySorted() [2/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first, without outputting results.

Parameters
d_keysVector of keys to insert.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 811 of file CuckooFilter.cuh.

811 {}) {
812 return insertManySorted(
813 thrust::raw_pointer_cast(d_keys.data()), d_keys.size(), nullptr, stream
814 );
815 }
size_t insertManySorted(const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={})
This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access ...

◆ insertManySorted() [3/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< bool > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first.

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (bool). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 765 of file CuckooFilter.cuh.

768 {}
769 ) {
770 if (d_output.size() != d_keys.size()) {
771 d_output.resize(d_keys.size());
772 }
773 return insertManySorted(
774 thrust::raw_pointer_cast(d_keys.data()),
775 d_keys.size(),
776 thrust::raw_pointer_cast(d_output.data()),
777 stream
778 );
779 }

◆ insertManySorted() [4/4]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySorted ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint8_t > &  d_output,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting them first (uint8_t output).

Parameters
d_keysVector of keys to insert.
d_outputVector to store results (uint8_t). Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 788 of file CuckooFilter.cuh.

791 {}
792 ) {
793 if (d_output.size() != d_keys.size()) {
794 d_output.resize(d_keys.size());
795 }
796 return insertManySorted(
797 thrust::raw_pointer_cast(d_keys.data()),
798 d_keys.size(),
799 reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output.data())),
800 stream
801 );
802 }

◆ insertManySortedWithEvictionCounts() [1/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySortedWithEvictionCounts ( const T d_keys,
const size_t  n,
uint32_t *  d_evictionAttempts,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a pre-sorted batch of keys and records per-attempt eviction counts.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_evictionAttemptsDevice output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt.
d_outputOptional pointer to an output array indicating per-key insertion success.
streamCUDA stream to use for the operation.
Returns
size_t Updated number of occupied slots in the filter.

Definition at line 567 of file CuckooFilter.cuh.

572 {}
573 ) {
574 PackedTagType* d_packedTags;
575
576 CUCKOO_CUDA_CALL(cudaMallocAsync(&d_packedTags, n * sizeof(PackedTagType), stream));
577
578 size_t numBlocks = SDIV(n, blockSize);
579
581 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_packedTags, n, numBuckets);
582
583 void* d_tempStorage = nullptr;
584 size_t tempStorageBytes = 0;
585
586 cub::DeviceRadixSort::SortKeys(
587 d_tempStorage,
588 tempStorageBytes,
589 d_packedTags,
590 d_packedTags,
591 n,
592 0,
593 sizeof(PackedTagType) * 8,
594 stream
595 );
596
597 CUCKOO_CUDA_CALL(cudaMallocAsync(&d_tempStorage, tempStorageBytes, stream));
598
599 cub::DeviceRadixSort::SortKeys(
600 d_tempStorage,
601 tempStorageBytes,
602 d_packedTags,
603 d_packedTags,
604 n,
605 0,
606 sizeof(PackedTagType) * 8,
607 stream
608 );
609
610 CUCKOO_CUDA_CALL(cudaFreeAsync(d_tempStorage, stream));
611
612 detail::insertKernelSorted<Config><<<numBlocks, blockSize, 0, stream>>>(
613 d_packedTags, d_output, n, this, d_evictionAttempts
614 );
615
616 CUCKOO_CUDA_CALL(cudaFreeAsync(d_packedTags, stream));
617 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
618
619 return occupiedSlots();
620 }

◆ insertManySortedWithEvictionCounts() [2/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManySortedWithEvictionCounts ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint32_t > &  d_evictionAttempts,
thrust::device_vector< uint8_t > *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts.

Parameters
d_keysVector of keys to insert.
d_evictionAttemptsVector to store per-key eviction counts. Resized if necessary.
d_outputOptional vector to store per-key insertion success. Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 827 of file CuckooFilter.cuh.

831 {}
832 ) {
833 if (d_evictionAttempts.size() != d_keys.size()) {
834 d_evictionAttempts.resize(d_keys.size());
835 }
836
837 bool* d_outputPtr = nullptr;
838 if (d_output != nullptr) {
839 if (d_output->size() != d_keys.size()) {
840 d_output->resize(d_keys.size());
841 }
842 d_outputPtr = reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output->data()));
843 }
844
846 thrust::raw_pointer_cast(d_keys.data()),
847 d_keys.size(),
848 thrust::raw_pointer_cast(d_evictionAttempts.data()),
849 d_outputPtr,
850 stream
851 );
852 }
size_t insertManySortedWithEvictionCounts(const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a pre-sorted batch of keys and records per-attempt eviction counts.

◆ insertManyWithEvictionCounts() [1/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManyWithEvictionCounts ( const T d_keys,
const size_t  n,
uint32_t *  d_evictionAttempts,
bool *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts a batch of keys and records per-attempt eviction counts.

Parameters
d_keysPointer to device memory containing keys to insert.
nNumber of keys to insert.
d_evictionAttemptsDevice output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt.
d_outputOptional pointer to an output array indicating per-key insertion success.
streamCUDA stream to use for the operation.
Returns
size_t Total number of occupied slots after insertion.

Definition at line 474 of file CuckooFilter.cuh.

479 {}
480 ) {
481 size_t numBlocks = SDIV(n, blockSize);
483 <<<numBlocks, blockSize, 0, stream>>>(d_keys, d_output, n, this, d_evictionAttempts);
484
485 CUCKOO_CUDA_CALL(cudaStreamSynchronize(stream));
486
487 return occupiedSlots();
488 }

◆ insertManyWithEvictionCounts() [2/2]

template<typename Config >
size_t cuckoogpu::Filter< Config >::insertManyWithEvictionCounts ( const thrust::device_vector< T > &  d_keys,
thrust::device_vector< uint32_t > &  d_evictionAttempts,
thrust::device_vector< uint8_t > *  d_output = nullptr,
cudaStream_t  stream = {} 
)
inline

Inserts keys from a Thrust device vector and records per-attempt eviction counts.

Parameters
d_keysVector of keys to insert.
d_evictionAttemptsVector to store per-key eviction counts. Resized if necessary.
d_outputOptional vector to store per-key insertion success. Resized if necessary.
streamCUDA stream.
Returns
size_t Total number of occupied slots.

Definition at line 730 of file CuckooFilter.cuh.

734 {}
735 ) {
736 if (d_evictionAttempts.size() != d_keys.size()) {
737 d_evictionAttempts.resize(d_keys.size());
738 }
739
740 bool* d_outputPtr = nullptr;
741 if (d_output != nullptr) {
742 if (d_output->size() != d_keys.size()) {
743 d_output->resize(d_keys.size());
744 }
745 d_outputPtr = reinterpret_cast<bool*>(thrust::raw_pointer_cast(d_output->data()));
746 }
747
749 thrust::raw_pointer_cast(d_keys.data()),
750 d_keys.size(),
751 thrust::raw_pointer_cast(d_evictionAttempts.data()),
752 d_outputPtr,
753 stream
754 );
755 }
size_t insertManyWithEvictionCounts(const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={})
Inserts a batch of keys and records per-attempt eviction counts.

◆ insertWithEvictionBFS()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insertWithEvictionBFS ( TagType  fp,
size_t  startBucket,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a fingerprint using repeated shallow breadth-first attempts.

Each round scans a handful of candidate eviction slots in the current bucket and tries to place one candidate in its alternate bucket. If no shallow move succeeds, it evicts the last scanned candidate slot from the current bucket and restarts the same BFS round from that evicted tag's alternate bucket. This repeats until insertion succeeds or maxEvictions is reached.

Parameters
fpFingerprint to insert
evictionAttemptsOptional pointer to a counter for eviction attempts
startBucketIndex of the bucket to start the search from
Returns
true if the insertion was successful, false otherwise

Definition at line 1255 of file CuckooFilter.cuh.

1255 {
1256 constexpr size_t numCandidates = std::max(1UL, bucketSize / 2);
1257
1258 TagType currentFp = fp;
1259 size_t currentBucket = startBucket;
1260
1261 size_t evictions = 0;
1262 while (evictions < maxEvictions) {
1263 Bucket& bucket = d_buckets[currentBucket];
1264 size_t restartWord = 0;
1265 size_t restartTagIdx = 0;
1266
1267 for (size_t i = 0; i < numCandidates; ++i) {
1268 size_t evictSlot = (currentFp + i * 0x9E3779B1UL + (evictions + 1) * 0x85EBCA77) &
1269 (bucketSize - 1);
1270 size_t evictWord = evictSlot / Bucket::tagsPerWord;
1271 size_t evictTagIdx = evictSlot & (Bucket::tagsPerWord - 1);
1272 restartWord = evictWord;
1273 restartTagIdx = evictTagIdx;
1274
1275 auto packed = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1276 TagType candidateFp = bucket.extractTag(packed, evictTagIdx);
1277
1278 if (candidateFp == EMPTY) {
1279 if (tryInsertAtBucket(currentBucket, currentFp)) {
1280 return true;
1281 }
1282 continue;
1283 }
1284
1285 auto [altBucket, altFp] =
1286 getAlternateBucketWithNewFp(currentBucket, candidateFp, numBuckets);
1287 if (tryInsertAtBucket(altBucket, altFp)) {
1288 // Successfully inserted the evicted tag at its alternate location
1289 // Now atomically swap in our tag at the original location
1290 auto expected = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1291
1292 // Verify the tag is still there and try to replace it
1293 if (bucket.extractTag(expected, evictTagIdx) == candidateFp) {
1294 auto desired = bucket.replaceTag(expected, evictTagIdx, currentFp);
1295
1296 if (bucket.packedTags[evictWord].compare_exchange_strong(
1297 expected,
1298 desired,
1299 cuda::memory_order_relaxed,
1300 cuda::memory_order_relaxed
1301 )) {
1302#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1303 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1304 if (evictionAttempts != nullptr) {
1305 (*evictionAttempts)++;
1306 }
1307#endif
1308 return true;
1309 }
1310 }
1311
1312 // Failed to swap, clean up the tag we inserted to avoid duplicates
1313 tryRemoveAtBucket(altBucket, candidateFp);
1314 }
1315 }
1316
1317 // Evict the last scanned candidate and continue from its alternate location.
1318 auto expected = bucket.packedTags[restartWord].load(cuda::memory_order_relaxed);
1319 typename Bucket::WordType desired;
1320 TagType evictedFp;
1321
1322 do {
1323 evictedFp = bucket.extractTag(expected, restartTagIdx);
1324 desired = bucket.replaceTag(expected, restartTagIdx, currentFp);
1325 } while (!bucket.packedTags[restartWord].compare_exchange_strong(
1326 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1327 ));
1328
1329 if (evictedFp == EMPTY) {
1330 return true;
1331 }
1332
1333#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1334 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1335 if (evictionAttempts != nullptr) {
1336 (*evictionAttempts)++;
1337 }
1338#endif
1339
1340 evictions++;
1341 auto [altBucket, altFp] =
1342 getAlternateBucketWithNewFp(currentBucket, evictedFp, numBuckets);
1343 currentBucket = altBucket;
1344 currentFp = altFp;
1345 }
1346
1347 return false;
1348 }
static constexpr size_t maxEvictions
static __host__ __device__ cuda::std::tuple< size_t, TagType > getAlternateBucketWithNewFp(size_t bucket, TagType fp, size_t numBuckets)
Computes alternate bucket AND updated fingerprint for choice bit policies.
__device__ bool tryRemoveAtBucket(size_t bucketIdx, TagType tag)
Attempt to remove a single instance of a fingerprint from a bucket.
typename Config::WordType WordType
cuda::std::atomic< WordType > packedTags[wordCount]
Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertWithEvictionDFS()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::insertWithEvictionDFS ( TagType  fp,
size_t  startBucket,
uint32_t *  evictionAttempts = nullptr 
)
inline

Inserts a fingerprint into the filter by evicting existing fingerprints.

The thread first picks a pseudo-random target to replace with the new fingerprint. Then it tries to insert the evicted fingerprint into its alternate bucket. This process is repeated until either a fingerprint is inserted into an empty slot or the maximum number of evictions is reached.

Parameters
fpFingerprint to insert
startBucketIndex of the bucket to start the search from
evictionAttemptsOptional pointer to a counter for eviction attempts
Returns
true if the insertion was successful, false otherwise

Definition at line 1198 of file CuckooFilter.cuh.

1198 {
1199 TagType currentFp = fp;
1200 size_t currentBucket = startBucket;
1201
1202 for (size_t evictions = 0; evictions < maxEvictions; ++evictions) {
1203 auto evictSlot = (currentFp + (evictions + 1) * 0x9E3779B1UL) & (bucketSize - 1);
1204
1205 size_t evictWord = evictSlot / Bucket::tagsPerWord;
1206 size_t evictTagIdx = evictSlot & (Bucket::tagsPerWord - 1);
1207
1208 Bucket& bucket = d_buckets[currentBucket];
1209 auto expected = bucket.packedTags[evictWord].load(cuda::memory_order_relaxed);
1210 typename Bucket::WordType desired;
1211 TagType evictedFp;
1212
1213 do {
1214 evictedFp = bucket.extractTag(expected, evictTagIdx);
1215 desired = bucket.replaceTag(expected, evictTagIdx, currentFp);
1216 } while (!bucket.packedTags[evictWord].compare_exchange_strong(
1217 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1218 ));
1219
1220#ifdef CUCKOO_FILTER_COUNT_EVICTIONS
1221 d_numEvictions->fetch_add(1, cuda::memory_order_relaxed);
1222 if (evictionAttempts != nullptr) {
1223 (*evictionAttempts)++;
1224 }
1225#endif
1226
1227 currentFp = evictedFp;
1228 auto [altBucket, newFp] =
1229 getAlternateBucketWithNewFp(currentBucket, evictedFp, numBuckets);
1230 currentBucket = altBucket;
1231 currentFp = newFp;
1232
1233 if (tryInsertAtBucket(currentBucket, currentFp)) {
1234 return true;
1235 }
1236 }
1237 return false;
1238 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ loadFactor()

template<typename Config >
float cuckoogpu::Filter< Config >::loadFactor ( )
inline

Calculates the current load factor of the filter.

Returns
float Load factor (occupied slots / total capacity).

Definition at line 971 of file CuckooFilter.cuh.

971 {
972 return static_cast<float>(occupiedSlots()) / (numBuckets * bucketSize);
973 }
Here is the call graph for this function:

◆ occupiedSlots()

template<typename Config >
size_t cuckoogpu::Filter< Config >::occupiedSlots ( )
inline

Returns the total number of occupied slots.

Retrieves the value from the device counter.

Returns
size_t Number of occupied slots.

Definition at line 982 of file CuckooFilter.cuh.

982 {
984 cudaMemcpy(&h_numOccupied, d_numOccupied, sizeof(size_t), cudaMemcpyDeviceToHost)
985 );
986 return h_numOccupied;
987 }
Here is the caller graph for this function:

◆ operator=()

template<typename Config >
Filter & cuckoogpu::Filter< Config >::operator= ( const Filter< Config > &  )
delete

◆ remove()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::remove ( const T key)
inline

Removes a key from the filter.

Parameters
keyThe key to remove.
Returns
true if the key was found and removed, false otherwise.

Definition at line 1412 of file CuckooFilter.cuh.

1412 {
1413 auto [i1, i2, fp1, fp2] = getCandidateBucketsAndFPs(key, numBuckets);
1414
1415 // fp1 is for bucket i1, fp2 is for bucket i2
1416 // For non-choice-bit policies, fp1 == fp2
1417 return tryRemoveAtBucket(i1, fp1) || tryRemoveAtBucket(i2, fp2);
1418 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ resetEvictionCount()

template<typename Config >
void cuckoogpu::Filter< Config >::resetEvictionCount ( )
inline

Resets the eviction counter to zero.

Definition at line 1008 of file CuckooFilter.cuh.

1008 {
1009 CUCKOO_CUDA_CALL(cudaMemset(d_numEvictions, 0, sizeof(cuda::std::atomic<size_t>)));
1010 }

◆ sizeInBytes()

template<typename Config >
size_t cuckoogpu::Filter< Config >::sizeInBytes ( ) const
inline

Returns the size of the filter in bytes.

Returns
size_t Size in bytes.

Definition at line 1033 of file CuckooFilter.cuh.

1033 {
1034 return numBuckets * sizeof(Bucket);
1035 }

◆ tryInsertAtBucket()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::tryInsertAtBucket ( size_t  bucketIdx,
TagType  tag 
)
inline

Attempts to insert a tag into a specific bucket.

Scans the bucket for an empty slot and attempts to atomically place the tag.

Parameters
bucketIdxIndex of the bucket.
tagTag to insert.
Returns
true if insertion succeeded, false if the bucket is full.

Definition at line 1145 of file CuckooFilter.cuh.

1145 {
1146 Bucket& bucket = d_buckets[bucketIdx];
1147 const uint32_t startIdx = tag & (bucketSize - 1);
1148 const size_t startWord = startIdx / Bucket::tagsPerWord;
1149
1150 using WordType = typename Bucket::WordType;
1151
1152 for (size_t i = 0; i < Bucket::wordCount; ++i) {
1153 const size_t currWord = (startWord + i) & (Bucket::wordCount - 1);
1154 auto expected = bucket.packedTags[currWord].load(cuda::memory_order_relaxed);
1155
1156 while (true) {
1157 WordType zeroMask = detail::getZeroMask<TagType, WordType>(expected);
1158
1159 if (zeroMask == 0) {
1160 // No empty slots in this word, move to next
1161 break;
1162 }
1163
1164 // Find position of first empty slot (returns 1-indexed bit position)
1165 int bitPos;
1166 if constexpr (sizeof(WordType) == 4) {
1167 bitPos = __ffs(static_cast<int>(zeroMask)) - 1;
1168 } else {
1169 bitPos = __ffsll(static_cast<long long>(zeroMask)) - 1;
1170 }
1171 size_t j = bitPos / bitsPerTag;
1172
1173 auto desired = bucket.replaceTag(expected, j, tag);
1174
1175 if (bucket.packedTags[currWord].compare_exchange_strong(
1176 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1177 )) {
1178 return true;
1179 }
1180 }
1181 }
1182 return false;
1183 }
static constexpr size_t bitsPerTag
Here is the call graph for this function:
Here is the caller graph for this function:

◆ tryRemoveAtBucket()

template<typename Config >
__device__ bool cuckoogpu::Filter< Config >::tryRemoveAtBucket ( size_t  bucketIdx,
TagType  tag 
)
inline

Attempt to remove a single instance of a fingerprint from a bucket.

Scans the atomic words that make up the bucket and attempts a CAS on each matching tag position until one removal succeeds. This allows multiple concurrent deleters to remove distinct copies when a bucket contains several identical fingerprints instead of all trying to clear the same slot

The function is lock-free and uses per-word compare-and-swap operations. It does NOT update any global occupancy counter, the caller is responsible for decrementing d_numOccupied if appropriate

Parameters
bucketIdxIndex of the bucket to search.
tagFingerprint value to remove (must not be EMPTY).
Returns
true if a single instance of tag was removed from the bucket; false if no matching tag remained (or another thread removed the last matching instance before this call could succeed).

Definition at line 1090 of file CuckooFilter.cuh.

1090 {
1091 Bucket& bucket = d_buckets[bucketIdx];
1092
1093 const uint32_t startSlot = tag & (bucketSize - 1);
1094 const size_t startWord = startSlot / Bucket::tagsPerWord;
1095
1096 using WordType = typename Bucket::WordType;
1097
1098 for (size_t i = 0; i < Bucket::wordCount; ++i) {
1099 const size_t currIdx = (startWord + i) & (Bucket::wordCount - 1);
1100
1101 while (true) {
1102 auto expected = bucket.packedTags[currIdx].load(cuda::memory_order_relaxed);
1103
1104 WordType matchMask = detail::getZeroMask<TagType, WordType>(
1106 );
1107
1108 if (matchMask == 0) {
1109 // No matching tags in this word
1110 break;
1111 }
1112
1113 // Find position of first matching tag
1114 int bitPos;
1115 if constexpr (sizeof(WordType) == 4) {
1116 bitPos = __ffs(static_cast<int>(matchMask)) - 1;
1117 } else {
1118 bitPos = __ffsll(static_cast<long long>(matchMask)) - 1;
1119 }
1120 size_t tagIdx = bitPos / bitsPerTag;
1121
1122 auto desired = bucket.replaceTag(expected, tagIdx, EMPTY);
1123
1124 if (bucket.packedTags[currIdx].compare_exchange_weak(
1125 expected, desired, cuda::memory_order_relaxed, cuda::memory_order_relaxed
1126 )) {
1127 return true;
1128 }
1129 // CAS failed, retry with updated expected value
1130 }
1131 }
1132
1133 return false;
1134 }
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ bitsPerTag

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::bitsPerTag = Config::bitsPerTag
staticconstexpr

Definition at line 146 of file CuckooFilter.cuh.

◆ blockSize

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::blockSize = Config::blockSize
staticconstexpr

Definition at line 155 of file CuckooFilter.cuh.

◆ bucketSize

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::bucketSize = Config::bucketSize
staticconstexpr

Definition at line 152 of file CuckooFilter.cuh.

◆ d_buckets

template<typename Config >
Bucket* cuckoogpu::Filter< Config >::d_buckets

Pointer to the device memory for the buckets.

Definition at line 348 of file CuckooFilter.cuh.

◆ d_numEvictions

template<typename Config >
cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numEvictions {}

Pointer to the device memory for the eviction counter.

Definition at line 354 of file CuckooFilter.cuh.

354{};

◆ d_numOccupied

template<typename Config >
cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numOccupied {}

Pointer to the device memory for the occupancy counter.

Definition at line 350 of file CuckooFilter.cuh.

350{};

◆ EMPTY

template<typename Config >
constexpr TagType cuckoogpu::Filter< Config >::EMPTY = 0
staticconstexpr

Definition at line 213 of file CuckooFilter.cuh.

◆ fpMask

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::fpMask = (1ULL << bitsPerTag) - 1
staticconstexpr

Definition at line 214 of file CuckooFilter.cuh.

◆ h_numOccupied

template<typename Config >
size_t cuckoogpu::Filter< Config >::h_numOccupied = 0

Number of occupied buckets in the filter.

Definition at line 357 of file CuckooFilter.cuh.

◆ maxEvictions

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::maxEvictions = Config::maxEvictions
staticconstexpr

Definition at line 154 of file CuckooFilter.cuh.

◆ numBuckets

template<typename Config >
size_t cuckoogpu::Filter< Config >::numBuckets

Number of buckets in the filter.

Definition at line 347 of file CuckooFilter.cuh.

◆ tagEntryBytes

template<typename Config >
constexpr size_t cuckoogpu::Filter< Config >::tagEntryBytes = sizeof(TagType)
staticconstexpr

Definition at line 151 of file CuckooFilter.cuh.


The documentation for this struct was generated from the following file: