|
GPU-Accelerated Cuckoo Filter
|
A CUDA-accelerated Cuckoo Filter implementation. More...
Classes | |
| struct | Bucket |
| Bucket structure that holds the fingerprint and tags for a given bucket. More... | |
| struct | PackedTag |
| This is used by the sorted insert kernel to store the fingerprint and primary bucket index in a compact format that allows you to sort them directly since the bucket index lives in the upper bits. More... | |
Public Types | |
| using | T = typename Config::KeyType |
| using | TagType = typename Config::TagType |
| using | AltBucketPolicy = typename Config::AltBucketPolicy |
| using | PackedTagType = typename std::conditional< bitsPerTag<=8, uint32_t, uint64_t >::type |
Public Member Functions | |
| Filter (const Filter &)=delete | |
| Filter & | operator= (const Filter &)=delete |
| Filter (size_t capacity) | |
| Constructs a new Cuckoo Filter. | |
| ~Filter () | |
| Destroys the Cuckoo Filter. | |
| size_t | insertMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={}) |
| Inserts a batch of keys into the filter. | |
| size_t | insertManyWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={}) |
| Inserts a batch of keys and records per-attempt eviction counts. | |
| size_t | insertManySorted (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={}) |
| This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter. | |
| size_t | insertManySortedWithEvictionCounts (const T *d_keys, const size_t n, uint32_t *d_evictionAttempts, bool *d_output=nullptr, cudaStream_t stream={}) |
| Inserts a pre-sorted batch of keys and records per-attempt eviction counts. | |
| void | containsMany (const T *d_keys, const size_t n, bool *d_output, cudaStream_t stream={}) |
| Checks for the existence of a batch of keys. | |
| size_t | deleteMany (const T *d_keys, const size_t n, bool *d_output=nullptr, cudaStream_t stream={}) |
| Tries to remove a set of keys from the filter. | |
| size_t | insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector. | |
| size_t | insertMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector (uint8_t output). | |
| size_t | insertMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector without outputting results. | |
| size_t | insertManyWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector and records per-attempt eviction counts. | |
| size_t | insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector, sorting them first. | |
| size_t | insertManySorted (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector, sorting them first (uint8_t output). | |
| size_t | insertManySorted (const thrust::device_vector< T > &d_keys, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector, sorting them first, without outputting results. | |
| size_t | insertManySortedWithEvictionCounts (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint32_t > &d_evictionAttempts, thrust::device_vector< uint8_t > *d_output=nullptr, cudaStream_t stream={}) |
| Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts. | |
| void | containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={}) |
| Checks for existence of keys in a Thrust device vector. | |
| void | containsMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={}) |
| Checks for existence of keys in a Thrust device vector (uint8_t output). | |
| size_t | deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< bool > &d_output, cudaStream_t stream={}) |
| Deletes keys in a Thrust device vector. | |
| size_t | deleteMany (const thrust::device_vector< T > &d_keys, thrust::device_vector< uint8_t > &d_output, cudaStream_t stream={}) |
| Deletes keys in a Thrust device vector (uint8_t output). | |
| size_t | deleteMany (const thrust::device_vector< T > &d_keys, cudaStream_t stream={}) |
| Deletes keys in a Thrust device vector without outputting results. | |
| void | clear () |
| Clears the filter, removing all items. | |
| float | loadFactor () |
| Calculates the current load factor of the filter. | |
| size_t | occupiedSlots () |
| Returns the total number of occupied slots. | |
| size_t | evictionCount () |
| Returns the total number of evictions performed. | |
| void | resetEvictionCount () |
| Resets the eviction counter to zero. | |
| size_t | capacity () |
| Returns the total capacity of the filter. | |
| size_t | getNumBuckets () const |
| Returns the number of buckets in the filter. | |
| size_t | sizeInBytes () const |
| Returns the size of the filter in bytes. | |
| size_t | countOccupiedSlots () |
| Counts occupied slots by iterating over all buckets on the host. | |
| __device__ bool | tryRemoveAtBucket (size_t bucketIdx, TagType tag) |
| Attempt to remove a single instance of a fingerprint from a bucket. | |
| __device__ bool | tryInsertAtBucket (size_t bucketIdx, TagType tag) |
| Attempts to insert a tag into a specific bucket. | |
| __device__ bool | insertWithEvictionDFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr) |
| Inserts a fingerprint into the filter by evicting existing fingerprints. | |
| __device__ bool | insertWithEvictionBFS (TagType fp, size_t startBucket, uint32_t *evictionAttempts=nullptr) |
| Inserts a fingerprint using repeated shallow breadth-first attempts. | |
| __device__ bool | insert (const T &key, uint32_t *evictionAttempts=nullptr) |
| Inserts a single key into the filter. | |
| __device__ bool | contains (const T &key) const |
| Checks if a key exists in the filter. | |
| __device__ bool | remove (const T &key) |
| Removes a key from the filter. | |
Static Public Member Functions | |
| template<typename H > | |
| static __host__ __device__ uint64_t | hash64 (const H &key) |
| static __host__ __device__ cuda::std::tuple< size_t, size_t, TagType, TagType > | getCandidateBucketsAndFPs (const T &key, size_t numBuckets) |
| static __host__ __device__ size_t | getAlternateBucket (size_t bucket, TagType fp, size_t numBuckets) |
| Computes the alternate bucket for a fingerprint. | |
| static __host__ __device__ cuda::std::tuple< size_t, TagType > | getAlternateBucketWithNewFp (size_t bucket, TagType fp, size_t numBuckets) |
| Computes alternate bucket AND updated fingerprint for choice bit policies. | |
| static size_t | calculateNumBuckets (size_t capacity) |
| The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices. | |
Public Attributes | |
| size_t | numBuckets |
| Number of buckets in the filter. | |
| Bucket * | d_buckets |
| Pointer to the device memory for the buckets. | |
| cuda::std::atomic< size_t > * | d_numOccupied {} |
| Pointer to the device memory for the occupancy counter. | |
| cuda::std::atomic< size_t > * | d_numEvictions {} |
| Pointer to the device memory for the eviction counter. | |
| size_t | h_numOccupied = 0 |
| Number of occupied buckets in the filter. | |
Static Public Attributes | |
| static constexpr size_t | bitsPerTag = Config::bitsPerTag |
| static constexpr size_t | tagEntryBytes = sizeof(TagType) |
| static constexpr size_t | bucketSize = Config::bucketSize |
| static constexpr size_t | maxEvictions = Config::maxEvictions |
| static constexpr size_t | blockSize = Config::blockSize |
| static constexpr TagType | EMPTY = 0 |
| static constexpr size_t | fpMask = (1ULL << bitsPerTag) - 1 |
A CUDA-accelerated Cuckoo Filter implementation.
This class implements a Cuckoo Filter using CUDA for high-throughput insertion, lookup, and deletion. It supports concurrent operations and uses atomic operations for thread safety within buckets.
| Config | The configuration structure defining filter parameters. |
Definition at line 144 of file CuckooFilter.cuh.
| using cuckoogpu::Filter< Config >::AltBucketPolicy = typename Config::AltBucketPolicy |
Definition at line 149 of file CuckooFilter.cuh.
| using cuckoogpu::Filter< Config >::PackedTagType = typename std::conditional<bitsPerTag <= 8, uint32_t, uint64_t>::type |
Definition at line 163 of file CuckooFilter.cuh.
| using cuckoogpu::Filter< Config >::T = typename Config::KeyType |
Definition at line 145 of file CuckooFilter.cuh.
| using cuckoogpu::Filter< Config >::TagType = typename Config::TagType |
Definition at line 148 of file CuckooFilter.cuh.
|
delete |
|
inlineexplicit |
Constructs a new Cuckoo Filter.
Allocates device memory for buckets and occupancy counters.
| capacity | Desired capacity (number of items) for the filter. |
Definition at line 408 of file CuckooFilter.cuh.
|
inline |
Destroys the Cuckoo Filter.
Frees allocated device memory.
Definition at line 423 of file CuckooFilter.cuh.
|
inlinestatic |
The number of buckets is enforced to be a power of two in order to allow for efficient modulo on the bucket indices.
Definition at line 394 of file CuckooFilter.cuh.
|
inline |
Returns the total capacity of the filter.
Definition at line 1015 of file CuckooFilter.cuh.
|
inline |
Clears the filter, removing all items.
Definition at line 958 of file CuckooFilter.cuh.
|
inline |
Checks if a key exists in the filter.
| key | The key to check. |
Definition at line 1396 of file CuckooFilter.cuh.
|
inline |
Checks for the existence of a batch of keys.
| d_keys | Pointer to device memory containing keys to check. |
| n | Number of keys to check. |
| d_output | Pointer to device memory to store results (true/false). |
| stream | CUDA stream to use for the operation. |
Definition at line 631 of file CuckooFilter.cuh.
|
inline |
Checks for existence of keys in a Thrust device vector.
| d_keys | Vector of keys to check. |
| d_output | Vector to store results (bool). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 861 of file CuckooFilter.cuh.
|
inline |
Checks for existence of keys in a Thrust device vector (uint8_t output).
| d_keys | Vector of keys to check. |
| d_output | Vector to store results (uint8_t). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 883 of file CuckooFilter.cuh.
|
inline |
Counts occupied slots by iterating over all buckets on the host.
This is a slow operation used for verification/debugging.
Definition at line 1042 of file CuckooFilter.cuh.
|
inline |
Tries to remove a set of keys from the filter.
If the key is not present, it is ignored.
| d_keys | Pointer to the array of keys to remove |
| n | Number of keys to remove |
| d_output | Optional pointer to an output array indicating the success of each key removal |
| stream | CUDA stream to use for the operation |
Definition at line 650 of file CuckooFilter.cuh.
|
inline |
Deletes keys in a Thrust device vector without outputting results.
| d_keys | Vector of keys to delete. |
| stream | CUDA stream. |
Definition at line 951 of file CuckooFilter.cuh.
|
inline |
Deletes keys in a Thrust device vector.
| d_keys | Vector of keys to delete. |
| d_output | Vector to store results (bool). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 906 of file CuckooFilter.cuh.
|
inline |
Deletes keys in a Thrust device vector (uint8_t output).
| d_keys | Vector of keys to delete. |
| d_output | Vector to store results (uint8_t). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 929 of file CuckooFilter.cuh.
|
inline |
Returns the total number of evictions performed.
Only available when CUCKOO_FILTER_COUNT_EVICTIONS is defined.
Definition at line 997 of file CuckooFilter.cuh.
|
inlinestatic |
Computes the alternate bucket for a fingerprint.
Definition at line 373 of file CuckooFilter.cuh.
|
inlinestatic |
Computes alternate bucket AND updated fingerprint for choice bit policies.
For non-choice-bit policies, returns the original fingerprint unchanged.
Definition at line 382 of file CuckooFilter.cuh.
|
inlinestatic |
|
inline |
Returns the number of buckets in the filter.
Definition at line 1023 of file CuckooFilter.cuh.
|
inlinestatic |
Definition at line 360 of file CuckooFilter.cuh.
|
inline |
Inserts a single key into the filter.
Computes candidate buckets and attempts insertion, performing eviction if necessary.
| key | The key to insert. |
| evictionAttempts | Optional pointer to a counter for eviction attempts |
Definition at line 1358 of file CuckooFilter.cuh.
|
inline |
Inserts a batch of keys into the filter.
| d_keys | Pointer to device memory containing keys to insert. |
| n | Number of keys to insert. |
| d_output | Optional pointer to an output array indicating the success of each key insertion. |
| stream | CUDA stream to use for the operation. |
Definition at line 447 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector without outputting results.
| d_keys | Vector of keys to insert. |
| stream | CUDA stream. |
Definition at line 717 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector.
| d_keys | Vector of keys to insert. |
| d_output | Vector to store results (bool). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 672 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector (uint8_t output).
| d_keys | Vector of keys to insert. |
| d_output | Vector to store results (uint8_t). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 695 of file CuckooFilter.cuh.
|
inline |
This pre-sorts the input keys based on the primary bucket index to allow for coalesced memory access when you later insert them into the filter.
| d_keys | Pointer to device memory array of keys to insert |
| n | Number of keys to insert |
| d_output | Optional pointer to an output array indicating the success of each key insertion. |
| stream | CUDA stream to use for the operation. |
Definition at line 502 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector, sorting them first, without outputting results.
| d_keys | Vector of keys to insert. |
| stream | CUDA stream. |
Definition at line 811 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector, sorting them first.
| d_keys | Vector of keys to insert. |
| d_output | Vector to store results (bool). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 765 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector, sorting them first (uint8_t output).
| d_keys | Vector of keys to insert. |
| d_output | Vector to store results (uint8_t). Resized if necessary. |
| stream | CUDA stream. |
Definition at line 788 of file CuckooFilter.cuh.
|
inline |
Inserts a pre-sorted batch of keys and records per-attempt eviction counts.
| d_keys | Pointer to device memory containing keys to insert. |
| n | Number of keys to insert. |
| d_evictionAttempts | Device output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt. |
| d_output | Optional pointer to an output array indicating per-key insertion success. |
| stream | CUDA stream to use for the operation. |
Definition at line 567 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector, sorting first, and records per-attempt eviction counts.
| d_keys | Vector of keys to insert. |
| d_evictionAttempts | Vector to store per-key eviction counts. Resized if necessary. |
| d_output | Optional vector to store per-key insertion success. Resized if necessary. |
| stream | CUDA stream. |
Definition at line 827 of file CuckooFilter.cuh.
|
inline |
Inserts a batch of keys and records per-attempt eviction counts.
| d_keys | Pointer to device memory containing keys to insert. |
| n | Number of keys to insert. |
| d_evictionAttempts | Device output array of size n. Each entry stores the number of evictions performed by the corresponding insertion attempt. |
| d_output | Optional pointer to an output array indicating per-key insertion success. |
| stream | CUDA stream to use for the operation. |
Definition at line 474 of file CuckooFilter.cuh.
|
inline |
Inserts keys from a Thrust device vector and records per-attempt eviction counts.
| d_keys | Vector of keys to insert. |
| d_evictionAttempts | Vector to store per-key eviction counts. Resized if necessary. |
| d_output | Optional vector to store per-key insertion success. Resized if necessary. |
| stream | CUDA stream. |
Definition at line 730 of file CuckooFilter.cuh.
|
inline |
Inserts a fingerprint using repeated shallow breadth-first attempts.
Each round scans a handful of candidate eviction slots in the current bucket and tries to place one candidate in its alternate bucket. If no shallow move succeeds, it evicts the last scanned candidate slot from the current bucket and restarts the same BFS round from that evicted tag's alternate bucket. This repeats until insertion succeeds or maxEvictions is reached.
| fp | Fingerprint to insert |
| evictionAttempts | Optional pointer to a counter for eviction attempts |
| startBucket | Index of the bucket to start the search from |
Definition at line 1253 of file CuckooFilter.cuh.
|
inline |
Inserts a fingerprint into the filter by evicting existing fingerprints.
The thread first picks a pseudo-random target to replace with the new fingerprint. Then it tries to insert the evicted fingerprint into its alternate bucket. This process is repeated until either a fingerprint is inserted into an empty slot or the maximum number of evictions is reached.
| fp | Fingerprint to insert |
| startBucket | Index of the bucket to start the search from |
| evictionAttempts | Optional pointer to a counter for eviction attempts |
Definition at line 1196 of file CuckooFilter.cuh.
|
inline |
Calculates the current load factor of the filter.
Definition at line 971 of file CuckooFilter.cuh.
|
inline |
Returns the total number of occupied slots.
Retrieves the value from the device counter.
Definition at line 982 of file CuckooFilter.cuh.
|
delete |
|
inline |
Removes a key from the filter.
| key | The key to remove. |
Definition at line 1410 of file CuckooFilter.cuh.
|
inline |
Resets the eviction counter to zero.
Definition at line 1006 of file CuckooFilter.cuh.
|
inline |
Returns the size of the filter in bytes.
Definition at line 1031 of file CuckooFilter.cuh.
|
inline |
Attempts to insert a tag into a specific bucket.
Scans the bucket for an empty slot and attempts to atomically place the tag.
| bucketIdx | Index of the bucket. |
| tag | Tag to insert. |
Definition at line 1143 of file CuckooFilter.cuh.
|
inline |
Attempt to remove a single instance of a fingerprint from a bucket.
Scans the atomic words that make up the bucket and attempts a CAS on each matching tag position until one removal succeeds. This allows multiple concurrent deleters to remove distinct copies when a bucket contains several identical fingerprints instead of all trying to clear the same slot
The function is lock-free and uses per-word compare-and-swap operations. It does NOT update any global occupancy counter, the caller is responsible for decrementing d_numOccupied if appropriate
| bucketIdx | Index of the bucket to search. |
| tag | Fingerprint value to remove (must not be EMPTY). |
tag was removed from the bucket; false if no matching tag remained (or another thread removed the last matching instance before this call could succeed). Definition at line 1088 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 146 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 155 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 152 of file CuckooFilter.cuh.
| Bucket* cuckoogpu::Filter< Config >::d_buckets |
Pointer to the device memory for the buckets.
Definition at line 348 of file CuckooFilter.cuh.
| cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numEvictions {} |
Pointer to the device memory for the eviction counter.
Definition at line 354 of file CuckooFilter.cuh.
| cuda::std::atomic<size_t>* cuckoogpu::Filter< Config >::d_numOccupied {} |
Pointer to the device memory for the occupancy counter.
Definition at line 350 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 213 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 214 of file CuckooFilter.cuh.
| size_t cuckoogpu::Filter< Config >::h_numOccupied = 0 |
Number of occupied buckets in the filter.
Definition at line 357 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 154 of file CuckooFilter.cuh.
| size_t cuckoogpu::Filter< Config >::numBuckets |
Number of buckets in the filter.
Definition at line 347 of file CuckooFilter.cuh.
|
staticconstexpr |
Definition at line 151 of file CuckooFilter.cuh.