|
cuSBF
|
Namespaces | |
| namespace | xxhash |
| XXHash_64 implementation from. | |
Classes | |
| struct | BitwiseOr |
| struct | SaltLiteral |
| Compile-time golden-ratio-derived multiplicative salt constants. More... | |
| struct | SaltLiteral< 0 > |
| struct | SaltLiteral< 1 > |
| struct | SaltLiteral< 10 > |
| struct | SaltLiteral< 11 > |
| struct | SaltLiteral< 12 > |
| struct | SaltLiteral< 13 > |
| struct | SaltLiteral< 14 > |
| struct | SaltLiteral< 15 > |
| struct | SaltLiteral< 2 > |
| struct | SaltLiteral< 3 > |
| struct | SaltLiteral< 4 > |
| struct | SaltLiteral< 5 > |
| struct | SaltLiteral< 6 > |
| struct | SaltLiteral< 7 > |
| struct | SaltLiteral< 8 > |
| struct | SaltLiteral< 9 > |
| struct | SequenceKmerInput |
| Kernel input descriptor for a sequence k-mer sweep. More... | |
Functions | |
| template<typename T > | |
| consteval uint64_t | validByteCount () |
| template<typename T > | |
| consteval bool | separatorPositionAlwaysEncodesInvalid (char *input, uint64_t separatorPosition, uint64_t index) |
| Recursively tests whether placing the separator byte at any position in an input of valid bytes always results in an invalid encoding. | |
| template<typename T > | |
| consteval bool | separatorByteAlwaysEncodesInvalid () |
| Tests that for every position in the input, placing the separator byte at that position always results in an invalid encoding. | |
| template<typename Config > | |
| __global__ void | containsSequenceKmersKernel (SequenceKmerInput< Config > input, device_span< const typename Filter< Config >::Shard > shards, device_span< uint8_t > output) |
| CUDA kernel: queries k-mers from a sequence against the filter. | |
| template<typename Config > | |
| __device__ __forceinline__ bool | prepareSequenceHashTiles (const char *sequence, uint64_t blockStartKmer, uint64_t blockKmers, uint8_t *sequenceTile) |
| Cooperatively loads and encodes a tile of symbols into shared memory. | |
| template<typename Config > | |
| __global__ void | insertSequenceKmersKernel (SequenceKmerInput< Config > input, device_span< typename Filter< Config >::Shard > shards) |
| CUDA kernel: inserts k-mers from a sequence into the filter. | |
| template<uint64_t Index> | |
| __host__ __device__ __forceinline__ constexpr uint64_t | multiplicativeSaltLiteral () |
Returns the multiplicative salt constant for hash function Index. | |
| template<typename Config , typename Fn , uint64_t... HashIndices> | |
| __host__ __device__ __forceinline__ void | forEachHashIndexImpl (Fn &&fn, std::index_sequence< HashIndices... >) |
| Implementation helper for forEachHashIndex (fold-expression over an index sequence). | |
| template<typename Config , typename Fn > | |
| __host__ __device__ __forceinline__ void | forEachHashIndex (Fn &&fn) |
Invokes fn for each hash index in [0, Config::hashCount) at compile time. | |
| template<typename Config , uint64_t Length> | |
| __host__ __device__ __forceinline__ constexpr uint64_t | packedWindowMask () |
Returns a bitmask covering Length packed alphabet symbols. | |
| template<typename Config , uint64_t WindowLength, uint64_t K> | |
| __host__ __device__ __forceinline__ constexpr uint64_t | extractPackedSubwindow (uint64_t packedKmer, uint64_t start) |
| Extracts a packed sub-window from a packed k-mer. | |
| __device__ __forceinline__ void | atomicOrWord (uint64_t *ptr, uint64_t value) |
Atomically ORs value into the device word at ptr. | |
| template<typename Config > | |
| __device__ __forceinline__ uint64_t | packedKmerMinimizerHash (uint64_t packedKmer) |
| Computes the minimizer hash for a packed k-mer. | |
| template<typename Config > | |
| __device__ __forceinline__ uint64_t | packedKmerSmerHash (uint64_t packedKmer, uint64_t start) |
Computes the hash for the s-mer at position start within a packed k-mer. | |
| template<typename Config > | |
| __device__ __forceinline__ void | loadShardWords4 (const typename Filter< Config >::Shard *shards, uint64_t shardIndex, uint64_t *w) |
| Loads all four 64-bit words of a shard into a local array. | |
| template<typename Config , uint64_t K> | |
| __device__ __forceinline__ uint64_t | packKmerFromTile (const uint8_t *tile, uint64_t start) |
Packs K symbols from a shared-memory tile into an integer. | |
| template<typename Config , uint64_t K> | |
| __device__ __forceinline__ uint64_t | advancePackedKmer (uint64_t packed, uint8_t newBase) |
| Slides the packed k-mer window forward by one symbol. | |
| template<typename Config > | |
| __device__ __forceinline__ bool | sectorizedContainsPackedKmer (uint64_t packedKmer, const uint64_t *w) |
| Tests whether a packed k-mer is present in a pre-loaded shard. | |
| template<typename Config > | |
| __device__ __forceinline__ bool | kmerIsValid (const uint8_t *tile, uint64_t start) |
| constexpr __host__ __device__ __forceinline__ uint64_t | hash64 (uint64_t key) |
| Fast 64-bit integer hash (non-cryptographic). | |
| constexpr __host__ __device__ __forceinline__ uint64_t | minimizerHash64 (uint64_t key) |
| Fast 64-bit hash sufficient for uniform minimizer selection. | |
Variables | |
| constexpr uint32_t | kContainsSequenceStride = 4 |
| constexpr uint64_t | kInvalidHash = std::numeric_limits<uint64_t>::max() |
| Sentinel hash value indicating "no valid minimizer found". | |
| __device__ __forceinline__ uint64_t cusbf::detail::advancePackedKmer | ( | uint64_t | packed, |
| uint8_t | newBase | ||
| ) |
Slides the packed k-mer window forward by one symbol.
Shifts the existing packed representation left by one symbol, inserts the new symbol in the least-significant position, and masks to K symbols.
| K | k-mer length. |
| packed | Current packed k-mer. |
| newBase | Pre-encoded new symbol. |
Definition at line 1411 of file BloomFilter.cuh.
| __device__ __forceinline__ void cusbf::detail::atomicOrWord | ( | uint64_t * | ptr, |
| uint64_t | value | ||
| ) |
Atomically ORs value into the device word at ptr.
| ptr | Target device word. |
| value | Value to OR in. |
Definition at line 319 of file BloomFilter.cuh.
| __global__ void cusbf::detail::containsSequenceKmersKernel | ( | SequenceKmerInput< Config > | input, |
| device_span< const typename Filter< Config >::Shard > | shards, | ||
| device_span< uint8_t > | output | ||
| ) |
CUDA kernel: queries k-mers from a sequence against the filter.
Each thread processes kStride consecutive k-mers to amortise packing and shard loads. Threads sharing the same shard collaborate via __match_any_sync to load the shard words once and broadcast them.
| input | Sequence descriptor (device span + k-mer count). |
| shards | Device-resident shard array (read-only). |
| output | Per-k-mer result buffer (1 = present, 0 = absent). |
Definition at line 1503 of file BloomFilter.cuh.
|
constexpr |
Extracts a packed sub-window from a packed k-mer.
Extracts WindowLength consecutive bases starting at start from a packed k-mer of length K (MSB = first base).
| WindowLength | Length of the sub-window to extract. |
| K | Length of the full k-mer. |
| packedKmer | Packed k-mer (MSB = first base). |
| start | Zero-based start position. |
Definition at line 307 of file BloomFilter.cuh.
| __host__ __device__ __forceinline__ void cusbf::detail::forEachHashIndex | ( | Fn && | fn | ) |
Invokes fn for each hash index in [0, Config::hashCount) at compile time.
| Config | Filter configuration. |
| Fn | Callable with signature void(std::integral_constant<uint64_t, I>). |
| fn | Callable to invoke for each index. |
Definition at line 271 of file BloomFilter.cuh.
| __host__ __device__ __forceinline__ void cusbf::detail::forEachHashIndexImpl | ( | Fn && | fn, |
| std::index_sequence< HashIndices... > | |||
| ) |
Implementation helper for forEachHashIndex (fold-expression over an index sequence).
Definition at line 259 of file BloomFilter.cuh.
|
constexpr |
Fast 64-bit integer hash (non-cryptographic).
One multiplicative step followed by an xorshift. Used to hash s-mer packed representations for Bloom bit-position selection.
| key | Input value. |
Definition at line 192 of file hashutil.cuh.
| __global__ void cusbf::detail::insertSequenceKmersKernel | ( | SequenceKmerInput< Config > | input, |
| device_span< typename Filter< Config >::Shard > | shards | ||
| ) |
CUDA kernel: inserts k-mers from a sequence into the filter.
Each thread processes one k-mer. Consecutive threads targeting the same shard use cub::WarpReduce::HeadSegmentedReduce to merge bitmasks before the run head issues a minimal number of atomicOr operations.
| input | Sequence descriptor. |
| shards | Device-resident shard array (modified in place). |
Definition at line 1610 of file BloomFilter.cuh.
| __device__ __forceinline__ bool cusbf::detail::kmerIsValid | ( | const uint8_t * | tile, |
| uint64_t | start | ||
| ) |
Definition at line 1447 of file BloomFilter.cuh.
| __device__ __forceinline__ void cusbf::detail::loadShardWords4 | ( | const typename Filter< Config >::Shard * | shards, |
| uint64_t | shardIndex, | ||
| uint64_t * | w | ||
| ) |
Loads all four 64-bit words of a shard into a local array.
On sm_100+ issues a single 256-bit non-coherent global load, on older architectures falls back to two 128-bit loads.
| shards | Pointer to the device shard array. |
| shardIndex | Index of the shard to load. |
| w | Output array of (at least) four words. |
Definition at line 1372 of file BloomFilter.cuh.
|
constexpr |
Fast 64-bit hash sufficient for uniform minimizer selection.
A single Knuth multiplicative step — provides enough uniformity for shard selection without the full avalanche quality of hash64.
| key | Packed m-mer input. |
Definition at line 209 of file hashutil.cuh.
|
constexpr |
Returns the multiplicative salt constant for hash function Index.
| Index | Salt index in [0, 15]. |
Definition at line 251 of file BloomFilter.cuh.
| __device__ __forceinline__ uint64_t cusbf::detail::packedKmerMinimizerHash | ( | uint64_t | packedKmer | ) |
Computes the minimizer hash for a packed k-mer.
Iterates over all m-mers within the k-mer and returns the minimum hash value, which is used to select the target shard.
| packedKmer | 2-bit packed k-mer. |
Definition at line 1332 of file BloomFilter.cuh.
| __device__ __forceinline__ uint64_t cusbf::detail::packedKmerSmerHash | ( | uint64_t | packedKmer, |
| uint64_t | start | ||
| ) |
Computes the hash for the s-mer at position start within a packed k-mer.
| packedKmer | 2-bit packed k-mer. |
| start | Zero-based start position of the s-mer within the k-mer. |
Definition at line 1353 of file BloomFilter.cuh.
|
constexpr |
Returns a bitmask covering Length packed alphabet symbols.
Returns UINT64_MAX when the packed window consumes all 64 bits.
| Length | Number of symbols. |
Definition at line 285 of file BloomFilter.cuh.
| __device__ __forceinline__ uint64_t cusbf::detail::packKmerFromTile | ( | const uint8_t * | tile, |
| uint64_t | start | ||
| ) |
Packs K symbols from a shared-memory tile into an integer.
| K | k-mer length. |
| tile | Encoded symbol tile in shared memory. |
| start | Start position within the tile. |
Definition at line 1390 of file BloomFilter.cuh.
| __device__ __forceinline__ bool cusbf::detail::prepareSequenceHashTiles | ( | const char * | sequence, |
| uint64_t | blockStartKmer, | ||
| uint64_t | blockKmers, | ||
| uint8_t * | sequenceTile | ||
| ) |
Cooperatively loads and encodes a tile of symbols into shared memory.
All threads in the block participate. The return value (via __syncthreads_count) is true only if every base in the tile is a valid alphabet symbol.
| sequence | Device-resident sequence pointer. |
| blockStartKmer | Index of the first k-mer assigned to this block. |
| blockKmers | Number of k-mers handled by this block. |
| sequenceTile | Shared-memory output buffer (blockKmers + k - 1 bytes). |
true if no invalid symbols are present in the tile. Definition at line 1472 of file BloomFilter.cuh.
| __device__ __forceinline__ bool cusbf::detail::sectorizedContainsPackedKmer | ( | uint64_t | packedKmer, |
| const uint64_t * | w | ||
| ) |
Tests whether a packed k-mer is present in a pre-loaded shard.
Checks all s-mer hashes across the k-mer against the four shard words. Returns false as soon as any required bit is absent.
| packedKmer | Packed k-mer to query. |
| w | The four pre-loaded shard words. |
true if all required bits are set. Definition at line 1429 of file BloomFilter.cuh.
Tests that for every position in the input, placing the separator byte at that position always results in an invalid encoding.
This is a necessary condition for the separator to function correctly when concatenating sequences, as it prevents the creation of valid symbols that span across sequence boundaries.
| T | Alphabet type to test. |
Definition at line 67 of file Alphabet.cuh.
| consteval bool cusbf::detail::separatorPositionAlwaysEncodesInvalid | ( | char * | input, |
| uint64_t | separatorPosition, | ||
| uint64_t | index | ||
| ) |
Recursively tests whether placing the separator byte at any position in an input of valid bytes always results in an invalid encoding.
This ensures that the separator cannot be confused with valid symbols when concatenating sequences.
| T | Alphabet type to test. |
| input | Buffer to construct input strings for encoding. Must have length at least T::symbolWidth. |
| separatorPosition | Position at which to place the separator byte in the input. |
| index | Current index being set in the input. Should be called with 0 initially. |
Definition at line 37 of file Alphabet.cuh.
Definition at line 14 of file Alphabet.cuh.
Definition at line 143 of file BloomFilter.cuh.