cuSBF GPU-accelerated sectorized Bloom filter. More...

Classes
struct	Shard
	One 256-bit filter block stored as an array of Config::blockWordCount words. More...

Public Member Functions
	Filter (uint64_t requestedFilterBits)
	Constructs a Filter with at least `requestedFilterBits` bits of storage.

	Filter (const Filter &)=delete

Filter &	operator= (const Filter &)=delete

	Filter (Filter &&)=default

Filter &	operator= (Filter &&)=default

	~Filter ()=default

uint64_t	insertSequence (std::string_view sequence, cuda::stream_ref stream=cudaStream_t{})
	Inserts all valid k-mers from a host-resident sequence.

uint64_t	insertSequenceDevice (device_span< const char > d_sequence, cuda::stream_ref stream=cudaStream_t{})
	Async insert of k-mers from a device-resident sequence.

FastxInsertReport	insertRecordBatch (RecordBatchView batch, cuda::stream_ref stream=cudaStream_t{})
	Inserts a dense host-resident record batch.

FastxInsertReport	insertFastx (std::istream &input, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{})
	Inserts all k-mers from a FASTA/FASTQ input stream.

FastxInsertReport	insertFastxFile (std::string_view path, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{})
	Inserts all k-mers from a FASTA/FASTQ file via chunked streaming.

void	containsSequenceDevice (device_span< const char > d_sequence, device_span< uint8_t > d_output, cuda::stream_ref stream=cudaStream_t{}) const
	Async query of k-mers from a device-resident sequence.

std::vector< uint8_t >	containsSequence (std::string_view sequence, cuda::stream_ref stream=cudaStream_t{}) const
	Queries all valid k-mers from a host-resident sequence.

FastxQueryReport	queryRecordBatch (RecordBatchView batch, cuda::stream_ref stream=cudaStream_t{}) const
	Queries a dense host-resident record batch and returns aggregate counts.

template<typename Consumer >
FastxQueryReport	queryRecordBatch (RecordBatchView batch, Consumer &&consume, cuda::stream_ref stream=cudaStream_t{}) const
	Queries a dense host-resident record batch and streams per-record results.

FastxQueryReport	queryFastx (std::istream &input, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries all k-mers from a FASTA/FASTQ input stream via chunked streaming.

FastxQueryReport	queryFastxFile (std::string_view path, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries all k-mers from a FASTA/FASTQ file via chunked streaming.

template<typename Consumer >
FastxQueryReport	queryFastxRecords (std::istream &input, Consumer &&consume, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries a FASTA/FASTQ stream and emits one record result per parsed record.

template<typename Consumer >
FastxQueryReport	queryFastxFileRecords (std::string_view path, Consumer &&consume, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries a FASTA/FASTQ file and emits one record result per parsed record.

FastxDetailedQueryReport	queryFastxDetailed (std::istream &input, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries all k-mers from a FASTA/FASTQ input stream via chunked streaming and preserves per-record hit vectors.

FastxDetailedQueryReport	queryFastxFileDetailed (std::string_view path, double fillFraction=0.7, cuda::stream_ref stream=cudaStream_t{}) const
	Queries all k-mers from a FASTA/FASTQ file via chunked streaming and preserves per-record hit vectors.

void	clear (cuda::stream_ref stream=cudaStream_t{})
	Resets all filter bits to zero and synchronises the stream.

float	loadFactor () const
	Computes the fraction of set bits in the filter.

uint64_t	filterBits () const
	Returns the total allocated capacity of the filter in bits.

uint64_t	numShards () const
	Returns the number of shards.

Detailed Description

template<typename Config>
class cusbf::Filter< Config >

cuSBF GPU-accelerated sectorized Bloom filter.

Stores an in-device cuSBF divided into numShards 256-bit shards. Each shard is independently addressed by a minimizer-derived hash, and bits within a shard are updated/tested by a set of s-mer-derived hashes.

The filter is not copyable (device memory ownership). Move construction and assignment are supported.

Template Parameters

Config A cusbf::Config specialisation.

Definition at line 338 of file BloomFilter.cuh.

Constructor & Destructor Documentation

◆ Filter() [1/3]

template<typename Config >

cusbf::Filter< Config >::Filter ( uint64_t requestedFilterBits )

inlineexplicit

Constructs a Filter with at least requestedFilterBits bits of storage.

The actual allocated capacity is rounded up to the next power-of-two number of shards.

Parameters

requestedFilterBits Desired filter capacity in bits.

Definition at line 487 of file BloomFilter.cuh.

        : numShards_(
              cuda::std::bit_ceil(
                  std::max<uint64_t>(
                      1,
                      cuda::ceil_div(requestedFilterBits, Config::filterBlockBits)
                  )
              )
          ),
          filterBits_(numShards_ * Config::filterBlockBits),
          d_shards_(numShards_) {
        clear();
    }

Here is the call graph for this function:

◆ Filter() [2/3]

template<typename Config >

cusbf::Filter< Config >::Filter ( const Filter< Config > & )

delete

◆ Filter() [3/3]

template<typename Config >

cusbf::Filter< Config >::Filter ( Filter< Config > && )

default

◆ ~Filter()

template<typename Config >

cusbf::Filter< Config >::~Filter ( )

default

Member Function Documentation

◆ clear()

template<typename Config >

void cusbf::Filter< Config >::clear ( cuda::stream_ref stream = cudaStream_t{} )

inline

Resets all filter bits to zero and synchronises the stream.

Parameters

stream CUDA stream to use.

Definition at line 823 of file BloomFilter.cuh.

                                                   {}) {
        CUSBF_CUDA_CALL(cudaMemsetAsync(
            thrust::raw_pointer_cast(d_shards_.data()),
            0,
            d_shards_.size() * sizeof(Shard),
            stream.get()
        ));
 
        stream.sync();
    }

Here is the caller graph for this function:

◆ containsSequence()

template<typename Config >

std::vector< uint8_t > cusbf::Filter< Config >::containsSequence	(	std::string_view	sequence,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries all valid k-mers from a host-resident sequence.

Copies the sequence to device, queries, copies results back, and synchronises. The returned vector has one byte per k-mer: 1 = present, 0 = absent.

Parameters

sequence	Raw nucleotide sequence.
stream	CUDA stream to use.

Returns: Per-k-mer membership results (empty if sequence length < k).

Definition at line 651 of file BloomFilter.cuh.

                                                                                  {}) const {
        if (recordSymbolCount(sequence.size()) < Config::k) {
            return {};
        }
 
        std::vector<uint8_t> output(recordKmerCount(sequence.size()));
 
        const auto d_sequence = stagedSequenceView({sequence.data(), sequence.size()}, stream);
        ensureResultCapacity(output.size());
        launchContainsSequence(
            d_sequence,
            device_span<uint8_t>{thrust::raw_pointer_cast(d_resultBuffer_.data()), output.size()},
            stream
        );
        CUSBF_CUDA_CALL(cudaMemcpyAsync(
            output.data(),
            thrust::raw_pointer_cast(d_resultBuffer_.data()),
            output.size() * sizeof(uint8_t),
            cudaMemcpyDeviceToHost,
            stream.get()
        ));
 
        stream.sync();
        return output;
    }

◆ containsSequenceDevice()

template<typename Config >

void cusbf::Filter< Config >::containsSequenceDevice	(	device_span< const char >	d_sequence,
		device_span< uint8_t >	d_output,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Async query of k-mers from a device-resident sequence.

Does not synchronise the stream. Results are written to d_output (one byte per k-mer: 1 = present, 0 = absent).

Parameters

d_sequence	Device-resident nucleotide sequence.
d_output	Per-k-mer result buffer (must hold kmerCount() bytes).
stream	CUDA stream to use.

Definition at line 627 of file BloomFilter.cuh.

                                            {}
    ) const {
        if (sequenceKmerCount(d_sequence) == 0) {
            return;
        }
 
        launchContainsSequence(d_sequence, d_output, stream);
    }

◆ filterBits()

template<typename Config >

uint64_t cusbf::Filter< Config >::filterBits ( ) const

inline

Returns the total allocated capacity of the filter in bits.

Definition at line 855 of file BloomFilter.cuh.

                                              {
        return filterBits_;
    }

◆ insertFastx()

template<typename Config >

FastxInsertReport cusbf::Filter< Config >::insertFastx	(	std::istream &	input,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)

inline

Inserts all k-mers from a FASTA/FASTQ input stream.

Reads records in streaming fashion, accumulating them until the concatenated sequence approaches fillFraction of free GPU memory, then inserts each chunk independently.

Parameters

input	Input stream containing FASTA or FASTQ records.
fillFraction	Fraction of free GPU memory to fill per chunk (default 0.7).
stream	CUDA stream to use.

Returns: Report summarising records indexed, bases processed, and k-mers inserted.

Definition at line 595 of file BloomFilter.cuh.

                                            {}
    ) {
        return insertFastxStream(input, "<stream>", fillFraction, stream);
    }

◆ insertFastxFile()

template<typename Config >

FastxInsertReport cusbf::Filter< Config >::insertFastxFile	(	std::string_view	path,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)

inline

Inserts all k-mers from a FASTA/FASTQ file via chunked streaming.

See also: insertFastx

Definition at line 608 of file BloomFilter.cuh.

                                            {}
    ) {
        auto input = detail::openFastxFile(path);
        return insertFastxStream(*input, path, fillFraction, stream);
    }

◆ insertRecordBatch()

template<typename Config >

FastxInsertReport cusbf::Filter< Config >::insertRecordBatch	(	RecordBatchView	batch,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)

inline

Inserts a dense host-resident record batch.

batch.sequence stores the raw record payloads back-to-back without separators. batch.records stores ordered, non-overlapping byte ranges into that dense buffer. The filter injects alphabet separators between records internally, so callers do not need to materialise separator bytes themselves.

Synchronises before returning.

Parameters

batch	Dense record batch to insert.
stream	CUDA stream to use.

Returns: Report summarising records indexed, bases processed, and k-mers inserted.

Definition at line 569 of file BloomFilter.cuh.

                                                                                 {}) {
        const PreparedRecordBatch prepared = prepareRecordBatch(batch);
        FastxInsertReport report;
        report.recordsIndexed = prepared.records.size();
        for (const PreparedRecordRange& record : prepared.records) {
            report.indexedBases += record.size;
            report.insertedKmers += record.validKmers;
        }
        if (!prepared.sequence.empty()) {
            (void)insertSequence(prepared.sequence, stream);
        }
        return report;
    }

◆ insertSequence()

template<typename Config >

uint64_t cusbf::Filter< Config >::insertSequence	(	std::string_view	sequence,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)

inline

Inserts all valid k-mers from a host-resident sequence.

Copies the sequence to device, launches the insert kernel, and synchronises before returning. K-mers containing characters outside {A,C,G,T,a,c,g,t} are skipped.

Parameters

sequence	Raw nucleotide sequence.
stream	CUDA stream to use (default: null stream).

Returns: Number of k-mers attempted (sequences shorter than k yield 0).

Definition at line 519 of file BloomFilter.cuh.

                                                                                {}) {
        if (recordSymbolCount(sequence.size()) < Config::k) {
            return 0;
        }
 
        const uint64_t totalKmers = recordKmerCount(sequence.size());
        const auto d_sequence = stagedSequenceView({sequence.data(), sequence.size()}, stream);
        launchInsertSequence(d_sequence, stream);
        stream.sync();
        return totalKmers;
    }

◆ insertSequenceDevice()

template<typename Config >

uint64_t cusbf::Filter< Config >::insertSequenceDevice	(	device_span< const char >	d_sequence,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)

inline

Async insert of k-mers from a device-resident sequence.

Does not synchronise the stream, the caller is responsible for ordering relative to downstream operations.

Parameters

d_sequence	Device-resident nucleotide sequence.
stream	CUDA stream to use.

Returns: Number of k-mers attempted.

Definition at line 541 of file BloomFilter.cuh.

                                            {}
    ) {
        const uint64_t totalKmers = sequenceKmerCount(d_sequence);
        if (totalKmers == 0) {
            return 0;
        }
 
        launchInsertSequence(d_sequence, stream);
        return totalKmers;
    }

◆ loadFactor()

template<typename Config >

float cusbf::Filter< Config >::loadFactor ( ) const

inline

Computes the fraction of set bits in the filter.

Returns: Load factor in [0, 1].

Definition at line 839 of file BloomFilter.cuh.

                                           {
        const auto* wordsBegin =
            reinterpret_cast<const uint64_t*>(thrust::raw_pointer_cast(d_shards_.data()));
        const uint64_t totalWords = numShards_ * Config::blockWordCount;
        const uint64_t setBits = thrust::transform_reduce(
            thrust::device,
            wordsBegin,
            wordsBegin + totalWords,
            [] __device__(uint64_t w) -> uint64_t { return cuda::std::popcount(w); },
            uint64_t{0},
            cuda::std::plus<uint64_t>()
        );
        return static_cast<float>(setBits) / static_cast<float>(filterBits_);
    }

◆ numShards()

template<typename Config >

uint64_t cusbf::Filter< Config >::numShards ( ) const

inline

Returns the number of shards.

Definition at line 860 of file BloomFilter.cuh.

                                             {
        return numShards_;
    }

◆ operator=() [1/2]

template<typename Config >

Filter & cusbf::Filter< Config >::operator= ( const Filter< Config > & )

delete

◆ operator=() [2/2]

template<typename Config >

Filter & cusbf::Filter< Config >::operator= ( Filter< Config > && )

default

◆ queryFastx()

template<typename Config >

FastxQueryReport cusbf::Filter< Config >::queryFastx	(	std::istream &	input,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries all k-mers from a FASTA/FASTQ input stream via chunked streaming.

See also: insertFastx

Definition at line 723 of file BloomFilter.cuh.

                                            {}
    ) const {
        return queryFastxStream(input, "<stream>", fillFraction, stream);
    }

◆ queryFastxDetailed()

template<typename Config >

FastxDetailedQueryReport cusbf::Filter< Config >::queryFastxDetailed	(	std::istream &	input,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries all k-mers from a FASTA/FASTQ input stream via chunked streaming and preserves per-record hit vectors.

The returned report keeps aggregate counts plus one detailed record result in source order. Each detailed hit vector contains one byte per k-mer window: 1 = present, 0 = absent. Invalid-symbol windows remain in the vector as 0 and are excluded from queriedKmers.

See also: queryFastx

Definition at line 795 of file BloomFilter.cuh.

                                            {}
    ) const {
        return queryFastxDetailedStream(input, "<stream>", fillFraction, stream);
    }

◆ queryFastxFile()

template<typename Config >

FastxQueryReport cusbf::Filter< Config >::queryFastxFile	(	std::string_view	path,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries all k-mers from a FASTA/FASTQ file via chunked streaming.

See also: queryFastx

Definition at line 736 of file BloomFilter.cuh.

                                            {}
    ) const {
        auto input = detail::openFastxFile(path);
        return queryFastxStream(*input, path, fillFraction, stream);
    }

◆ queryFastxFileDetailed()

template<typename Config >

FastxDetailedQueryReport cusbf::Filter< Config >::queryFastxFileDetailed	(	std::string_view	path,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries all k-mers from a FASTA/FASTQ file via chunked streaming and preserves per-record hit vectors.

See also: queryFastxDetailed

Definition at line 809 of file BloomFilter.cuh.

                                            {}
    ) const {
        auto input = detail::openFastxFile(path);
        return queryFastxDetailedStream(*input, path, fillFraction, stream);
    }

◆ queryFastxFileRecords()

template<typename Config >

template<typename Consumer >

FastxQueryReport cusbf::Filter< Config >::queryFastxFileRecords	(	std::string_view	path,
		Consumer &&	consume,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries a FASTA/FASTQ file and emits one record result per parsed record.

See also: queryFastxRecords

Definition at line 774 of file BloomFilter.cuh.

                                            {}
    ) const {
        auto input = detail::openFastxFile(path);
        return queryFastxRecordsStream(*input, path, consume, fillFraction, stream);
    }

◆ queryFastxRecords()

template<typename Config >

template<typename Consumer >

FastxQueryReport cusbf::Filter< Config >::queryFastxRecords	(	std::istream &	input,
		Consumer &&	consume,
		double	fillFraction = `0.7`,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries a FASTA/FASTQ stream and emits one record result per parsed record.

The callback receives record headers, record sequences, aggregate counts, and the per-window hit span for each record as soon as its chunk has been processed. The hit span remains valid only for the duration of the callback.

Parameters

input	Input stream containing FASTA or FASTQ records.
consume	Per-record callback.
fillFraction	Fraction of free GPU memory to fill per chunk (default 0.7).
stream	CUDA stream to use.

Returns: Aggregate query summary for the whole stream.

Definition at line 759 of file BloomFilter.cuh.

                                            {}
    ) const {
        return queryFastxRecordsStream(input, "<stream>", consume, fillFraction, stream);
    }

◆ queryRecordBatch() [1/2]

template<typename Config >

template<typename Consumer >

FastxQueryReport cusbf::Filter< Config >::queryRecordBatch	(	RecordBatchView	batch,
		Consumer &&	consume,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries a dense host-resident record batch and streams per-record results.

The callback receives one RecordQueryView per input record in source order. The hit span remains valid only for the duration of the callback.

Synchronises before returning.

Parameters

batch	Dense record batch to query.
consume	Per-record callback.
stream	CUDA stream to use.

Returns: Aggregate query summary for the whole batch.

Definition at line 710 of file BloomFilter.cuh.

                                            {}
    ) const {
        return queryPreparedRecordBatch(prepareRecordBatch(batch), batch.sequence, consume, stream);
    }

◆ queryRecordBatch() [2/2]

template<typename Config >

FastxQueryReport cusbf::Filter< Config >::queryRecordBatch	(	RecordBatchView	batch,
		cuda::stream_ref	stream = `cudaStream_t{}`
	)		const

inline

Queries a dense host-resident record batch and returns aggregate counts.

batch.sequence stores raw record payloads back-to-back without separators. batch.records stores ordered, non-overlapping byte ranges into that dense buffer. The filter injects alphabet separators between records internally, so cross-record k-mers are never formed.

Synchronises before returning.

Parameters

batch	Dense record batch to query.
stream	CUDA stream to use.

Returns: Aggregate query summary for the whole batch.

Definition at line 692 of file BloomFilter.cuh.

                                                                                {}) const {
        return queryRecordBatch(batch, [](const RecordQueryView&) {}, stream);
    }

The documentation for this class was generated from the following file:

BloomFilter.cuh

Classes

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ Filter() [1/3]

◆ Filter() [2/3]

◆ Filter() [3/3]

◆ ~Filter()

Member Function Documentation

◆ clear()

◆ containsSequence()

◆ containsSequenceDevice()

◆ filterBits()

◆ insertFastx()

◆ insertFastxFile()

◆ insertRecordBatch()

◆ insertSequence()

◆ insertSequenceDevice()

◆ loadFactor()

◆ numShards()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ queryFastx()

◆ queryFastxDetailed()

◆ queryFastxFile()

◆ queryFastxFileDetailed()

◆ queryFastxFileRecords()

◆ queryFastxRecords()

◆ queryRecordBatch() [1/2]

◆ queryRecordBatch() [2/2]