3#include <cuda/std/__bit/integral.h>
4#include <cuda_runtime.h>
16 while (T::validBytes[count] !=
'\0') {
38 if (index == T::symbolWidth) {
39 return T::encode(input) ==
static_cast<uint8_t
>(T::invalidSymbol);
42 if (index == separatorPosition) {
43 input[index] =
static_cast<char>(T::separator);
69 char input[T::symbolWidth]{};
95concept Alphabet =
requires(
const char* input) {
96 { T::symbolCount } -> std::convertible_to<uint64_t>;
97 { T::symbolWidth } -> std::convertible_to<uint64_t>;
98 { T::invalidSymbol } -> std::convertible_to<uint8_t>;
99 { T::separator } -> std::convertible_to<uint8_t>;
100 { T::validBytes } -> std::convertible_to<const char*>;
101 { T::encode(input) } -> std::same_as<uint8_t>;
103 requires T::symbolCount > 0 && T::symbolCount <= 255;
104 requires T::symbolWidth > 0;
120 [[nodiscard]]
constexpr __host__ __device__ __forceinline__
static uint8_t
encode(
123 const auto byte =
static_cast<uint8_t
>(input[0]);
124 const uint8_t upper =
byte & 0xDFu;
125 const uint8_t x = (
byte >> 1u) & 3u;
126 const uint8_t valid = (upper ==
'A') | (upper ==
'C') | (upper ==
'G') | (upper ==
'T');
127 const uint8_t mask = -valid;
145 [[nodiscard]]
constexpr __host__ __device__ __forceinline__
static uint8_t
encode(
152 const uint8_t packed = (a << 4u) | (b << 2u) | c;
153 const uint8_t mask = -valid;
172 static constexpr char validBytes[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
174 [[nodiscard]]
constexpr __host__ __device__ __forceinline__
static uint8_t
encode(
177 const auto byte =
static_cast<uint8_t
>(input[0]);
178 const uint8_t upper =
byte & 0xDFu;
179 const uint8_t letterIndex = upper -
'A';
180 const uint8_t valid = letterIndex < 26;
181 const uint8_t mask = -valid;
Concept for alphabet-like types used to encode bytes as symbol indices.
consteval uint64_t validByteCount()
consteval bool separatorByteAlwaysEncodesInvalid()
Tests that for every position in the input, placing the separator byte at that position always result...
consteval bool separatorPositionAlwaysEncodesInvalid(char *input, uint64_t separatorPosition, uint64_t index)
Recursively tests whether placing the separator byte at any position in an input of valid bytes alway...
An alphabet for encoding DNA sequences, consisting of the symbols A, C, G, and T.
static constexpr uint8_t separator
constexpr __host__ __device__ static __forceinline__ uint8_t encode(const char *input)
static constexpr uint64_t symbolWidth
static constexpr uint8_t invalidSymbol
static constexpr uint64_t symbolCount
static constexpr char validBytes[]
An alphabet that encodes non-overlapping DNA triplets as single symbols.
static constexpr uint8_t separator
static constexpr uint64_t symbolCount
static constexpr uint8_t invalidSymbol
static constexpr char validBytes[]
static constexpr uint64_t symbolWidth
constexpr __host__ __device__ static __forceinline__ uint8_t encode(const char *input)
An alphabet for encoding protein sequences, consisting of the 20 standard amino acids plus common amb...
static constexpr uint64_t symbolWidth
constexpr __host__ __device__ static __forceinline__ uint8_t encode(const char *input)
static constexpr uint8_t separator
static constexpr uint8_t invalidSymbol
static constexpr char validBytes[]
static constexpr uint64_t symbolCount