SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 #include <cassert>
17 #include <concepts>
18 #include <utility>
19 
26 
27 namespace seqan3::detail
28 {
29 
32 template <simd::simd_concept simd_t, size_t... I>
33 constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
34 {
35  return simd_t{((void)I, scalar)...};
36 }
37 
40 template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
41 constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)
42 {
43  return simd_t{static_cast<scalar_t>(offset + I)...};
44 }
45 
60 template <size_t divisor, simd_concept simd_t>
61 constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
62 {
63  simd_t dst{};
64  constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
65  size_t offset = chunk * mask;
66  for (size_t i = 0; i < chunk; ++i)
67  dst[i] = src[i + offset];
68 
69  return dst;
70 }
71 
80 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
81 constexpr target_simd_t upcast_signed(source_simd_t const & src)
82 {
83  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
84  "Target vector has different byte size.");
85 
86  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
87  return upcast_signed_sse4<target_simd_t>(src);
88  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
89  return upcast_signed_avx2<target_simd_t>(src);
90  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
91  return upcast_signed_avx512<target_simd_t>(src);
92  else
93  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
94 }
95 
104 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
105 constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
106 {
107  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
108  "Target vector has different byte size.");
109 
110  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
111  return upcast_unsigned_sse4<target_simd_t>(src);
112  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
113  return upcast_unsigned_avx2<target_simd_t>(src);
114  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
115  return upcast_unsigned_avx512<target_simd_t>(src);
116  else
117  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
118 }
119 
142 template <uint8_t index, simd::simd_concept simd_t>
143 constexpr simd_t extract_half(simd_t const & src)
144 {
145  static_assert(index < 2, "The index must be in the range of [0, 1]");
146 
147  return detail::extract_impl<2>(src, index);
148 }
149 
151 template <uint8_t index, simd::simd_concept simd_t>
152  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
153 constexpr simd_t extract_half(simd_t const & src)
154 {
155  static_assert(index < 2, "The index must be in the range of [0, 1]");
156 
157  if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
158  return src;
159  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
160  return detail::extract_half_sse4<index>(src);
161  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
162  return detail::extract_half_avx2<index>(src);
163  else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
164  return detail::extract_half_avx512<index>(src);
165  else // Anything else
166  return detail::extract_impl<2>(src, index);
167 }
169 
192 template <uint8_t index, simd::simd_concept simd_t>
193 constexpr simd_t extract_quarter(simd_t const & src)
194 {
195  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
196 
197  return detail::extract_impl<4>(src, index);
198 }
199 
201 template <uint8_t index, simd::simd_concept simd_t>
202  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
203 constexpr simd_t extract_quarter(simd_t const & src)
204 {
205  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
206 
207  if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
208  return src;
209  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
210  return detail::extract_quarter_sse4<index>(src);
211  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
212  return detail::extract_quarter_avx2<index>(src);
213 #if defined(__AVX512DQ__)
214  else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
215  return detail::extract_quarter_avx512<index>(src);
216 #endif // defined(__AVX512DQ__)
217  else // Anything else
218  return detail::extract_impl<4>(src, index);
219 }
221 
244 template <uint8_t index, simd::simd_concept simd_t>
245 constexpr simd_t extract_eighth(simd_t const & src)
246 {
247  return detail::extract_impl<8>(src, index);
248 }
249 
251 template <uint8_t index, simd::simd_concept simd_t>
252  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
253 constexpr simd_t extract_eighth(simd_t const & src)
254 {
255  static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
256 
257  if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
258  return src;
259  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
260  return detail::extract_eighth_sse4<index>(src);
261  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
262  return detail::extract_eighth_avx2<index>(src);
263 #if defined(__AVX512DQ__)
264  else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
265  return detail::extract_eighth_avx512<index>(src);
266 #endif // defined(__AVX512DQ__)
267  else // Anything else
268  return detail::extract_impl<8>(src, index);
269 }
271 
273 template <simd::simd_concept simd_t>
274 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
275 {
277 
278  for (size_t i = 0; i < matrix.size(); ++i)
279  for (size_t j = 0; j < matrix.size(); ++j)
280  tmp[j][i] = matrix[i][j];
281 
282  std::swap(tmp, matrix);
283 }
285 } // namespace seqan3::detail
286 
287 namespace seqan3
288 {
289 
290 inline namespace simd
291 {
292 
302 template <simd::simd_concept simd_t>
303 constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
304 {
305  constexpr size_t length = simd_traits<simd_t>::length;
306  return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
307 }
308 
318 template <simd::simd_concept simd_t>
319 constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
320 {
321  constexpr size_t length = simd_traits<simd_t>::length;
322  using scalar_type = typename simd_traits<simd_t>::scalar_type;
323  return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
324 }
325 
335 template <simd::simd_concept simd_t>
336 constexpr simd_t load(void const * mem_addr)
337 {
338  assert(mem_addr != nullptr);
339  simd_t tmp{};
340 
341  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
342  tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
343 
344  return tmp;
345 }
346 
348 template <simd::simd_concept simd_t>
349  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
350 constexpr simd_t load(void const * mem_addr)
351 {
352  assert(mem_addr != nullptr);
353 
354  if constexpr (simd_traits<simd_t>::max_length == 16)
355  return detail::load_sse4<simd_t>(mem_addr);
356  else if constexpr (simd_traits<simd_t>::max_length == 32)
357  return detail::load_avx2<simd_t>(mem_addr);
358  else if constexpr (simd_traits<simd_t>::max_length == 64)
359  return detail::load_avx512<simd_t>(mem_addr);
360  else
361  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
362  "Unsupported simd type.");
363 }
365 
376 template <simd::simd_concept simd_t>
377 constexpr void store(void * mem_addr, simd_t const & simd_vec)
378 {
379  assert(mem_addr != nullptr);
380  using scalar_t = typename simd_traits<simd_t>::scalar_type;
381 
382  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
383  *(static_cast<scalar_t *>(mem_addr) + i) = simd_vec[i];
384 }
385 
387 template <simd::simd_concept simd_t>
388  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
389 constexpr void store(void * mem_addr, simd_t const & simd_vec)
390 {
391  assert(mem_addr != nullptr);
392 
393  if constexpr (simd_traits<simd_t>::max_length == 16)
394  detail::store_sse4<simd_t>(mem_addr, simd_vec);
395  else if constexpr (simd_traits<simd_t>::max_length == 32)
396  detail::store_avx2<simd_t>(mem_addr, simd_vec);
397  else if constexpr (simd_traits<simd_t>::max_length == 64)
398  detail::store_avx512<simd_t>(mem_addr, simd_vec);
399  else
400  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
401  "Unsupported simd type.");
402 }
404 
422 template <simd::simd_concept simd_t>
423 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
424 {
425  detail::transpose(matrix);
426 }
427 
429 // Implementation for seqan builtin simd.
430 template <simd::simd_concept simd_t>
431  requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
432  && (simd_traits<simd_t>::max_length == simd_traits<simd_t>::length)
433 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
434 {
435  if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation
436  detail::transpose_matrix_sse4(matrix);
437  else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation
438  detail::transpose_matrix_avx2(matrix);
439 #if defined(__AVX512BW__) // Requires byte-word extension of AVX512 instruction set.
440  else if constexpr (simd_traits<simd_t>::length == 64) // AVX512 implementation
441  detail::transpose_matrix_avx512(matrix);
442 #endif // defined(__AVX512BW__)
443  else
444  detail::transpose(matrix);
445 }
447 
456 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
457 constexpr target_simd_t upcast(source_simd_t const & src)
458 {
459  static_assert(
460  simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
461  "The length of the target simd type must be greater or equal than the length of the source simd type.");
462 
463  target_simd_t tmp{};
464  for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
465  tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
466 
467  return tmp;
468 }
469 
471 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
472  requires detail::is_builtin_simd_v<target_simd_t> && detail::is_builtin_simd_v<source_simd_t>
473  && detail::is_native_builtin_simd_v<source_simd_t>
474 constexpr target_simd_t upcast(source_simd_t const & src)
475 {
476  static_assert(
477  simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
478  "The length of the target simd type must be greater or equal than the length of the source simd type.");
479 
480  if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)
481  {
482  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
483  "Target vector has a different byte size.");
484  return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
485  }
486  else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
487  {
488  return detail::upcast_signed<target_simd_t>(src);
489  }
490  else
491  {
492  static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
493  "Expected unsigned scalar type.");
494  return detail::upcast_unsigned<target_simd_t>(src);
495  }
496 }
498 
499 } // namespace simd
500 
501 } // namespace seqan3
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
The <concepts> header from C++20's standard library.
T fill(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr auto chunk
Divide a range in chunks.
Definition: chunk.hpp:835
T iota(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Provides specific algorithm implementations for AVX2 instruction set.
Provides specific algorithm implementations for AVX512 instruction set.
Provides specific algorithm implementations for SSE4 instruction set.
Provides seqan3::simd::simd_traits.
T swap(T... args)
Provides seqan3::simd::simd_concept.