SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
simd_matrix_scoring_scheme.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <concepts>
16 
23 
24 namespace seqan3::detail
25 {
26 
56 template <simd_concept simd_score_t, semialphabet alphabet_t, typename alignment_t>
57  requires (std::same_as<alignment_t, align_cfg::method_local> || std::same_as<alignment_t, align_cfg::method_global>)
58 class simd_matrix_scoring_scheme
59 {
60 private:
62  using scalar_type = typename simd_traits<simd_score_t>::scalar_type;
64  using simd_score_profile_type = simd_score_t;
66  using simd_alphabet_ranks_type = simd_score_t;
67 
68  static_assert(seqan3::alphabet_size<alphabet_t> <= std::numeric_limits<scalar_type>::max(),
69  "The selected simd scalar type is not large enough to represent the given alphabet including an "
70  "additional padding symbol!");
71 
72  static_assert(seqan3::alphabet_size<alphabet_t> < std::numeric_limits<scalar_type>::max(),
73  "The selected simd scalar type is not large enough to represent the given alphabet including an "
74  "additional padding symbol!");
75 
77  static constexpr bool is_global = std::same_as<alignment_t, align_cfg::method_global>;
79  static constexpr size_t index_offset = seqan3::alphabet_size<alphabet_t> + 1; // scheme is extended by one.
81  static constexpr scalar_type score_for_padding_symbol = (is_global) ? 1 : -1;
82 
84  std::vector<scalar_type> scoring_scheme_data{};
85 
86 public:
88  static constexpr scalar_type padding_symbol = static_cast<scalar_type>(seqan3::alphabet_size<alphabet_t>);
89 
93  constexpr simd_matrix_scoring_scheme() = default;
94  constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme const &) = default;
95  constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme &&) = default;
96  constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme const &) = default;
97  constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme &&) = default;
98  ~simd_matrix_scoring_scheme() = default;
99 
101  template <typename scoring_scheme_t>
102  constexpr explicit simd_matrix_scoring_scheme(scoring_scheme_t const & scoring_scheme)
103  {
104  initialise_from_scalar_scoring_scheme(scoring_scheme);
105  }
106 
108  template <typename scoring_scheme_t>
109  constexpr simd_matrix_scoring_scheme & operator=(scoring_scheme_t const & scoring_scheme)
110  {
111  initialise_from_scalar_scoring_scheme(scoring_scheme);
112  return *this;
113  }
115 
140  constexpr simd_score_t score(simd_score_profile_type const & score_profile,
141  simd_alphabet_ranks_type const & ranks) const noexcept
142  {
143  simd_score_t const matrix_index = score_profile + ranks; // Compute the matrix indices for the lookup.
144  simd_score_t result{};
145 
146  for (size_t idx = 0; idx < simd_traits<simd_score_t>::length; ++idx)
147  result[idx] = scoring_scheme_data.data()[matrix_index[idx]];
148 
149  return result;
150  }
152 
154  constexpr scalar_type padding_match_score() const noexcept
155  {
156  return score_for_padding_symbol;
157  }
158 
169  constexpr simd_score_profile_type make_score_profile(simd_alphabet_ranks_type const & ranks) const noexcept
170  {
171  return ranks * simd::fill<simd_score_t>(index_offset);
172  }
173 
174 private:
183  template <typename scoring_scheme_t>
185  constexpr void initialise_from_scalar_scoring_scheme(scoring_scheme_t const & scoring_scheme)
186  {
187  using score_t = decltype(std::declval<scoring_scheme_t const &>().score(alphabet_t{}, alphabet_t{}));
188 
189  // Helper function to check if the matrix score is in the value range representable by the selected simd vector.
190  [[maybe_unused]] auto check_score_range = [&]([[maybe_unused]] score_t score)
191  {
192  // Note only if the size of the scalar type of the simd vector is smaller than the size of the score type
193  // of the original scoring scheme, the score might exceed the valid value range of the scalar type. In this
194  // case an exception will be thrown.
195  if constexpr (sizeof(scalar_type) < sizeof(score_t))
196  {
197  constexpr score_t max_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::max());
198  constexpr score_t min_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::lowest());
199 
200  if (score > max_score_value || score < min_score_value)
201  throw std::invalid_argument{"The selected scoring scheme score overflows "
202  "for the selected scalar type of the simd type."};
203  }
204  };
205 
206  // For the global alignment we extend the alphabet by one symbol to handle sequences with different size.
207  scoring_scheme_data.resize(index_offset * index_offset, score_for_padding_symbol);
208 
209  // Convert the scoring matrix into a linear vector to allow gather operations later on.
210  using alphabet_size_t = std::remove_const_t<decltype(seqan3::alphabet_size<alphabet_t>)>;
211  auto data_it = scoring_scheme_data.begin();
212  for (alphabet_size_t lhs_rank = 0; lhs_rank < seqan3::alphabet_size<alphabet_t>; ++lhs_rank)
213  {
214  for (alphabet_size_t rhs_rank = 0; rhs_rank < seqan3::alphabet_size<alphabet_t>; ++rhs_rank, ++data_it)
215  {
216  score_t tmp_score = scoring_scheme.score(seqan3::assign_rank_to(lhs_rank, alphabet_t{}),
217  seqan3::assign_rank_to(rhs_rank, alphabet_t{}));
218 
219  check_score_range(tmp_score);
220  *data_it = tmp_score;
221  }
222  ++data_it; // skip one for the padded symbol.
223  }
224  }
225 };
226 
227 } // namespace seqan3::detail
Provides algorithms to modify seqan3::simd::simd_type.
Provides global and local alignment configurations.
Core alphabet concept and free function/type trait wrappers.
Adaptions of concepts from the Cereal library.
The <concepts> header from C++20's standard library.
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: alphabet/concept.hpp:293
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
A concept that requires that type be able to score two letters.
T lowest(T... args)
T max(T... args)
Provides seqan3::scoring_scheme_for.
Provides seqan3::simd::simd_concept.