SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
pairwise_alignment_algorithm_banded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <concepts>
16 #include <ranges>
17 
20 
21 namespace seqan3::detail
22 {
23 
29 template <typename alignment_configuration_t, typename... policies_t>
30  requires is_type_specialisation_of_v<alignment_configuration_t, configuration>
31 class pairwise_alignment_algorithm_banded :
32  protected pairwise_alignment_algorithm<alignment_configuration_t, policies_t...>
33 {
34 protected:
36  using base_algorithm_t = pairwise_alignment_algorithm<alignment_configuration_t, policies_t...>;
37 
38  // Import types from base class.
39  using typename base_algorithm_t::alignment_result_type;
40  using typename base_algorithm_t::score_type;
41  using typename base_algorithm_t::traits_type;
42 
43  static_assert(!std::same_as<alignment_result_type, empty_type>, "Alignment result type was not configured.");
44  static_assert(traits_type::is_banded, "Alignment configuration must have band configured.");
45 
46 public:
50  pairwise_alignment_algorithm_banded() = default;
51  pairwise_alignment_algorithm_banded(pairwise_alignment_algorithm_banded const &) = default;
52  pairwise_alignment_algorithm_banded(pairwise_alignment_algorithm_banded &&) = default;
53  pairwise_alignment_algorithm_banded &
54  operator=(pairwise_alignment_algorithm_banded const &) = default;
55  pairwise_alignment_algorithm_banded & operator=(pairwise_alignment_algorithm_banded &&) = default;
56  ~pairwise_alignment_algorithm_banded() = default;
57 
67  pairwise_alignment_algorithm_banded(alignment_configuration_t const & config) : base_algorithm_t(config)
68  {}
70 
75  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
76  requires std::invocable<callback_t, alignment_result_type>
77  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
78  {
79  using std::get;
80 
81  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
82  {
83  size_t sequence1_size = std::ranges::distance(get<0>(sequence_pair));
84  size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair));
85 
86  auto && [alignment_matrix, index_matrix] =
87  this->acquire_matrices(sequence1_size, sequence2_size, this->lowest_viable_score());
88 
89  // Initialise the cell updater with the dimensions of the regular matrix.
90  this->compare_and_set_optimum.set_target_indices(row_index_type{sequence2_size},
91  column_index_type{sequence1_size});
92 
93  // Shrink the first sequence if the band ends before its actual end.
94  sequence1_size = std::min(sequence1_size, this->upper_diagonal + sequence2_size);
95 
96  using sequence1_difference_t = std::ranges::range_difference_t<decltype(get<0>(sequence_pair))>;
97 
98  compute_matrix(std::views::take(get<0>(sequence_pair), static_cast<sequence1_difference_t>(sequence1_size)),
99  get<1>(sequence_pair),
100  alignment_matrix,
101  index_matrix);
102  this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
103  std::move(idx),
104  this->optimal_score,
105  this->optimal_coordinate,
106  alignment_matrix,
107  callback);
108  }
109  }
110 
112  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
113  requires traits_type::is_vectorised && std::invocable<callback_t, alignment_result_type>
114  auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
115  {
116  using simd_collection_t = std::vector<score_type, aligned_allocator<score_type, alignof(score_type)>>;
117  using original_score_t = typename traits_type::original_score_type;
118 
119  // Extract the batch of sequences for the first and the second sequence.
120  auto seq1_collection = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
121  auto seq2_collection = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
122 
123  this->initialise_tracker(seq1_collection, seq2_collection);
124 
125  // Convert batch of sequences to sequence of simd vectors.
126  thread_local simd_collection_t simd_seq1_collection{};
127  thread_local simd_collection_t simd_seq2_collection{};
128 
129  this->convert_batch_of_sequences_to_simd_vector(simd_seq1_collection,
130  seq1_collection,
131  this->scoring_scheme.padding_symbol);
132  this->convert_batch_of_sequences_to_simd_vector(simd_seq2_collection,
133  seq2_collection,
134  this->scoring_scheme.padding_symbol);
135 
136  size_t const sequence1_size = std::ranges::distance(simd_seq1_collection);
137  size_t const sequence2_size = std::ranges::distance(simd_seq2_collection);
138 
139  auto && [alignment_matrix, index_matrix] =
140  this->acquire_matrices(sequence1_size, sequence2_size, this->lowest_viable_score());
141 
142  compute_matrix(simd_seq1_collection, simd_seq2_collection, alignment_matrix, index_matrix);
143 
144  size_t index = 0;
145  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
146  {
147  original_score_t score = this->optimal_score[index]
148  - (this->padding_offsets[index] * this->scoring_scheme.padding_match_score());
149  matrix_coordinate coordinate{row_index_type{size_t{this->optimal_coordinate.row[index]}},
150  column_index_type{size_t{this->optimal_coordinate.col[index]}}};
151  this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
152  std::move(idx),
153  std::move(score),
154  std::move(coordinate),
155  alignment_matrix,
156  callback);
157  ++index;
158  }
159  }
161 
162 protected:
215  template <std::ranges::forward_range sequence1_t,
216  std::ranges::forward_range sequence2_t,
217  std::ranges::input_range alignment_matrix_t,
218  std::ranges::input_range index_matrix_t>
219  requires std::ranges::forward_range<std::ranges::range_reference_t<alignment_matrix_t>>
220  && std::ranges::forward_range<std::ranges::range_reference_t<index_matrix_t>>
221  void compute_matrix(sequence1_t && sequence1,
222  sequence2_t && sequence2,
223  alignment_matrix_t && alignment_matrix,
224  index_matrix_t && index_matrix)
225  {
226  // ---------------------------------------------------------------------
227  // Initialisation phase: allocate memory and initialise first column.
228  // ---------------------------------------------------------------------
229 
230  this->reset_optimum(); // Reset the tracker for the new alignment computation.
231 
232  auto alignment_matrix_it = alignment_matrix.begin();
233  auto indexed_matrix_it = index_matrix.begin();
234 
235  using row_index_t = std::ranges::range_difference_t<sequence2_t>; // row_size = |sequence2| + 1
236  using column_index_t = std::ranges::range_difference_t<sequence1_t>; // column_size = |sequence1| + 1
237 
238  row_index_t row_size = std::max<int32_t>(0, -this->lower_diagonal);
239  column_index_t const column_size = std::max<int32_t>(0, this->upper_diagonal);
240  this->initialise_column(*alignment_matrix_it, *indexed_matrix_it, std::views::take(sequence2, row_size));
241 
242  // ---------------------------------------------------------------------
243  // 1st recursion phase: band intersects with the first row.
244  // ---------------------------------------------------------------------
245 
246  for (auto alphabet1 : std::views::take(sequence1, column_size))
247  {
248  this->compute_column(*++alignment_matrix_it,
249  *++indexed_matrix_it,
250  alphabet1,
251  std::views::take(sequence2, ++row_size));
252  }
253 
254  // ---------------------------------------------------------------------
255  // 2nd recursion phase: iterate until the end of the matrix.
256  // ---------------------------------------------------------------------
257 
258  row_index_t first_row_index = 0u;
259  for (auto alphabet1 : std::views::drop(sequence1, column_size))
260  {
261  compute_band_column(*++alignment_matrix_it,
262  std::views::drop(*++indexed_matrix_it, first_row_index + 1),
263  alphabet1,
264  views::slice(sequence2, first_row_index, ++row_size));
265  ++first_row_index;
266  }
267 
268  // ---------------------------------------------------------------------
269  // Final phase: track score of last column
270  // ---------------------------------------------------------------------
271 
272  auto alignment_column = *alignment_matrix_it;
273  auto cell_index_column = std::views::drop(*indexed_matrix_it, first_row_index);
274 
275  auto alignment_column_it = alignment_column.begin();
276  auto cell_index_column_it = cell_index_column.begin();
277 
278  this->track_last_column_cell(*alignment_column_it, *cell_index_column_it);
279 
280  for (row_index_t last_row = std::min<row_index_t>(std::ranges::distance(sequence2), row_size);
281  first_row_index < last_row;
282  ++first_row_index)
283  this->track_last_column_cell(*++alignment_column_it, *++cell_index_column_it);
284 
285  this->track_final_cell(*alignment_column_it, *cell_index_column_it);
286  }
287 
339  template <std::ranges::forward_range alignment_column_t,
340  std::ranges::input_range cell_index_column_t,
341  typename alphabet1_t,
342  std::ranges::input_range sequence2_t>
343  void compute_band_column(alignment_column_t && alignment_column,
344  cell_index_column_t && cell_index_column,
345  alphabet1_t const & alphabet1,
346  sequence2_t && sequence2)
347  {
348  // ---------------------------------------------------------------------
349  // Initial phase: prepare column and initialise first cell
350  // ---------------------------------------------------------------------
351 
352  auto current_alignment_column_it = alignment_column.begin();
353  auto cell_index_column_it = cell_index_column.begin();
354 
355  // Points to the last valid cell in the column.
356  decltype(current_alignment_column_it) next_alignment_column_it{current_alignment_column_it};
357  auto cell = *current_alignment_column_it;
358  cell = this->track_cell(
359  this->initialise_band_first_cell(cell.best_score(),
360  *++next_alignment_column_it,
361  this->scoring_scheme.score(alphabet1, *std::ranges::begin(sequence2))),
362  *cell_index_column_it);
363 
364  // ---------------------------------------------------------------------
365  // Iteration phase: iterate over column and compute each cell
366  // ---------------------------------------------------------------------
367 
368  for (auto && alphabet2 : sequence2 | std::views::drop(1))
369  {
370  current_alignment_column_it = next_alignment_column_it;
371  auto cell = *current_alignment_column_it;
372  cell = this->track_cell(this->compute_inner_cell(cell.best_score(),
373  *++next_alignment_column_it,
374  this->scoring_scheme.score(alphabet1, alphabet2)),
375  *++cell_index_column_it);
376  }
377 
378  // ---------------------------------------------------------------------
379  // Final phase: track last cell
380  // ---------------------------------------------------------------------
381 
382  this->track_last_row_cell(*current_alignment_column_it, *cell_index_column_it);
383  }
384 };
385 
386 } // namespace seqan3::detail
The <concepts> header from C++20's standard library.
T forward(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
Provides seqan3::detail::pairwise_alignment_algorithm.
The <ranges> header from C++20's standard library.
Provides seqan3::views::slice.