SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
debug_matrix.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iomanip>
16 
25 
26 namespace seqan3::detail
27 {
28 
60 template <matrix matrix_t, typename first_sequence_t = std::nullopt_t, typename second_sequence_t = std::nullopt_t>
61 class debug_matrix
62 {
63 protected:
65  static constexpr bool has_first_sequence = !std::is_same_v<std::decay_t<first_sequence_t>, std::nullopt_t>;
67  static constexpr bool has_second_sequence = !std::is_same_v<std::decay_t<second_sequence_t>, std::nullopt_t>;
69  using entry_t = typename std::remove_reference_t<matrix_t>::value_type;
71  static constexpr bool is_traceback_matrix = std::is_same_v<std::decay_t<entry_t>, trace_directions>;
74  static constexpr bool is_optional_score = is_type_specialisation_of_v<entry_t, std::optional>;
75 
76 public:
83  using reference = value_type;
85  using const_reference = reference;
87  using size_type = typename std::remove_reference_t<matrix_t>::size_type;
89 
93  debug_matrix() = default;
94  debug_matrix(debug_matrix const &) = default;
95  debug_matrix(debug_matrix &&) = default;
96  debug_matrix & operator=(debug_matrix const &) = default;
97  debug_matrix & operator=(debug_matrix &&) = default;
98  ~debug_matrix() = default;
99 
103  debug_matrix(matrix_t matrix) : debug_matrix(std::forward<matrix_t>(matrix), std::nullopt, std::nullopt)
104  {}
105 
111  debug_matrix(matrix_t matrix, first_sequence_t first_sequence, second_sequence_t second_sequence) :
112  _matrix{std::forward<matrix_t>(matrix)},
113  _first_sequence{std::forward<first_sequence_t>(first_sequence)},
114  _second_sequence{std::forward<second_sequence_t>(second_sequence)}
115  {
116  if constexpr (has_first_sequence)
117  {
118  assert(_matrix.cols() <= _first_sequence.size() + 1u);
119  }
120 
121  if constexpr (has_second_sequence)
122  {
123  assert(_matrix.rows() <= _second_sequence.size() + 1u);
124  }
125  }
127 
129  size_t rows() const noexcept
130  {
131  if (!_transpose)
132  return _rows.value_or(_matrix.rows());
133  else
134  return _cols.value_or(_matrix.cols());
135  }
136 
138  size_t cols() const noexcept
139  {
140  if (!_transpose)
141  return _cols.value_or(_matrix.cols());
142  else
143  return _rows.value_or(_matrix.rows());
144  }
145 
147  first_sequence_t const & first_sequence() const noexcept
148  {
149  if (!_transpose)
150  return _first_sequence;
151  else
152  return _second_sequence;
153  }
154 
156  second_sequence_t const & second_sequence() const noexcept
157  {
158  if (!_transpose)
159  return _second_sequence;
160  else
161  return _first_sequence;
162  }
163 
165  const_reference at(matrix_coordinate const & coordinate) const noexcept
166  {
167  size_t row = coordinate.row;
168  size_t col = coordinate.col;
169 
170  assert(row < rows() && col < cols());
171 
172  row_index_type const _row{!_transpose ? row : col};
173  column_index_type const _col{!_transpose ? col : row};
174  row_index_type const _mask_row{_transpose == _transpose_mask ? row : col};
175  column_index_type const _mask_col{_transpose == _transpose_mask ? col : row};
176 
177  if (!_masking_matrix.has_value() || _masking_matrix.value().at({_mask_row, _mask_col}))
178  {
179  entry_t const & entry = _matrix.at({_row, _col});
180 
181  if (!is_traceback_matrix || !_transpose)
182  return entry;
183 
184  if constexpr (is_traceback_matrix)
185  {
186  trace_directions reverse{};
187  if ((entry & trace_directions::left) == trace_directions::left)
188  reverse |= trace_directions::up;
189  if ((entry & trace_directions::up) == trace_directions::up)
190  reverse |= trace_directions::left;
191  if ((entry & trace_directions::diagonal) == trace_directions::diagonal)
192  reverse |= trace_directions::diagonal;
193  return reverse;
194  }
195  }
196 
197  if constexpr (is_traceback_matrix)
198  return trace_directions::none;
199  else
200  return std::nullopt;
201  }
202 
209  debug_matrix & mask_matrix(row_wise_matrix<bool> masking_matrix) noexcept
210  {
211  assert(masking_matrix.rows() == rows());
212  assert(masking_matrix.cols() == cols());
213  _transpose_mask = _transpose;
214  _masking_matrix = std::move(masking_matrix);
215  return *this;
216  }
217 
222  debug_matrix & mask_matrix(std::vector<bool> masking_vector) noexcept
223  {
224  return mask_matrix(row_wise_matrix<bool>{number_rows{rows()}, number_cols{cols()}, std::move(masking_vector)});
225  }
226 
232  debug_matrix & sub_matrix(size_t const new_rows, size_t const new_cols) noexcept
233  {
234  assert(new_rows <= rows());
235  assert(new_cols <= cols());
236  if (!_transpose)
237  {
238  _rows = new_rows;
239  _cols = new_cols;
240  }
241  else
242  {
243  _rows = new_cols;
244  _cols = new_rows;
245  }
246  return *this;
247  }
248 
252  debug_matrix & transpose_matrix() noexcept
253  {
254  _transpose = !_transpose;
255  return *this;
256  }
257 
258 protected:
260  struct format_type; // forward declaration
262 
263 public:
273  template <typename ostream_t>
274  void stream_matrix(ostream_t & cout, fmtflags2 const flags) const noexcept
275  {
276  format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
277  size_t const column_width =
278  this->column_width.has_value() ? this->column_width.value() : auto_column_width(flags);
279 
280  auto char_first_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
281  {
282  if constexpr (!has_first_sequence)
283  return " ";
284  else
285  return as_string(first_sequence()[i], flags);
286  };
287 
288  auto char_second_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
289  {
290  if constexpr (!has_second_sequence)
291  return " ";
292  else
293  return as_string(second_sequence()[i], flags);
294  };
295 
296  auto print_cell = [&](std::string const & symbol)
297  {
298  // deal with unicode chars that mess up std::setw
299  size_t const length_bytes = symbol.size();
300  size_t const length = unicode_str_length(symbol);
301  size_t const offset = length_bytes - length;
302 
303  cout << std::left << std::setw(column_width + offset) << symbol << symbols.col_sep;
304  };
305 
306  auto print_first_cell = [&](std::string const & symbol)
307  {
308  cout << symbol << symbols.col_sep;
309  };
310 
311  // |_|d|a|t|a|b|a|s|e|
312  auto print_first_row = [&]
313  {
314  print_first_cell(" ");
315  print_cell(symbols.epsilon);
316 
317  for (size_t col = 0; col < cols() - 1; ++col)
318  print_cell(char_first_sequence(col));
319 
320  cout << "\n";
321  };
322 
323  // |-|-|-|-|-|-|-|-|-|
324  auto print_divider = [&]
325  {
326  cout << " " << symbols.row_col_sep;
327  for (size_t col = 0; col < cols(); ++col)
328  {
329  for (size_t i = 0; i < column_width; ++i)
330  cout << symbols.row_sep;
331 
332  cout << symbols.row_col_sep;
333  }
334  cout << "\n";
335  };
336 
337  print_first_row();
338  for (size_t row = 0; row < rows(); ++row)
339  {
340  if (symbols.row_sep[0] != '\0')
341  print_divider();
342 
343  // one query letter + one row of scores / traces
344  if (row == 0)
345  print_first_cell(symbols.epsilon);
346  else
347  print_first_cell(char_second_sequence(row - 1));
348 
349  for (size_t col = 0; col < cols(); ++col)
350  print_cell(entry_at({row_index_type{row}, column_index_type{col}}, flags));
351 
352  cout << "\n";
353  }
354  }
355 
357  size_t auto_column_width(fmtflags2 const flags) const noexcept
358  {
359  size_t col_width = 1;
360  for (size_t row = 0; row < rows(); ++row)
361  for (size_t col = 0; col < cols(); ++col)
362  col_width =
363  std::max(col_width,
364  unicode_str_length(entry_at({row_index_type{row}, column_index_type{col}}, flags)));
365 
366  return col_width;
367  }
368 
369 protected:
371  std::string entry_at(matrix_coordinate const coordinate, fmtflags2 flags) const noexcept
372  {
373  format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
374 
375  value_type const & entry = at(coordinate);
376  if (!is_traceback_matrix && entry == matrix_inf<value_type>)
377  return symbols.inf;
378 
379  return as_string(entry, flags);
380  }
381 
383  template <typename value_type>
384  static std::string as_string(value_type && entry, fmtflags2 const flags) noexcept
385  {
386  std::stringstream strstream;
387  debug_stream_type stream{strstream};
388  stream << flags << entry;
389  return strstream.str();
390  }
391 
394  static size_t unicode_str_length(std::string const & str) noexcept
395  {
396  size_t length = 0u;
397  for (auto it = str.cbegin(); it < str.cend(); ++it, ++length)
398  {
399  uint8_t v = *it;
400  if ((v & 0b11100000) == 0b11000000)
401  ++it;
402  else if ((v & 0b11110000) == 0b11100000)
403  it += 2;
404  else if ((v & 0b11111000) == 0b11110000)
405  it += 3;
406  }
407  return length;
408  }
409 
411  struct format_type
412  {
414  char const * epsilon{};
416  char const * col_sep{};
418  char const * row_sep{};
420  char const * row_col_sep{};
422  char const * inf{};
423  };
424 
426  static constexpr format_type csv{" ", ";", "", "", ""};
428  static constexpr format_type unicode{"ε", "║", "═", "╬", "∞"};
429 
430 public:
432  std::optional<size_t> column_width{std::nullopt};
433 
434 protected:
436  matrix_t _matrix;
438  first_sequence_t _first_sequence;
440  second_sequence_t _second_sequence;
442  std::optional<size_t> _rows{};
444  std::optional<size_t> _cols{};
446  std::optional<row_wise_matrix<bool>> _masking_matrix{};
448  bool _transpose{};
450  bool _transpose_mask{};
451 };
452 
458 template <matrix matrix_t>
459 debug_matrix(matrix_t &&) -> debug_matrix<matrix_t>;
460 
463 template <matrix matrix_t, typename first_sequence_t, typename second_sequence_t>
464 debug_matrix(matrix_t &&, first_sequence_t &&, second_sequence_t &&)
465  -> debug_matrix<matrix_t, first_sequence_t, second_sequence_t>;
467 
468 } // namespace seqan3::detail
469 
470 namespace seqan3
471 {
482 template <typename char_t, detail::matrix alignment_matrix_t>
483 inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
484 {
485  detail::debug_matrix debug{std::forward<alignment_matrix_t>(matrix)};
486 
487  std::stringstream sstream{};
488  debug.stream_matrix(sstream, s.flags2());
489  s << sstream.str();
490  return s;
491 }
492 
494 template <typename char_t, std::ranges::input_range alignment_matrix_t>
495  requires detail::debug_stream_range_guard<alignment_matrix_t> && detail::matrix<alignment_matrix_t>
496 inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
497 {
498  return s << detail::debug_matrix{std::forward<alignment_matrix_t>(matrix)};
499 }
500 
501 } // namespace seqan3
A "pretty printer" for most SeqAn data structures and related types.
Definition: debug_stream_type.hpp:78
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
T forward(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
fmtflags2
Flags that change the behaviour of the seqan3::debug_stream.
Definition: debug_stream_type.hpp:31
@ utf8
Enables use of non-ASCII UTF8 characters in formatted output.
Definition: debug_stream_type.hpp:33
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
T left(T... args)
Provides seqan3::detail::matrix.
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178
T max(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
requires detail::debug_stream_range_guard< alignment_matrix_t > &&detail::matrix< alignment_matrix_t > debug_stream_type< char_t > & operator<<(debug_stream_type< char_t > &s, alignment_matrix_t &&matrix)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: debug_matrix.hpp:496
SeqAn specific customisations in the standard namespace.
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
T reverse(T... args)
Provides seqan3::detail::row_wise_matrix.
T setw(T... args)
T str(T... args)
Provides type traits for working with templates.
Provides the declaration of seqan3::detail::trace_directions.