SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
gap_decorator.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <algorithm>
17 #include <limits>
18 #include <ranges>
19 #include <set>
20 #include <tuple>
21 #include <type_traits>
22 
28 
29 namespace seqan3
30 {
31 
77 template <std::ranges::viewable_range inner_type>
78  requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
79  && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
81 {
82 private:
83  // Declaration of class's iterator types; for the definition see below.
84  template <bool = true>
85  class basic_iterator;
86 
88  using iterator = basic_iterator<true>;
91  using const_iterator = iterator;
92 
94  using ungapped_view_type = decltype(views::type_reduce(std::declval<inner_type &&>()));
95 
96 public:
105 
111 
118 
123  using size_type = std::ranges::range_size_t<inner_type>;
124 
129  using difference_type = std::ranges::range_difference_t<inner_type>;
131 
136  using unaligned_sequence_type = inner_type;
137 
148  gap_decorator() = default;
149  gap_decorator(gap_decorator const &) = default;
150  gap_decorator & operator=(gap_decorator const &) = default;
151  gap_decorator(gap_decorator && rhs) = default;
152  gap_decorator & operator=(gap_decorator && rhs) = default;
153  ~gap_decorator() = default;
154 
159  template <typename other_range_t>
160  requires (!std::same_as<other_range_t, gap_decorator>)
161  && std::same_as<std::remove_cvref_t<other_range_t>, std::remove_cvref_t<inner_type>>
162  && std::ranges::viewable_range<other_range_t> // at end, otherwise it competes with the move ctor
163  gap_decorator(other_range_t && range) : ungapped_view{views::type_reduce(std::forward<inner_type>(range))}
164  {
165  } // TODO (@smehringer) only works for copyable views. Has to be changed once views are not required to be copyable anymore.
166  // !\}
167 
181  size_type size() const
182  {
183  if (anchors.size())
184  return anchors.rbegin()->second + ungapped_view.size();
185 
186  return ungapped_view.size();
187  }
188 
204  iterator insert_gap(const_iterator const it, size_type const count = 1)
205  {
206  if (!count) // [[unlikely]]
207  return it;
208 
209  size_type const pos = it - begin();
210  assert(pos <= size());
211 
212  set_iterator_type it_set = anchors.upper_bound(anchor_gap_t{pos, bound_dummy});
213 
214  if (it_set == anchors.begin()) // will also catch if anchors is empty since begin() == end()
215  {
216  anchors.emplace_hint(anchors.begin(), anchor_gap_t{pos, count});
217  }
218  else // there are gaps before pos
219  {
220  --it_set;
221  auto gap_len{it_set->second};
222  if (it_set != anchors.begin())
223  gap_len -= (*(std::prev(it_set))).second;
224 
225  if (it_set->first + gap_len >= pos) // extend existing gap
226  {
227  anchor_gap_t gap{it_set->first, it_set->second + count};
228  it_set = anchors.erase(it_set);
229  anchors.insert(it_set, gap);
230  }
231  else // insert new gap
232  {
233  anchor_gap_t gap{pos, it_set->second + count};
234  ++it_set;
235  anchors.insert(it_set, gap);
236  }
237  }
238 
239  // post-processing: reverse update of succeeding gaps
240  rupdate(pos, count);
241  return iterator{*this, pos};
242  }
243 
257  iterator erase_gap(const_iterator const it)
258  {
259  // check if [it, it+gap_len[ covers [first, last[
260  if ((*it) != gap{}) // [[unlikely]]
261  throw gap_erase_failure("The range to be erased does not correspond to a consecutive gap.");
262 
263  return erase_gap(it, std::next(it));
264  }
265 
281  iterator erase_gap(const_iterator const first, const_iterator const last)
282  {
283  size_type const pos1 = first - begin();
284  size_type const pos2 = last - begin();
285  set_iterator_type it = anchors.upper_bound(anchor_gap_t{pos1, bound_dummy}); // first element greater than pos1
286 
287  if (it == anchors.begin())
288  throw gap_erase_failure{"There is no gap to erase in range [" + std::to_string(pos1) + ","
289  + std::to_string(pos2) + "]."};
290 
291  --it;
292  size_type const gap_len = gap_length(it);
293 
294  // check if [it, it+gap_len[ covers [first, last[
295  if ((it->first + gap_len) < pos2) // [[unlikely]]
296  {
297  throw gap_erase_failure{"The range to be erased does not correspond to a consecutive gap."};
298  }
299  // case 1: complete gap is deleted
300  else if (gap_len == pos2 - pos1)
301  {
302  it = anchors.erase(it);
303  }
304  // case 2: gap to be deleted in tail or larger than 1 (equiv. to shift tail left, i.e. pos remains unchanged)
305  else
306  {
307  anchor_gap_t gap{it->first, it->second - pos2 + pos1};
308  it = anchors.erase(it);
309  it = anchors.insert(it, gap); // amortized constant because of hint
310  ++it; // update node after the current
311  }
312 
313  // post-processing: forward update of succeeding gaps
314  update(it, pos2 - pos1);
315 
316  return iterator{*this, pos1};
317  }
318 
326  template <typename unaligned_sequence_t> // generic template to use forwarding reference
327  requires std::assignable_from<gap_decorator &, unaligned_sequence_t>
328  friend void assign_unaligned(gap_decorator & dec, unaligned_sequence_t && unaligned)
329  {
330  dec = unaligned;
331  }
333 
352  const_iterator begin() const noexcept
353  {
354  return iterator{*this};
355  }
356 
358  const_iterator cbegin() const noexcept
359  {
360  return const_iterator{*this};
361  }
362 
378  const_iterator end() const noexcept
379  {
380  return iterator{*this, size()};
381  }
382 
384  const_iterator cend() const noexcept
385  {
386  return const_iterator{*this, size()};
387  }
389 
407  reference at(size_type const i)
408  {
409  if (i >= size()) // [[unlikely]]
410  throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
411  return (*this)[i];
412  }
413 
415  const_reference at(size_type const i) const
416  {
417  if (i >= size()) // [[unlikely]]
418  throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
419  return (*this)[i];
420  }
421 
434  reference operator[](size_type const i) const
435  {
436  return *iterator{*this, i};
437  }
439 
463  friend bool operator==(gap_decorator const & lhs, gap_decorator const & rhs)
464  {
465  if (lhs.size() == rhs.size() && lhs.anchors == rhs.anchors
466  && std::ranges::equal(lhs.ungapped_view, rhs.ungapped_view))
467  {
468  return true;
469  }
470 
471  return false;
472  }
473 
478  friend bool operator!=(gap_decorator const & lhs, gap_decorator const & rhs)
479  {
480  return !(lhs == rhs);
481  }
482 
487  friend bool operator<(gap_decorator const & lhs, gap_decorator const & rhs)
488  {
489  auto lit = lhs.begin();
490  auto rit = rhs.begin();
491 
492  while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
493  ++lit, ++rit;
494 
495  if (rit == rhs.end())
496  return false; // lhs == rhs, or rhs prefix of lhs
497  else if (lit == lhs.end())
498  return true; // lhs prefix of rhs
499 
500  return *lit < *rit;
501  }
502 
507  friend bool operator<=(gap_decorator const & lhs, gap_decorator const & rhs)
508  {
509  auto lit = lhs.begin();
510  auto rit = rhs.begin();
511 
512  while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
513  ++lit, ++rit;
514 
515  if (lit == lhs.end())
516  return true; // lhs == rhs, or lhs prefix of rhs
517  else if (rit == rhs.end())
518  return false; // rhs prefix of lhs
519 
520  return *lit < *rit;
521  }
522 
527  friend bool operator>(gap_decorator const & lhs, gap_decorator const & rhs)
528  {
529  return !(lhs <= rhs);
530  }
531 
536  friend bool operator>=(gap_decorator const & lhs, gap_decorator const & rhs)
537  {
538  return !(lhs < rhs);
539  }
541 
542 private:
544  using anchor_gap_t = typename std::pair<size_t, size_t>;
545 
547  using anchor_set_type = std::set<anchor_gap_t>;
548 
550  using set_iterator_type = typename anchor_set_type::iterator;
551 
553  static constexpr size_t bound_dummy{std::numeric_limits<size_t>::max()};
554 
567  size_type gap_length(set_iterator_type it) const
568  {
569  return (it == anchors.begin()) ? it->second : it->second - (*std::prev(it)).second;
570  }
571 
584  void rupdate(size_type const pos, size_type const offset)
585  {
586  for (auto it = std::prev(anchors.end(), 1); it->first > pos;)
587  {
588  anchors.emplace_hint(it, anchor_gap_t{it->first + offset, it->second + offset});
589  anchors.erase(*it--);
590  }
591  }
592 
605  void update(set_iterator_type it, size_type const offset)
606  {
607  while (it != anchors.end())
608  {
609  anchor_gap_t gap{it->first - offset, it->second - offset};
610  it = anchors.erase(it);
611  it = anchors.insert(it, gap);
612  ++it;
613  }
614  }
615 
617  ungapped_view_type ungapped_view{};
618 
620  anchor_set_type anchors{};
621 };
622 
630 template <std::ranges::viewable_range urng_t>
632 gap_decorator(urng_t && range) -> gap_decorator<std::remove_reference_t<urng_t> const &>;
633 
638 template <std::ranges::view urng_t>
639 gap_decorator(urng_t range) -> gap_decorator<urng_t>;
641 
658 template <std::ranges::viewable_range inner_type>
659  requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
660  && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
661 template <bool>
662 class gap_decorator<inner_type>::basic_iterator
663 {
664 protected:
666  typename std::add_pointer_t<gap_decorator const> host{nullptr};
668  typename gap_decorator::size_type pos{0u};
670  int64_t ungapped_view_pos{0}; // must be signed because we need this value to be -1 in case of leading gaps.
673  typename gap_decorator::size_type left_gap_end{0};
676  typename gap_decorator::set_iterator_type anchor_set_it{};
678  bool is_at_gap{true};
679 
681  void jump(typename gap_decorator::size_type const new_pos)
682  {
683  assert(new_pos <= host->size());
684  pos = new_pos;
685 
686  anchor_set_it = host->anchors.upper_bound(anchor_gap_t{pos, host->bound_dummy});
687  ungapped_view_pos = pos;
688 
689  if (anchor_set_it != host->anchors.begin())
690  {
691  typename gap_decorator::set_iterator_type prev{std::prev(anchor_set_it)};
692  size_type gap_len{prev->second};
693 
694  if (prev != host->anchors.begin())
695  gap_len -= std::prev(prev)->second;
696 
697  ungapped_view_pos -= prev->second;
698  left_gap_end = prev->first + gap_len;
699  }
700 
701  if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()) && pos >= left_gap_end
702  && (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first))
703  is_at_gap = false;
704  else
705  is_at_gap = true;
706  }
707 
708 public:
713  using difference_type = typename gap_decorator::difference_type;
715  using value_type = typename gap_decorator::value_type;
717  using reference = typename gap_decorator::const_reference;
719  using pointer = value_type *;
721  using iterator_category = std::bidirectional_iterator_tag;
723 
727  basic_iterator() = default;
728  basic_iterator(basic_iterator const &) = default;
729  basic_iterator & operator=(basic_iterator const &) = default;
730  basic_iterator(basic_iterator &&) = default;
731  basic_iterator & operator=(basic_iterator &&) = default;
732  ~basic_iterator() = default;
733 
735  explicit basic_iterator(gap_decorator const & host_) : host(&host_), anchor_set_it{host_.anchors.begin()}
736  {
737  if (host_.anchors.size() && (*host_.anchors.begin()).first == 0) // there are gaps at the very front
738  {
739  --ungapped_view_pos; // set ungapped_view_pos to -1 so operator++ works without an extra if-branch.
740  left_gap_end = anchor_set_it->second;
741  ++anchor_set_it;
742  }
743  else
744  {
745  is_at_gap = false;
746  }
747  }
748 
750  basic_iterator(gap_decorator const & host_, typename gap_decorator::size_type const pos_) : host(&host_)
751  {
752  jump(pos_); // random access to pos
753  }
755 
760  basic_iterator & operator++()
761  {
762  assert(host); // host is set
763  ++pos;
764 
765  if (pos < left_gap_end) // we stay within the preceding gap stretch
766  return *this;
767 
768  if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
769  { // proceed within the view since we are right of the previous gap but didn't arrive at the right gap yet
770  ++ungapped_view_pos;
771  if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()))
772  is_at_gap = false;
773  }
774  else
775  { // we arrived at the right gap and have to update the variables. ungapped_view_pos remains unchanged.
776  left_gap_end = anchor_set_it->first + anchor_set_it->second
777  - ((anchor_set_it != host->anchors.begin()) ? (std::prev(anchor_set_it))->second : 0);
778  ++anchor_set_it;
779  is_at_gap = true;
780 
781  if (left_gap_end == host->size()) // very last gap
782  ++ungapped_view_pos;
783  }
784 
785  return *this;
786  }
787 
789  basic_iterator operator++(int)
790  {
791  basic_iterator cpy{*this};
792  ++(*this);
793  return cpy;
794  }
795 
797  basic_iterator & operator+=(difference_type const skip)
798  {
799  this->jump(this->pos + skip);
800  return *this;
801  }
802 
804  basic_iterator operator+(difference_type const skip) const
805  {
806  return basic_iterator{*(this->host), this->pos + skip};
807  }
808 
810  friend basic_iterator operator+(difference_type const skip, basic_iterator const & it)
811  {
812  return it + skip;
813  }
814 
816  basic_iterator & operator--()
817  {
818  assert(host); // host is set
819  --pos;
820 
821  if (pos < left_gap_end)
822  { // there was no gap before but we arrive at the left gap and have to update the variables.
823  (anchor_set_it != host->anchors.begin()) ? --anchor_set_it : anchor_set_it;
824 
825  if (anchor_set_it != host->anchors.begin())
826  {
827  auto prev = std::prev(anchor_set_it);
828  left_gap_end =
829  prev->first + prev->second - ((prev != host->anchors.begin()) ? std::prev(prev)->second : 0);
830  }
831  else // [[unlikely]]
832  {
833  left_gap_end = 0;
834  }
835  is_at_gap = true;
836  }
837  else if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
838  { // we are neither at the left nor right gap
839  --ungapped_view_pos;
840  is_at_gap = false;
841  }
842  // else -> no op (we are still within the right gap stretch)
843 
844  return *this;
845  }
846 
848  basic_iterator operator--(int)
849  {
850  basic_iterator cpy{*this};
851  --(*this);
852  return cpy;
853  }
854 
856  basic_iterator & operator-=(difference_type const skip)
857  {
858  this->jump(this->pos - skip);
859  return *this;
860  }
861 
863  basic_iterator operator-(difference_type const skip) const
864  {
865  return basic_iterator{*(this->host), this->pos - skip};
866  }
867 
869  friend basic_iterator operator-(difference_type const skip, basic_iterator const & it)
870  {
871  return it - skip;
872  }
873 
875  difference_type operator-(basic_iterator const lhs) const noexcept
876  {
877  return static_cast<difference_type>(this->pos - lhs.pos);
878  }
880 
885  reference operator*() const
886  {
887  return (is_at_gap) ? reference{gap{}} : reference{host->ungapped_view[ungapped_view_pos]};
888  }
889 
891  reference operator[](difference_type const n) const
892  {
893  return *(*this + n);
894  }
896 
903  friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
904  {
905  return lhs.pos == rhs.pos;
906  }
907 
909  friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
910  {
911  return lhs.pos != rhs.pos;
912  }
913 
915  friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
916  {
917  return lhs.pos < rhs.pos;
918  }
919 
921  friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
922  {
923  return lhs.pos > rhs.pos;
924  }
925 
927  friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
928  {
929  return lhs.pos <= rhs.pos;
930  }
931 
933  friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
934  {
935  return lhs.pos >= rhs.pos;
936  }
938 };
939 
940 } // namespace seqan3
Includes customized exception types for the alignment module .
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:81
gap_decorator & operator=(gap_decorator &&rhs)=default
Defaulted.
gap_decorator(gap_decorator &&rhs)=default
Defaulted.
std::ranges::range_difference_t< inner_type > difference_type
The difference type of the underlying sequence.
Definition: gap_decorator.hpp:129
inner_type unaligned_sequence_type
The underlying ungapped range type.
Definition: gap_decorator.hpp:136
~gap_decorator()=default
Defaulted.
gap_decorator(gap_decorator const &)=default
Defaulted.
reference const_reference
const_reference type equals reference type equals value type because the underlying sequence must not...
Definition: gap_decorator.hpp:117
gap_decorator & operator=(gap_decorator const &)=default
Defaulted.
gapped< std::ranges::range_value_t< inner_type > > value_type
The variant type of the alphabet type and gap symbol type (see seqan3::gapped).
Definition: gap_decorator.hpp:104
value_type reference
Use the value type as reference type because the underlying sequence must not be modified.
Definition: gap_decorator.hpp:110
gap_decorator()=default
Default constructor.
std::ranges::range_size_t< inner_type > size_type
The size_type of the underlying sequence.
Definition: gap_decorator.hpp:123
requires(!std::same_as< other_range_t, gap_decorator >) &&std
Construct with the ungapped range type.
Definition: gap_decorator.hpp:160
The alphabet of a gap character '-'.
Definition: gap.hpp:39
T end(T... args)
Provides seqan3::gap.
Provides seqan3::gapped.
alphabet_variant< alphabet_t, gap > gapped
Extends a given alphabet with a gap character.
Definition: gapped.hpp:41
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: type_pack/traits.hpp:164
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:150
T max(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
gap_decorator(urng_t range) -> gap_decorator< urng_t >
Views always deduce to their respective type because they are copied.
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
T prev(T... args)
The <ranges> header from C++20's standard library.
T size(T... args)
T to_string(T... args)
Provides seqan3::views::type_reduce.