SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
misc_input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <concepts>
17 #include <filesystem>
18 #include <iostream>
19 #include <ranges>
20 #include <span>
21 #include <string>
22 #include <tuple>
23 
24 #if defined(SEQAN3_HAS_BZIP2)
25 # include <seqan3/contrib/stream/bz2_istream.hpp>
26 #endif
27 #if defined(SEQAN3_HAS_ZLIB)
28 # include <seqan3/contrib/stream/bgzf_istream.hpp>
30 # include <seqan3/contrib/stream/gz_istream.hpp>
31 #endif
33 #include <seqan3/io/exception.hpp>
35 
36 namespace seqan3::detail
37 {
38 
44 template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
45 inline bool starts_with(ref_t && reference, query_t && query)
46  requires std::equality_comparable_with<std::ranges::range_reference_t<ref_t>,
47  std::ranges::range_reference_t<query_t>>
48 {
49  auto rit = std::ranges::begin(reference);
50  auto rend = std::ranges::end(reference);
51 
52  auto qit = std::ranges::begin(query);
53  auto qend = std::ranges::end(query);
54 
55  while (true)
56  {
57  if (qit == qend)
58  return true;
59 
60  if (rit == rend)
61  return false;
62 
63  if (*qit != *rit)
64  return false;
65 
66  ++qit;
67  ++rit;
68  }
69 }
70 
78 template <builtin_character char_t>
79 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
81 {
82  assert(primary_stream.good());
83 
84  // don't assume ownership
85  constexpr auto stream_deleter_noop = [](std::basic_istream<char_t> *) {};
86  // assume ownership
87  [[maybe_unused]] constexpr auto stream_deleter_default = [](std::basic_istream<char_t> * ptr)
88  {
89  delete ptr;
90  };
91 
92  // extract "magic header"
93  std::istreambuf_iterator<char_t> it{primary_stream};
94  std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
95  size_t read_chars = 0;
96  for (; read_chars < magic_number.size(); ++read_chars)
97  {
99  break;
100 
101  magic_number[read_chars] = *it;
102  ++it;
103  }
104 
105  // unget all read chars.
106  for (size_t i = 0; i < read_chars; ++i)
107  primary_stream.unget();
108 
109  std::string extension{};
110  if (filename.has_extension())
111  extension = filename.extension().string().substr(1);
112 
113  // tests whether the given extension matches with one of the given compression tags.
114  [[maybe_unused]] auto contains_extension = [](auto compression_tag, auto const & extension) constexpr
115  {
116  return std::ranges::find(decltype(compression_tag)::file_extensions, extension)
117  != std::ranges::end(decltype(compression_tag)::file_extensions);
118  };
119 
120  // set return value appropriately
121  if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
122  {
123 #if defined(SEQAN3_HAS_ZLIB)
124  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
125  filename.replace_extension();
126 
127  return {new contrib::basic_bgzf_istream<char_t>{primary_stream}, stream_deleter_default};
128 #else
129  throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
130 #endif
131  }
132  else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
133  {
134 #if defined(SEQAN3_HAS_ZLIB)
135  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
136  filename.replace_extension();
137 
138  return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
139 #else
140  throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
141 #endif
142  }
143  else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
144  {
145 #if defined(SEQAN3_HAS_BZIP2)
146  if (contains_extension(bz2_compression{}, extension))
147  filename.replace_extension();
148 
149  return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
150 #else
151  throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
152 #endif
153  }
154  else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
155  {
156  throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
157  }
158 
159  return {&primary_stream, stream_deleter_noop};
160 }
161 
163 template <builtin_character char_t>
164 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
165 {
167  return make_secondary_istream(primary_stream, p);
168 }
169 
170 } // namespace seqan3::detail
Provides stream compression utilities.
The <concepts> header from C++20's standard library.
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
constexpr ptrdiff_t find
Get the index of the first occurrence of a type in a pack.
Definition: type_pack/traits.hpp:182
Provides exceptions used in the I/O module.
Provides seqan3::detail::magic_header.
The <ranges> header from C++20's standard library.
T rend(T... args)
T size(T... args)
Provides std::span from the C++20 standard library.
Provides concepts that do not have equivalents in C++20.