Horizon
input_adapters.hpp
1 #pragma once
2 
3 #include <array> // array
4 #include <cstddef> // size_t
5 #include <cstdio> //FILE *
6 #include <cstring> // strlen
7 #include <istream> // istream
8 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
9 #include <memory> // shared_ptr, make_shared, addressof
10 #include <numeric> // accumulate
11 #include <string> // string, char_traits
12 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
13 #include <utility> // pair, declval
14 
15 #include <nlohmann/detail/iterators/iterator_traits.hpp>
16 #include <nlohmann/detail/macro_scope.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
23 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
24 
26 // input adapters //
28 
34 {
35  public:
36  using char_type = char;
37 
38  JSON_HEDLEY_NON_NULL(2)
39  explicit file_input_adapter(std::FILE* f) noexcept
40  : m_file(f)
41  {}
42 
43  // make class move-only
44  file_input_adapter(const file_input_adapter&) = delete;
46  file_input_adapter& operator=(const file_input_adapter&) = delete;
47  file_input_adapter& operator=(file_input_adapter&&) = delete;
48 
49  std::char_traits<char>::int_type get_character() noexcept
50  {
51  return std::fgetc(m_file);
52  }
53 
54  private:
56  std::FILE* m_file;
57 };
58 
59 
70 {
71  public:
72  using char_type = char;
73 
75  {
76  // clear stream flags; we use underlying streambuf I/O, do not
77  // maintain ifstream flags, except eof
78  if (is != nullptr)
79  {
80  is->clear(is->rdstate() & std::ios::eofbit);
81  }
82  }
83 
84  explicit input_stream_adapter(std::istream& i)
85  : is(&i), sb(i.rdbuf())
86  {}
87 
88  // delete because of pointer members
90  input_stream_adapter& operator=(input_stream_adapter&) = delete;
91  input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
92 
93  input_stream_adapter(input_stream_adapter&& rhs) noexcept : is(rhs.is), sb(rhs.sb)
94  {
95  rhs.is = nullptr;
96  rhs.sb = nullptr;
97  }
98 
99  // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
100  // ensure that std::char_traits<char>::eof() and the character 0xFF do not
101  // end up as the same value, eg. 0xFFFFFFFF.
102  std::char_traits<char>::int_type get_character()
103  {
104  auto res = sb->sbumpc();
105  // set eof manually, as we don't use the istream interface.
106  if (JSON_HEDLEY_UNLIKELY(res == EOF))
107  {
108  is->clear(is->rdstate() | std::ios::eofbit);
109  }
110  return res;
111  }
112 
113  private:
115  std::istream* is = nullptr;
116  std::streambuf* sb = nullptr;
117 };
118 
119 // General-purpose iterator-based adapter. It might not be as fast as
120 // theoretically possible for some containers, but it is extremely versatile.
121 template<typename IteratorType>
123 {
124  public:
125  using char_type = typename std::iterator_traits<IteratorType>::value_type;
126 
127  iterator_input_adapter(IteratorType first, IteratorType last)
128  : current(std::move(first)), end(std::move(last)) {}
129 
130  typename std::char_traits<char_type>::int_type get_character()
131  {
132  if (JSON_HEDLEY_LIKELY(current != end))
133  {
134  auto result = std::char_traits<char_type>::to_int_type(*current);
135  std::advance(current, 1);
136  return result;
137  }
138  else
139  {
140  return std::char_traits<char_type>::eof();
141  }
142  }
143 
144  private:
145  IteratorType current;
146  IteratorType end;
147 
148  template<typename BaseInputAdapter, size_t T>
149  friend struct wide_string_input_helper;
150 
151  bool empty() const
152  {
153  return current == end;
154  }
155 
156 };
157 
158 
159 template<typename BaseInputAdapter, size_t T>
161 
162 template<typename BaseInputAdapter>
163 struct wide_string_input_helper<BaseInputAdapter, 4>
164 {
165  // UTF-32
166  static void fill_buffer(BaseInputAdapter& input,
167  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
168  size_t& utf8_bytes_index,
169  size_t& utf8_bytes_filled)
170  {
171  utf8_bytes_index = 0;
172 
173  if (JSON_HEDLEY_UNLIKELY(input.empty()))
174  {
175  utf8_bytes[0] = std::char_traits<char>::eof();
176  utf8_bytes_filled = 1;
177  }
178  else
179  {
180  // get the current character
181  const auto wc = input.get_character();
182 
183  // UTF-32 to UTF-8 encoding
184  if (wc < 0x80)
185  {
186  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
187  utf8_bytes_filled = 1;
188  }
189  else if (wc <= 0x7FF)
190  {
191  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
192  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
193  utf8_bytes_filled = 2;
194  }
195  else if (wc <= 0xFFFF)
196  {
197  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
198  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
199  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
200  utf8_bytes_filled = 3;
201  }
202  else if (wc <= 0x10FFFF)
203  {
204  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
205  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
206  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
207  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
208  utf8_bytes_filled = 4;
209  }
210  else
211  {
212  // unknown character
213  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
214  utf8_bytes_filled = 1;
215  }
216  }
217  }
218 };
219 
220 template<typename BaseInputAdapter>
221 struct wide_string_input_helper<BaseInputAdapter, 2>
222 {
223  // UTF-16
224  static void fill_buffer(BaseInputAdapter& input,
225  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
226  size_t& utf8_bytes_index,
227  size_t& utf8_bytes_filled)
228  {
229  utf8_bytes_index = 0;
230 
231  if (JSON_HEDLEY_UNLIKELY(input.empty()))
232  {
233  utf8_bytes[0] = std::char_traits<char>::eof();
234  utf8_bytes_filled = 1;
235  }
236  else
237  {
238  // get the current character
239  const auto wc = input.get_character();
240 
241  // UTF-16 to UTF-8 encoding
242  if (wc < 0x80)
243  {
244  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
245  utf8_bytes_filled = 1;
246  }
247  else if (wc <= 0x7FF)
248  {
249  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
250  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
251  utf8_bytes_filled = 2;
252  }
253  else if (0xD800 > wc || wc >= 0xE000)
254  {
255  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
256  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
257  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
258  utf8_bytes_filled = 3;
259  }
260  else
261  {
262  if (JSON_HEDLEY_UNLIKELY(!input.empty()))
263  {
264  const auto wc2 = static_cast<unsigned int>(input.get_character());
265  const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
266  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
267  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
268  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
269  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
270  utf8_bytes_filled = 4;
271  }
272  else
273  {
274  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
275  utf8_bytes_filled = 1;
276  }
277  }
278  }
279  }
280 };
281 
282 // Wraps another input apdater to convert wide character types into individual bytes.
283 template<typename BaseInputAdapter, typename WideCharType>
285 {
286  public:
287  using char_type = char;
288 
289  wide_string_input_adapter(BaseInputAdapter base)
290  : base_adapter(base) {}
291 
292  typename std::char_traits<char>::int_type get_character() noexcept
293  {
294  // check if buffer needs to be filled
295  if (utf8_bytes_index == utf8_bytes_filled)
296  {
297  fill_buffer<sizeof(WideCharType)>();
298 
299  JSON_ASSERT(utf8_bytes_filled > 0);
300  JSON_ASSERT(utf8_bytes_index == 0);
301  }
302 
303  // use buffer
304  JSON_ASSERT(utf8_bytes_filled > 0);
305  JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
306  return utf8_bytes[utf8_bytes_index++];
307  }
308 
309  private:
310  BaseInputAdapter base_adapter;
311 
312  template<size_t T>
313  void fill_buffer()
314  {
315  wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
316  }
317 
319  std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
320 
322  std::size_t utf8_bytes_index = 0;
324  std::size_t utf8_bytes_filled = 0;
325 };
326 
327 
328 template<typename IteratorType, typename Enable = void>
330 {
331  using iterator_type = IteratorType;
332  using char_type = typename std::iterator_traits<iterator_type>::value_type;
334 
335  static adapter_type create(IteratorType first, IteratorType last)
336  {
337  return adapter_type(std::move(first), std::move(last));
338  }
339 };
340 
341 template<typename T>
343 {
344  using value_type = typename std::iterator_traits<T>::value_type;
345  enum
346  {
347  value = sizeof(value_type) > 1
348  };
349 };
350 
351 template<typename IteratorType>
352 struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
353 {
354  using iterator_type = IteratorType;
355  using char_type = typename std::iterator_traits<iterator_type>::value_type;
358 
359  static adapter_type create(IteratorType first, IteratorType last)
360  {
361  return adapter_type(base_adapter_type(std::move(first), std::move(last)));
362  }
363 };
364 
365 // General purpose iterator-based input
366 template<typename IteratorType>
367 typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
368 {
370  return factory_type::create(first, last);
371 }
372 
373 // Convenience shorthand from container to iterator
374 template<typename ContainerType>
375 auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container)))
376 {
377  // Enable ADL
378  using std::begin;
379  using std::end;
380 
381  return input_adapter(begin(container), end(container));
382 }
383 
384 // Special cases with fast paths
385 inline file_input_adapter input_adapter(std::FILE* file)
386 {
387  return file_input_adapter(file);
388 }
389 
390 inline input_stream_adapter input_adapter(std::istream& stream)
391 {
392  return input_stream_adapter(stream);
393 }
394 
395 inline input_stream_adapter input_adapter(std::istream&& stream)
396 {
397  return input_stream_adapter(stream);
398 }
399 
400 using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
401 
402 // Null-delimited strings, and the like.
403 template < typename CharT,
404  typename std::enable_if <
405  std::is_pointer<CharT>::value&&
406  !std::is_array<CharT>::value&&
407  std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
408  sizeof(typename std::remove_pointer<CharT>::type) == 1,
409  int >::type = 0 >
410 contiguous_bytes_input_adapter input_adapter(CharT b)
411 {
412  auto length = std::strlen(reinterpret_cast<const char*>(b));
413  const auto* ptr = reinterpret_cast<const char*>(b);
414  return input_adapter(ptr, ptr + length);
415 }
416 
417 template<typename T, std::size_t N>
418 auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N))
419 {
420  return input_adapter(array, array + N);
421 }
422 
423 // This class only handles inputs of input_buffer_adapter type.
424 // It's required so that expressions like {ptr, len} can be implicitely casted
425 // to the correct adapter.
427 {
428  public:
429  template < typename CharT,
430  typename std::enable_if <
431  std::is_pointer<CharT>::value&&
432  std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
433  sizeof(typename std::remove_pointer<CharT>::type) == 1,
434  int >::type = 0 >
435  span_input_adapter(CharT b, std::size_t l)
436  : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
437 
438  template<class IteratorType,
439  typename std::enable_if<
440  std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
441  int>::type = 0>
442  span_input_adapter(IteratorType first, IteratorType last)
443  : ia(input_adapter(first, last)) {}
444 
445  contiguous_bytes_input_adapter&& get()
446  {
447  return std::move(ia);
448  }
449 
450  private:
451  contiguous_bytes_input_adapter ia;
452 };
453 } // namespace detail
454 } // namespace nlohmann
a class to store JSON values
Definition: json.hpp:170
Definition: input_adapters.hpp:34
Definition: input_adapters.hpp:70
Definition: input_adapters.hpp:123
Definition: input_adapters.hpp:427
Definition: input_adapters.hpp:285
@ value
the parser finished reading a JSON value
input_format_t
the supported input formats
Definition: input_adapters.hpp:23
namespace for Niels Lohmann
Definition: adl_serializer.hpp:9
Definition: input_adapters.hpp:343
Definition: input_adapters.hpp:330
Definition: input_adapters.hpp:160