libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2019 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 // Result of merging regex_match and regex_search.
38 //
39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40 // the other one if possible, for test purpose).
41 //
42 // That __match_mode is true means regex_match, else regex_search.
43 template<typename _BiIter, typename _Alloc,
44 typename _CharT, typename _TraitsT,
45 _RegexExecutorPolicy __policy,
46 bool __match_mode>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
53 {
54 if (__re._M_automaton == nullptr)
55 return false;
56
57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58 __m._M_begin = __s;
59 __m._M_resize(__re._M_automaton->_M_sub_count());
60 for (auto& __it : __res)
61 __it.matched = false;
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_resize(0);
113 for (auto& __it : __res)
114 {
115 __it.matched = false;
116 __it.first = __it.second = __e;
117 }
118 }
119 return __ret;
120 }
121}
122
123 template<typename _Ch_type>
124 template<typename _Fwd_iter>
125 typename regex_traits<_Ch_type>::string_type
127 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128 {
129 typedef std::ctype<char_type> __ctype_type;
130 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
131
132 static const char* __collatenames[] =
133 {
134 "NUL",
135 "SOH",
136 "STX",
137 "ETX",
138 "EOT",
139 "ENQ",
140 "ACK",
141 "alert",
142 "backspace",
143 "tab",
144 "newline",
145 "vertical-tab",
146 "form-feed",
147 "carriage-return",
148 "SO",
149 "SI",
150 "DLE",
151 "DC1",
152 "DC2",
153 "DC3",
154 "DC4",
155 "NAK",
156 "SYN",
157 "ETB",
158 "CAN",
159 "EM",
160 "SUB",
161 "ESC",
162 "IS4",
163 "IS3",
164 "IS2",
165 "IS1",
166 "space",
167 "exclamation-mark",
168 "quotation-mark",
169 "number-sign",
170 "dollar-sign",
171 "percent-sign",
172 "ampersand",
173 "apostrophe",
174 "left-parenthesis",
175 "right-parenthesis",
176 "asterisk",
177 "plus-sign",
178 "comma",
179 "hyphen",
180 "period",
181 "slash",
182 "zero",
183 "one",
184 "two",
185 "three",
186 "four",
187 "five",
188 "six",
189 "seven",
190 "eight",
191 "nine",
192 "colon",
193 "semicolon",
194 "less-than-sign",
195 "equals-sign",
196 "greater-than-sign",
197 "question-mark",
198 "commercial-at",
199 "A",
200 "B",
201 "C",
202 "D",
203 "E",
204 "F",
205 "G",
206 "H",
207 "I",
208 "J",
209 "K",
210 "L",
211 "M",
212 "N",
213 "O",
214 "P",
215 "Q",
216 "R",
217 "S",
218 "T",
219 "U",
220 "V",
221 "W",
222 "X",
223 "Y",
224 "Z",
225 "left-square-bracket",
226 "backslash",
227 "right-square-bracket",
228 "circumflex",
229 "underscore",
230 "grave-accent",
231 "a",
232 "b",
233 "c",
234 "d",
235 "e",
236 "f",
237 "g",
238 "h",
239 "i",
240 "j",
241 "k",
242 "l",
243 "m",
244 "n",
245 "o",
246 "p",
247 "q",
248 "r",
249 "s",
250 "t",
251 "u",
252 "v",
253 "w",
254 "x",
255 "y",
256 "z",
257 "left-curly-bracket",
258 "vertical-line",
259 "right-curly-bracket",
260 "tilde",
261 "DEL",
262 };
263
264 string __s;
265 for (; __first != __last; ++__first)
266 __s += __fctyp.narrow(*__first, 0);
267
268 for (const auto& __it : __collatenames)
269 if (__s == __it)
270 return string_type(1, __fctyp.widen(
271 static_cast<char>(&__it - __collatenames)));
272
273 // TODO Add digraph support:
274 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
276 return string_type();
277 }
278
279 template<typename _Ch_type>
280 template<typename _Fwd_iter>
281 typename regex_traits<_Ch_type>::char_class_type
283 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284 {
285 typedef std::ctype<char_type> __ctype_type;
286 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
287
288 // Mappings from class name to class mask.
289 static const pair<const char*, char_class_type> __classnames[] =
290 {
291 {"d", ctype_base::digit},
292 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
293 {"s", ctype_base::space},
294 {"alnum", ctype_base::alnum},
295 {"alpha", ctype_base::alpha},
296 {"blank", ctype_base::blank},
297 {"cntrl", ctype_base::cntrl},
298 {"digit", ctype_base::digit},
299 {"graph", ctype_base::graph},
300 {"lower", ctype_base::lower},
301 {"print", ctype_base::print},
302 {"punct", ctype_base::punct},
303 {"space", ctype_base::space},
304 {"upper", ctype_base::upper},
305 {"xdigit", ctype_base::xdigit},
306 };
307
308 string __s;
309 for (; __first != __last; ++__first)
310 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
311
312 for (const auto& __it : __classnames)
313 if (__s == __it.first)
314 {
315 if (__icase
316 && ((__it.second
317 & (ctype_base::lower | ctype_base::upper)) != 0))
318 return ctype_base::alpha;
319 return __it.second;
320 }
321 return 0;
322 }
323
324 template<typename _Ch_type>
325 bool
327 isctype(_Ch_type __c, char_class_type __f) const
328 {
329 typedef std::ctype<char_type> __ctype_type;
330 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
331
332 return __fctyp.is(__f._M_base, __c)
333 // [[:w:]]
334 || ((__f._M_extended & _RegexMask::_S_under)
335 && __c == __fctyp.widen('_'));
336 }
337
338 template<typename _Ch_type>
339 int
341 value(_Ch_type __ch, int __radix) const
342 {
344 long __v;
345 if (__radix == 8)
346 __is >> std::oct;
347 else if (__radix == 16)
348 __is >> std::hex;
349 __is >> __v;
350 return __is.fail() ? -1 : __v;
351 }
352
353 template<typename _Bi_iter, typename _Alloc>
354 template<typename _Out_iter>
356 format(_Out_iter __out,
357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359 match_flag_type __flags) const
360 {
361 __glibcxx_assert( ready() );
363 typedef std::ctype<char_type> __ctype_type;
364 const __ctype_type&
365 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
366
367 auto __output = [&](size_t __idx)
368 {
369 auto& __sub = (*this)[__idx];
370 if (__sub.matched)
371 __out = std::copy(__sub.first, __sub.second, __out);
372 };
373
374 if (__flags & regex_constants::format_sed)
375 {
376 bool __escaping = false;
377 for (; __fmt_first != __fmt_last; __fmt_first++)
378 {
379 if (__escaping)
380 {
381 __escaping = false;
382 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
383 __output(__traits.value(*__fmt_first, 10));
384 else
385 *__out++ = *__fmt_first;
386 continue;
387 }
388 if (*__fmt_first == '\\')
389 {
390 __escaping = true;
391 continue;
392 }
393 if (*__fmt_first == '&')
394 {
395 __output(0);
396 continue;
397 }
398 *__out++ = *__fmt_first;
399 }
400 if (__escaping)
401 *__out++ = '\\';
402 }
403 else
404 {
405 while (1)
406 {
407 auto __next = std::find(__fmt_first, __fmt_last, '$');
408 if (__next == __fmt_last)
409 break;
410
411 __out = std::copy(__fmt_first, __next, __out);
412
413 auto __eat = [&](char __ch) -> bool
414 {
415 if (*__next == __ch)
416 {
417 ++__next;
418 return true;
419 }
420 return false;
421 };
422
423 if (++__next == __fmt_last)
424 *__out++ = '$';
425 else if (__eat('$'))
426 *__out++ = '$';
427 else if (__eat('&'))
428 __output(0);
429 else if (__eat('`'))
430 {
431 auto& __sub = _M_prefix();
432 if (__sub.matched)
433 __out = std::copy(__sub.first, __sub.second, __out);
434 }
435 else if (__eat('\''))
436 {
437 auto& __sub = _M_suffix();
438 if (__sub.matched)
439 __out = std::copy(__sub.first, __sub.second, __out);
440 }
441 else if (__fctyp.is(__ctype_type::digit, *__next))
442 {
443 long __num = __traits.value(*__next, 10);
444 if (++__next != __fmt_last
445 && __fctyp.is(__ctype_type::digit, *__next))
446 {
447 __num *= 10;
448 __num += __traits.value(*__next++, 10);
449 }
450 if (0 <= __num && __num < this->size())
451 __output(__num);
452 }
453 else
454 *__out++ = '$';
455 __fmt_first = __next;
456 }
457 __out = std::copy(__fmt_first, __fmt_last, __out);
458 }
459 return __out;
460 }
461
462 template<typename _Out_iter, typename _Bi_iter,
463 typename _Rx_traits, typename _Ch_type>
464 _Out_iter
465 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
467 const _Ch_type* __fmt,
469 {
471 _IterT __i(__first, __last, __e, __flags);
472 _IterT __end;
473 if (__i == __end)
474 {
475 if (!(__flags & regex_constants::format_no_copy))
476 __out = std::copy(__first, __last, __out);
477 }
478 else
479 {
480 sub_match<_Bi_iter> __last;
481 auto __len = char_traits<_Ch_type>::length(__fmt);
482 for (; __i != __end; ++__i)
483 {
484 if (!(__flags & regex_constants::format_no_copy))
485 __out = std::copy(__i->prefix().first, __i->prefix().second,
486 __out);
487 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
488 __last = __i->suffix();
490 break;
491 }
492 if (!(__flags & regex_constants::format_no_copy))
493 __out = std::copy(__last.first, __last.second, __out);
494 }
495 return __out;
496 }
497
498 template<typename _Bi_iter,
499 typename _Ch_type,
500 typename _Rx_traits>
501 bool
503 operator==(const regex_iterator& __rhs) const noexcept
504 {
505 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
506 return true;
507 return _M_pregex == __rhs._M_pregex
508 && _M_begin == __rhs._M_begin
509 && _M_end == __rhs._M_end
510 && _M_flags == __rhs._M_flags
511 && _M_match[0] == __rhs._M_match[0];
512 }
513
514 template<typename _Bi_iter,
515 typename _Ch_type,
516 typename _Rx_traits>
520 {
521 // In all cases in which the call to regex_search returns true,
522 // match.prefix().first shall be equal to the previous value of
523 // match[0].second, and for each index i in the half-open range
524 // [0, match.size()) for which match[i].matched is true,
525 // match[i].position() shall return distance(begin, match[i].first).
526 // [28.12.1.4.5]
527 if (_M_match[0].matched)
528 {
529 auto __start = _M_match[0].second;
530 auto __prefix_first = _M_match[0].second;
531 if (_M_match[0].first == _M_match[0].second)
532 {
533 if (__start == _M_end)
534 {
535 _M_pregex = nullptr;
536 return *this;
537 }
538 else
539 {
540 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
541 _M_flags
544 {
545 __glibcxx_assert(_M_match[0].matched);
546 auto& __prefix = _M_match._M_prefix();
547 __prefix.first = __prefix_first;
548 __prefix.matched = __prefix.first != __prefix.second;
549 // [28.12.1.4.5]
550 _M_match._M_begin = _M_begin;
551 return *this;
552 }
553 else
554 ++__start;
555 }
556 }
558 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
559 {
560 __glibcxx_assert(_M_match[0].matched);
561 auto& __prefix = _M_match._M_prefix();
562 __prefix.first = __prefix_first;
563 __prefix.matched = __prefix.first != __prefix.second;
564 // [28.12.1.4.5]
565 _M_match._M_begin = _M_begin;
566 }
567 else
568 _M_pregex = nullptr;
569 }
570 return *this;
571 }
572
573 template<typename _Bi_iter,
574 typename _Ch_type,
575 typename _Rx_traits>
579 {
580 _M_position = __rhs._M_position;
581 _M_subs = __rhs._M_subs;
582 _M_n = __rhs._M_n;
583 _M_suffix = __rhs._M_suffix;
584 _M_has_m1 = __rhs._M_has_m1;
585 _M_normalize_result();
586 return *this;
587 }
588
589 template<typename _Bi_iter,
590 typename _Ch_type,
591 typename _Rx_traits>
592 bool
594 operator==(const regex_token_iterator& __rhs) const
595 {
596 if (_M_end_of_seq() && __rhs._M_end_of_seq())
597 return true;
598 if (_M_suffix.matched && __rhs._M_suffix.matched
599 && _M_suffix == __rhs._M_suffix)
600 return true;
601 if (_M_end_of_seq() || _M_suffix.matched
602 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
603 return false;
604 return _M_position == __rhs._M_position
605 && _M_n == __rhs._M_n
606 && _M_subs == __rhs._M_subs;
607 }
608
609 template<typename _Bi_iter,
610 typename _Ch_type,
611 typename _Rx_traits>
615 {
616 _Position __prev = _M_position;
617 if (_M_suffix.matched)
618 *this = regex_token_iterator();
619 else if (_M_n + 1 < _M_subs.size())
620 {
621 _M_n++;
622 _M_result = &_M_current_match();
623 }
624 else
625 {
626 _M_n = 0;
627 ++_M_position;
628 if (_M_position != _Position())
629 _M_result = &_M_current_match();
630 else if (_M_has_m1 && __prev->suffix().length() != 0)
631 {
632 _M_suffix.matched = true;
633 _M_suffix.first = __prev->suffix().first;
634 _M_suffix.second = __prev->suffix().second;
635 _M_result = &_M_suffix;
636 }
637 else
638 *this = regex_token_iterator();
639 }
640 return *this;
641 }
642
643 template<typename _Bi_iter,
644 typename _Ch_type,
645 typename _Rx_traits>
646 void
648 _M_init(_Bi_iter __a, _Bi_iter __b)
649 {
650 _M_has_m1 = false;
651 for (auto __it : _M_subs)
652 if (__it == -1)
653 {
654 _M_has_m1 = true;
655 break;
656 }
657 if (_M_position != _Position())
658 _M_result = &_M_current_match();
659 else if (_M_has_m1)
660 {
661 _M_suffix.matched = true;
662 _M_suffix.first = __a;
663 _M_suffix.second = __b;
664 _M_result = &_M_suffix;
665 }
666 else
667 _M_result = nullptr;
668 }
669
670_GLIBCXX_END_NAMESPACE_VERSION
671} // namespace
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2365
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2217
ISO C++ entities toplevel namespace is std.
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1036
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1044
_GLIBCXX17_INLINE constexpr match_flag_type format_first_only
_GLIBCXX17_INLINE constexpr match_flag_type format_sed
_GLIBCXX17_INLINE constexpr match_flag_type match_continuous
_GLIBCXX17_INLINE constexpr syntax_option_type __polynomial
_GLIBCXX17_INLINE constexpr match_flag_type format_no_copy
_GLIBCXX17_INLINE constexpr match_flag_type match_not_null
match_flag_type
This is a bitmask type indicating regex matching rules.
_GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
Controlling input for std::string.
Definition: sstream:393
Managing sequences of characters and character-like objects.
Basis for explicit traits specializations.
Definition: char_traits.h:285
Primary class template ctype facet.
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Describes aspects of a regular expression.
Definition: regex.h:81
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:341
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:127
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:370
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:327
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:283
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition: regex.tcc:503
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:519
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:594
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:578
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:614
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:210
_T1 first
second_type is the second bound type
Definition: stl_pair.h:214
_T2 second
first is a copy of the first object
Definition: stl_pair.h:215