Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6554 serge 1
// class template regex -*- C++ -*-
2
 
3
// Copyright (C) 2013-2015 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// .
24
 
25
/**
26
 *  @file bits/regex_scanner.h
27
 *  This is an internal header file, included by other library headers.
28
 *  Do not attempt to use it directly. @headername{regex}
29
 */
30
 
31
namespace std _GLIBCXX_VISIBILITY(default)
32
{
33
namespace __detail
34
{
35
_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
 
37
  /**
38
   * @addtogroup regex-detail
39
   * @{
40
   */
41
 
42
  struct _ScannerBase
43
  {
44
  public:
45
    /// Token types returned from the scanner.
46
    enum _TokenT
47
    {
48
      _S_token_anychar,
49
      _S_token_ord_char,
50
      _S_token_oct_num,
51
      _S_token_hex_num,
52
      _S_token_backref,
53
      _S_token_subexpr_begin,
54
      _S_token_subexpr_no_group_begin,
55
      _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56
      _S_token_subexpr_end,
57
      _S_token_bracket_begin,
58
      _S_token_bracket_neg_begin,
59
      _S_token_bracket_end,
60
      _S_token_interval_begin,
61
      _S_token_interval_end,
62
      _S_token_quoted_class,
63
      _S_token_char_class_name,
64
      _S_token_collsymbol,
65
      _S_token_equiv_class_name,
66
      _S_token_opt,
67
      _S_token_or,
68
      _S_token_closure0,
69
      _S_token_closure1,
70
      _S_token_line_begin,
71
      _S_token_line_end,
72
      _S_token_word_bound, // neg if _M_value[0] == 'n'
73
      _S_token_comma,
74
      _S_token_dup_count,
75
      _S_token_eof,
76
      _S_token_unknown
77
    };
78
 
79
  protected:
80
    typedef regex_constants::syntax_option_type _FlagT;
81
 
82
    enum _StateT
83
    {
84
      _S_state_normal,
85
      _S_state_in_brace,
86
      _S_state_in_bracket,
87
    };
88
 
89
  protected:
90
    _ScannerBase(_FlagT __flags)
91
    : _M_state(_S_state_normal),
92
    _M_flags(__flags),
93
    _M_escape_tbl(_M_is_ecma()
94
		  ? _M_ecma_escape_tbl
95
		  : _M_awk_escape_tbl),
96
    _M_spec_char(_M_is_ecma()
97
		 ? _M_ecma_spec_char
98
		 : _M_flags & regex_constants::basic
99
		 ? _M_basic_spec_char
100
		 : _M_flags & regex_constants::extended
101
		 ? _M_extended_spec_char
102
		 : _M_flags & regex_constants::grep
103
		 ?  ".[\\*^$\n"
104
		 : _M_flags & regex_constants::egrep
105
		 ? ".[\\()*+?{|^$\n"
106
		 : _M_flags & regex_constants::awk
107
		 ? _M_extended_spec_char
108
		 : nullptr),
109
    _M_at_bracket_start(false)
110
    { __glibcxx_assert(_M_spec_char); }
111
 
112
  protected:
113
    const char*
114
    _M_find_escape(char __c)
115
    {
116
      auto __it = _M_escape_tbl;
117
      for (; __it->first != '\0'; ++__it)
118
	if (__it->first == __c)
119
	  return &__it->second;
120
      return nullptr;
121
    }
122
 
123
    bool
124
    _M_is_ecma() const
125
    { return _M_flags & regex_constants::ECMAScript; }
126
 
127
    bool
128
    _M_is_basic() const
129
    { return _M_flags & (regex_constants::basic | regex_constants::grep); }
130
 
131
    bool
132
    _M_is_extended() const
133
    {
134
      return _M_flags & (regex_constants::extended
135
			 | regex_constants::egrep
136
			 | regex_constants::awk);
137
    }
138
 
139
    bool
140
    _M_is_grep() const
141
    { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
142
 
143
    bool
144
    _M_is_awk() const
145
    { return _M_flags & regex_constants::awk; }
146
 
147
  protected:
148
    // TODO: Make them static in the next abi change.
149
    const std::pair _M_token_tbl[9] =
150
      {
151
	{'^', _S_token_line_begin},
152
	{'$', _S_token_line_end},
153
	{'.', _S_token_anychar},
154
	{'*', _S_token_closure0},
155
	{'+', _S_token_closure1},
156
	{'?', _S_token_opt},
157
	{'|', _S_token_or},
158
	{'\n', _S_token_or}, // grep and egrep
159
	{'\0', _S_token_or},
160
      };
161
    const std::pair _M_ecma_escape_tbl[8] =
162
      {
163
	{'0', '\0'},
164
	{'b', '\b'},
165
	{'f', '\f'},
166
	{'n', '\n'},
167
	{'r', '\r'},
168
	{'t', '\t'},
169
	{'v', '\v'},
170
	{'\0', '\0'},
171
      };
172
    const std::pair _M_awk_escape_tbl[11] =
173
      {
174
	{'"', '"'},
175
	{'/', '/'},
176
	{'\\', '\\'},
177
	{'a', '\a'},
178
	{'b', '\b'},
179
	{'f', '\f'},
180
	{'n', '\n'},
181
	{'r', '\r'},
182
	{'t', '\t'},
183
	{'v', '\v'},
184
	{'\0', '\0'},
185
      };
186
    const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
187
    const char* _M_basic_spec_char = ".[\\*^$";
188
    const char* _M_extended_spec_char = ".[\\()*+?{|^$";
189
 
190
    _StateT                       _M_state;
191
    _FlagT                        _M_flags;
192
    _TokenT                       _M_token;
193
    const std::pair*  _M_escape_tbl;
194
    const char*                   _M_spec_char;
195
    bool                          _M_at_bracket_start;
196
  };
197
 
198
  /**
199
   * @brief Scans an input range for regex tokens.
200
   *
201
   * The %_Scanner class interprets the regular expression pattern in
202
   * the input range passed to its constructor as a sequence of parse
203
   * tokens passed to the regular expression compiler.  The sequence
204
   * of tokens provided depends on the flag settings passed to the
205
   * constructor: different regular expression grammars will interpret
206
   * the same input pattern in syntactically different ways.
207
   */
208
  template
209
    class _Scanner
210
    : public _ScannerBase
211
    {
212
    public:
213
      typedef const _CharT*                                       _IterT;
214
      typedef std::basic_string<_CharT>                           _StringT;
215
      typedef regex_constants::syntax_option_type                 _FlagT;
216
      typedef const std::ctype<_CharT>                            _CtypeT;
217
 
218
      _Scanner(_IterT __begin, _IterT __end,
219
	       _FlagT __flags, std::locale __loc);
220
 
221
      void
222
      _M_advance();
223
 
224
      _TokenT
225
      _M_get_token() const
226
      { return _M_token; }
227
 
228
      const _StringT&
229
      _M_get_value() const
230
      { return _M_value; }
231
 
232
#ifdef _GLIBCXX_DEBUG
233
      std::ostream&
234
      _M_print(std::ostream&);
235
#endif
236
 
237
    private:
238
      void
239
      _M_scan_normal();
240
 
241
      void
242
      _M_scan_in_bracket();
243
 
244
      void
245
      _M_scan_in_brace();
246
 
247
      void
248
      _M_eat_escape_ecma();
249
 
250
      void
251
      _M_eat_escape_posix();
252
 
253
      void
254
      _M_eat_escape_awk();
255
 
256
      void
257
      _M_eat_class(char);
258
 
259
      _IterT                        _M_current;
260
      _IterT                        _M_end;
261
      _CtypeT&                      _M_ctype;
262
      _StringT                      _M_value;
263
      void (_Scanner::* _M_eat_escape)();
264
    };
265
 
266
 //@} regex-detail
267
_GLIBCXX_END_NAMESPACE_VERSION
268
} // namespace __detail
269
} // namespace std
270
 
271
#include