Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6554 serge 1
// class template regex -*- C++ -*-
2
 
3
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// .
24
 
25
/**
26
 *  @file bits/regex_nfa.h
27
 *  This is an internal header file, included by other library headers.
28
 *  Do not attempt to use it directly. @headername{regex}
29
 */
30
 
31
namespace std _GLIBCXX_VISIBILITY(default)
32
{
33
namespace __detail
34
{
35
_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
 
37
  /**
38
   * @addtogroup regex-detail
39
   * @{
40
   */
41
 
42
  /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
43
  class _Automaton
44
  {
45
  public:
46
    typedef unsigned int _SizeT;
47
 
48
  public:
49
    virtual
50
    ~_Automaton() { }
51
 
52
    virtual _SizeT
53
    _M_sub_count() const = 0;
54
 
55
#ifdef _GLIBCXX_DEBUG
56
    virtual std::ostream&
57
    _M_dot(std::ostream& __ostr) const = 0;
58
#endif
59
  };
60
 
61
  /// Generic shared pointer to an automaton.
62
  typedef std::shared_ptr<_Automaton> _AutomatonPtr;
63
 
64
  /// Operation codes that define the type of transitions within the base NFA
65
  /// that represents the regular expression.
66
  enum _Opcode
67
  {
68
      _S_opcode_unknown       =   0,
69
      _S_opcode_alternative   =   1,
70
      _S_opcode_subexpr_begin =   4,
71
      _S_opcode_subexpr_end   =   5,
72
      _S_opcode_match         = 100,
73
      _S_opcode_accept        = 255
74
  };
75
 
76
  /// Provides a generic facade for a templated match_results.
77
  struct _Results
78
  {
79
    virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
80
    virtual void _M_set_matched(int __i, bool __is_matched) = 0;
81
  };
82
 
83
  /// Tags current state (for subexpr begin/end).
84
  typedef std::function _Tagger;
85
 
86
  /// Start state tag.
87
  template
88
    struct _StartTagger
89
    {
90
      explicit
91
      _StartTagger(int __i)
92
      : _M_index(__i)
93
      { }
94
 
95
      void
96
      operator()(const _PatternCursor& __pc, _Results& __r)
97
      { __r._M_set_pos(_M_index, 0, __pc); }
98
 
99
      int       _M_index;
100
    };
101
 
102
  /// End state tag.
103
  template
104
    struct _EndTagger
105
    {
106
      explicit
107
      _EndTagger(int __i)
108
      : _M_index(__i)
109
      { }
110
 
111
      void
112
      operator()(const _PatternCursor& __pc, _Results& __r)
113
      { __r._M_set_pos(_M_index, 1, __pc); }
114
 
115
      int       _M_index;
116
      _FwdIterT _M_pos;
117
    };
118
 
119
  /// Indicates if current state matches cursor current.
120
  typedef std::function _Matcher;
121
 
122
  /// Matches any character
123
  inline bool
124
  _AnyMatcher(const _PatternCursor&)
125
  { return true; }
126
 
127
  /// Matches a single character
128
  template
129
    struct _CharMatcher
130
    {
131
      typedef typename _TraitsT::char_type char_type;
132
 
133
      explicit
134
      _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
135
      : _M_traits(__t), _M_c(_M_traits.translate(__c))
136
      { }
137
 
138
      bool
139
      operator()(const _PatternCursor& __pc) const
140
      {
141
	typedef const _SpecializedCursor<_InIterT>& _CursorT;
142
	_CursorT __c = static_cast<_CursorT>(__pc);
143
	return _M_traits.translate(__c._M_current()) == _M_c;
144
      }
145
 
146
      const _TraitsT& _M_traits;
147
      char_type       _M_c;
148
    };
149
 
150
  /// Matches a character range (bracket expression)
151
  template
152
    struct _RangeMatcher
153
    {
154
      typedef typename _TraitsT::char_type _CharT;
155
      typedef std::basic_string<_CharT>    _StringT;
156
 
157
      explicit
158
      _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
159
      : _M_traits(__t), _M_is_non_matching(__is_non_matching)
160
      { }
161
 
162
      bool
163
      operator()(const _PatternCursor& __pc) const
164
      {
165
	typedef const _SpecializedCursor<_InIterT>& _CursorT;
166
	_CursorT __c = static_cast<_CursorT>(__pc);
167
	return true;
168
      }
169
 
170
      void
171
      _M_add_char(_CharT __c)
172
      { }
173
 
174
      void
175
      _M_add_collating_element(const _StringT& __s)
176
      { }
177
 
178
      void
179
      _M_add_equivalence_class(const _StringT& __s)
180
      { }
181
 
182
      void
183
      _M_add_character_class(const _StringT& __s)
184
      { }
185
 
186
      void
187
      _M_make_range()
188
      { }
189
 
190
      const _TraitsT& _M_traits;
191
      bool            _M_is_non_matching;
192
    };
193
 
194
  /// Identifies a state in the NFA.
195
  typedef int _StateIdT;
196
 
197
  /// The special case in which a state identifier is not an index.
198
  static const _StateIdT _S_invalid_state_id  = -1;
199
 
200
 
201
  /**
202
   * @brief struct _State
203
   *
204
   * An individual state in an NFA
205
   *
206
   * In this case a "state" is an entry in the NFA definition coupled
207
   * with its outgoing transition(s).  All states have a single outgoing
208
   * transition, except for accepting states (which have no outgoing
209
   * transitions) and alt states, which have two outgoing transitions.
210
   */
211
  struct _State
212
  {
213
    typedef int  _OpcodeT;
214
 
215
    _OpcodeT     _M_opcode;    // type of outgoing transition
216
    _StateIdT    _M_next;      // outgoing transition
217
    _StateIdT    _M_alt;       // for _S_opcode_alternative
218
    unsigned int _M_subexpr;   // for _S_opcode_subexpr_*
219
    _Tagger      _M_tagger;    // for _S_opcode_subexpr_*
220
    _Matcher     _M_matches;   // for _S_opcode_match
221
 
222
    explicit _State(_OpcodeT __opcode)
223
    : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
224
    { }
225
 
226
    _State(const _Matcher& __m)
227
    : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
228
    { }
229
 
230
    _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
231
    : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
232
      _M_tagger(__t)
233
    { }
234
 
235
    _State(_StateIdT __next, _StateIdT __alt)
236
    : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
237
    { }
238
 
239
#ifdef _GLIBCXX_DEBUG
240
    std::ostream&
241
    _M_print(std::ostream& ostr) const;
242
 
243
    // Prints graphviz dot commands for state.
244
    std::ostream&
245
    _M_dot(std::ostream& __ostr, _StateIdT __id) const;
246
#endif
247
  };
248
 
249
 
250
  /// The Grep Matcher works on sets of states.  Here are sets of states.
251
  typedef std::set<_StateIdT> _StateSet;
252
 
253
  /**
254
   * @brief struct _Nfa
255
   *
256
   * A collection of all states making up an NFA.
257
   *
258
   * An NFA is a 4-tuple M = (K, S, s, F), where
259
   *    K is a finite set of states,
260
   *    S is the alphabet of the NFA,
261
   *    s is the initial state,
262
   *    F is a set of final (accepting) states.
263
   *
264
   * This NFA class is templated on S, a type that will hold values of the
265
   * underlying alphabet (without regard to semantics of that alphabet).  The
266
   * other elements of the tuple are generated during construction of the NFA
267
   * and are available through accessor member functions.
268
   */
269
  class _Nfa
270
  : public _Automaton, public std::vector<_State>
271
  {
272
  public:
273
    typedef _State                              _StateT;
274
    typedef unsigned int                        _SizeT;
275
    typedef regex_constants::syntax_option_type _FlagT;
276
 
277
    _Nfa(_FlagT __f)
278
    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
279
    { }
280
 
281
    ~_Nfa()
282
    { }
283
 
284
    _FlagT
285
    _M_options() const
286
    { return _M_flags; }
287
 
288
    _StateIdT
289
    _M_start() const
290
    { return _M_start_state; }
291
 
292
    const _StateSet&
293
    _M_final_states() const
294
    { return _M_accepting_states; }
295
 
296
    _SizeT
297
    _M_sub_count() const
298
    { return _M_subexpr_count; }
299
 
300
    _StateIdT
301
    _M_insert_accept()
302
    {
303
      this->push_back(_StateT(_S_opcode_accept));
304
      _M_accepting_states.insert(this->size()-1);
305
      return this->size()-1;
306
    }
307
 
308
    _StateIdT
309
    _M_insert_alt(_StateIdT __next, _StateIdT __alt)
310
    {
311
      this->push_back(_StateT(__next, __alt));
312
      return this->size()-1;
313
    }
314
 
315
    _StateIdT
316
    _M_insert_matcher(_Matcher __m)
317
    {
318
      this->push_back(_StateT(__m));
319
      return this->size()-1;
320
    }
321
 
322
    _StateIdT
323
    _M_insert_subexpr_begin(const _Tagger& __t)
324
    {
325
      this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
326
			      __t));
327
      return this->size()-1;
328
    }
329
 
330
    _StateIdT
331
    _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
332
    {
333
      this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
334
      return this->size()-1;
335
    }
336
 
337
#ifdef _GLIBCXX_DEBUG
338
    std::ostream&
339
    _M_dot(std::ostream& __ostr) const;
340
#endif
341
 
342
  private:
343
    _FlagT     _M_flags;
344
    _StateIdT  _M_start_state;
345
    _StateSet  _M_accepting_states;
346
    _SizeT     _M_subexpr_count;
347
  };
348
 
349
  /// Describes a sequence of one or more %_State, its current start
350
  /// and end(s).  This structure contains fragments of an NFA during
351
  /// construction.
352
  class _StateSeq
353
  {
354
  public:
355
    // Constructs a single-node sequence
356
    _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
357
    : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
358
    { }
359
    // Constructs a split sequence from two other sequencces
360
    _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
361
    : _M_nfa(__e1._M_nfa),
362
      _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
363
      _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
364
    { }
365
 
366
    // Constructs a split sequence from a single sequence
367
    _StateSeq(const _StateSeq& __e, _StateIdT __id)
368
    : _M_nfa(__e._M_nfa),
369
      _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
370
      _M_end1(__id), _M_end2(__e._M_end1)
371
    { }
372
 
373
    // Constructs a copy of a %_StateSeq
374
    _StateSeq(const _StateSeq& __rhs)
375
    : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
376
      _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
377
    { }
378
 
379
 
380
    _StateSeq& operator=(const _StateSeq& __rhs);
381
 
382
    _StateIdT
383
    _M_front() const
384
    { return _M_start; }
385
 
386
    // Extends a sequence by one.
387
    void
388
    _M_push_back(_StateIdT __id);
389
 
390
    // Extends and maybe joins a sequence.
391
    void
392
    _M_append(_StateIdT __id);
393
 
394
    void
395
    _M_append(_StateSeq& __rhs);
396
 
397
    // Clones an entire sequence.
398
    _StateIdT
399
    _M_clone();
400
 
401
  private:
402
    _Nfa&     _M_nfa;
403
    _StateIdT _M_start;
404
    _StateIdT _M_end1;
405
    _StateIdT _M_end2;
406
 
407
  };
408
 
409
 //@} regex-detail
410
_GLIBCXX_END_NAMESPACE_VERSION
411
} // namespace __detail
412
} // namespace std
413
 
414
#include 
415