Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // class template regex -*- C++ -*-
  2.  
  3. // Copyright (C) 2013-2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /**
  26.  *  @file bits/regex_automaton.h
  27.  *  This is an internal header file, included by other library headers.
  28.  *  Do not attempt to use it directly. @headername{regex}
  29.  */
  30.  
  31. // This macro defines the maximal state number a NFA can have.
  32. #ifndef _GLIBCXX_REGEX_STATE_LIMIT
  33. #define _GLIBCXX_REGEX_STATE_LIMIT 100000
  34. #endif
  35.  
  36. namespace std _GLIBCXX_VISIBILITY(default)
  37. {
  38. namespace __detail
  39. {
  40. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  41.  
  42.   /**
  43.    *  @defgroup regex-detail Base and Implementation Classes
  44.    *  @ingroup regex
  45.    *  @{
  46.    */
  47.  
  48.   typedef long _StateIdT;
  49.   static const _StateIdT _S_invalid_state_id  = -1;
  50.  
  51.   template<typename _CharT>
  52.     using _Matcher = std::function<bool (_CharT)>;
  53.  
  54.   /// Operation codes that define the type of transitions within the base NFA
  55.   /// that represents the regular expression.
  56.   enum _Opcode : int
  57.   {
  58.       _S_opcode_unknown,
  59.       _S_opcode_alternative,
  60.       _S_opcode_repeat,
  61.       _S_opcode_backref,
  62.       _S_opcode_line_begin_assertion,
  63.       _S_opcode_line_end_assertion,
  64.       _S_opcode_word_boundary,
  65.       _S_opcode_subexpr_lookahead,
  66.       _S_opcode_subexpr_begin,
  67.       _S_opcode_subexpr_end,
  68.       _S_opcode_dummy,
  69.       _S_opcode_match,
  70.       _S_opcode_accept,
  71.   };
  72.  
  73.   struct _State_base
  74.   {
  75.     _Opcode      _M_opcode;           // type of outgoing transition
  76.     _StateIdT    _M_next;             // outgoing transition
  77.     union // Since they are mutually exclusive.
  78.     {
  79.       size_t _M_subexpr;        // for _S_opcode_subexpr_*
  80.       size_t _M_backref_index;  // for _S_opcode_backref
  81.       struct
  82.       {
  83.         // for _S_opcode_alternative, _S_opcode_repeat and
  84.         // _S_opcode_subexpr_lookahead
  85.         _StateIdT  _M_alt;
  86.         // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
  87.         // quantifiers (ungreedy if set true)
  88.         bool       _M_neg;
  89.       };
  90.     };
  91.  
  92.     explicit _State_base(_Opcode __opcode)
  93.     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
  94.     { }
  95.  
  96.   protected:
  97.     ~_State_base() = default;
  98.  
  99.   public:
  100. #ifdef _GLIBCXX_DEBUG
  101.     std::ostream&
  102.     _M_print(std::ostream& ostr) const;
  103.  
  104.     // Prints graphviz dot commands for state.
  105.     std::ostream&
  106.     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
  107. #endif
  108.   };
  109.  
  110.   template<typename _TraitsT>
  111.     struct _State : _State_base
  112.     {
  113.       typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
  114.  
  115.       _MatcherT      _M_matches;        // for _S_opcode_match
  116.  
  117.       explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
  118.     };
  119.  
  120.   struct _NFA_base
  121.   {
  122.     typedef size_t                              _SizeT;
  123.     typedef regex_constants::syntax_option_type _FlagT;
  124.  
  125.     explicit
  126.     _NFA_base(_FlagT __f)
  127.     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
  128.     _M_has_backref(false)
  129.     { }
  130.  
  131.     _NFA_base(_NFA_base&&) = default;
  132.  
  133.   protected:
  134.     ~_NFA_base() = default;
  135.  
  136.   public:
  137.     _FlagT
  138.     _M_options() const
  139.     { return _M_flags; }
  140.  
  141.     _StateIdT
  142.     _M_start() const
  143.     { return _M_start_state; }
  144.  
  145.     _SizeT
  146.     _M_sub_count() const
  147.     { return _M_subexpr_count; }
  148.  
  149.     std::vector<size_t>       _M_paren_stack;
  150.     _FlagT                    _M_flags;
  151.     _StateIdT                 _M_start_state;
  152.     _SizeT                    _M_subexpr_count;
  153.     bool                      _M_has_backref;
  154.   };
  155.  
  156.   template<typename _TraitsT>
  157.     struct _NFA
  158.     : _NFA_base, std::vector<_State<_TraitsT>>
  159.     {
  160.       typedef _State<_TraitsT>                          _StateT;
  161.       typedef _Matcher<typename _TraitsT::char_type>    _MatcherT;
  162.  
  163.       _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
  164.       : _NFA_base(__flags)
  165.       { _M_traits.imbue(__loc); }
  166.  
  167.       // for performance reasons _NFA objects should only be moved not copied
  168.       _NFA(const _NFA&) = delete;
  169.       _NFA(_NFA&&) = default;
  170.  
  171.       _StateIdT
  172.       _M_insert_accept()
  173.       {
  174.         auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
  175.         return __ret;
  176.       }
  177.  
  178.       _StateIdT
  179.       _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
  180.       {
  181.         _StateT __tmp(_S_opcode_alternative);
  182.         // It labels every quantifier to make greedy comparison easier in BFS
  183.         // approach.
  184.         __tmp._M_next = __next;
  185.         __tmp._M_alt = __alt;
  186.         return _M_insert_state(std::move(__tmp));
  187.       }
  188.  
  189.       _StateIdT
  190.       _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
  191.       {
  192.         _StateT __tmp(_S_opcode_repeat);
  193.         // It labels every quantifier to make greedy comparison easier in BFS
  194.         // approach.
  195.         __tmp._M_next = __next;
  196.         __tmp._M_alt = __alt;
  197.         __tmp._M_neg = __neg;
  198.         return _M_insert_state(std::move(__tmp));
  199.       }
  200.  
  201.       _StateIdT
  202.       _M_insert_matcher(_MatcherT __m)
  203.       {
  204.         _StateT __tmp(_S_opcode_match);
  205.         __tmp._M_matches = std::move(__m);
  206.         return _M_insert_state(std::move(__tmp));
  207.       }
  208.  
  209.       _StateIdT
  210.       _M_insert_subexpr_begin()
  211.       {
  212.         auto __id = this->_M_subexpr_count++;
  213.         this->_M_paren_stack.push_back(__id);
  214.         _StateT __tmp(_S_opcode_subexpr_begin);
  215.         __tmp._M_subexpr = __id;
  216.         return _M_insert_state(std::move(__tmp));
  217.       }
  218.  
  219.       _StateIdT
  220.       _M_insert_subexpr_end()
  221.       {
  222.         _StateT __tmp(_S_opcode_subexpr_end);
  223.         __tmp._M_subexpr = this->_M_paren_stack.back();
  224.         this->_M_paren_stack.pop_back();
  225.         return _M_insert_state(std::move(__tmp));
  226.       }
  227.  
  228.       _StateIdT
  229.       _M_insert_backref(size_t __index);
  230.  
  231.       _StateIdT
  232.       _M_insert_line_begin()
  233.       { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
  234.  
  235.       _StateIdT
  236.       _M_insert_line_end()
  237.       { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
  238.  
  239.       _StateIdT
  240.       _M_insert_word_bound(bool __neg)
  241.       {
  242.         _StateT __tmp(_S_opcode_word_boundary);
  243.         __tmp._M_neg = __neg;
  244.         return _M_insert_state(std::move(__tmp));
  245.       }
  246.  
  247.       _StateIdT
  248.       _M_insert_lookahead(_StateIdT __alt, bool __neg)
  249.       {
  250.         _StateT __tmp(_S_opcode_subexpr_lookahead);
  251.         __tmp._M_alt = __alt;
  252.         __tmp._M_neg = __neg;
  253.         return _M_insert_state(std::move(__tmp));
  254.       }
  255.  
  256.       _StateIdT
  257.       _M_insert_dummy()
  258.       { return _M_insert_state(_StateT(_S_opcode_dummy)); }
  259.  
  260.       _StateIdT
  261.       _M_insert_state(_StateT __s)
  262.       {
  263.         this->push_back(std::move(__s));
  264.         if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
  265.           __throw_regex_error(regex_constants::error_space);
  266.         return this->size()-1;
  267.       }
  268.  
  269.       // Eliminate dummy node in this NFA to make it compact.
  270.       void
  271.       _M_eliminate_dummy();
  272.  
  273. #ifdef _GLIBCXX_DEBUG
  274.       std::ostream&
  275.       _M_dot(std::ostream& __ostr) const;
  276. #endif
  277.     public:
  278.       _TraitsT                  _M_traits;
  279.     };
  280.  
  281.   /// Describes a sequence of one or more %_State, its current start
  282.   /// and end(s).  This structure contains fragments of an NFA during
  283.   /// construction.
  284.   template<typename _TraitsT>
  285.     class _StateSeq
  286.     {
  287.     public:
  288.       typedef _NFA<_TraitsT> _RegexT;
  289.  
  290.     public:
  291.       _StateSeq(_RegexT& __nfa, _StateIdT __s)
  292.       : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
  293.       { }
  294.  
  295.       _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
  296.       : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
  297.       { }
  298.  
  299.       // Append a state on *this and change *this to the new sequence.
  300.       void
  301.       _M_append(_StateIdT __id)
  302.       {
  303.         _M_nfa[_M_end]._M_next = __id;
  304.         _M_end = __id;
  305.       }
  306.  
  307.       // Append a sequence on *this and change *this to the new sequence.
  308.       void
  309.       _M_append(const _StateSeq& __s)
  310.       {
  311.         _M_nfa[_M_end]._M_next = __s._M_start;
  312.         _M_end = __s._M_end;
  313.       }
  314.  
  315.       // Clones an entire sequence.
  316.       _StateSeq
  317.       _M_clone();
  318.  
  319.     public:
  320.       _RegexT&  _M_nfa;
  321.       _StateIdT _M_start;
  322.       _StateIdT _M_end;
  323.     };
  324.  
  325.  //@} regex-detail
  326. _GLIBCXX_END_NAMESPACE_VERSION
  327. } // namespace __detail
  328. } // namespace std
  329.  
  330. #include <bits/regex_automaton.tcc>
  331.