Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // class template regex -*- C++ -*-
  2.  
  3. // Copyright (C) 2010-2013 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. /**
  26.  *  @file bits/regex_nfa.h
  27.  *  This is an internal header file, included by other library headers.
  28.  *  Do not attempt to use it directly. @headername{regex}
  29.  */
  30.  
  31. namespace std _GLIBCXX_VISIBILITY(default)
  32. {
  33. namespace __detail
  34. {
  35. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  36.  
  37.   /**
  38.    * @addtogroup regex-detail
  39.    * @{
  40.    */
  41.  
  42.   /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
  43.   class _Automaton
  44.   {
  45.   public:
  46.     typedef unsigned int _SizeT;
  47.  
  48.   public:
  49.     virtual
  50.     ~_Automaton() { }
  51.  
  52.     virtual _SizeT
  53.     _M_sub_count() const = 0;
  54.  
  55. #ifdef _GLIBCXX_DEBUG
  56.     virtual std::ostream&
  57.     _M_dot(std::ostream& __ostr) const = 0;
  58. #endif
  59.   };
  60.  
  61.   /// Generic shared pointer to an automaton.  
  62.   typedef std::shared_ptr<_Automaton> _AutomatonPtr;
  63.  
  64.   /// Operation codes that define the type of transitions within the base NFA
  65.   /// that represents the regular expression.
  66.   enum _Opcode
  67.   {
  68.       _S_opcode_unknown       =   0,
  69.       _S_opcode_alternative   =   1,
  70.       _S_opcode_subexpr_begin =   4,
  71.       _S_opcode_subexpr_end   =   5,
  72.       _S_opcode_match         = 100,
  73.       _S_opcode_accept        = 255
  74.   };
  75.  
  76.   /// Provides a generic facade for a templated match_results.
  77.   struct _Results
  78.   {
  79.     virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
  80.     virtual void _M_set_matched(int __i, bool __is_matched) = 0;
  81.   };
  82.  
  83.   /// Tags current state (for subexpr begin/end).
  84.   typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
  85.  
  86.   /// Start state tag.
  87.   template<typename _FwdIterT, typename _TraitsT>
  88.     struct _StartTagger
  89.     {
  90.       explicit
  91.       _StartTagger(int __i)
  92.       : _M_index(__i)
  93.       { }
  94.  
  95.       void
  96.       operator()(const _PatternCursor& __pc, _Results& __r)
  97.       { __r._M_set_pos(_M_index, 0, __pc); }
  98.  
  99.       int       _M_index;
  100.     };
  101.  
  102.   /// End state tag.
  103.   template<typename _FwdIterT, typename _TraitsT>
  104.     struct _EndTagger
  105.     {
  106.       explicit
  107.       _EndTagger(int __i)
  108.       : _M_index(__i)
  109.       { }
  110.  
  111.       void
  112.       operator()(const _PatternCursor& __pc, _Results& __r)
  113.       { __r._M_set_pos(_M_index, 1, __pc); }
  114.  
  115.       int       _M_index;
  116.       _FwdIterT _M_pos;
  117.     };
  118.  
  119.   /// Indicates if current state matches cursor current.
  120.   typedef std::function<bool (const _PatternCursor&)> _Matcher;
  121.  
  122.   /// Matches any character
  123.   inline bool
  124.   _AnyMatcher(const _PatternCursor&)
  125.   { return true; }
  126.  
  127.   /// Matches a single character
  128.   template<typename _InIterT, typename _TraitsT>
  129.     struct _CharMatcher
  130.     {
  131.       typedef typename _TraitsT::char_type char_type;
  132.  
  133.       explicit
  134.       _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
  135.       : _M_traits(__t), _M_c(_M_traits.translate(__c))
  136.       { }
  137.  
  138.       bool
  139.       operator()(const _PatternCursor& __pc) const
  140.       {
  141.         typedef const _SpecializedCursor<_InIterT>& _CursorT;
  142.         _CursorT __c = static_cast<_CursorT>(__pc);
  143.         return _M_traits.translate(__c._M_current()) == _M_c;
  144.       }
  145.  
  146.       const _TraitsT& _M_traits;
  147.       char_type       _M_c;
  148.     };
  149.  
  150.   /// Matches a character range (bracket expression)
  151.   template<typename _InIterT, typename _TraitsT>
  152.     struct _RangeMatcher
  153.     {
  154.       typedef typename _TraitsT::char_type _CharT;
  155.       typedef std::basic_string<_CharT>    _StringT;
  156.  
  157.       explicit
  158.       _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
  159.       : _M_traits(__t), _M_is_non_matching(__is_non_matching)
  160.       { }
  161.  
  162.       bool
  163.       operator()(const _PatternCursor& __pc) const
  164.       {
  165.         typedef const _SpecializedCursor<_InIterT>& _CursorT;
  166.         _CursorT __c = static_cast<_CursorT>(__pc);
  167.         return true;
  168.       }
  169.  
  170.       void
  171.       _M_add_char(_CharT __c)
  172.       { }
  173.  
  174.       void
  175.       _M_add_collating_element(const _StringT& __s)
  176.       { }
  177.  
  178.       void
  179.       _M_add_equivalence_class(const _StringT& __s)
  180.       { }
  181.  
  182.       void
  183.       _M_add_character_class(const _StringT& __s)
  184.       { }
  185.  
  186.       void
  187.       _M_make_range()
  188.       { }
  189.  
  190.       const _TraitsT& _M_traits;
  191.       bool            _M_is_non_matching;
  192.     };
  193.  
  194.   /// Identifies a state in the NFA.
  195.   typedef int _StateIdT;
  196.  
  197.   /// The special case in which a state identifier is not an index.
  198.   static const _StateIdT _S_invalid_state_id  = -1;
  199.  
  200.  
  201.   /**
  202.    * @brief struct _State
  203.    *
  204.    * An individual state in an NFA
  205.    *
  206.    * In this case a "state" is an entry in the NFA definition coupled
  207.    * with its outgoing transition(s).  All states have a single outgoing
  208.    * transition, except for accepting states (which have no outgoing
  209.    * transitions) and alt states, which have two outgoing transitions.
  210.    */
  211.   struct _State
  212.   {
  213.     typedef int  _OpcodeT;
  214.  
  215.     _OpcodeT     _M_opcode;    // type of outgoing transition
  216.     _StateIdT    _M_next;      // outgoing transition
  217.     _StateIdT    _M_alt;       // for _S_opcode_alternative
  218.     unsigned int _M_subexpr;   // for _S_opcode_subexpr_*
  219.     _Tagger      _M_tagger;    // for _S_opcode_subexpr_*
  220.     _Matcher     _M_matches;   // for _S_opcode_match
  221.  
  222.     explicit _State(_OpcodeT __opcode)
  223.     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
  224.     { }
  225.  
  226.     _State(const _Matcher& __m)
  227.     : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
  228.     { }
  229.  
  230.     _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
  231.     : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
  232.       _M_tagger(__t)
  233.     { }
  234.  
  235.     _State(_StateIdT __next, _StateIdT __alt)
  236.     : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
  237.     { }
  238.  
  239. #ifdef _GLIBCXX_DEBUG
  240.     std::ostream&
  241.     _M_print(std::ostream& ostr) const;
  242.  
  243.     // Prints graphviz dot commands for state.
  244.     std::ostream&
  245.     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
  246. #endif
  247.   };
  248.  
  249.  
  250.   /// The Grep Matcher works on sets of states.  Here are sets of states.
  251.   typedef std::set<_StateIdT> _StateSet;
  252.  
  253.   /**
  254.    * @brief struct _Nfa
  255.    *
  256.    * A collection of all states making up an NFA.
  257.    *
  258.    * An NFA is a 4-tuple M = (K, S, s, F), where
  259.    *    K is a finite set of states,
  260.    *    S is the alphabet of the NFA,
  261.    *    s is the initial state,
  262.    *    F is a set of final (accepting) states.
  263.    *
  264.    * This NFA class is templated on S, a type that will hold values of the
  265.    * underlying alphabet (without regard to semantics of that alphabet).  The
  266.    * other elements of the tuple are generated during construction of the NFA
  267.    * and are available through accessor member functions.
  268.    */
  269.   class _Nfa
  270.   : public _Automaton, public std::vector<_State>
  271.   {
  272.   public:
  273.     typedef _State                              _StateT;
  274.     typedef unsigned int                        _SizeT;
  275.     typedef regex_constants::syntax_option_type _FlagT;
  276.  
  277.     _Nfa(_FlagT __f)
  278.     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
  279.     { }
  280.  
  281.     ~_Nfa()
  282.     { }
  283.  
  284.     _FlagT
  285.     _M_options() const
  286.     { return _M_flags; }
  287.  
  288.     _StateIdT
  289.     _M_start() const
  290.     { return _M_start_state; }
  291.  
  292.     const _StateSet&
  293.     _M_final_states() const
  294.     { return _M_accepting_states; }
  295.  
  296.     _SizeT
  297.     _M_sub_count() const
  298.     { return _M_subexpr_count; }
  299.  
  300.     _StateIdT
  301.     _M_insert_accept()
  302.     {
  303.       this->push_back(_StateT(_S_opcode_accept));
  304.       _M_accepting_states.insert(this->size()-1);
  305.       return this->size()-1;
  306.     }
  307.  
  308.     _StateIdT
  309.     _M_insert_alt(_StateIdT __next, _StateIdT __alt)
  310.     {
  311.       this->push_back(_StateT(__next, __alt));
  312.       return this->size()-1;
  313.     }
  314.  
  315.     _StateIdT
  316.     _M_insert_matcher(_Matcher __m)
  317.     {
  318.       this->push_back(_StateT(__m));
  319.       return this->size()-1;
  320.     }
  321.  
  322.     _StateIdT
  323.     _M_insert_subexpr_begin(const _Tagger& __t)
  324.     {
  325.       this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
  326.                               __t));
  327.       return this->size()-1;
  328.     }
  329.  
  330.     _StateIdT
  331.     _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
  332.     {
  333.       this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
  334.       return this->size()-1;
  335.     }
  336.  
  337. #ifdef _GLIBCXX_DEBUG
  338.     std::ostream&
  339.     _M_dot(std::ostream& __ostr) const;
  340. #endif
  341.  
  342.   private:
  343.     _FlagT     _M_flags;
  344.     _StateIdT  _M_start_state;
  345.     _StateSet  _M_accepting_states;
  346.     _SizeT     _M_subexpr_count;
  347.   };
  348.  
  349.   /// Describes a sequence of one or more %_State, its current start
  350.   /// and end(s).  This structure contains fragments of an NFA during
  351.   /// construction.
  352.   class _StateSeq
  353.   {
  354.   public:
  355.     // Constructs a single-node sequence
  356.     _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
  357.     : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
  358.     { }
  359.     // Constructs a split sequence from two other sequencces
  360.     _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
  361.     : _M_nfa(__e1._M_nfa),
  362.       _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
  363.       _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
  364.     { }
  365.  
  366.     // Constructs a split sequence from a single sequence
  367.     _StateSeq(const _StateSeq& __e, _StateIdT __id)
  368.     : _M_nfa(__e._M_nfa),
  369.       _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
  370.       _M_end1(__id), _M_end2(__e._M_end1)
  371.     { }
  372.  
  373.     // Constructs a copy of a %_StateSeq
  374.     _StateSeq(const _StateSeq& __rhs)
  375.     : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
  376.       _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
  377.     { }
  378.  
  379.  
  380.     _StateSeq& operator=(const _StateSeq& __rhs);
  381.  
  382.     _StateIdT
  383.     _M_front() const
  384.     { return _M_start; }
  385.  
  386.     // Extends a sequence by one.
  387.     void
  388.     _M_push_back(_StateIdT __id);
  389.  
  390.     // Extends and maybe joins a sequence.
  391.     void
  392.     _M_append(_StateIdT __id);
  393.  
  394.     void
  395.     _M_append(_StateSeq& __rhs);
  396.  
  397.     // Clones an entire sequence.
  398.     _StateIdT
  399.     _M_clone();
  400.  
  401.   private:
  402.     _Nfa&     _M_nfa;
  403.     _StateIdT _M_start;
  404.     _StateIdT _M_end1;
  405.     _StateIdT _M_end2;
  406.  
  407.   };
  408.  
  409.  //@} regex-detail
  410. _GLIBCXX_END_NAMESPACE_VERSION
  411. } // namespace __detail
  412. } // namespace std
  413.  
  414. #include <bits/regex_nfa.tcc>
  415.  
  416.