Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. // Locale support (codecvt) -*- C++ -*-
  2.  
  3. // Copyright (C) 2000-2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. //
  26. // ISO C++ 14882: 22.2.1.5 Template class codecvt
  27. //
  28.  
  29. // Written by Benjamin Kosnik <bkoz@redhat.com>
  30.  
  31. /** @file ext/codecvt_specializations.h
  32.  *  This file is a GNU extension to the Standard C++ Library.
  33.  */
  34.  
  35. #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
  36. #define _EXT_CODECVT_SPECIALIZATIONS_H 1
  37.  
  38. #include <bits/c++config.h>
  39. #include <locale>
  40. #include <iconv.h>
  41.  
  42. namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
  43. {
  44. _GLIBCXX_BEGIN_NAMESPACE_CXX11
  45. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  46.  
  47.   /// Extension to use iconv for dealing with character encodings.
  48.   // This includes conversions and comparisons between various character
  49.   // sets.  This object encapsulates data that may need to be shared between
  50.   // char_traits, codecvt and ctype.
  51.   class encoding_state
  52.   {
  53.   public:
  54.     // Types:
  55.     // NB: A conversion descriptor subsumes and enhances the
  56.     // functionality of a simple state type such as mbstate_t.
  57.     typedef iconv_t     descriptor_type;
  58.    
  59.   protected:
  60.     // Name of internal character set encoding.
  61.     std::string         _M_int_enc;
  62.  
  63.     // Name of external character set encoding.
  64.     std::string         _M_ext_enc;
  65.  
  66.     // Conversion descriptor between external encoding to internal encoding.
  67.     descriptor_type     _M_in_desc;
  68.  
  69.     // Conversion descriptor between internal encoding to external encoding.
  70.     descriptor_type     _M_out_desc;
  71.  
  72.     // The byte-order marker for the external encoding, if necessary.
  73.     int                 _M_ext_bom;
  74.  
  75.     // The byte-order marker for the internal encoding, if necessary.
  76.     int                 _M_int_bom;
  77.  
  78.     // Number of external bytes needed to construct one complete
  79.     // character in the internal encoding.
  80.     // NB: -1 indicates variable, or stateful, encodings.
  81.     int                 _M_bytes;
  82.  
  83.   public:
  84.     explicit
  85.     encoding_state()
  86.     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
  87.     { }
  88.  
  89.     explicit
  90.     encoding_state(const char* __int, const char* __ext,
  91.                    int __ibom = 0, int __ebom = 0, int __bytes = 1)
  92.     : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
  93.       _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
  94.     { init(); }
  95.  
  96.     // 21.1.2 traits typedefs
  97.     // p4
  98.     // typedef STATE_T state_type
  99.     // requires: state_type shall meet the requirements of
  100.     // CopyConstructible types (20.1.3)
  101.     // NB: This does not preserve the actual state of the conversion
  102.     // descriptor member, but it does duplicate the encoding
  103.     // information.
  104.     encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
  105.     { construct(__obj); }
  106.  
  107.     // Need assignment operator as well.
  108.     encoding_state&
  109.     operator=(const encoding_state& __obj)
  110.     {
  111.       construct(__obj);
  112.       return *this;
  113.     }
  114.  
  115.     ~encoding_state()
  116.     { destroy(); }
  117.  
  118.     bool
  119.     good() const throw()
  120.     {
  121.       const descriptor_type __err = (iconv_t)(-1);
  122.       bool __test = _M_in_desc && _M_in_desc != __err;
  123.       __test &=  _M_out_desc && _M_out_desc != __err;
  124.       return __test;
  125.     }
  126.    
  127.     int
  128.     character_ratio() const
  129.     { return _M_bytes; }
  130.  
  131.     const std::string
  132.     internal_encoding() const
  133.     { return _M_int_enc; }
  134.  
  135.     int
  136.     internal_bom() const
  137.     { return _M_int_bom; }
  138.  
  139.     const std::string
  140.     external_encoding() const
  141.     { return _M_ext_enc; }
  142.  
  143.     int
  144.     external_bom() const
  145.     { return _M_ext_bom; }
  146.  
  147.     const descriptor_type&
  148.     in_descriptor() const
  149.     { return _M_in_desc; }
  150.  
  151.     const descriptor_type&
  152.     out_descriptor() const
  153.     { return _M_out_desc; }
  154.  
  155.   protected:
  156.     void
  157.     init()
  158.     {
  159.       const descriptor_type __err = (iconv_t)(-1);
  160.       const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
  161.       if (!_M_in_desc && __have_encodings)
  162.         {
  163.           _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
  164.           if (_M_in_desc == __err)
  165.             std::__throw_runtime_error(__N("encoding_state::_M_init "
  166.                                     "creating iconv input descriptor failed"));
  167.         }
  168.       if (!_M_out_desc && __have_encodings)
  169.         {
  170.           _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
  171.           if (_M_out_desc == __err)
  172.             std::__throw_runtime_error(__N("encoding_state::_M_init "
  173.                                   "creating iconv output descriptor failed"));
  174.         }
  175.     }
  176.  
  177.     void
  178.     construct(const encoding_state& __obj)
  179.     {
  180.       destroy();
  181.       _M_int_enc = __obj._M_int_enc;
  182.       _M_ext_enc = __obj._M_ext_enc;
  183.       _M_ext_bom = __obj._M_ext_bom;
  184.       _M_int_bom = __obj._M_int_bom;
  185.       _M_bytes = __obj._M_bytes;
  186.       init();
  187.     }
  188.  
  189.     void
  190.     destroy() throw()
  191.     {
  192.       const descriptor_type __err = (iconv_t)(-1);
  193.       if (_M_in_desc && _M_in_desc != __err)
  194.         {
  195.           iconv_close(_M_in_desc);
  196.           _M_in_desc = 0;
  197.         }
  198.       if (_M_out_desc && _M_out_desc != __err)
  199.         {
  200.           iconv_close(_M_out_desc);
  201.           _M_out_desc = 0;
  202.         }
  203.     }
  204.   };
  205.  
  206.   /// encoding_char_traits
  207.   // Custom traits type with encoding_state for the state type, and the
  208.   // associated fpos<encoding_state> for the position type, all other
  209.   // bits equivalent to the required char_traits instantiations.
  210.   template<typename _CharT>
  211.     struct encoding_char_traits
  212.     : public std::char_traits<_CharT>
  213.     {
  214.       typedef encoding_state                            state_type;
  215.       typedef typename std::fpos<state_type>            pos_type;
  216.     };
  217.  
  218. _GLIBCXX_END_NAMESPACE_VERSION
  219. _GLIBCXX_END_NAMESPACE_CXX11
  220. } // namespace
  221.  
  222.  
  223. namespace std _GLIBCXX_VISIBILITY(default)
  224. {
  225. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  226.  
  227.   using __gnu_cxx::encoding_state;
  228.  
  229.   /// codecvt<InternT, _ExternT, encoding_state> specialization.
  230.   // This partial specialization takes advantage of iconv to provide
  231.   // code conversions between a large number of character encodings.
  232.   template<typename _InternT, typename _ExternT>
  233.     class codecvt<_InternT, _ExternT, encoding_state>
  234.     : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
  235.     {
  236.     public:      
  237.       // Types:
  238.       typedef codecvt_base::result                      result;
  239.       typedef _InternT                                  intern_type;
  240.       typedef _ExternT                                  extern_type;
  241.       typedef __gnu_cxx::encoding_state                 state_type;
  242.       typedef state_type::descriptor_type               descriptor_type;
  243.  
  244.       // Data Members:
  245.       static locale::id                 id;
  246.  
  247.       explicit
  248.       codecvt(size_t __refs = 0)
  249.       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
  250.       { }
  251.  
  252.       explicit
  253.       codecvt(state_type& __enc, size_t __refs = 0)
  254.       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
  255.       { }
  256.  
  257.      protected:
  258.       virtual
  259.       ~codecvt() { }
  260.  
  261.       virtual result
  262.       do_out(state_type& __state, const intern_type* __from,
  263.              const intern_type* __from_end, const intern_type*& __from_next,
  264.              extern_type* __to, extern_type* __to_end,
  265.              extern_type*& __to_next) const;
  266.  
  267.       virtual result
  268.       do_unshift(state_type& __state, extern_type* __to,
  269.                  extern_type* __to_end, extern_type*& __to_next) const;
  270.  
  271.       virtual result
  272.       do_in(state_type& __state, const extern_type* __from,
  273.             const extern_type* __from_end, const extern_type*& __from_next,
  274.             intern_type* __to, intern_type* __to_end,
  275.             intern_type*& __to_next) const;
  276.  
  277.       virtual int
  278.       do_encoding() const throw();
  279.  
  280.       virtual bool
  281.       do_always_noconv() const throw();
  282.  
  283.       virtual int
  284.       do_length(state_type&, const extern_type* __from,
  285.                 const extern_type* __end, size_t __max) const;
  286.  
  287.       virtual int
  288.       do_max_length() const throw();
  289.     };
  290.  
  291.   template<typename _InternT, typename _ExternT>
  292.     locale::id
  293.     codecvt<_InternT, _ExternT, encoding_state>::id;
  294.  
  295.   // This adaptor works around the signature problems of the second
  296.   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
  297.   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
  298.   // Using this adaptor, g++ will do the work for us.
  299.   template<typename _Tp>
  300.     inline size_t
  301.     __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
  302.                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
  303.                     char** __outbuf, size_t* __outbytes)
  304.     { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
  305.  
  306.   template<typename _InternT, typename _ExternT>
  307.     codecvt_base::result
  308.     codecvt<_InternT, _ExternT, encoding_state>::
  309.     do_out(state_type& __state, const intern_type* __from,
  310.            const intern_type* __from_end, const intern_type*& __from_next,
  311.            extern_type* __to, extern_type* __to_end,
  312.            extern_type*& __to_next) const
  313.     {
  314.       result __ret = codecvt_base::error;
  315.       if (__state.good())
  316.         {
  317.           const descriptor_type& __desc = __state.out_descriptor();
  318.           const size_t __fmultiple = sizeof(intern_type);
  319.           size_t __fbytes = __fmultiple * (__from_end - __from);
  320.           const size_t __tmultiple = sizeof(extern_type);
  321.           size_t __tbytes = __tmultiple * (__to_end - __to);
  322.          
  323.           // Argument list for iconv specifies a byte sequence. Thus,
  324.           // all to/from arrays must be brutally casted to char*.
  325.           char* __cto = reinterpret_cast<char*>(__to);
  326.           char* __cfrom;
  327.           size_t __conv;
  328.  
  329.           // Some encodings need a byte order marker as the first item
  330.           // in the byte stream, to designate endian-ness. The default
  331.           // value for the byte order marker is NULL, so if this is
  332.           // the case, it's not necessary and we can just go on our
  333.           // merry way.
  334.           int __int_bom = __state.internal_bom();
  335.           if (__int_bom)
  336.             {    
  337.               size_t __size = __from_end - __from;
  338.               intern_type* __cfixed = static_cast<intern_type*>
  339.                 (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
  340.               __cfixed[0] = static_cast<intern_type>(__int_bom);
  341.               char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
  342.               __cfrom = reinterpret_cast<char*>(__cfixed);
  343.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  344.                                         &__fbytes, &__cto, &__tbytes);
  345.             }
  346.           else
  347.             {
  348.               intern_type* __cfixed = const_cast<intern_type*>(__from);
  349.               __cfrom = reinterpret_cast<char*>(__cfixed);
  350.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
  351.                                        &__cto, &__tbytes);
  352.             }
  353.  
  354.           if (__conv != size_t(-1))
  355.             {
  356.               __from_next = reinterpret_cast<const intern_type*>(__cfrom);
  357.               __to_next = reinterpret_cast<extern_type*>(__cto);
  358.               __ret = codecvt_base::ok;
  359.             }
  360.           else
  361.             {
  362.               if (__fbytes < __fmultiple * (__from_end - __from))
  363.                 {
  364.                   __from_next = reinterpret_cast<const intern_type*>(__cfrom);
  365.                   __to_next = reinterpret_cast<extern_type*>(__cto);
  366.                   __ret = codecvt_base::partial;
  367.                 }
  368.               else
  369.                 __ret = codecvt_base::error;
  370.             }
  371.         }
  372.       return __ret;
  373.     }
  374.  
  375.   template<typename _InternT, typename _ExternT>
  376.     codecvt_base::result
  377.     codecvt<_InternT, _ExternT, encoding_state>::
  378.     do_unshift(state_type& __state, extern_type* __to,
  379.                extern_type* __to_end, extern_type*& __to_next) const
  380.     {
  381.       result __ret = codecvt_base::error;
  382.       if (__state.good())
  383.         {
  384.           const descriptor_type& __desc = __state.in_descriptor();
  385.           const size_t __tmultiple = sizeof(intern_type);
  386.           size_t __tlen = __tmultiple * (__to_end - __to);
  387.          
  388.           // Argument list for iconv specifies a byte sequence. Thus,
  389.           // all to/from arrays must be brutally casted to char*.
  390.           char* __cto = reinterpret_cast<char*>(__to);
  391.           size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
  392.                                           &__cto, &__tlen);
  393.          
  394.           if (__conv != size_t(-1))
  395.             {
  396.               __to_next = reinterpret_cast<extern_type*>(__cto);
  397.               if (__tlen == __tmultiple * (__to_end - __to))
  398.                 __ret = codecvt_base::noconv;
  399.               else if (__tlen == 0)
  400.                 __ret = codecvt_base::ok;
  401.               else
  402.                 __ret = codecvt_base::partial;
  403.             }
  404.           else
  405.             __ret = codecvt_base::error;
  406.         }
  407.       return __ret;
  408.     }
  409.    
  410.   template<typename _InternT, typename _ExternT>
  411.     codecvt_base::result
  412.     codecvt<_InternT, _ExternT, encoding_state>::
  413.     do_in(state_type& __state, const extern_type* __from,
  414.           const extern_type* __from_end, const extern_type*& __from_next,
  415.           intern_type* __to, intern_type* __to_end,
  416.           intern_type*& __to_next) const
  417.     {
  418.       result __ret = codecvt_base::error;
  419.       if (__state.good())
  420.         {
  421.           const descriptor_type& __desc = __state.in_descriptor();
  422.           const size_t __fmultiple = sizeof(extern_type);
  423.           size_t __flen = __fmultiple * (__from_end - __from);
  424.           const size_t __tmultiple = sizeof(intern_type);
  425.           size_t __tlen = __tmultiple * (__to_end - __to);
  426.          
  427.           // Argument list for iconv specifies a byte sequence. Thus,
  428.           // all to/from arrays must be brutally casted to char*.
  429.           char* __cto = reinterpret_cast<char*>(__to);
  430.           char* __cfrom;
  431.           size_t __conv;
  432.  
  433.           // Some encodings need a byte order marker as the first item
  434.           // in the byte stream, to designate endian-ness. The default
  435.           // value for the byte order marker is NULL, so if this is
  436.           // the case, it's not necessary and we can just go on our
  437.           // merry way.
  438.           int __ext_bom = __state.external_bom();
  439.           if (__ext_bom)
  440.             {    
  441.               size_t __size = __from_end - __from;
  442.               extern_type* __cfixed =  static_cast<extern_type*>
  443.                 (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
  444.               __cfixed[0] = static_cast<extern_type>(__ext_bom);
  445.               char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
  446.               __cfrom = reinterpret_cast<char*>(__cfixed);
  447.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  448.                                        &__flen, &__cto, &__tlen);
  449.             }
  450.           else
  451.             {
  452.               extern_type* __cfixed = const_cast<extern_type*>(__from);
  453.               __cfrom = reinterpret_cast<char*>(__cfixed);
  454.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  455.                                        &__flen, &__cto, &__tlen);
  456.             }
  457.  
  458.          
  459.           if (__conv != size_t(-1))
  460.             {
  461.               __from_next = reinterpret_cast<const extern_type*>(__cfrom);
  462.               __to_next = reinterpret_cast<intern_type*>(__cto);
  463.               __ret = codecvt_base::ok;
  464.             }
  465.           else
  466.             {
  467.               if (__flen < static_cast<size_t>(__from_end - __from))
  468.                 {
  469.                   __from_next = reinterpret_cast<const extern_type*>(__cfrom);
  470.                   __to_next = reinterpret_cast<intern_type*>(__cto);
  471.                   __ret = codecvt_base::partial;
  472.                 }
  473.               else
  474.                 __ret = codecvt_base::error;
  475.             }
  476.         }
  477.       return __ret;
  478.     }
  479.  
  480.   template<typename _InternT, typename _ExternT>
  481.     int
  482.     codecvt<_InternT, _ExternT, encoding_state>::
  483.     do_encoding() const throw()
  484.     {
  485.       int __ret = 0;
  486.       if (sizeof(_ExternT) <= sizeof(_InternT))
  487.         __ret = sizeof(_InternT) / sizeof(_ExternT);
  488.       return __ret;
  489.     }
  490.  
  491.   template<typename _InternT, typename _ExternT>
  492.     bool
  493.     codecvt<_InternT, _ExternT, encoding_state>::
  494.     do_always_noconv() const throw()
  495.     { return false; }
  496.  
  497.   template<typename _InternT, typename _ExternT>
  498.     int
  499.     codecvt<_InternT, _ExternT, encoding_state>::
  500.     do_length(state_type&, const extern_type* __from,
  501.               const extern_type* __end, size_t __max) const
  502.     { return std::min(__max, static_cast<size_t>(__end - __from)); }
  503.  
  504.   // _GLIBCXX_RESOLVE_LIB_DEFECTS
  505.   // 74.  Garbled text for codecvt::do_max_length
  506.   template<typename _InternT, typename _ExternT>
  507.     int
  508.     codecvt<_InternT, _ExternT, encoding_state>::
  509.     do_max_length() const throw()
  510.     { return 1; }
  511.  
  512. _GLIBCXX_END_NAMESPACE_VERSION
  513. } // namespace
  514.  
  515. #endif
  516.