Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // Locale support (codecvt) -*- C++ -*-
  2.  
  3. // Copyright (C) 2000-2013 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. //
  26. // ISO C++ 14882: 22.2.1.5 Template class codecvt
  27. //
  28.  
  29. // Written by Benjamin Kosnik <bkoz@redhat.com>
  30.  
  31. /** @file ext/codecvt_specializations.h
  32.  *  This file is a GNU extension to the Standard C++ Library.
  33.  */
  34.  
  35. #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
  36. #define _EXT_CODECVT_SPECIALIZATIONS_H 1
  37.  
  38. #include <bits/c++config.h>
  39. #include <locale>
  40. #include <iconv.h>
  41.  
  42. namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
  43. {
  44. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  45.  
  46.   /// Extension to use iconv for dealing with character encodings.
  47.   // This includes conversions and comparisons between various character
  48.   // sets.  This object encapsulates data that may need to be shared between
  49.   // char_traits, codecvt and ctype.
  50.   class encoding_state
  51.   {
  52.   public:
  53.     // Types:
  54.     // NB: A conversion descriptor subsumes and enhances the
  55.     // functionality of a simple state type such as mbstate_t.
  56.     typedef iconv_t     descriptor_type;
  57.    
  58.   protected:
  59.     // Name of internal character set encoding.
  60.     std::string         _M_int_enc;
  61.  
  62.     // Name of external character set encoding.
  63.     std::string         _M_ext_enc;
  64.  
  65.     // Conversion descriptor between external encoding to internal encoding.
  66.     descriptor_type     _M_in_desc;
  67.  
  68.     // Conversion descriptor between internal encoding to external encoding.
  69.     descriptor_type     _M_out_desc;
  70.  
  71.     // The byte-order marker for the external encoding, if necessary.
  72.     int                 _M_ext_bom;
  73.  
  74.     // The byte-order marker for the internal encoding, if necessary.
  75.     int                 _M_int_bom;
  76.  
  77.     // Number of external bytes needed to construct one complete
  78.     // character in the internal encoding.
  79.     // NB: -1 indicates variable, or stateful, encodings.
  80.     int                 _M_bytes;
  81.  
  82.   public:
  83.     explicit
  84.     encoding_state()
  85.     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
  86.     { }
  87.  
  88.     explicit
  89.     encoding_state(const char* __int, const char* __ext,
  90.                    int __ibom = 0, int __ebom = 0, int __bytes = 1)
  91.     : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
  92.       _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
  93.     { init(); }
  94.  
  95.     // 21.1.2 traits typedefs
  96.     // p4
  97.     // typedef STATE_T state_type
  98.     // requires: state_type shall meet the requirements of
  99.     // CopyConstructible types (20.1.3)
  100.     // NB: This does not preserve the actual state of the conversion
  101.     // descriptor member, but it does duplicate the encoding
  102.     // information.
  103.     encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
  104.     { construct(__obj); }
  105.  
  106.     // Need assignment operator as well.
  107.     encoding_state&
  108.     operator=(const encoding_state& __obj)
  109.     {
  110.       construct(__obj);
  111.       return *this;
  112.     }
  113.  
  114.     ~encoding_state()
  115.     { destroy(); }
  116.  
  117.     bool
  118.     good() const throw()
  119.     {
  120.       const descriptor_type __err = (iconv_t)(-1);
  121.       bool __test = _M_in_desc && _M_in_desc != __err;
  122.       __test &=  _M_out_desc && _M_out_desc != __err;
  123.       return __test;
  124.     }
  125.    
  126.     int
  127.     character_ratio() const
  128.     { return _M_bytes; }
  129.  
  130.     const std::string
  131.     internal_encoding() const
  132.     { return _M_int_enc; }
  133.  
  134.     int
  135.     internal_bom() const
  136.     { return _M_int_bom; }
  137.  
  138.     const std::string
  139.     external_encoding() const
  140.     { return _M_ext_enc; }
  141.  
  142.     int
  143.     external_bom() const
  144.     { return _M_ext_bom; }
  145.  
  146.     const descriptor_type&
  147.     in_descriptor() const
  148.     { return _M_in_desc; }
  149.  
  150.     const descriptor_type&
  151.     out_descriptor() const
  152.     { return _M_out_desc; }
  153.  
  154.   protected:
  155.     void
  156.     init()
  157.     {
  158.       const descriptor_type __err = (iconv_t)(-1);
  159.       const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
  160.       if (!_M_in_desc && __have_encodings)
  161.         {
  162.           _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
  163.           if (_M_in_desc == __err)
  164.             std::__throw_runtime_error(__N("encoding_state::_M_init "
  165.                                     "creating iconv input descriptor failed"));
  166.         }
  167.       if (!_M_out_desc && __have_encodings)
  168.         {
  169.           _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
  170.           if (_M_out_desc == __err)
  171.             std::__throw_runtime_error(__N("encoding_state::_M_init "
  172.                                   "creating iconv output descriptor failed"));
  173.         }
  174.     }
  175.  
  176.     void
  177.     construct(const encoding_state& __obj)
  178.     {
  179.       destroy();
  180.       _M_int_enc = __obj._M_int_enc;
  181.       _M_ext_enc = __obj._M_ext_enc;
  182.       _M_ext_bom = __obj._M_ext_bom;
  183.       _M_int_bom = __obj._M_int_bom;
  184.       _M_bytes = __obj._M_bytes;
  185.       init();
  186.     }
  187.  
  188.     void
  189.     destroy() throw()
  190.     {
  191.       const descriptor_type __err = (iconv_t)(-1);
  192.       if (_M_in_desc && _M_in_desc != __err)
  193.         {
  194.           iconv_close(_M_in_desc);
  195.           _M_in_desc = 0;
  196.         }
  197.       if (_M_out_desc && _M_out_desc != __err)
  198.         {
  199.           iconv_close(_M_out_desc);
  200.           _M_out_desc = 0;
  201.         }
  202.     }
  203.   };
  204.  
  205.   /// encoding_char_traits
  206.   // Custom traits type with encoding_state for the state type, and the
  207.   // associated fpos<encoding_state> for the position type, all other
  208.   // bits equivalent to the required char_traits instantiations.
  209.   template<typename _CharT>
  210.     struct encoding_char_traits : public std::char_traits<_CharT>
  211.     {
  212.       typedef encoding_state                            state_type;
  213.       typedef typename std::fpos<state_type>            pos_type;
  214.     };
  215.  
  216. _GLIBCXX_END_NAMESPACE_VERSION
  217. } // namespace
  218.  
  219.  
  220. namespace std _GLIBCXX_VISIBILITY(default)
  221. {
  222. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  223.  
  224.   using __gnu_cxx::encoding_state;
  225.  
  226.   /// codecvt<InternT, _ExternT, encoding_state> specialization.
  227.   // This partial specialization takes advantage of iconv to provide
  228.   // code conversions between a large number of character encodings.
  229.   template<typename _InternT, typename _ExternT>
  230.     class codecvt<_InternT, _ExternT, encoding_state>
  231.     : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
  232.     {
  233.     public:      
  234.       // Types:
  235.       typedef codecvt_base::result                      result;
  236.       typedef _InternT                                  intern_type;
  237.       typedef _ExternT                                  extern_type;
  238.       typedef __gnu_cxx::encoding_state                 state_type;
  239.       typedef state_type::descriptor_type               descriptor_type;
  240.  
  241.       // Data Members:
  242.       static locale::id                 id;
  243.  
  244.       explicit
  245.       codecvt(size_t __refs = 0)
  246.       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
  247.       { }
  248.  
  249.       explicit
  250.       codecvt(state_type& __enc, size_t __refs = 0)
  251.       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
  252.       { }
  253.  
  254.      protected:
  255.       virtual
  256.       ~codecvt() { }
  257.  
  258.       virtual result
  259.       do_out(state_type& __state, const intern_type* __from,
  260.              const intern_type* __from_end, const intern_type*& __from_next,
  261.              extern_type* __to, extern_type* __to_end,
  262.              extern_type*& __to_next) const;
  263.  
  264.       virtual result
  265.       do_unshift(state_type& __state, extern_type* __to,
  266.                  extern_type* __to_end, extern_type*& __to_next) const;
  267.  
  268.       virtual result
  269.       do_in(state_type& __state, const extern_type* __from,
  270.             const extern_type* __from_end, const extern_type*& __from_next,
  271.             intern_type* __to, intern_type* __to_end,
  272.             intern_type*& __to_next) const;
  273.  
  274.       virtual int
  275.       do_encoding() const throw();
  276.  
  277.       virtual bool
  278.       do_always_noconv() const throw();
  279.  
  280.       virtual int
  281.       do_length(state_type&, const extern_type* __from,
  282.                 const extern_type* __end, size_t __max) const;
  283.  
  284.       virtual int
  285.       do_max_length() const throw();
  286.     };
  287.  
  288.   template<typename _InternT, typename _ExternT>
  289.     locale::id
  290.     codecvt<_InternT, _ExternT, encoding_state>::id;
  291.  
  292.   // This adaptor works around the signature problems of the second
  293.   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
  294.   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
  295.   // Using this adaptor, g++ will do the work for us.
  296.   template<typename _Tp>
  297.     inline size_t
  298.     __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
  299.                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
  300.                     char** __outbuf, size_t* __outbytes)
  301.     { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
  302.  
  303.   template<typename _InternT, typename _ExternT>
  304.     codecvt_base::result
  305.     codecvt<_InternT, _ExternT, encoding_state>::
  306.     do_out(state_type& __state, const intern_type* __from,
  307.            const intern_type* __from_end, const intern_type*& __from_next,
  308.            extern_type* __to, extern_type* __to_end,
  309.            extern_type*& __to_next) const
  310.     {
  311.       result __ret = codecvt_base::error;
  312.       if (__state.good())
  313.         {
  314.           const descriptor_type& __desc = __state.out_descriptor();
  315.           const size_t __fmultiple = sizeof(intern_type);
  316.           size_t __fbytes = __fmultiple * (__from_end - __from);
  317.           const size_t __tmultiple = sizeof(extern_type);
  318.           size_t __tbytes = __tmultiple * (__to_end - __to);
  319.          
  320.           // Argument list for iconv specifies a byte sequence. Thus,
  321.           // all to/from arrays must be brutally casted to char*.
  322.           char* __cto = reinterpret_cast<char*>(__to);
  323.           char* __cfrom;
  324.           size_t __conv;
  325.  
  326.           // Some encodings need a byte order marker as the first item
  327.           // in the byte stream, to designate endian-ness. The default
  328.           // value for the byte order marker is NULL, so if this is
  329.           // the case, it's not necessary and we can just go on our
  330.           // merry way.
  331.           int __int_bom = __state.internal_bom();
  332.           if (__int_bom)
  333.             {    
  334.               size_t __size = __from_end - __from;
  335.               intern_type* __cfixed = static_cast<intern_type*>
  336.                 (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
  337.               __cfixed[0] = static_cast<intern_type>(__int_bom);
  338.               char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
  339.               __cfrom = reinterpret_cast<char*>(__cfixed);
  340.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  341.                                         &__fbytes, &__cto, &__tbytes);
  342.             }
  343.           else
  344.             {
  345.               intern_type* __cfixed = const_cast<intern_type*>(__from);
  346.               __cfrom = reinterpret_cast<char*>(__cfixed);
  347.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
  348.                                        &__cto, &__tbytes);
  349.             }
  350.  
  351.           if (__conv != size_t(-1))
  352.             {
  353.               __from_next = reinterpret_cast<const intern_type*>(__cfrom);
  354.               __to_next = reinterpret_cast<extern_type*>(__cto);
  355.               __ret = codecvt_base::ok;
  356.             }
  357.           else
  358.             {
  359.               if (__fbytes < __fmultiple * (__from_end - __from))
  360.                 {
  361.                   __from_next = reinterpret_cast<const intern_type*>(__cfrom);
  362.                   __to_next = reinterpret_cast<extern_type*>(__cto);
  363.                   __ret = codecvt_base::partial;
  364.                 }
  365.               else
  366.                 __ret = codecvt_base::error;
  367.             }
  368.         }
  369.       return __ret;
  370.     }
  371.  
  372.   template<typename _InternT, typename _ExternT>
  373.     codecvt_base::result
  374.     codecvt<_InternT, _ExternT, encoding_state>::
  375.     do_unshift(state_type& __state, extern_type* __to,
  376.                extern_type* __to_end, extern_type*& __to_next) const
  377.     {
  378.       result __ret = codecvt_base::error;
  379.       if (__state.good())
  380.         {
  381.           const descriptor_type& __desc = __state.in_descriptor();
  382.           const size_t __tmultiple = sizeof(intern_type);
  383.           size_t __tlen = __tmultiple * (__to_end - __to);
  384.          
  385.           // Argument list for iconv specifies a byte sequence. Thus,
  386.           // all to/from arrays must be brutally casted to char*.
  387.           char* __cto = reinterpret_cast<char*>(__to);
  388.           size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
  389.                                           &__cto, &__tlen);
  390.          
  391.           if (__conv != size_t(-1))
  392.             {
  393.               __to_next = reinterpret_cast<extern_type*>(__cto);
  394.               if (__tlen == __tmultiple * (__to_end - __to))
  395.                 __ret = codecvt_base::noconv;
  396.               else if (__tlen == 0)
  397.                 __ret = codecvt_base::ok;
  398.               else
  399.                 __ret = codecvt_base::partial;
  400.             }
  401.           else
  402.             __ret = codecvt_base::error;
  403.         }
  404.       return __ret;
  405.     }
  406.    
  407.   template<typename _InternT, typename _ExternT>
  408.     codecvt_base::result
  409.     codecvt<_InternT, _ExternT, encoding_state>::
  410.     do_in(state_type& __state, const extern_type* __from,
  411.           const extern_type* __from_end, const extern_type*& __from_next,
  412.           intern_type* __to, intern_type* __to_end,
  413.           intern_type*& __to_next) const
  414.     {
  415.       result __ret = codecvt_base::error;
  416.       if (__state.good())
  417.         {
  418.           const descriptor_type& __desc = __state.in_descriptor();
  419.           const size_t __fmultiple = sizeof(extern_type);
  420.           size_t __flen = __fmultiple * (__from_end - __from);
  421.           const size_t __tmultiple = sizeof(intern_type);
  422.           size_t __tlen = __tmultiple * (__to_end - __to);
  423.          
  424.           // Argument list for iconv specifies a byte sequence. Thus,
  425.           // all to/from arrays must be brutally casted to char*.
  426.           char* __cto = reinterpret_cast<char*>(__to);
  427.           char* __cfrom;
  428.           size_t __conv;
  429.  
  430.           // Some encodings need a byte order marker as the first item
  431.           // in the byte stream, to designate endian-ness. The default
  432.           // value for the byte order marker is NULL, so if this is
  433.           // the case, it's not necessary and we can just go on our
  434.           // merry way.
  435.           int __ext_bom = __state.external_bom();
  436.           if (__ext_bom)
  437.             {    
  438.               size_t __size = __from_end - __from;
  439.               extern_type* __cfixed =  static_cast<extern_type*>
  440.                 (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
  441.               __cfixed[0] = static_cast<extern_type>(__ext_bom);
  442.               char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
  443.               __cfrom = reinterpret_cast<char*>(__cfixed);
  444.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  445.                                        &__flen, &__cto, &__tlen);
  446.             }
  447.           else
  448.             {
  449.               extern_type* __cfixed = const_cast<extern_type*>(__from);
  450.               __cfrom = reinterpret_cast<char*>(__cfixed);
  451.               __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
  452.                                        &__flen, &__cto, &__tlen);
  453.             }
  454.  
  455.          
  456.           if (__conv != size_t(-1))
  457.             {
  458.               __from_next = reinterpret_cast<const extern_type*>(__cfrom);
  459.               __to_next = reinterpret_cast<intern_type*>(__cto);
  460.               __ret = codecvt_base::ok;
  461.             }
  462.           else
  463.             {
  464.               if (__flen < static_cast<size_t>(__from_end - __from))
  465.                 {
  466.                   __from_next = reinterpret_cast<const extern_type*>(__cfrom);
  467.                   __to_next = reinterpret_cast<intern_type*>(__cto);
  468.                   __ret = codecvt_base::partial;
  469.                 }
  470.               else
  471.                 __ret = codecvt_base::error;
  472.             }
  473.         }
  474.       return __ret;
  475.     }
  476.  
  477.   template<typename _InternT, typename _ExternT>
  478.     int
  479.     codecvt<_InternT, _ExternT, encoding_state>::
  480.     do_encoding() const throw()
  481.     {
  482.       int __ret = 0;
  483.       if (sizeof(_ExternT) <= sizeof(_InternT))
  484.         __ret = sizeof(_InternT) / sizeof(_ExternT);
  485.       return __ret;
  486.     }
  487.  
  488.   template<typename _InternT, typename _ExternT>
  489.     bool
  490.     codecvt<_InternT, _ExternT, encoding_state>::
  491.     do_always_noconv() const throw()
  492.     { return false; }
  493.  
  494.   template<typename _InternT, typename _ExternT>
  495.     int
  496.     codecvt<_InternT, _ExternT, encoding_state>::
  497.     do_length(state_type&, const extern_type* __from,
  498.               const extern_type* __end, size_t __max) const
  499.     { return std::min(__max, static_cast<size_t>(__end - __from)); }
  500.  
  501.   // _GLIBCXX_RESOLVE_LIB_DEFECTS
  502.   // 74.  Garbled text for codecvt::do_max_length
  503.   template<typename _InternT, typename _ExternT>
  504.     int
  505.     codecvt<_InternT, _ExternT, encoding_state>::
  506.     do_max_length() const throw()
  507.     { return 1; }
  508.  
  509. _GLIBCXX_END_NAMESPACE_VERSION
  510. } // namespace
  511.  
  512. #endif
  513.