Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6554 serge 1
// Locale support (codecvt) -*- C++ -*-
2
 
3
// Copyright (C) 2000-2015 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// .
24
 
25
//
26
// ISO C++ 14882: 22.2.1.5 Template class codecvt
27
//
28
 
29
// Written by Benjamin Kosnik 
30
 
31
/** @file ext/codecvt_specializations.h
32
 *  This file is a GNU extension to the Standard C++ Library.
33
 */
34
 
35
#ifndef _EXT_CODECVT_SPECIALIZATIONS_H
36
#define _EXT_CODECVT_SPECIALIZATIONS_H 1
37
 
38
#include 
39
#include 
40
#include 
41
 
42
namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
43
{
44
_GLIBCXX_BEGIN_NAMESPACE_CXX11
45
_GLIBCXX_BEGIN_NAMESPACE_VERSION
46
 
47
  /// Extension to use iconv for dealing with character encodings.
48
  // This includes conversions and comparisons between various character
49
  // sets.  This object encapsulates data that may need to be shared between
50
  // char_traits, codecvt and ctype.
51
  class encoding_state
52
  {
53
  public:
54
    // Types:
55
    // NB: A conversion descriptor subsumes and enhances the
56
    // functionality of a simple state type such as mbstate_t.
57
    typedef iconv_t	descriptor_type;
58
 
59
  protected:
60
    // Name of internal character set encoding.
61
    std::string	       	_M_int_enc;
62
 
63
    // Name of external character set encoding.
64
    std::string  	_M_ext_enc;
65
 
66
    // Conversion descriptor between external encoding to internal encoding.
67
    descriptor_type	_M_in_desc;
68
 
69
    // Conversion descriptor between internal encoding to external encoding.
70
    descriptor_type	_M_out_desc;
71
 
72
    // The byte-order marker for the external encoding, if necessary.
73
    int			_M_ext_bom;
74
 
75
    // The byte-order marker for the internal encoding, if necessary.
76
    int			_M_int_bom;
77
 
78
    // Number of external bytes needed to construct one complete
79
    // character in the internal encoding.
80
    // NB: -1 indicates variable, or stateful, encodings.
81
    int 		_M_bytes;
82
 
83
  public:
84
    explicit
85
    encoding_state()
86
    : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
87
    { }
88
 
89
    explicit
90
    encoding_state(const char* __int, const char* __ext,
91
		   int __ibom = 0, int __ebom = 0, int __bytes = 1)
92
    : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
93
      _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
94
    { init(); }
95
 
96
    // 21.1.2 traits typedefs
97
    // p4
98
    // typedef STATE_T state_type
99
    // requires: state_type shall meet the requirements of
100
    // CopyConstructible types (20.1.3)
101
    // NB: This does not preserve the actual state of the conversion
102
    // descriptor member, but it does duplicate the encoding
103
    // information.
104
    encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
105
    { construct(__obj); }
106
 
107
    // Need assignment operator as well.
108
    encoding_state&
109
    operator=(const encoding_state& __obj)
110
    {
111
      construct(__obj);
112
      return *this;
113
    }
114
 
115
    ~encoding_state()
116
    { destroy(); }
117
 
118
    bool
119
    good() const throw()
120
    {
121
      const descriptor_type __err = (iconv_t)(-1);
122
      bool __test = _M_in_desc && _M_in_desc != __err;
123
      __test &=  _M_out_desc && _M_out_desc != __err;
124
      return __test;
125
    }
126
 
127
    int
128
    character_ratio() const
129
    { return _M_bytes; }
130
 
131
    const std::string
132
    internal_encoding() const
133
    { return _M_int_enc; }
134
 
135
    int
136
    internal_bom() const
137
    { return _M_int_bom; }
138
 
139
    const std::string
140
    external_encoding() const
141
    { return _M_ext_enc; }
142
 
143
    int
144
    external_bom() const
145
    { return _M_ext_bom; }
146
 
147
    const descriptor_type&
148
    in_descriptor() const
149
    { return _M_in_desc; }
150
 
151
    const descriptor_type&
152
    out_descriptor() const
153
    { return _M_out_desc; }
154
 
155
  protected:
156
    void
157
    init()
158
    {
159
      const descriptor_type __err = (iconv_t)(-1);
160
      const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
161
      if (!_M_in_desc && __have_encodings)
162
	{
163
	  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
164
	  if (_M_in_desc == __err)
165
	    std::__throw_runtime_error(__N("encoding_state::_M_init "
166
				    "creating iconv input descriptor failed"));
167
	}
168
      if (!_M_out_desc && __have_encodings)
169
	{
170
	  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
171
	  if (_M_out_desc == __err)
172
	    std::__throw_runtime_error(__N("encoding_state::_M_init "
173
				  "creating iconv output descriptor failed"));
174
	}
175
    }
176
 
177
    void
178
    construct(const encoding_state& __obj)
179
    {
180
      destroy();
181
      _M_int_enc = __obj._M_int_enc;
182
      _M_ext_enc = __obj._M_ext_enc;
183
      _M_ext_bom = __obj._M_ext_bom;
184
      _M_int_bom = __obj._M_int_bom;
185
      _M_bytes = __obj._M_bytes;
186
      init();
187
    }
188
 
189
    void
190
    destroy() throw()
191
    {
192
      const descriptor_type __err = (iconv_t)(-1);
193
      if (_M_in_desc && _M_in_desc != __err)
194
	{
195
	  iconv_close(_M_in_desc);
196
	  _M_in_desc = 0;
197
	}
198
      if (_M_out_desc && _M_out_desc != __err)
199
	{
200
	  iconv_close(_M_out_desc);
201
	  _M_out_desc = 0;
202
	}
203
    }
204
  };
205
 
206
  /// encoding_char_traits
207
  // Custom traits type with encoding_state for the state type, and the
208
  // associated fpos for the position type, all other
209
  // bits equivalent to the required char_traits instantiations.
210
  template
211
    struct encoding_char_traits
212
    : public std::char_traits<_CharT>
213
    {
214
      typedef encoding_state				state_type;
215
      typedef typename std::fpos		pos_type;
216
    };
217
 
218
_GLIBCXX_END_NAMESPACE_VERSION
219
_GLIBCXX_END_NAMESPACE_CXX11
220
} // namespace
221
 
222
 
223
namespace std _GLIBCXX_VISIBILITY(default)
224
{
225
_GLIBCXX_BEGIN_NAMESPACE_VERSION
226
 
227
  using __gnu_cxx::encoding_state;
228
 
229
  /// codecvt specialization.
230
  // This partial specialization takes advantage of iconv to provide
231
  // code conversions between a large number of character encodings.
232
  template
233
    class codecvt<_InternT, _ExternT, encoding_state>
234
    : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
235
    {
236
    public:
237
      // Types:
238
      typedef codecvt_base::result			result;
239
      typedef _InternT 					intern_type;
240
      typedef _ExternT 					extern_type;
241
      typedef __gnu_cxx::encoding_state 		state_type;
242
      typedef state_type::descriptor_type 		descriptor_type;
243
 
244
      // Data Members:
245
      static locale::id 		id;
246
 
247
      explicit
248
      codecvt(size_t __refs = 0)
249
      : __codecvt_abstract_base(__refs)
250
      { }
251
 
252
      explicit
253
      codecvt(state_type& __enc, size_t __refs = 0)
254
      : __codecvt_abstract_base(__refs)
255
      { }
256
 
257
     protected:
258
      virtual
259
      ~codecvt() { }
260
 
261
      virtual result
262
      do_out(state_type& __state, const intern_type* __from,
263
	     const intern_type* __from_end, const intern_type*& __from_next,
264
	     extern_type* __to, extern_type* __to_end,
265
	     extern_type*& __to_next) const;
266
 
267
      virtual result
268
      do_unshift(state_type& __state, extern_type* __to,
269
		 extern_type* __to_end, extern_type*& __to_next) const;
270
 
271
      virtual result
272
      do_in(state_type& __state, const extern_type* __from,
273
	    const extern_type* __from_end, const extern_type*& __from_next,
274
	    intern_type* __to, intern_type* __to_end,
275
	    intern_type*& __to_next) const;
276
 
277
      virtual int
278
      do_encoding() const throw();
279
 
280
      virtual bool
281
      do_always_noconv() const throw();
282
 
283
      virtual int
284
      do_length(state_type&, const extern_type* __from,
285
		const extern_type* __end, size_t __max) const;
286
 
287
      virtual int
288
      do_max_length() const throw();
289
    };
290
 
291
  template
292
    locale::id
293
    codecvt<_InternT, _ExternT, encoding_state>::id;
294
 
295
  // This adaptor works around the signature problems of the second
296
  // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
297
  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
298
  // Using this adaptor, g++ will do the work for us.
299
  template
300
    inline size_t
301
    __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
302
                    iconv_t __cd, char** __inbuf, size_t* __inbytes,
303
                    char** __outbuf, size_t* __outbytes)
304
    { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
305
 
306
  template
307
    codecvt_base::result
308
    codecvt<_InternT, _ExternT, encoding_state>::
309
    do_out(state_type& __state, const intern_type* __from,
310
	   const intern_type* __from_end, const intern_type*& __from_next,
311
	   extern_type* __to, extern_type* __to_end,
312
	   extern_type*& __to_next) const
313
    {
314
      result __ret = codecvt_base::error;
315
      if (__state.good())
316
	{
317
	  const descriptor_type& __desc = __state.out_descriptor();
318
	  const size_t __fmultiple = sizeof(intern_type);
319
	  size_t __fbytes = __fmultiple * (__from_end - __from);
320
	  const size_t __tmultiple = sizeof(extern_type);
321
	  size_t __tbytes = __tmultiple * (__to_end - __to);
322
 
323
	  // Argument list for iconv specifies a byte sequence. Thus,
324
	  // all to/from arrays must be brutally casted to char*.
325
	  char* __cto = reinterpret_cast(__to);
326
	  char* __cfrom;
327
	  size_t __conv;
328
 
329
	  // Some encodings need a byte order marker as the first item
330
	  // in the byte stream, to designate endian-ness. The default
331
	  // value for the byte order marker is NULL, so if this is
332
	  // the case, it's not necessary and we can just go on our
333
	  // merry way.
334
	  int __int_bom = __state.internal_bom();
335
	  if (__int_bom)
336
	    {
337
	      size_t __size = __from_end - __from;
338
	      intern_type* __cfixed = static_cast
339
		(__builtin_alloca(sizeof(intern_type) * (__size + 1)));
340
	      __cfixed[0] = static_cast(__int_bom);
341
	      char_traits::copy(__cfixed + 1, __from, __size);
342
	      __cfrom = reinterpret_cast(__cfixed);
343
	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
344
                                        &__fbytes, &__cto, &__tbytes);
345
	    }
346
	  else
347
	    {
348
	      intern_type* __cfixed = const_cast(__from);
349
	      __cfrom = reinterpret_cast(__cfixed);
350
	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
351
				       &__cto, &__tbytes);
352
	    }
353
 
354
	  if (__conv != size_t(-1))
355
	    {
356
	      __from_next = reinterpret_cast(__cfrom);
357
	      __to_next = reinterpret_cast(__cto);
358
	      __ret = codecvt_base::ok;
359
	    }
360
	  else
361
	    {
362
	      if (__fbytes < __fmultiple * (__from_end - __from))
363
		{
364
		  __from_next = reinterpret_cast(__cfrom);
365
		  __to_next = reinterpret_cast(__cto);
366
		  __ret = codecvt_base::partial;
367
		}
368
	      else
369
		__ret = codecvt_base::error;
370
	    }
371
	}
372
      return __ret;
373
    }
374
 
375
  template
376
    codecvt_base::result
377
    codecvt<_InternT, _ExternT, encoding_state>::
378
    do_unshift(state_type& __state, extern_type* __to,
379
	       extern_type* __to_end, extern_type*& __to_next) const
380
    {
381
      result __ret = codecvt_base::error;
382
      if (__state.good())
383
	{
384
	  const descriptor_type& __desc = __state.in_descriptor();
385
	  const size_t __tmultiple = sizeof(intern_type);
386
	  size_t __tlen = __tmultiple * (__to_end - __to);
387
 
388
	  // Argument list for iconv specifies a byte sequence. Thus,
389
	  // all to/from arrays must be brutally casted to char*.
390
	  char* __cto = reinterpret_cast(__to);
391
	  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
392
                                          &__cto, &__tlen);
393
 
394
	  if (__conv != size_t(-1))
395
	    {
396
	      __to_next = reinterpret_cast(__cto);
397
	      if (__tlen == __tmultiple * (__to_end - __to))
398
		__ret = codecvt_base::noconv;
399
	      else if (__tlen == 0)
400
		__ret = codecvt_base::ok;
401
	      else
402
		__ret = codecvt_base::partial;
403
	    }
404
	  else
405
	    __ret = codecvt_base::error;
406
	}
407
      return __ret;
408
    }
409
 
410
  template
411
    codecvt_base::result
412
    codecvt<_InternT, _ExternT, encoding_state>::
413
    do_in(state_type& __state, const extern_type* __from,
414
	  const extern_type* __from_end, const extern_type*& __from_next,
415
	  intern_type* __to, intern_type* __to_end,
416
	  intern_type*& __to_next) const
417
    {
418
      result __ret = codecvt_base::error;
419
      if (__state.good())
420
	{
421
	  const descriptor_type& __desc = __state.in_descriptor();
422
	  const size_t __fmultiple = sizeof(extern_type);
423
	  size_t __flen = __fmultiple * (__from_end - __from);
424
	  const size_t __tmultiple = sizeof(intern_type);
425
	  size_t __tlen = __tmultiple * (__to_end - __to);
426
 
427
	  // Argument list for iconv specifies a byte sequence. Thus,
428
	  // all to/from arrays must be brutally casted to char*.
429
	  char* __cto = reinterpret_cast(__to);
430
	  char* __cfrom;
431
	  size_t __conv;
432
 
433
	  // Some encodings need a byte order marker as the first item
434
	  // in the byte stream, to designate endian-ness. The default
435
	  // value for the byte order marker is NULL, so if this is
436
	  // the case, it's not necessary and we can just go on our
437
	  // merry way.
438
	  int __ext_bom = __state.external_bom();
439
	  if (__ext_bom)
440
	    {
441
	      size_t __size = __from_end - __from;
442
	      extern_type* __cfixed =  static_cast
443
		(__builtin_alloca(sizeof(extern_type) * (__size + 1)));
444
	      __cfixed[0] = static_cast(__ext_bom);
445
	      char_traits::copy(__cfixed + 1, __from, __size);
446
	      __cfrom = reinterpret_cast(__cfixed);
447
	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
448
                                       &__flen, &__cto, &__tlen);
449
	    }
450
	  else
451
	    {
452
	      extern_type* __cfixed = const_cast(__from);
453
	      __cfrom = reinterpret_cast(__cfixed);
454
	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
455
                                       &__flen, &__cto, &__tlen);
456
	    }
457
 
458
 
459
	  if (__conv != size_t(-1))
460
	    {
461
	      __from_next = reinterpret_cast(__cfrom);
462
	      __to_next = reinterpret_cast(__cto);
463
	      __ret = codecvt_base::ok;
464
	    }
465
	  else
466
	    {
467
	      if (__flen < static_cast(__from_end - __from))
468
		{
469
		  __from_next = reinterpret_cast(__cfrom);
470
		  __to_next = reinterpret_cast(__cto);
471
		  __ret = codecvt_base::partial;
472
		}
473
	      else
474
		__ret = codecvt_base::error;
475
	    }
476
	}
477
      return __ret;
478
    }
479
 
480
  template
481
    int
482
    codecvt<_InternT, _ExternT, encoding_state>::
483
    do_encoding() const throw()
484
    {
485
      int __ret = 0;
486
      if (sizeof(_ExternT) <= sizeof(_InternT))
487
	__ret = sizeof(_InternT) / sizeof(_ExternT);
488
      return __ret;
489
    }
490
 
491
  template
492
    bool
493
    codecvt<_InternT, _ExternT, encoding_state>::
494
    do_always_noconv() const throw()
495
    { return false; }
496
 
497
  template
498
    int
499
    codecvt<_InternT, _ExternT, encoding_state>::
500
    do_length(state_type&, const extern_type* __from,
501
	      const extern_type* __end, size_t __max) const
502
    { return std::min(__max, static_cast(__end - __from)); }
503
 
504
  // _GLIBCXX_RESOLVE_LIB_DEFECTS
505
  // 74.  Garbled text for codecvt::do_max_length
506
  template
507
    int
508
    codecvt<_InternT, _ExternT, encoding_state>::
509
    do_max_length() const throw()
510
    { return 1; }
511
 
512
_GLIBCXX_END_NAMESPACE_VERSION
513
} // namespace
514
 
515
#endif