Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6554 serge 1
// Locale support (codecvt) -*- C++ -*-
2
 
3
// Copyright (C) 2015 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// .
24
 
25
#include 
26
#include 		// std::memcpy, std::memcmp
27
#include 	// std::max
28
 
29
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
30
namespace std _GLIBCXX_VISIBILITY(default)
31
{
32
_GLIBCXX_BEGIN_NAMESPACE_VERSION
33
 
34
namespace
35
{
36
  // Largest code point that fits in a single UTF-16 code unit.
37
  const char32_t max_single_utf16_unit = 0xFFFF;
38
 
39
  const char32_t max_code_point = 0x10FFFF;
40
 
41
  // The functions below rely on maxcode < incomplete_mb_character
42
  // (which is enforced by the codecvt_utf* classes on construction).
43
  const char32_t incomplete_mb_character = char32_t(-2);
44
  const char32_t invalid_mb_sequence = char32_t(-1);
45
 
46
  template
47
    struct range
48
    {
49
      Elem* next;
50
      Elem* end;
51
 
52
      Elem operator*() const { return *next; }
53
 
54
      range& operator++() { ++next; return *this; }
55
 
56
      size_t size() const { return end - next; }
57
    };
58
 
59
  // Multibyte sequences can have "header" consisting of Byte Order Mark
60
  const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
61
  const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
62
  const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
63
 
64
  template
65
    inline bool
66
    write_bom(range& to, const unsigned char (&bom)[N])
67
    {
68
      if (to.size() < N)
69
	return false;
70
      memcpy(to.next, bom, N);
71
      to.next += N;
72
      return true;
73
    }
74
 
75
  // If generate_header is set in mode write out UTF-8 BOM.
76
  bool
77
  write_utf8_bom(range& to, codecvt_mode mode)
78
  {
79
    if (mode & generate_header)
80
      return write_bom(to, utf8_bom);
81
    return true;
82
  }
83
 
84
  // If generate_header is set in mode write out the UTF-16 BOM indicated
85
  // by whether little_endian is set in mode.
86
  bool
87
  write_utf16_bom(range& to, codecvt_mode mode)
88
  {
89
    if (mode & generate_header)
90
    {
91
      if (!to.size())
92
	return false;
93
      auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
94
      std::memcpy(to.next, bom, 2);
95
      ++to.next;
96
    }
97
    return true;
98
  }
99
 
100
  template
101
    inline bool
102
    read_bom(range& from, const unsigned char (&bom)[N])
103
    {
104
      if (from.size() >= N && !memcmp(from.next, bom, N))
105
	{
106
	  from.next += N;
107
	  return true;
108
	}
109
      return false;
110
    }
111
 
112
  // If consume_header is set in mode update from.next to after any BOM.
113
  void
114
  read_utf8_bom(range& from, codecvt_mode mode)
115
  {
116
    if (mode & consume_header)
117
      read_bom(from, utf8_bom);
118
  }
119
 
120
  // If consume_header is set in mode update from.next to after any BOM.
121
  // Return little_endian iff the UTF-16LE BOM was present.
122
  codecvt_mode
123
  read_utf16_bom(range& from, codecvt_mode mode)
124
  {
125
    if (mode & consume_header && from.size())
126
      {
127
	if (*from.next == 0xFEFF)
128
	  ++from.next;
129
	else if (*from.next == 0xFFFE)
130
	  {
131
	    ++from.next;
132
	    return little_endian;
133
	  }
134
      }
135
    return {};
136
  }
137
 
138
  // Read a codepoint from a UTF-8 multibyte sequence.
139
  // Updates from.next if the codepoint is not greater than maxcode.
140
  // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
141
  char32_t
142
  read_utf8_code_point(range& from, unsigned long maxcode)
143
  {
144
    const size_t avail = from.size();
145
    if (avail == 0)
146
      return incomplete_mb_character;
147
    unsigned char c1 = from.next[0];
148
    // https://en.wikipedia.org/wiki/UTF-8#Sample_code
149
    if (c1 < 0x80)
150
    {
151
      ++from.next;
152
      return c1;
153
    }
154
    else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
155
      return invalid_mb_sequence;
156
    else if (c1 < 0xE0) // 2-byte sequence
157
    {
158
      if (avail < 2)
159
	return incomplete_mb_character;
160
      unsigned char c2 = from.next[1];
161
      if ((c2 & 0xC0) != 0x80)
162
	return invalid_mb_sequence;
163
      char32_t c = (c1 << 6) + c2 - 0x3080;
164
      if (c <= maxcode)
165
	from.next += 2;
166
      return c;
167
    }
168
    else if (c1 < 0xF0) // 3-byte sequence
169
    {
170
      if (avail < 3)
171
	return incomplete_mb_character;
172
      unsigned char c2 = from.next[1];
173
      if ((c2 & 0xC0) != 0x80)
174
	return invalid_mb_sequence;
175
      if (c1 == 0xE0 && c2 < 0xA0) // overlong
176
	return invalid_mb_sequence;
177
      unsigned char c3 = from.next[2];
178
      if ((c3 & 0xC0) != 0x80)
179
	return invalid_mb_sequence;
180
      char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
181
      if (c <= maxcode)
182
	from.next += 3;
183
      return c;
184
    }
185
    else if (c1 < 0xF5) // 4-byte sequence
186
    {
187
      if (avail < 4)
188
	return incomplete_mb_character;
189
      unsigned char c2 = from.next[1];
190
      if ((c2 & 0xC0) != 0x80)
191
	return invalid_mb_sequence;
192
      if (c1 == 0xF0 && c2 < 0x90) // overlong
193
	return invalid_mb_sequence;
194
      if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
195
      return invalid_mb_sequence;
196
      unsigned char c3 = from.next[2];
197
      if ((c3 & 0xC0) != 0x80)
198
	return invalid_mb_sequence;
199
      unsigned char c4 = from.next[3];
200
      if ((c4 & 0xC0) != 0x80)
201
	return invalid_mb_sequence;
202
      char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
203
      if (c <= maxcode)
204
	from.next += 4;
205
      return c;
206
    }
207
    else // > U+10FFFF
208
      return invalid_mb_sequence;
209
  }
210
 
211
  bool
212
  write_utf8_code_point(range& to, char32_t code_point)
213
  {
214
    if (code_point < 0x80)
215
      {
216
	if (to.size() < 1)
217
	  return false;
218
	*to.next++ = code_point;
219
      }
220
    else if (code_point <= 0x7FF)
221
      {
222
	if (to.size() < 2)
223
	  return false;
224
	*to.next++ = (code_point >> 6) + 0xC0;
225
	*to.next++ = (code_point & 0x3F) + 0x80;
226
      }
227
    else if (code_point <= 0xFFFF)
228
      {
229
	if (to.size() < 3)
230
	  return false;
231
	*to.next++ = (code_point >> 12) + 0xE0;
232
	*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
233
	*to.next++ = (code_point & 0x3F) + 0x80;
234
      }
235
    else if (code_point <= 0x10FFFF)
236
      {
237
	if (to.size() < 4)
238
	  return false;
239
	*to.next++ = (code_point >> 18) + 0xF0;
240
	*to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
241
	*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
242
	*to.next++ = (code_point & 0x3F) + 0x80;
243
      }
244
    else
245
      return false;
246
    return true;
247
  }
248
 
249
  inline char16_t
250
  adjust_byte_order(char16_t c, codecvt_mode mode)
251
  {
252
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
253
    return (mode & little_endian) ? __builtin_bswap16(c) : c;
254
#else
255
    return (mode & little_endian) ? c : __builtin_bswap16(c);
256
#endif
257
  }
258
 
259
  // Return true if c is a high-surrogate (aka leading) code point.
260
  inline bool
261
  is_high_surrogate(char32_t c)
262
  {
263
    return c >= 0xD800 && c <= 0xDBFF;
264
  }
265
 
266
  // Return true if c is a low-surrogate (aka trailing) code point.
267
  inline bool
268
  is_low_surrogate(char32_t c)
269
  {
270
    return c >= 0xDC00 && c <= 0xDFFF;
271
  }
272
 
273
  inline char32_t
274
  surrogate_pair_to_code_point(char32_t high, char32_t low)
275
  {
276
    return (high << 10) + low - 0x35FDC00;
277
  }
278
 
279
  // Read a codepoint from a UTF-16 multibyte sequence.
280
  // The sequence's endianness is indicated by (mode & little_endian).
281
  // Updates from.next if the codepoint is not greater than maxcode.
282
  // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
283
  char32_t
284
  read_utf16_code_point(range& from, unsigned long maxcode,
285
			codecvt_mode mode)
286
  {
287
    const size_t avail = from.size();
288
    if (avail == 0)
289
      return incomplete_mb_character;
290
    int inc = 1;
291
    char32_t c = adjust_byte_order(from.next[0], mode);
292
    if (is_high_surrogate(c))
293
      {
294
	if (avail < 2)
295
	  return incomplete_mb_character;
296
	const char16_t c2 = adjust_byte_order(from.next[1], mode);
297
	if (is_low_surrogate(c2))
298
	  {
299
	    c = surrogate_pair_to_code_point(c, c2);
300
	    inc = 2;
301
	  }
302
	else
303
	  return invalid_mb_sequence;
304
      }
305
    else if (is_low_surrogate(c))
306
      return invalid_mb_sequence;
307
    if (c <= maxcode)
308
      from.next += inc;
309
    return c;
310
  }
311
 
312
  template
313
  bool
314
  write_utf16_code_point(range& to, char32_t codepoint, codecvt_mode mode)
315
  {
316
    static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
317
 
318
    if (codepoint < max_single_utf16_unit)
319
      {
320
	if (to.size() > 0)
321
	  {
322
	    *to.next = adjust_byte_order(codepoint, mode);
323
	    ++to.next;
324
	    return true;
325
	  }
326
      }
327
    else if (to.size() > 1)
328
      {
329
	// Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
330
	const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
331
	char16_t lead = LEAD_OFFSET + (codepoint >> 10);
332
	char16_t trail = 0xDC00 + (codepoint & 0x3FF);
333
	to.next[0] = adjust_byte_order(lead, mode);
334
	to.next[1] = adjust_byte_order(trail, mode);
335
	to.next += 2;
336
	return true;
337
      }
338
    return false;
339
  }
340
 
341
  // utf8 -> ucs4
342
  codecvt_base::result
343
  ucs4_in(range& from, range& to,
344
          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
345
  {
346
    read_utf8_bom(from, mode);
347
    while (from.size() && to.size())
348
      {
349
	const char32_t codepoint = read_utf8_code_point(from, maxcode);
350
	if (codepoint == incomplete_mb_character)
351
	  return codecvt_base::partial;
352
	if (codepoint > maxcode)
353
	  return codecvt_base::error;
354
	*to.next++ = codepoint;
355
      }
356
    return from.size() ? codecvt_base::partial : codecvt_base::ok;
357
  }
358
 
359
  // ucs4 -> utf8
360
  codecvt_base::result
361
  ucs4_out(range& from, range& to,
362
           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
363
  {
364
    if (!write_utf8_bom(to, mode))
365
      return codecvt_base::partial;
366
    while (from.size())
367
      {
368
	const char32_t c = from.next[0];
369
	if (c > maxcode)
370
	  return codecvt_base::error;
371
	if (!write_utf8_code_point(to, c))
372
	  return codecvt_base::partial;
373
	++from.next;
374
      }
375
    return codecvt_base::ok;
376
  }
377
 
378
  // utf16 -> ucs4
379
  codecvt_base::result
380
  ucs4_in(range& from, range& to,
381
          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
382
  {
383
    if (read_utf16_bom(from, mode) == little_endian)
384
      mode = codecvt_mode(mode & little_endian);
385
    while (from.size() && to.size())
386
      {
387
	const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
388
	if (codepoint == incomplete_mb_character)
389
	  return codecvt_base::partial;
390
	if (codepoint > maxcode)
391
	  return codecvt_base::error;
392
	*to.next++ = codepoint;
393
      }
394
    return from.size() ? codecvt_base::partial : codecvt_base::ok;
395
  }
396
 
397
  // ucs4 -> utf16
398
  codecvt_base::result
399
  ucs4_out(range& from, range& to,
400
           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
401
  {
402
    if (!write_utf16_bom(to, mode))
403
      return codecvt_base::partial;
404
    while (from.size())
405
      {
406
	const char32_t c = from.next[0];
407
	if (c > maxcode)
408
	  return codecvt_base::error;
409
	if (!write_utf16_code_point(to, c, mode))
410
	  return codecvt_base::partial;
411
	++from.next;
412
      }
413
    return codecvt_base::ok;
414
  }
415
 
416
  // utf8 -> utf16
417
  template
418
  codecvt_base::result
419
  utf16_in(range& from, range& to,
420
           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
421
  {
422
    read_utf8_bom(from, mode);
423
    while (from.size() && to.size())
424
      {
425
	const char* const first = from.next;
426
	const char32_t codepoint = read_utf8_code_point(from, maxcode);
427
	if (codepoint == incomplete_mb_character)
428
	  return codecvt_base::partial;
429
	if (codepoint > maxcode)
430
	  return codecvt_base::error;
431
	if (!write_utf16_code_point(to, codepoint, mode))
432
	  {
433
	    from.next = first;
434
	    return codecvt_base::partial;
435
	  }
436
      }
437
    return codecvt_base::ok;
438
  }
439
 
440
  // utf16 -> utf8
441
  template
442
  codecvt_base::result
443
  utf16_out(range& from, range& to,
444
            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
445
  {
446
    if (!write_utf8_bom(to, mode))
447
      return codecvt_base::partial;
448
    while (from.size())
449
      {
450
	char32_t c = from.next[0];
451
	int inc = 1;
452
	if (is_high_surrogate(c))
453
	  {
454
	    if (from.size() < 2)
455
	      return codecvt_base::ok; // stop converting at this point
456
 
457
	    const char32_t c2 = from.next[1];
458
	    if (is_low_surrogate(c2))
459
	      {
460
		c = surrogate_pair_to_code_point(c, c2);
461
		inc = 2;
462
	      }
463
	    else
464
	      return codecvt_base::error;
465
	  }
466
	else if (is_low_surrogate(c))
467
	  return codecvt_base::error;
468
	if (c > maxcode)
469
	  return codecvt_base::error;
470
	if (!write_utf8_code_point(to, c))
471
	  return codecvt_base::partial;
472
	from.next += inc;
473
      }
474
    return codecvt_base::ok;
475
  }
476
 
477
  // return pos such that [begin,pos) is valid UTF-16 string no longer than max
478
  const char*
479
  utf16_span(const char* begin, const char* end, size_t max,
480
	     char32_t maxcode = max_code_point, codecvt_mode mode = {})
481
  {
482
    range from{ begin, end };
483
    read_utf8_bom(from, mode);
484
    size_t count = 0;
485
    while (count+1 < max)
486
      {
487
	char32_t c = read_utf8_code_point(from, maxcode);
488
	if (c > maxcode)
489
	  return from.next;
490
	else if (c > max_single_utf16_unit)
491
	  ++count;
492
	++count;
493
      }
494
    if (count+1 == max) // take one more character if it fits in a single unit
495
      read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
496
    return from.next;
497
  }
498
 
499
  // utf8 -> ucs2
500
  codecvt_base::result
501
  ucs2_in(range& from, range& to,
502
	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
503
  {
504
    return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
505
  }
506
 
507
  // ucs2 -> utf8
508
  codecvt_base::result
509
  ucs2_out(range& from, range& to,
510
	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
511
  {
512
    return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
513
  }
514
 
515
  // ucs2 -> utf16
516
  codecvt_base::result
517
  ucs2_out(range& from, range& to,
518
	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
519
  {
520
    if (!write_utf16_bom(to, mode))
521
      return codecvt_base::partial;
522
    while (from.size() && to.size())
523
      {
524
	char16_t c = from.next[0];
525
	if (is_high_surrogate(c))
526
	  return codecvt_base::error;
527
	if (c > maxcode)
528
	  return codecvt_base::error;
529
	*to.next++ = adjust_byte_order(c, mode);
530
	++from.next;
531
      }
532
    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
533
  }
534
 
535
  // utf16 -> ucs2
536
  codecvt_base::result
537
  ucs2_in(range& from, range& to,
538
	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
539
  {
540
    if (read_utf16_bom(from, mode) == little_endian)
541
      mode = codecvt_mode(mode & little_endian);
542
    maxcode = std::max(max_single_utf16_unit, maxcode);
543
    while (from.size() && to.size())
544
      {
545
	const char32_t c = read_utf16_code_point(from, maxcode, mode);
546
	if (c == incomplete_mb_character)
547
	  return codecvt_base::partial;
548
	if (c > maxcode)
549
	  return codecvt_base::error;
550
	*to.next++ = c;
551
      }
552
    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
553
  }
554
 
555
  const char16_t*
556
  ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
557
            char32_t maxcode, codecvt_mode mode)
558
  {
559
    range from{ begin, end };
560
    if (read_utf16_bom(from, mode) == little_endian)
561
      mode = codecvt_mode(mode & little_endian);
562
    maxcode = std::max(max_single_utf16_unit, maxcode);
563
    char32_t c = 0;
564
    while (max-- && c <= maxcode)
565
      c = read_utf16_code_point(from, maxcode, mode);
566
    return from.next;
567
  }
568
 
569
  const char*
570
  ucs2_span(const char* begin, const char* end, size_t max,
571
            char32_t maxcode, codecvt_mode mode)
572
  {
573
    range from{ begin, end };
574
    read_utf8_bom(from, mode);
575
    maxcode = std::max(max_single_utf16_unit, maxcode);
576
    char32_t c = 0;
577
    while (max-- && c <= maxcode)
578
      c = read_utf8_code_point(from, maxcode);
579
    return from.next;
580
  }
581
 
582
  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
583
  const char*
584
  ucs4_span(const char* begin, const char* end, size_t max,
585
            char32_t maxcode = max_code_point, codecvt_mode mode = {})
586
  {
587
    range from{ begin, end };
588
    read_utf8_bom(from, mode);
589
    char32_t c = 0;
590
    while (max-- && c <= maxcode)
591
      c = read_utf8_code_point(from, maxcode);
592
    return from.next;
593
  }
594
 
595
  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
596
  const char16_t*
597
  ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
598
            char32_t maxcode = max_code_point, codecvt_mode mode = {})
599
  {
600
    range from{ begin, end };
601
    if (read_utf16_bom(from, mode) == little_endian)
602
      mode = codecvt_mode(mode & little_endian);
603
    char32_t c = 0;
604
    while (max-- && c <= maxcode)
605
      c = read_utf16_code_point(from, maxcode, mode);
606
    return from.next;
607
  }
608
}
609
 
610
// Define members of codecvt specialization.
611
// Converts from UTF-8 to UTF-16.
612
 
613
locale::id codecvt::id;
614
 
615
codecvt::~codecvt() { }
616
 
617
codecvt_base::result
618
codecvt::
619
do_out(state_type&,
620
       const intern_type* __from,
621
       const intern_type* __from_end, const intern_type*& __from_next,
622
       extern_type* __to, extern_type* __to_end,
623
       extern_type*& __to_next) const
624
{
625
  range from{ __from, __from_end };
626
  range to{ __to, __to_end };
627
  auto res = utf16_out(from, to);
628
  __from_next = from.next;
629
  __to_next = to.next;
630
  return res;
631
}
632
 
633
codecvt_base::result
634
codecvt::
635
do_unshift(state_type&, extern_type* __to, extern_type*,
636
	   extern_type*& __to_next) const
637
{
638
  __to_next = __to;
639
  return noconv; // we don't use mbstate_t for the unicode facets
640
}
641
 
642
codecvt_base::result
643
codecvt::
644
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
645
      const extern_type*& __from_next,
646
      intern_type* __to, intern_type* __to_end,
647
      intern_type*& __to_next) const
648
{
649
  range from{ __from, __from_end };
650
  range to{ __to, __to_end };
651
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
652
  codecvt_mode mode = {};
653
#else
654
  codecvt_mode mode = little_endian;
655
#endif
656
  auto res = utf16_in(from, to, max_code_point, mode);
657
  __from_next = from.next;
658
  __to_next = to.next;
659
  return res;
660
}
661
 
662
int
663
codecvt::do_encoding() const throw()
664
{ return 0; }
665
 
666
bool
667
codecvt::do_always_noconv() const throw()
668
{ return false; }
669
 
670
int
671
codecvt::
672
do_length(state_type&, const extern_type* __from,
673
	  const extern_type* __end, size_t __max) const
674
{
675
  __end = utf16_span(__from, __end, __max);
676
  return __end - __from;
677
}
678
 
679
int
680
codecvt::do_max_length() const throw()
681
{
682
  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
683
  // whereas 4 byte sequences require two 16-bit code units.
684
  return 3;
685
}
686
 
687
// Define members of codecvt specialization.
688
// Converts from UTF-8 to UTF-32 (aka UCS-4).
689
 
690
locale::id codecvt::id;
691
 
692
codecvt::~codecvt() { }
693
 
694
codecvt_base::result
695
codecvt::
696
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
697
       const intern_type*& __from_next,
698
       extern_type* __to, extern_type* __to_end,
699
       extern_type*& __to_next) const
700
{
701
  range from{ __from, __from_end };
702
  range to{ __to, __to_end };
703
  auto res = ucs4_out(from, to);
704
  __from_next = from.next;
705
  __to_next = to.next;
706
  return res;
707
}
708
 
709
codecvt_base::result
710
codecvt::
711
do_unshift(state_type&, extern_type* __to, extern_type*,
712
	   extern_type*& __to_next) const
713
{
714
  __to_next = __to;
715
  return noconv;
716
}
717
 
718
codecvt_base::result
719
codecvt::
720
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
721
      const extern_type*& __from_next,
722
      intern_type* __to, intern_type* __to_end,
723
      intern_type*& __to_next) const
724
{
725
  range from{ __from, __from_end };
726
  range to{ __to, __to_end };
727
  auto res = ucs4_in(from, to);
728
  __from_next = from.next;
729
  __to_next = to.next;
730
  return res;
731
}
732
 
733
int
734
codecvt::do_encoding() const throw()
735
{ return 0; }
736
 
737
bool
738
codecvt::do_always_noconv() const throw()
739
{ return false; }
740
 
741
int
742
codecvt::
743
do_length(state_type&, const extern_type* __from,
744
	  const extern_type* __end, size_t __max) const
745
{
746
  __end = ucs4_span(__from, __end, __max);
747
  return __end - __from;
748
}
749
 
750
int
751
codecvt::do_max_length() const throw()
752
{ return 4; }
753
 
754
// Define members of codecvt_utf8 base class implementation.
755
// Converts from UTF-8 to UCS-2.
756
 
757
__codecvt_utf8_base::~__codecvt_utf8_base() { }
758
 
759
codecvt_base::result
760
__codecvt_utf8_base::
761
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
762
       const intern_type*& __from_next,
763
       extern_type* __to, extern_type* __to_end,
764
       extern_type*& __to_next) const
765
{
766
  range from{ __from, __from_end };
767
  range to{ __to, __to_end };
768
  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
769
  __from_next = from.next;
770
  __to_next = to.next;
771
  return res;
772
}
773
 
774
codecvt_base::result
775
__codecvt_utf8_base::
776
do_unshift(state_type&, extern_type* __to, extern_type*,
777
	   extern_type*& __to_next) const
778
{
779
  __to_next = __to;
780
  return noconv;
781
}
782
 
783
codecvt_base::result
784
__codecvt_utf8_base::
785
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
786
      const extern_type*& __from_next,
787
      intern_type* __to, intern_type* __to_end,
788
      intern_type*& __to_next) const
789
{
790
  range from{ __from, __from_end };
791
  range to{ __to, __to_end };
792
  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
793
#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
794
  mode = codecvt_mode(mode | little_endian);
795
#endif
796
  auto res = ucs2_in(from, to, _M_maxcode, mode);
797
  __from_next = from.next;
798
  __to_next = to.next;
799
  return res;
800
}
801
 
802
int
803
__codecvt_utf8_base::do_encoding() const throw()
804
{ return 0; }
805
 
806
bool
807
__codecvt_utf8_base::do_always_noconv() const throw()
808
{ return false; }
809
 
810
int
811
__codecvt_utf8_base::
812
do_length(state_type&, const extern_type* __from,
813
	  const extern_type* __end, size_t __max) const
814
{
815
  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
816
  return __end - __from;
817
}
818
 
819
int
820
__codecvt_utf8_base::do_max_length() const throw()
821
{ return 3; }
822
 
823
// Define members of codecvt_utf8 base class implementation.
824
// Converts from UTF-8 to UTF-32 (aka UCS-4).
825
 
826
__codecvt_utf8_base::~__codecvt_utf8_base() { }
827
 
828
codecvt_base::result
829
__codecvt_utf8_base::
830
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
831
       const intern_type*& __from_next,
832
       extern_type* __to, extern_type* __to_end,
833
       extern_type*& __to_next) const
834
{
835
  range from{ __from, __from_end };
836
  range to{ __to, __to_end };
837
  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
838
  __from_next = from.next;
839
  __to_next = to.next;
840
  return res;
841
}
842
 
843
codecvt_base::result
844
__codecvt_utf8_base::
845
do_unshift(state_type&, extern_type* __to, extern_type*,
846
	   extern_type*& __to_next) const
847
{
848
  __to_next = __to;
849
  return noconv;
850
}
851
 
852
codecvt_base::result
853
__codecvt_utf8_base::
854
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
855
      const extern_type*& __from_next,
856
      intern_type* __to, intern_type* __to_end,
857
      intern_type*& __to_next) const
858
{
859
  range from{ __from, __from_end };
860
  range to{ __to, __to_end };
861
  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
862
  __from_next = from.next;
863
  __to_next = to.next;
864
  return res;
865
}
866
 
867
int
868
__codecvt_utf8_base::do_encoding() const throw()
869
{ return 0; }
870
 
871
bool
872
__codecvt_utf8_base::do_always_noconv() const throw()
873
{ return false; }
874
 
875
int
876
__codecvt_utf8_base::
877
do_length(state_type&, const extern_type* __from,
878
	  const extern_type* __end, size_t __max) const
879
{
880
  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
881
  return __end - __from;
882
}
883
 
884
int
885
__codecvt_utf8_base::do_max_length() const throw()
886
{ return 4; }
887
 
888
#ifdef _GLIBCXX_USE_WCHAR_T
889
// Define members of codecvt_utf8 base class implementation.
890
// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
891
 
892
__codecvt_utf8_base::~__codecvt_utf8_base() { }
893
 
894
codecvt_base::result
895
__codecvt_utf8_base::
896
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
897
       const intern_type*& __from_next,
898
       extern_type* __to, extern_type* __to_end,
899
       extern_type*& __to_next) const
900
{
901
  range to{ __to, __to_end };
902
#if __SIZEOF_WCHAR_T__ == 2
903
  range from{
904
    reinterpret_cast(__from),
905
    reinterpret_cast(__from_end)
906
  };
907
  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
908
#elif __SIZEOF_WCHAR_T__ == 4
909
  range from{
910
    reinterpret_cast(__from),
911
    reinterpret_cast(__from_end)
912
  };
913
  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
914
#else
915
  return codecvt_base::error;
916
#endif
917
  __from_next = reinterpret_cast(from.next);
918
  __to_next = to.next;
919
  return res;
920
}
921
 
922
codecvt_base::result
923
__codecvt_utf8_base::
924
do_unshift(state_type&, extern_type* __to, extern_type*,
925
	   extern_type*& __to_next) const
926
{
927
  __to_next = __to;
928
  return noconv;
929
}
930
 
931
codecvt_base::result
932
__codecvt_utf8_base::
933
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
934
      const extern_type*& __from_next,
935
      intern_type* __to, intern_type* __to_end,
936
      intern_type*& __to_next) const
937
{
938
  range from{ __from, __from_end };
939
#if __SIZEOF_WCHAR_T__ == 2
940
  range to{
941
    reinterpret_cast(__to),
942
    reinterpret_cast(__to_end)
943
  };
944
  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
945
#elif __SIZEOF_WCHAR_T__ == 4
946
  range to{
947
    reinterpret_cast(__to),
948
    reinterpret_cast(__to_end)
949
  };
950
  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
951
#else
952
  return codecvt_base::error;
953
#endif
954
  __from_next = from.next;
955
  __to_next = reinterpret_cast(to.next);
956
  return res;
957
}
958
 
959
int
960
__codecvt_utf8_base::do_encoding() const throw()
961
{ return 0; }
962
 
963
bool
964
__codecvt_utf8_base::do_always_noconv() const throw()
965
{ return false; }
966
 
967
int
968
__codecvt_utf8_base::
969
do_length(state_type&, const extern_type* __from,
970
	  const extern_type* __end, size_t __max) const
971
{
972
#if __SIZEOF_WCHAR_T__ == 2
973
  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
974
#elif __SIZEOF_WCHAR_T__ == 4
975
  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
976
#else
977
  __end = __from;
978
#endif
979
  return __end - __from;
980
}
981
 
982
int
983
__codecvt_utf8_base::do_max_length() const throw()
984
{ return 4; }
985
#endif
986
 
987
// Define members of codecvt_utf16 base class implementation.
988
// Converts from UTF-16 to UCS-2.
989
 
990
__codecvt_utf16_base::~__codecvt_utf16_base() { }
991
 
992
codecvt_base::result
993
__codecvt_utf16_base::
994
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
995
       const intern_type*& __from_next,
996
       extern_type* __to, extern_type* __to_end,
997
       extern_type*& __to_next) const
998
{
999
  range from{ __from, __from_end };
1000
  range to{
1001
    reinterpret_cast(__to),
1002
    reinterpret_cast(__to_end)
1003
  };
1004
  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1005
  __from_next = from.next;
1006
  __to_next = reinterpret_cast(to.next);
1007
  return res;
1008
}
1009
 
1010
codecvt_base::result
1011
__codecvt_utf16_base::
1012
do_unshift(state_type&, extern_type* __to, extern_type*,
1013
	   extern_type*& __to_next) const
1014
{
1015
  __to_next = __to;
1016
  return noconv;
1017
}
1018
 
1019
codecvt_base::result
1020
__codecvt_utf16_base::
1021
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1022
      const extern_type*& __from_next,
1023
      intern_type* __to, intern_type* __to_end,
1024
      intern_type*& __to_next) const
1025
{
1026
  range from{
1027
    reinterpret_cast(__from),
1028
    reinterpret_cast(__from_end)
1029
  };
1030
  range to{ __to, __to_end };
1031
  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1032
  __from_next = reinterpret_cast(from.next);
1033
  __to_next = to.next;
1034
  return res;
1035
}
1036
 
1037
int
1038
__codecvt_utf16_base::do_encoding() const throw()
1039
{ return 1; }
1040
 
1041
bool
1042
__codecvt_utf16_base::do_always_noconv() const throw()
1043
{ return false; }
1044
 
1045
int
1046
__codecvt_utf16_base::
1047
do_length(state_type&, const extern_type* __from,
1048
	  const extern_type* __end, size_t __max) const
1049
{
1050
  auto next = reinterpret_cast(__from);
1051
  next = ucs2_span(next, reinterpret_cast(__end), __max,
1052
		   _M_maxcode, _M_mode);
1053
  return reinterpret_cast(next) - __from;
1054
}
1055
 
1056
int
1057
__codecvt_utf16_base::do_max_length() const throw()
1058
{ return 3; }
1059
 
1060
// Define members of codecvt_utf16 base class implementation.
1061
// Converts from UTF-16 to UTF-32 (aka UCS-4).
1062
 
1063
__codecvt_utf16_base::~__codecvt_utf16_base() { }
1064
 
1065
codecvt_base::result
1066
__codecvt_utf16_base::
1067
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1068
       const intern_type*& __from_next,
1069
       extern_type* __to, extern_type* __to_end,
1070
       extern_type*& __to_next) const
1071
{
1072
  range from{ __from, __from_end };
1073
  range to{
1074
    reinterpret_cast(__to),
1075
    reinterpret_cast(__to_end)
1076
  };
1077
  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1078
  __from_next = from.next;
1079
  __to_next = reinterpret_cast(to.next);
1080
  return res;
1081
}
1082
 
1083
codecvt_base::result
1084
__codecvt_utf16_base::
1085
do_unshift(state_type&, extern_type* __to, extern_type*,
1086
	   extern_type*& __to_next) const
1087
{
1088
  __to_next = __to;
1089
  return noconv;
1090
}
1091
 
1092
codecvt_base::result
1093
__codecvt_utf16_base::
1094
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1095
      const extern_type*& __from_next,
1096
      intern_type* __to, intern_type* __to_end,
1097
      intern_type*& __to_next) const
1098
{
1099
  range from{
1100
    reinterpret_cast(__from),
1101
    reinterpret_cast(__from_end)
1102
  };
1103
  range to{ __to, __to_end };
1104
  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1105
  __from_next = reinterpret_cast(from.next);
1106
  __to_next = to.next;
1107
  return res;
1108
}
1109
 
1110
int
1111
__codecvt_utf16_base::do_encoding() const throw()
1112
{ return 0; }
1113
 
1114
bool
1115
__codecvt_utf16_base::do_always_noconv() const throw()
1116
{ return false; }
1117
 
1118
int
1119
__codecvt_utf16_base::
1120
do_length(state_type&, const extern_type* __from,
1121
	  const extern_type* __end, size_t __max) const
1122
{
1123
  auto next = reinterpret_cast(__from);
1124
  next = ucs4_span(next, reinterpret_cast(__end), __max,
1125
		   _M_maxcode, _M_mode);
1126
  return reinterpret_cast(next) - __from;
1127
}
1128
 
1129
int
1130
__codecvt_utf16_base::do_max_length() const throw()
1131
{ return 4; }
1132
 
1133
#ifdef _GLIBCXX_USE_WCHAR_T
1134
// Define members of codecvt_utf16 base class implementation.
1135
// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1136
 
1137
__codecvt_utf16_base::~__codecvt_utf16_base() { }
1138
 
1139
codecvt_base::result
1140
__codecvt_utf16_base::
1141
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1142
       const intern_type*& __from_next,
1143
       extern_type* __to, extern_type* __to_end,
1144
       extern_type*& __to_next) const
1145
{
1146
  range to{ __to, __to_end };
1147
#if __SIZEOF_WCHAR_T__ == 2
1148
  range from{
1149
    reinterpret_cast(__from),
1150
    reinterpret_cast(__from_end)
1151
  };
1152
  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1153
#elif __SIZEOF_WCHAR_T__ == 4
1154
  range from{
1155
    reinterpret_cast(__from),
1156
    reinterpret_cast(__from_end)
1157
  };
1158
  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1159
#else
1160
  return codecvt_base::error;
1161
#endif
1162
  __from_next = reinterpret_cast(from.next);
1163
  __to_next = to.next;
1164
  return res;
1165
}
1166
 
1167
codecvt_base::result
1168
__codecvt_utf16_base::
1169
do_unshift(state_type&, extern_type* __to, extern_type*,
1170
	   extern_type*& __to_next) const
1171
{
1172
  __to_next = __to;
1173
  return noconv;
1174
}
1175
 
1176
codecvt_base::result
1177
__codecvt_utf16_base::
1178
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1179
      const extern_type*& __from_next,
1180
      intern_type* __to, intern_type* __to_end,
1181
      intern_type*& __to_next) const
1182
{
1183
  range from{ __from, __from_end };
1184
#if __SIZEOF_WCHAR_T__ == 2
1185
  range to{
1186
    reinterpret_cast(__to),
1187
    reinterpret_cast(__to_end)
1188
  };
1189
  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1190
#elif __SIZEOF_WCHAR_T__ == 4
1191
  range to{
1192
    reinterpret_cast(__to),
1193
    reinterpret_cast(__to_end)
1194
  };
1195
  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1196
#else
1197
  return codecvt_base::error;
1198
#endif
1199
  __from_next = from.next;
1200
  __to_next = reinterpret_cast(to.next);
1201
  return res;
1202
}
1203
 
1204
int
1205
__codecvt_utf16_base::do_encoding() const throw()
1206
{ return 0; }
1207
 
1208
bool
1209
__codecvt_utf16_base::do_always_noconv() const throw()
1210
{ return false; }
1211
 
1212
int
1213
__codecvt_utf16_base::
1214
do_length(state_type&, const extern_type* __from,
1215
	  const extern_type* __end, size_t __max) const
1216
{
1217
  auto next = reinterpret_cast(__from);
1218
#if __SIZEOF_WCHAR_T__ == 2
1219
  next = ucs2_span(next, reinterpret_cast(__end), __max,
1220
		   _M_maxcode, _M_mode);
1221
#elif __SIZEOF_WCHAR_T__ == 4
1222
  next = ucs4_span(next, reinterpret_cast(__end), __max,
1223
		   _M_maxcode, _M_mode);
1224
#endif
1225
  return reinterpret_cast(next) - __from;
1226
}
1227
 
1228
int
1229
__codecvt_utf16_base::do_max_length() const throw()
1230
{ return 4; }
1231
#endif
1232
 
1233
// Define members of codecvt_utf8_utf16 base class implementation.
1234
// Converts from UTF-8 to UTF-16.
1235
 
1236
__codecvt_utf8_utf16_base::~__codecvt_utf8_utf16_base() { }
1237
 
1238
codecvt_base::result
1239
__codecvt_utf8_utf16_base::
1240
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1241
       const intern_type*& __from_next,
1242
       extern_type* __to, extern_type* __to_end,
1243
       extern_type*& __to_next) const
1244
{
1245
  range from{ __from, __from_end };
1246
  range to{ __to, __to_end };
1247
  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1248
  __from_next = from.next;
1249
  __to_next = to.next;
1250
  return res;
1251
}
1252
 
1253
codecvt_base::result
1254
__codecvt_utf8_utf16_base::
1255
do_unshift(state_type&, extern_type* __to, extern_type*,
1256
	   extern_type*& __to_next) const
1257
{
1258
  __to_next = __to;
1259
  return noconv;
1260
}
1261
 
1262
codecvt_base::result
1263
__codecvt_utf8_utf16_base::
1264
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1265
      const extern_type*& __from_next,
1266
      intern_type* __to, intern_type* __to_end,
1267
      intern_type*& __to_next) const
1268
{
1269
  range from{ __from, __from_end };
1270
  range to{ __to, __to_end };
1271
  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1272
#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1273
  mode = codecvt_mode(mode | little_endian);
1274
#endif
1275
  auto res = utf16_in(from, to, _M_maxcode, mode);
1276
  __from_next = from.next;
1277
  __to_next = to.next;
1278
  return res;
1279
}
1280
 
1281
int
1282
__codecvt_utf8_utf16_base::do_encoding() const throw()
1283
{ return 0; }
1284
 
1285
bool
1286
__codecvt_utf8_utf16_base::do_always_noconv() const throw()
1287
{ return false; }
1288
 
1289
int
1290
__codecvt_utf8_utf16_base::
1291
do_length(state_type&, const extern_type* __from,
1292
	  const extern_type* __end, size_t __max) const
1293
{
1294
  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1295
  return __end - __from;
1296
}
1297
 
1298
int
1299
__codecvt_utf8_utf16_base::do_max_length() const throw()
1300
{
1301
  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1302
  // whereas 4 byte sequences require two 16-bit code units.
1303
  return 3;
1304
}
1305
 
1306
// Define members of codecvt_utf8_utf16 base class implementation.
1307
// Converts from UTF-8 to UTF-16.
1308
 
1309
__codecvt_utf8_utf16_base::~__codecvt_utf8_utf16_base() { }
1310
 
1311
codecvt_base::result
1312
__codecvt_utf8_utf16_base::
1313
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1314
       const intern_type*& __from_next,
1315
       extern_type* __to, extern_type* __to_end,
1316
       extern_type*& __to_next) const
1317
{
1318
  range from{ __from, __from_end };
1319
  range to{ __to, __to_end };
1320
  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1321
  __from_next = from.next;
1322
  __to_next = to.next;
1323
  return res;
1324
}
1325
 
1326
codecvt_base::result
1327
__codecvt_utf8_utf16_base::
1328
do_unshift(state_type&, extern_type* __to, extern_type*,
1329
	   extern_type*& __to_next) const
1330
{
1331
  __to_next = __to;
1332
  return noconv;
1333
}
1334
 
1335
codecvt_base::result
1336
__codecvt_utf8_utf16_base::
1337
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1338
      const extern_type*& __from_next,
1339
      intern_type* __to, intern_type* __to_end,
1340
      intern_type*& __to_next) const
1341
{
1342
  range from{ __from, __from_end };
1343
  range to{ __to, __to_end };
1344
  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1345
  __from_next = from.next;
1346
  __to_next = to.next;
1347
  return res;
1348
}
1349
 
1350
int
1351
__codecvt_utf8_utf16_base::do_encoding() const throw()
1352
{ return 0; }
1353
 
1354
bool
1355
__codecvt_utf8_utf16_base::do_always_noconv() const throw()
1356
{ return false; }
1357
 
1358
int
1359
__codecvt_utf8_utf16_base::
1360
do_length(state_type&, const extern_type* __from,
1361
	  const extern_type* __end, size_t __max) const
1362
{
1363
  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1364
  return __end - __from;
1365
}
1366
 
1367
int
1368
__codecvt_utf8_utf16_base::do_max_length() const throw()
1369
{
1370
  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1371
  // whereas 4 byte sequences require two 16-bit code units.
1372
  return 3;
1373
}
1374
 
1375
#ifdef _GLIBCXX_USE_WCHAR_T
1376
// Define members of codecvt_utf8_utf16 base class implementation.
1377
// Converts from UTF-8 to UTF-16.
1378
 
1379
__codecvt_utf8_utf16_base::~__codecvt_utf8_utf16_base() { }
1380
 
1381
codecvt_base::result
1382
__codecvt_utf8_utf16_base::
1383
do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1384
       const intern_type*& __from_next,
1385
       extern_type* __to, extern_type* __to_end,
1386
       extern_type*& __to_next) const
1387
{
1388
  range from{ __from, __from_end };
1389
  range to{ __to, __to_end };
1390
  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1391
  __from_next = from.next;
1392
  __to_next = to.next;
1393
  return res;
1394
}
1395
 
1396
codecvt_base::result
1397
__codecvt_utf8_utf16_base::
1398
do_unshift(state_type&, extern_type* __to, extern_type*,
1399
	   extern_type*& __to_next) const
1400
{
1401
  __to_next = __to;
1402
  return noconv;
1403
}
1404
 
1405
codecvt_base::result
1406
__codecvt_utf8_utf16_base::
1407
do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1408
      const extern_type*& __from_next,
1409
      intern_type* __to, intern_type* __to_end,
1410
      intern_type*& __to_next) const
1411
{
1412
  range from{ __from, __from_end };
1413
  range to{ __to, __to_end };
1414
  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1415
  __from_next = from.next;
1416
  __to_next = to.next;
1417
  return res;
1418
}
1419
 
1420
int
1421
__codecvt_utf8_utf16_base::do_encoding() const throw()
1422
{ return 0; }
1423
 
1424
bool
1425
__codecvt_utf8_utf16_base::do_always_noconv() const throw()
1426
{ return false; }
1427
 
1428
int
1429
__codecvt_utf8_utf16_base::
1430
do_length(state_type&, const extern_type* __from,
1431
	  const extern_type* __end, size_t __max) const
1432
{
1433
  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1434
  return __end - __from;
1435
}
1436
 
1437
int
1438
__codecvt_utf8_utf16_base::do_max_length() const throw()
1439
{
1440
  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1441
  // whereas 4 byte sequences require two 16-bit code units.
1442
  return 3;
1443
}
1444
#endif
1445
 
1446
inline template class __codecvt_abstract_base;
1447
inline template class __codecvt_abstract_base;
1448
template class codecvt_byname;
1449
template class codecvt_byname;
1450
 
1451
_GLIBCXX_END_NAMESPACE_VERSION
1452
}
1453
#endif // _GLIBCXX_USE_C99_STDINT_TR1