Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // Locale support (codecvt) -*- C++ -*-
  2.  
  3. // Copyright (C) 2015 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library.  This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10.  
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. // GNU General Public License for more details.
  15.  
  16. // Under Section 7 of GPL version 3, you are granted additional
  17. // permissions described in the GCC Runtime Library Exception, version
  18. // 3.1, as published by the Free Software Foundation.
  19.  
  20. // You should have received a copy of the GNU General Public License and
  21. // a copy of the GCC Runtime Library Exception along with this program;
  22. // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23. // <http://www.gnu.org/licenses/>.
  24.  
  25. #include <codecvt>
  26. #include <cstring>              // std::memcpy, std::memcmp
  27. #include <bits/stl_algobase.h>  // std::max
  28.  
  29. #ifdef _GLIBCXX_USE_C99_STDINT_TR1
  30. namespace std _GLIBCXX_VISIBILITY(default)
  31. {
  32. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  33.  
  34. namespace
  35. {
  36.   // Largest code point that fits in a single UTF-16 code unit.
  37.   const char32_t max_single_utf16_unit = 0xFFFF;
  38.  
  39.   const char32_t max_code_point = 0x10FFFF;
  40.  
  41.   // The functions below rely on maxcode < incomplete_mb_character
  42.   // (which is enforced by the codecvt_utf* classes on construction).
  43.   const char32_t incomplete_mb_character = char32_t(-2);
  44.   const char32_t invalid_mb_sequence = char32_t(-1);
  45.  
  46.   template<typename Elem>
  47.     struct range
  48.     {
  49.       Elem* next;
  50.       Elem* end;
  51.  
  52.       Elem operator*() const { return *next; }
  53.  
  54.       range& operator++() { ++next; return *this; }
  55.  
  56.       size_t size() const { return end - next; }
  57.     };
  58.  
  59.   // Multibyte sequences can have "header" consisting of Byte Order Mark
  60.   const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
  61.   const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
  62.   const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
  63.  
  64.   template<size_t N>
  65.     inline bool
  66.     write_bom(range<char>& to, const unsigned char (&bom)[N])
  67.     {
  68.       if (to.size() < N)
  69.         return false;
  70.       memcpy(to.next, bom, N);
  71.       to.next += N;
  72.       return true;
  73.     }
  74.  
  75.   // If generate_header is set in mode write out UTF-8 BOM.
  76.   bool
  77.   write_utf8_bom(range<char>& to, codecvt_mode mode)
  78.   {
  79.     if (mode & generate_header)
  80.       return write_bom(to, utf8_bom);
  81.     return true;
  82.   }
  83.  
  84.   // If generate_header is set in mode write out the UTF-16 BOM indicated
  85.   // by whether little_endian is set in mode.
  86.   bool
  87.   write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
  88.   {
  89.     if (mode & generate_header)
  90.     {
  91.       if (!to.size())
  92.         return false;
  93.       auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
  94.       std::memcpy(to.next, bom, 2);
  95.       ++to.next;
  96.     }
  97.     return true;
  98.   }
  99.  
  100.   template<size_t N>
  101.     inline bool
  102.     read_bom(range<const char>& from, const unsigned char (&bom)[N])
  103.     {
  104.       if (from.size() >= N && !memcmp(from.next, bom, N))
  105.         {
  106.           from.next += N;
  107.           return true;
  108.         }
  109.       return false;
  110.     }
  111.  
  112.   // If consume_header is set in mode update from.next to after any BOM.
  113.   void
  114.   read_utf8_bom(range<const char>& from, codecvt_mode mode)
  115.   {
  116.     if (mode & consume_header)
  117.       read_bom(from, utf8_bom);
  118.   }
  119.  
  120.   // If consume_header is set in mode update from.next to after any BOM.
  121.   // Return little_endian iff the UTF-16LE BOM was present.
  122.   codecvt_mode
  123.   read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
  124.   {
  125.     if (mode & consume_header && from.size())
  126.       {
  127.         if (*from.next == 0xFEFF)
  128.           ++from.next;
  129.         else if (*from.next == 0xFFFE)
  130.           {
  131.             ++from.next;
  132.             return little_endian;
  133.           }
  134.       }
  135.     return {};
  136.   }
  137.  
  138.   // Read a codepoint from a UTF-8 multibyte sequence.
  139.   // Updates from.next if the codepoint is not greater than maxcode.
  140.   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
  141.   char32_t
  142.   read_utf8_code_point(range<const char>& from, unsigned long maxcode)
  143.   {
  144.     const size_t avail = from.size();
  145.     if (avail == 0)
  146.       return incomplete_mb_character;
  147.     unsigned char c1 = from.next[0];
  148.     // https://en.wikipedia.org/wiki/UTF-8#Sample_code
  149.     if (c1 < 0x80)
  150.     {
  151.       ++from.next;
  152.       return c1;
  153.     }
  154.     else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
  155.       return invalid_mb_sequence;
  156.     else if (c1 < 0xE0) // 2-byte sequence
  157.     {
  158.       if (avail < 2)
  159.         return incomplete_mb_character;
  160.       unsigned char c2 = from.next[1];
  161.       if ((c2 & 0xC0) != 0x80)
  162.         return invalid_mb_sequence;
  163.       char32_t c = (c1 << 6) + c2 - 0x3080;
  164.       if (c <= maxcode)
  165.         from.next += 2;
  166.       return c;
  167.     }
  168.     else if (c1 < 0xF0) // 3-byte sequence
  169.     {
  170.       if (avail < 3)
  171.         return incomplete_mb_character;
  172.       unsigned char c2 = from.next[1];
  173.       if ((c2 & 0xC0) != 0x80)
  174.         return invalid_mb_sequence;
  175.       if (c1 == 0xE0 && c2 < 0xA0) // overlong
  176.         return invalid_mb_sequence;
  177.       unsigned char c3 = from.next[2];
  178.       if ((c3 & 0xC0) != 0x80)
  179.         return invalid_mb_sequence;
  180.       char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
  181.       if (c <= maxcode)
  182.         from.next += 3;
  183.       return c;
  184.     }
  185.     else if (c1 < 0xF5) // 4-byte sequence
  186.     {
  187.       if (avail < 4)
  188.         return incomplete_mb_character;
  189.       unsigned char c2 = from.next[1];
  190.       if ((c2 & 0xC0) != 0x80)
  191.         return invalid_mb_sequence;
  192.       if (c1 == 0xF0 && c2 < 0x90) // overlong
  193.         return invalid_mb_sequence;
  194.       if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
  195.       return invalid_mb_sequence;
  196.       unsigned char c3 = from.next[2];
  197.       if ((c3 & 0xC0) != 0x80)
  198.         return invalid_mb_sequence;
  199.       unsigned char c4 = from.next[3];
  200.       if ((c4 & 0xC0) != 0x80)
  201.         return invalid_mb_sequence;
  202.       char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
  203.       if (c <= maxcode)
  204.         from.next += 4;
  205.       return c;
  206.     }
  207.     else // > U+10FFFF
  208.       return invalid_mb_sequence;
  209.   }
  210.  
  211.   bool
  212.   write_utf8_code_point(range<char>& to, char32_t code_point)
  213.   {
  214.     if (code_point < 0x80)
  215.       {
  216.         if (to.size() < 1)
  217.           return false;
  218.         *to.next++ = code_point;
  219.       }
  220.     else if (code_point <= 0x7FF)
  221.       {
  222.         if (to.size() < 2)
  223.           return false;
  224.         *to.next++ = (code_point >> 6) + 0xC0;
  225.         *to.next++ = (code_point & 0x3F) + 0x80;
  226.       }
  227.     else if (code_point <= 0xFFFF)
  228.       {
  229.         if (to.size() < 3)
  230.           return false;
  231.         *to.next++ = (code_point >> 12) + 0xE0;
  232.         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
  233.         *to.next++ = (code_point & 0x3F) + 0x80;
  234.       }
  235.     else if (code_point <= 0x10FFFF)
  236.       {
  237.         if (to.size() < 4)
  238.           return false;
  239.         *to.next++ = (code_point >> 18) + 0xF0;
  240.         *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
  241.         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
  242.         *to.next++ = (code_point & 0x3F) + 0x80;
  243.       }
  244.     else
  245.       return false;
  246.     return true;
  247.   }
  248.  
  249.   inline char16_t
  250.   adjust_byte_order(char16_t c, codecvt_mode mode)
  251.   {
  252. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  253.     return (mode & little_endian) ? __builtin_bswap16(c) : c;
  254. #else
  255.     return (mode & little_endian) ? c : __builtin_bswap16(c);
  256. #endif
  257.   }
  258.  
  259.   // Return true if c is a high-surrogate (aka leading) code point.
  260.   inline bool
  261.   is_high_surrogate(char32_t c)
  262.   {
  263.     return c >= 0xD800 && c <= 0xDBFF;
  264.   }
  265.  
  266.   // Return true if c is a low-surrogate (aka trailing) code point.
  267.   inline bool
  268.   is_low_surrogate(char32_t c)
  269.   {
  270.     return c >= 0xDC00 && c <= 0xDFFF;
  271.   }
  272.  
  273.   inline char32_t
  274.   surrogate_pair_to_code_point(char32_t high, char32_t low)
  275.   {
  276.     return (high << 10) + low - 0x35FDC00;
  277.   }
  278.  
  279.   // Read a codepoint from a UTF-16 multibyte sequence.
  280.   // The sequence's endianness is indicated by (mode & little_endian).
  281.   // Updates from.next if the codepoint is not greater than maxcode.
  282.   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
  283.   char32_t
  284.   read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
  285.                         codecvt_mode mode)
  286.   {
  287.     const size_t avail = from.size();
  288.     if (avail == 0)
  289.       return incomplete_mb_character;
  290.     int inc = 1;
  291.     char32_t c = adjust_byte_order(from.next[0], mode);
  292.     if (is_high_surrogate(c))
  293.       {
  294.         if (avail < 2)
  295.           return incomplete_mb_character;
  296.         const char16_t c2 = adjust_byte_order(from.next[1], mode);
  297.         if (is_low_surrogate(c2))
  298.           {
  299.             c = surrogate_pair_to_code_point(c, c2);
  300.             inc = 2;
  301.           }
  302.         else
  303.           return invalid_mb_sequence;
  304.       }
  305.     else if (is_low_surrogate(c))
  306.       return invalid_mb_sequence;
  307.     if (c <= maxcode)
  308.       from.next += inc;
  309.     return c;
  310.   }
  311.  
  312.   template<typename C>
  313.   bool
  314.   write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
  315.   {
  316.     static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
  317.  
  318.     if (codepoint < max_single_utf16_unit)
  319.       {
  320.         if (to.size() > 0)
  321.           {
  322.             *to.next = adjust_byte_order(codepoint, mode);
  323.             ++to.next;
  324.             return true;
  325.           }
  326.       }
  327.     else if (to.size() > 1)
  328.       {
  329.         // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
  330.         const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
  331.         char16_t lead = LEAD_OFFSET + (codepoint >> 10);
  332.         char16_t trail = 0xDC00 + (codepoint & 0x3FF);
  333.         to.next[0] = adjust_byte_order(lead, mode);
  334.         to.next[1] = adjust_byte_order(trail, mode);
  335.         to.next += 2;
  336.         return true;
  337.       }
  338.     return false;
  339.   }
  340.  
  341.   // utf8 -> ucs4
  342.   codecvt_base::result
  343.   ucs4_in(range<const char>& from, range<char32_t>& to,
  344.           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  345.   {
  346.     read_utf8_bom(from, mode);
  347.     while (from.size() && to.size())
  348.       {
  349.         const char32_t codepoint = read_utf8_code_point(from, maxcode);
  350.         if (codepoint == incomplete_mb_character)
  351.           return codecvt_base::partial;
  352.         if (codepoint > maxcode)
  353.           return codecvt_base::error;
  354.         *to.next++ = codepoint;
  355.       }
  356.     return from.size() ? codecvt_base::partial : codecvt_base::ok;
  357.   }
  358.  
  359.   // ucs4 -> utf8
  360.   codecvt_base::result
  361.   ucs4_out(range<const char32_t>& from, range<char>& to,
  362.            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  363.   {
  364.     if (!write_utf8_bom(to, mode))
  365.       return codecvt_base::partial;
  366.     while (from.size())
  367.       {
  368.         const char32_t c = from.next[0];
  369.         if (c > maxcode)
  370.           return codecvt_base::error;
  371.         if (!write_utf8_code_point(to, c))
  372.           return codecvt_base::partial;
  373.         ++from.next;
  374.       }
  375.     return codecvt_base::ok;
  376.   }
  377.  
  378.   // utf16 -> ucs4
  379.   codecvt_base::result
  380.   ucs4_in(range<const char16_t>& from, range<char32_t>& to,
  381.           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  382.   {
  383.     if (read_utf16_bom(from, mode) == little_endian)
  384.       mode = codecvt_mode(mode & little_endian);
  385.     while (from.size() && to.size())
  386.       {
  387.         const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
  388.         if (codepoint == incomplete_mb_character)
  389.           return codecvt_base::partial;
  390.         if (codepoint > maxcode)
  391.           return codecvt_base::error;
  392.         *to.next++ = codepoint;
  393.       }
  394.     return from.size() ? codecvt_base::partial : codecvt_base::ok;
  395.   }
  396.  
  397.   // ucs4 -> utf16
  398.   codecvt_base::result
  399.   ucs4_out(range<const char32_t>& from, range<char16_t>& to,
  400.            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  401.   {
  402.     if (!write_utf16_bom(to, mode))
  403.       return codecvt_base::partial;
  404.     while (from.size())
  405.       {
  406.         const char32_t c = from.next[0];
  407.         if (c > maxcode)
  408.           return codecvt_base::error;
  409.         if (!write_utf16_code_point(to, c, mode))
  410.           return codecvt_base::partial;
  411.         ++from.next;
  412.       }
  413.     return codecvt_base::ok;
  414.   }
  415.  
  416.   // utf8 -> utf16
  417.   template<typename C>
  418.   codecvt_base::result
  419.   utf16_in(range<const char>& from, range<C>& to,
  420.            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  421.   {
  422.     read_utf8_bom(from, mode);
  423.     while (from.size() && to.size())
  424.       {
  425.         const char* const first = from.next;
  426.         const char32_t codepoint = read_utf8_code_point(from, maxcode);
  427.         if (codepoint == incomplete_mb_character)
  428.           return codecvt_base::partial;
  429.         if (codepoint > maxcode)
  430.           return codecvt_base::error;
  431.         if (!write_utf16_code_point(to, codepoint, mode))
  432.           {
  433.             from.next = first;
  434.             return codecvt_base::partial;
  435.           }
  436.       }
  437.     return codecvt_base::ok;
  438.   }
  439.  
  440.   // utf16 -> utf8
  441.   template<typename C>
  442.   codecvt_base::result
  443.   utf16_out(range<const C>& from, range<char>& to,
  444.             unsigned long maxcode = max_code_point, codecvt_mode mode = {})
  445.   {
  446.     if (!write_utf8_bom(to, mode))
  447.       return codecvt_base::partial;
  448.     while (from.size())
  449.       {
  450.         char32_t c = from.next[0];
  451.         int inc = 1;
  452.         if (is_high_surrogate(c))
  453.           {
  454.             if (from.size() < 2)
  455.               return codecvt_base::ok; // stop converting at this point
  456.  
  457.             const char32_t c2 = from.next[1];
  458.             if (is_low_surrogate(c2))
  459.               {
  460.                 c = surrogate_pair_to_code_point(c, c2);
  461.                 inc = 2;
  462.               }
  463.             else
  464.               return codecvt_base::error;
  465.           }
  466.         else if (is_low_surrogate(c))
  467.           return codecvt_base::error;
  468.         if (c > maxcode)
  469.           return codecvt_base::error;
  470.         if (!write_utf8_code_point(to, c))
  471.           return codecvt_base::partial;
  472.         from.next += inc;
  473.       }
  474.     return codecvt_base::ok;
  475.   }
  476.  
  477.   // return pos such that [begin,pos) is valid UTF-16 string no longer than max
  478.   const char*
  479.   utf16_span(const char* begin, const char* end, size_t max,
  480.              char32_t maxcode = max_code_point, codecvt_mode mode = {})
  481.   {
  482.     range<const char> from{ begin, end };
  483.     read_utf8_bom(from, mode);
  484.     size_t count = 0;
  485.     while (count+1 < max)
  486.       {
  487.         char32_t c = read_utf8_code_point(from, maxcode);
  488.         if (c > maxcode)
  489.           return from.next;
  490.         else if (c > max_single_utf16_unit)
  491.           ++count;
  492.         ++count;
  493.       }
  494.     if (count+1 == max) // take one more character if it fits in a single unit
  495.       read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
  496.     return from.next;
  497.   }
  498.  
  499.   // utf8 -> ucs2
  500.   codecvt_base::result
  501.   ucs2_in(range<const char>& from, range<char16_t>& to,
  502.           char32_t maxcode = max_code_point, codecvt_mode mode = {})
  503.   {
  504.     return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
  505.   }
  506.  
  507.   // ucs2 -> utf8
  508.   codecvt_base::result
  509.   ucs2_out(range<const char16_t>& from, range<char>& to,
  510.            char32_t maxcode = max_code_point, codecvt_mode mode = {})
  511.   {
  512.     return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
  513.   }
  514.  
  515.   // ucs2 -> utf16
  516.   codecvt_base::result
  517.   ucs2_out(range<const char16_t>& from, range<char16_t>& to,
  518.            char32_t maxcode = max_code_point, codecvt_mode mode = {})
  519.   {
  520.     if (!write_utf16_bom(to, mode))
  521.       return codecvt_base::partial;
  522.     while (from.size() && to.size())
  523.       {
  524.         char16_t c = from.next[0];
  525.         if (is_high_surrogate(c))
  526.           return codecvt_base::error;
  527.         if (c > maxcode)
  528.           return codecvt_base::error;
  529.         *to.next++ = adjust_byte_order(c, mode);
  530.         ++from.next;
  531.       }
  532.     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
  533.   }
  534.  
  535.   // utf16 -> ucs2
  536.   codecvt_base::result
  537.   ucs2_in(range<const char16_t>& from, range<char16_t>& to,
  538.           char32_t maxcode = max_code_point, codecvt_mode mode = {})
  539.   {
  540.     if (read_utf16_bom(from, mode) == little_endian)
  541.       mode = codecvt_mode(mode & little_endian);
  542.     maxcode = std::max(max_single_utf16_unit, maxcode);
  543.     while (from.size() && to.size())
  544.       {
  545.         const char32_t c = read_utf16_code_point(from, maxcode, mode);
  546.         if (c == incomplete_mb_character)
  547.           return codecvt_base::partial;
  548.         if (c > maxcode)
  549.           return codecvt_base::error;
  550.         *to.next++ = c;
  551.       }
  552.     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
  553.   }
  554.  
  555.   const char16_t*
  556.   ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
  557.             char32_t maxcode, codecvt_mode mode)
  558.   {
  559.     range<const char16_t> from{ begin, end };
  560.     if (read_utf16_bom(from, mode) == little_endian)
  561.       mode = codecvt_mode(mode & little_endian);
  562.     maxcode = std::max(max_single_utf16_unit, maxcode);
  563.     char32_t c = 0;
  564.     while (max-- && c <= maxcode)
  565.       c = read_utf16_code_point(from, maxcode, mode);
  566.     return from.next;
  567.   }
  568.  
  569.   const char*
  570.   ucs2_span(const char* begin, const char* end, size_t max,
  571.             char32_t maxcode, codecvt_mode mode)
  572.   {
  573.     range<const char> from{ begin, end };
  574.     read_utf8_bom(from, mode);
  575.     maxcode = std::max(max_single_utf16_unit, maxcode);
  576.     char32_t c = 0;
  577.     while (max-- && c <= maxcode)
  578.       c = read_utf8_code_point(from, maxcode);
  579.     return from.next;
  580.   }
  581.  
  582.   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
  583.   const char*
  584.   ucs4_span(const char* begin, const char* end, size_t max,
  585.             char32_t maxcode = max_code_point, codecvt_mode mode = {})
  586.   {
  587.     range<const char> from{ begin, end };
  588.     read_utf8_bom(from, mode);
  589.     char32_t c = 0;
  590.     while (max-- && c <= maxcode)
  591.       c = read_utf8_code_point(from, maxcode);
  592.     return from.next;
  593.   }
  594.  
  595.   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
  596.   const char16_t*
  597.   ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
  598.             char32_t maxcode = max_code_point, codecvt_mode mode = {})
  599.   {
  600.     range<const char16_t> from{ begin, end };
  601.     if (read_utf16_bom(from, mode) == little_endian)
  602.       mode = codecvt_mode(mode & little_endian);
  603.     char32_t c = 0;
  604.     while (max-- && c <= maxcode)
  605.       c = read_utf16_code_point(from, maxcode, mode);
  606.     return from.next;
  607.   }
  608. }
  609.  
  610. // Define members of codecvt<char16_t, char, mbstate_t> specialization.
  611. // Converts from UTF-8 to UTF-16.
  612.  
  613. locale::id codecvt<char16_t, char, mbstate_t>::id;
  614.  
  615. codecvt<char16_t, char, mbstate_t>::~codecvt() { }
  616.  
  617. codecvt_base::result
  618. codecvt<char16_t, char, mbstate_t>::
  619. do_out(state_type&,
  620.        const intern_type* __from,
  621.        const intern_type* __from_end, const intern_type*& __from_next,
  622.        extern_type* __to, extern_type* __to_end,
  623.        extern_type*& __to_next) const
  624. {
  625.   range<const char16_t> from{ __from, __from_end };
  626.   range<char> to{ __to, __to_end };
  627.   auto res = utf16_out(from, to);
  628.   __from_next = from.next;
  629.   __to_next = to.next;
  630.   return res;
  631. }
  632.  
  633. codecvt_base::result
  634. codecvt<char16_t, char, mbstate_t>::
  635. do_unshift(state_type&, extern_type* __to, extern_type*,
  636.            extern_type*& __to_next) const
  637. {
  638.   __to_next = __to;
  639.   return noconv; // we don't use mbstate_t for the unicode facets
  640. }
  641.  
  642. codecvt_base::result
  643. codecvt<char16_t, char, mbstate_t>::
  644. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  645.       const extern_type*& __from_next,
  646.       intern_type* __to, intern_type* __to_end,
  647.       intern_type*& __to_next) const
  648. {
  649.   range<const char> from{ __from, __from_end };
  650.   range<char16_t> to{ __to, __to_end };
  651. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  652.   codecvt_mode mode = {};
  653. #else
  654.   codecvt_mode mode = little_endian;
  655. #endif
  656.   auto res = utf16_in(from, to, max_code_point, mode);
  657.   __from_next = from.next;
  658.   __to_next = to.next;
  659.   return res;
  660. }
  661.  
  662. int
  663. codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
  664. { return 0; }
  665.  
  666. bool
  667. codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
  668. { return false; }
  669.  
  670. int
  671. codecvt<char16_t, char, mbstate_t>::
  672. do_length(state_type&, const extern_type* __from,
  673.           const extern_type* __end, size_t __max) const
  674. {
  675.   __end = utf16_span(__from, __end, __max);
  676.   return __end - __from;
  677. }
  678.  
  679. int
  680. codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
  681. {
  682.   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
  683.   // whereas 4 byte sequences require two 16-bit code units.
  684.   return 3;
  685. }
  686.  
  687. // Define members of codecvt<char32_t, char, mbstate_t> specialization.
  688. // Converts from UTF-8 to UTF-32 (aka UCS-4).
  689.  
  690. locale::id codecvt<char32_t, char, mbstate_t>::id;
  691.  
  692. codecvt<char32_t, char, mbstate_t>::~codecvt() { }
  693.  
  694. codecvt_base::result
  695. codecvt<char32_t, char, mbstate_t>::
  696. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  697.        const intern_type*& __from_next,
  698.        extern_type* __to, extern_type* __to_end,
  699.        extern_type*& __to_next) const
  700. {
  701.   range<const char32_t> from{ __from, __from_end };
  702.   range<char> to{ __to, __to_end };
  703.   auto res = ucs4_out(from, to);
  704.   __from_next = from.next;
  705.   __to_next = to.next;
  706.   return res;
  707. }
  708.  
  709. codecvt_base::result
  710. codecvt<char32_t, char, mbstate_t>::
  711. do_unshift(state_type&, extern_type* __to, extern_type*,
  712.            extern_type*& __to_next) const
  713. {
  714.   __to_next = __to;
  715.   return noconv;
  716. }
  717.  
  718. codecvt_base::result
  719. codecvt<char32_t, char, mbstate_t>::
  720. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  721.       const extern_type*& __from_next,
  722.       intern_type* __to, intern_type* __to_end,
  723.       intern_type*& __to_next) const
  724. {
  725.   range<const char> from{ __from, __from_end };
  726.   range<char32_t> to{ __to, __to_end };
  727.   auto res = ucs4_in(from, to);
  728.   __from_next = from.next;
  729.   __to_next = to.next;
  730.   return res;
  731. }
  732.  
  733. int
  734. codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
  735. { return 0; }
  736.  
  737. bool
  738. codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
  739. { return false; }
  740.  
  741. int
  742. codecvt<char32_t, char, mbstate_t>::
  743. do_length(state_type&, const extern_type* __from,
  744.           const extern_type* __end, size_t __max) const
  745. {
  746.   __end = ucs4_span(__from, __end, __max);
  747.   return __end - __from;
  748. }
  749.  
  750. int
  751. codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
  752. { return 4; }
  753.  
  754. // Define members of codecvt_utf8<char16_t> base class implementation.
  755. // Converts from UTF-8 to UCS-2.
  756.  
  757. __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
  758.  
  759. codecvt_base::result
  760. __codecvt_utf8_base<char16_t>::
  761. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  762.        const intern_type*& __from_next,
  763.        extern_type* __to, extern_type* __to_end,
  764.        extern_type*& __to_next) const
  765. {
  766.   range<const char16_t> from{ __from, __from_end };
  767.   range<char> to{ __to, __to_end };
  768.   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
  769.   __from_next = from.next;
  770.   __to_next = to.next;
  771.   return res;
  772. }
  773.  
  774. codecvt_base::result
  775. __codecvt_utf8_base<char16_t>::
  776. do_unshift(state_type&, extern_type* __to, extern_type*,
  777.            extern_type*& __to_next) const
  778. {
  779.   __to_next = __to;
  780.   return noconv;
  781. }
  782.  
  783. codecvt_base::result
  784. __codecvt_utf8_base<char16_t>::
  785. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  786.       const extern_type*& __from_next,
  787.       intern_type* __to, intern_type* __to_end,
  788.       intern_type*& __to_next) const
  789. {
  790.   range<const char> from{ __from, __from_end };
  791.   range<char16_t> to{ __to, __to_end };
  792.   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
  793. #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
  794.   mode = codecvt_mode(mode | little_endian);
  795. #endif
  796.   auto res = ucs2_in(from, to, _M_maxcode, mode);
  797.   __from_next = from.next;
  798.   __to_next = to.next;
  799.   return res;
  800. }
  801.  
  802. int
  803. __codecvt_utf8_base<char16_t>::do_encoding() const throw()
  804. { return 0; }
  805.  
  806. bool
  807. __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
  808. { return false; }
  809.  
  810. int
  811. __codecvt_utf8_base<char16_t>::
  812. do_length(state_type&, const extern_type* __from,
  813.           const extern_type* __end, size_t __max) const
  814. {
  815.   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
  816.   return __end - __from;
  817. }
  818.  
  819. int
  820. __codecvt_utf8_base<char16_t>::do_max_length() const throw()
  821. { return 3; }
  822.  
  823. // Define members of codecvt_utf8<char32_t> base class implementation.
  824. // Converts from UTF-8 to UTF-32 (aka UCS-4).
  825.  
  826. __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
  827.  
  828. codecvt_base::result
  829. __codecvt_utf8_base<char32_t>::
  830. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  831.        const intern_type*& __from_next,
  832.        extern_type* __to, extern_type* __to_end,
  833.        extern_type*& __to_next) const
  834. {
  835.   range<const char32_t> from{ __from, __from_end };
  836.   range<char> to{ __to, __to_end };
  837.   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
  838.   __from_next = from.next;
  839.   __to_next = to.next;
  840.   return res;
  841. }
  842.  
  843. codecvt_base::result
  844. __codecvt_utf8_base<char32_t>::
  845. do_unshift(state_type&, extern_type* __to, extern_type*,
  846.            extern_type*& __to_next) const
  847. {
  848.   __to_next = __to;
  849.   return noconv;
  850. }
  851.  
  852. codecvt_base::result
  853. __codecvt_utf8_base<char32_t>::
  854. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  855.       const extern_type*& __from_next,
  856.       intern_type* __to, intern_type* __to_end,
  857.       intern_type*& __to_next) const
  858. {
  859.   range<const char> from{ __from, __from_end };
  860.   range<char32_t> to{ __to, __to_end };
  861.   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
  862.   __from_next = from.next;
  863.   __to_next = to.next;
  864.   return res;
  865. }
  866.  
  867. int
  868. __codecvt_utf8_base<char32_t>::do_encoding() const throw()
  869. { return 0; }
  870.  
  871. bool
  872. __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
  873. { return false; }
  874.  
  875. int
  876. __codecvt_utf8_base<char32_t>::
  877. do_length(state_type&, const extern_type* __from,
  878.           const extern_type* __end, size_t __max) const
  879. {
  880.   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
  881.   return __end - __from;
  882. }
  883.  
  884. int
  885. __codecvt_utf8_base<char32_t>::do_max_length() const throw()
  886. { return 4; }
  887.  
  888. #ifdef _GLIBCXX_USE_WCHAR_T
  889. // Define members of codecvt_utf8<wchar_t> base class implementation.
  890. // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
  891.  
  892. __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
  893.  
  894. codecvt_base::result
  895. __codecvt_utf8_base<wchar_t>::
  896. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  897.        const intern_type*& __from_next,
  898.        extern_type* __to, extern_type* __to_end,
  899.        extern_type*& __to_next) const
  900. {
  901.   range<char> to{ __to, __to_end };
  902. #if __SIZEOF_WCHAR_T__ == 2
  903.   range<const char16_t> from{
  904.     reinterpret_cast<const char16_t*>(__from),
  905.     reinterpret_cast<const char16_t*>(__from_end)
  906.   };
  907.   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
  908. #elif __SIZEOF_WCHAR_T__ == 4
  909.   range<const char32_t> from{
  910.     reinterpret_cast<const char32_t*>(__from),
  911.     reinterpret_cast<const char32_t*>(__from_end)
  912.   };
  913.   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
  914. #else
  915.   return codecvt_base::error;
  916. #endif
  917.   __from_next = reinterpret_cast<const wchar_t*>(from.next);
  918.   __to_next = to.next;
  919.   return res;
  920. }
  921.  
  922. codecvt_base::result
  923. __codecvt_utf8_base<wchar_t>::
  924. do_unshift(state_type&, extern_type* __to, extern_type*,
  925.            extern_type*& __to_next) const
  926. {
  927.   __to_next = __to;
  928.   return noconv;
  929. }
  930.  
  931. codecvt_base::result
  932. __codecvt_utf8_base<wchar_t>::
  933. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  934.       const extern_type*& __from_next,
  935.       intern_type* __to, intern_type* __to_end,
  936.       intern_type*& __to_next) const
  937. {
  938.   range<const char> from{ __from, __from_end };
  939. #if __SIZEOF_WCHAR_T__ == 2
  940.   range<char16_t> to{
  941.     reinterpret_cast<char16_t*>(__to),
  942.     reinterpret_cast<char16_t*>(__to_end)
  943.   };
  944.   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
  945. #elif __SIZEOF_WCHAR_T__ == 4
  946.   range<char32_t> to{
  947.     reinterpret_cast<char32_t*>(__to),
  948.     reinterpret_cast<char32_t*>(__to_end)
  949.   };
  950.   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
  951. #else
  952.   return codecvt_base::error;
  953. #endif
  954.   __from_next = from.next;
  955.   __to_next = reinterpret_cast<wchar_t*>(to.next);
  956.   return res;
  957. }
  958.  
  959. int
  960. __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
  961. { return 0; }
  962.  
  963. bool
  964. __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
  965. { return false; }
  966.  
  967. int
  968. __codecvt_utf8_base<wchar_t>::
  969. do_length(state_type&, const extern_type* __from,
  970.           const extern_type* __end, size_t __max) const
  971. {
  972. #if __SIZEOF_WCHAR_T__ == 2
  973.   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
  974. #elif __SIZEOF_WCHAR_T__ == 4
  975.   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
  976. #else
  977.   __end = __from;
  978. #endif
  979.   return __end - __from;
  980. }
  981.  
  982. int
  983. __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
  984. { return 4; }
  985. #endif
  986.  
  987. // Define members of codecvt_utf16<char16_t> base class implementation.
  988. // Converts from UTF-16 to UCS-2.
  989.  
  990. __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
  991.  
  992. codecvt_base::result
  993. __codecvt_utf16_base<char16_t>::
  994. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  995.        const intern_type*& __from_next,
  996.        extern_type* __to, extern_type* __to_end,
  997.        extern_type*& __to_next) const
  998. {
  999.   range<const char16_t> from{ __from, __from_end };
  1000.   range<char16_t> to{
  1001.     reinterpret_cast<char16_t*>(__to),
  1002.     reinterpret_cast<char16_t*>(__to_end)
  1003.   };
  1004.   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
  1005.   __from_next = from.next;
  1006.   __to_next = reinterpret_cast<char*>(to.next);
  1007.   return res;
  1008. }
  1009.  
  1010. codecvt_base::result
  1011. __codecvt_utf16_base<char16_t>::
  1012. do_unshift(state_type&, extern_type* __to, extern_type*,
  1013.            extern_type*& __to_next) const
  1014. {
  1015.   __to_next = __to;
  1016.   return noconv;
  1017. }
  1018.  
  1019. codecvt_base::result
  1020. __codecvt_utf16_base<char16_t>::
  1021. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1022.       const extern_type*& __from_next,
  1023.       intern_type* __to, intern_type* __to_end,
  1024.       intern_type*& __to_next) const
  1025. {
  1026.   range<const char16_t> from{
  1027.     reinterpret_cast<const char16_t*>(__from),
  1028.     reinterpret_cast<const char16_t*>(__from_end)
  1029.   };
  1030.   range<char16_t> to{ __to, __to_end };
  1031.   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
  1032.   __from_next = reinterpret_cast<const char*>(from.next);
  1033.   __to_next = to.next;
  1034.   return res;
  1035. }
  1036.  
  1037. int
  1038. __codecvt_utf16_base<char16_t>::do_encoding() const throw()
  1039. { return 1; }
  1040.  
  1041. bool
  1042. __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
  1043. { return false; }
  1044.  
  1045. int
  1046. __codecvt_utf16_base<char16_t>::
  1047. do_length(state_type&, const extern_type* __from,
  1048.           const extern_type* __end, size_t __max) const
  1049. {
  1050.   auto next = reinterpret_cast<const char16_t*>(__from);
  1051.   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
  1052.                    _M_maxcode, _M_mode);
  1053.   return reinterpret_cast<const char*>(next) - __from;
  1054. }
  1055.  
  1056. int
  1057. __codecvt_utf16_base<char16_t>::do_max_length() const throw()
  1058. { return 3; }
  1059.  
  1060. // Define members of codecvt_utf16<char32_t> base class implementation.
  1061. // Converts from UTF-16 to UTF-32 (aka UCS-4).
  1062.  
  1063. __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
  1064.  
  1065. codecvt_base::result
  1066. __codecvt_utf16_base<char32_t>::
  1067. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  1068.        const intern_type*& __from_next,
  1069.        extern_type* __to, extern_type* __to_end,
  1070.        extern_type*& __to_next) const
  1071. {
  1072.   range<const char32_t> from{ __from, __from_end };
  1073.   range<char16_t> to{
  1074.     reinterpret_cast<char16_t*>(__to),
  1075.     reinterpret_cast<char16_t*>(__to_end)
  1076.   };
  1077.   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
  1078.   __from_next = from.next;
  1079.   __to_next = reinterpret_cast<char*>(to.next);
  1080.   return res;
  1081. }
  1082.  
  1083. codecvt_base::result
  1084. __codecvt_utf16_base<char32_t>::
  1085. do_unshift(state_type&, extern_type* __to, extern_type*,
  1086.            extern_type*& __to_next) const
  1087. {
  1088.   __to_next = __to;
  1089.   return noconv;
  1090. }
  1091.  
  1092. codecvt_base::result
  1093. __codecvt_utf16_base<char32_t>::
  1094. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1095.       const extern_type*& __from_next,
  1096.       intern_type* __to, intern_type* __to_end,
  1097.       intern_type*& __to_next) const
  1098. {
  1099.   range<const char16_t> from{
  1100.     reinterpret_cast<const char16_t*>(__from),
  1101.     reinterpret_cast<const char16_t*>(__from_end)
  1102.   };
  1103.   range<char32_t> to{ __to, __to_end };
  1104.   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
  1105.   __from_next = reinterpret_cast<const char*>(from.next);
  1106.   __to_next = to.next;
  1107.   return res;
  1108. }
  1109.  
  1110. int
  1111. __codecvt_utf16_base<char32_t>::do_encoding() const throw()
  1112. { return 0; }
  1113.  
  1114. bool
  1115. __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
  1116. { return false; }
  1117.  
  1118. int
  1119. __codecvt_utf16_base<char32_t>::
  1120. do_length(state_type&, const extern_type* __from,
  1121.           const extern_type* __end, size_t __max) const
  1122. {
  1123.   auto next = reinterpret_cast<const char16_t*>(__from);
  1124.   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
  1125.                    _M_maxcode, _M_mode);
  1126.   return reinterpret_cast<const char*>(next) - __from;
  1127. }
  1128.  
  1129. int
  1130. __codecvt_utf16_base<char32_t>::do_max_length() const throw()
  1131. { return 4; }
  1132.  
  1133. #ifdef _GLIBCXX_USE_WCHAR_T
  1134. // Define members of codecvt_utf16<wchar_t> base class implementation.
  1135. // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
  1136.  
  1137. __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
  1138.  
  1139. codecvt_base::result
  1140. __codecvt_utf16_base<wchar_t>::
  1141. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  1142.        const intern_type*& __from_next,
  1143.        extern_type* __to, extern_type* __to_end,
  1144.        extern_type*& __to_next) const
  1145. {
  1146.   range<char> to{ __to, __to_end };
  1147. #if __SIZEOF_WCHAR_T__ == 2
  1148.   range<const char16_t> from{
  1149.     reinterpret_cast<const char16_t*>(__from),
  1150.     reinterpret_cast<const char16_t*>(__from_end)
  1151.   };
  1152.   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
  1153. #elif __SIZEOF_WCHAR_T__ == 4
  1154.   range<const char32_t> from{
  1155.     reinterpret_cast<const char32_t*>(__from),
  1156.     reinterpret_cast<const char32_t*>(__from_end)
  1157.   };
  1158.   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
  1159. #else
  1160.   return codecvt_base::error;
  1161. #endif
  1162.   __from_next = reinterpret_cast<const wchar_t*>(from.next);
  1163.   __to_next = to.next;
  1164.   return res;
  1165. }
  1166.  
  1167. codecvt_base::result
  1168. __codecvt_utf16_base<wchar_t>::
  1169. do_unshift(state_type&, extern_type* __to, extern_type*,
  1170.            extern_type*& __to_next) const
  1171. {
  1172.   __to_next = __to;
  1173.   return noconv;
  1174. }
  1175.  
  1176. codecvt_base::result
  1177. __codecvt_utf16_base<wchar_t>::
  1178. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1179.       const extern_type*& __from_next,
  1180.       intern_type* __to, intern_type* __to_end,
  1181.       intern_type*& __to_next) const
  1182. {
  1183.   range<const char> from{ __from, __from_end };
  1184. #if __SIZEOF_WCHAR_T__ == 2
  1185.   range<char16_t> to{
  1186.     reinterpret_cast<char16_t*>(__to),
  1187.     reinterpret_cast<char16_t*>(__to_end)
  1188.   };
  1189.   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
  1190. #elif __SIZEOF_WCHAR_T__ == 4
  1191.   range<char32_t> to{
  1192.     reinterpret_cast<char32_t*>(__to),
  1193.     reinterpret_cast<char32_t*>(__to_end)
  1194.   };
  1195.   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
  1196. #else
  1197.   return codecvt_base::error;
  1198. #endif
  1199.   __from_next = from.next;
  1200.   __to_next = reinterpret_cast<wchar_t*>(to.next);
  1201.   return res;
  1202. }
  1203.  
  1204. int
  1205. __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
  1206. { return 0; }
  1207.  
  1208. bool
  1209. __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
  1210. { return false; }
  1211.  
  1212. int
  1213. __codecvt_utf16_base<wchar_t>::
  1214. do_length(state_type&, const extern_type* __from,
  1215.           const extern_type* __end, size_t __max) const
  1216. {
  1217.   auto next = reinterpret_cast<const char16_t*>(__from);
  1218. #if __SIZEOF_WCHAR_T__ == 2
  1219.   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
  1220.                    _M_maxcode, _M_mode);
  1221. #elif __SIZEOF_WCHAR_T__ == 4
  1222.   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
  1223.                    _M_maxcode, _M_mode);
  1224. #endif
  1225.   return reinterpret_cast<const char*>(next) - __from;
  1226. }
  1227.  
  1228. int
  1229. __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
  1230. { return 4; }
  1231. #endif
  1232.  
  1233. // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
  1234. // Converts from UTF-8 to UTF-16.
  1235.  
  1236. __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
  1237.  
  1238. codecvt_base::result
  1239. __codecvt_utf8_utf16_base<char16_t>::
  1240. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  1241.        const intern_type*& __from_next,
  1242.        extern_type* __to, extern_type* __to_end,
  1243.        extern_type*& __to_next) const
  1244. {
  1245.   range<const char16_t> from{ __from, __from_end };
  1246.   range<char> to{ __to, __to_end };
  1247.   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
  1248.   __from_next = from.next;
  1249.   __to_next = to.next;
  1250.   return res;
  1251. }
  1252.  
  1253. codecvt_base::result
  1254. __codecvt_utf8_utf16_base<char16_t>::
  1255. do_unshift(state_type&, extern_type* __to, extern_type*,
  1256.            extern_type*& __to_next) const
  1257. {
  1258.   __to_next = __to;
  1259.   return noconv;
  1260. }
  1261.  
  1262. codecvt_base::result
  1263. __codecvt_utf8_utf16_base<char16_t>::
  1264. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1265.       const extern_type*& __from_next,
  1266.       intern_type* __to, intern_type* __to_end,
  1267.       intern_type*& __to_next) const
  1268. {
  1269.   range<const char> from{ __from, __from_end };
  1270.   range<char16_t> to{ __to, __to_end };
  1271.   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
  1272. #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
  1273.   mode = codecvt_mode(mode | little_endian);
  1274. #endif
  1275.   auto res = utf16_in(from, to, _M_maxcode, mode);
  1276.   __from_next = from.next;
  1277.   __to_next = to.next;
  1278.   return res;
  1279. }
  1280.  
  1281. int
  1282. __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
  1283. { return 0; }
  1284.  
  1285. bool
  1286. __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
  1287. { return false; }
  1288.  
  1289. int
  1290. __codecvt_utf8_utf16_base<char16_t>::
  1291. do_length(state_type&, const extern_type* __from,
  1292.           const extern_type* __end, size_t __max) const
  1293. {
  1294.   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
  1295.   return __end - __from;
  1296. }
  1297.  
  1298. int
  1299. __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
  1300. {
  1301.   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
  1302.   // whereas 4 byte sequences require two 16-bit code units.
  1303.   return 3;
  1304. }
  1305.  
  1306. // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
  1307. // Converts from UTF-8 to UTF-16.
  1308.  
  1309. __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
  1310.  
  1311. codecvt_base::result
  1312. __codecvt_utf8_utf16_base<char32_t>::
  1313. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  1314.        const intern_type*& __from_next,
  1315.        extern_type* __to, extern_type* __to_end,
  1316.        extern_type*& __to_next) const
  1317. {
  1318.   range<const char32_t> from{ __from, __from_end };
  1319.   range<char> to{ __to, __to_end };
  1320.   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
  1321.   __from_next = from.next;
  1322.   __to_next = to.next;
  1323.   return res;
  1324. }
  1325.  
  1326. codecvt_base::result
  1327. __codecvt_utf8_utf16_base<char32_t>::
  1328. do_unshift(state_type&, extern_type* __to, extern_type*,
  1329.            extern_type*& __to_next) const
  1330. {
  1331.   __to_next = __to;
  1332.   return noconv;
  1333. }
  1334.  
  1335. codecvt_base::result
  1336. __codecvt_utf8_utf16_base<char32_t>::
  1337. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1338.       const extern_type*& __from_next,
  1339.       intern_type* __to, intern_type* __to_end,
  1340.       intern_type*& __to_next) const
  1341. {
  1342.   range<const char> from{ __from, __from_end };
  1343.   range<char32_t> to{ __to, __to_end };
  1344.   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
  1345.   __from_next = from.next;
  1346.   __to_next = to.next;
  1347.   return res;
  1348. }
  1349.  
  1350. int
  1351. __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
  1352. { return 0; }
  1353.  
  1354. bool
  1355. __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
  1356. { return false; }
  1357.  
  1358. int
  1359. __codecvt_utf8_utf16_base<char32_t>::
  1360. do_length(state_type&, const extern_type* __from,
  1361.           const extern_type* __end, size_t __max) const
  1362. {
  1363.   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
  1364.   return __end - __from;
  1365. }
  1366.  
  1367. int
  1368. __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
  1369. {
  1370.   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
  1371.   // whereas 4 byte sequences require two 16-bit code units.
  1372.   return 3;
  1373. }
  1374.  
  1375. #ifdef _GLIBCXX_USE_WCHAR_T
  1376. // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
  1377. // Converts from UTF-8 to UTF-16.
  1378.  
  1379. __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
  1380.  
  1381. codecvt_base::result
  1382. __codecvt_utf8_utf16_base<wchar_t>::
  1383. do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
  1384.        const intern_type*& __from_next,
  1385.        extern_type* __to, extern_type* __to_end,
  1386.        extern_type*& __to_next) const
  1387. {
  1388.   range<const wchar_t> from{ __from, __from_end };
  1389.   range<char> to{ __to, __to_end };
  1390.   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
  1391.   __from_next = from.next;
  1392.   __to_next = to.next;
  1393.   return res;
  1394. }
  1395.  
  1396. codecvt_base::result
  1397. __codecvt_utf8_utf16_base<wchar_t>::
  1398. do_unshift(state_type&, extern_type* __to, extern_type*,
  1399.            extern_type*& __to_next) const
  1400. {
  1401.   __to_next = __to;
  1402.   return noconv;
  1403. }
  1404.  
  1405. codecvt_base::result
  1406. __codecvt_utf8_utf16_base<wchar_t>::
  1407. do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
  1408.       const extern_type*& __from_next,
  1409.       intern_type* __to, intern_type* __to_end,
  1410.       intern_type*& __to_next) const
  1411. {
  1412.   range<const char> from{ __from, __from_end };
  1413.   range<wchar_t> to{ __to, __to_end };
  1414.   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
  1415.   __from_next = from.next;
  1416.   __to_next = to.next;
  1417.   return res;
  1418. }
  1419.  
  1420. int
  1421. __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
  1422. { return 0; }
  1423.  
  1424. bool
  1425. __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
  1426. { return false; }
  1427.  
  1428. int
  1429. __codecvt_utf8_utf16_base<wchar_t>::
  1430. do_length(state_type&, const extern_type* __from,
  1431.           const extern_type* __end, size_t __max) const
  1432. {
  1433.   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
  1434.   return __end - __from;
  1435. }
  1436.  
  1437. int
  1438. __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
  1439. {
  1440.   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
  1441.   // whereas 4 byte sequences require two 16-bit code units.
  1442.   return 3;
  1443. }
  1444. #endif
  1445.  
  1446. inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
  1447. inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
  1448. template class codecvt_byname<char16_t, char, mbstate_t>;
  1449. template class codecvt_byname<char32_t, char, mbstate_t>;
  1450.  
  1451. _GLIBCXX_END_NAMESPACE_VERSION
  1452. }
  1453. #endif // _GLIBCXX_USE_C99_STDINT_TR1
  1454.