Subversion Repositories Kolibri OS

Rev

Rev 4874 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4921 Serge 1
#include 
2
#include 
3
#include 
4
#include 
5
#include 
6
#include "mbctype.h"
7
#include "local.h"
8
 
9
int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset,
10
		 mbstate_t *)
11
#ifdef __CYGWIN__
12
   /* Cygwin starts up in UTF-8 mode. */
13
    = __utf8_wctomb;
14
#else
15
    = __ascii_wctomb;
16
#endif
17
 
18
int
19
_DEFUN (_wctomb_r, (r, s, wchar, state),
20
        struct _reent *r     _AND
21
        char          *s     _AND
22
        wchar_t        _wchar _AND
23
        mbstate_t     *state)
24
{
25
  return __wctomb (r, s, _wchar, __locale_charset (), state);
26
}
27
 
28
int
29
_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state),
30
        struct _reent *r       _AND
31
        char          *s       _AND
32
        wchar_t        _wchar  _AND
33
	const char    *charset _AND
34
        mbstate_t     *state)
35
{
36
  /* Avoids compiler warnings about comparisons that are always false
37
     due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t)
38
     is 4, as is the case on cygwin.  */
39
  wint_t wchar = _wchar;
40
 
41
  if (s == NULL)
42
    return 0;
43
 
44
#ifdef __CYGWIN__
45
  if ((size_t)wchar >= 0x80)
46
#else
47
  if ((size_t)wchar >= 0x100)
48
#endif
49
    {
50
      r->_errno = EILSEQ;
51
      return -1;
52
    }
53
 
54
  *s = (char) wchar;
55
  return 1;
56
}
57
 
58
#ifdef _MB_CAPABLE
59
/* for some conversions, we use the __count field as a place to store a state value */
60
#define __state __count
61
 
62
int
63
_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state),
64
        struct _reent *r       _AND
65
        char          *s       _AND
66
        wchar_t        _wchar  _AND
67
	const char    *charset _AND
68
        mbstate_t     *state)
69
{
70
  wint_t wchar = _wchar;
71
  int ret = 0;
72
 
73
  if (s == NULL)
74
    return 0; /* UTF-8 encoding is not state-dependent */
75
 
76
  if (sizeof (wchar_t) == 2 && state->__count == -4
77
      && (wchar < 0xdc00 || wchar >= 0xdfff))
78
    {
79
      /* There's a leftover lone high surrogate.  Write out the CESU-8 value
80
	 of the surrogate and proceed to convert the given character.  Note
81
	 to return extra 3 bytes. */
82
      wchar_t tmp;
83
      tmp = (state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8)
84
	    - (0x10000 >> 10 | 0xd80d);
85
      *s++ = 0xe0 | ((tmp & 0xf000) >> 12);
86
      *s++ = 0x80 | ((tmp &  0xfc0) >> 6);
87
      *s++ = 0x80 |  (tmp &   0x3f);
88
      state->__count = 0;
89
      ret = 3;
90
    }
91
  if (wchar <= 0x7f)
92
    {
93
      *s = wchar;
94
      return ret + 1;
95
    }
96
  if (wchar >= 0x80 && wchar <= 0x7ff)
97
    {
98
      *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
99
      *s   = 0x80 |  (wchar &  0x3f);
100
      return ret + 2;
101
    }
102
  if (wchar >= 0x800 && wchar <= 0xffff)
103
    {
104
      /* No UTF-16 surrogate handling in UCS-4 */
105
      if (sizeof (wchar_t) == 2 && wchar >= 0xd800 && wchar <= 0xdfff)
106
	{
107
	  wint_t tmp;
108
	  if (wchar <= 0xdbff)
109
	    {
110
	      /* First half of a surrogate pair.  Store the state and
111
	         return ret + 0. */
112
	      tmp = ((wchar & 0x3ff) << 10) + 0x10000;
113
	      state->__value.__wchb[0] = (tmp >> 16) & 0xff;
114
	      state->__value.__wchb[1] = (tmp >> 8) & 0xff;
115
	      state->__count = -4;
116
	      *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
117
	      return ret;
118
	    }
119
	  if (state->__count == -4)
120
	    {
121
	      /* Second half of a surrogate pair.  Reconstruct the full
122
		 Unicode value and return the trailing three bytes of the
123
		 UTF-8 character. */
124
	      tmp = (state->__value.__wchb[0] << 16)
125
		    | (state->__value.__wchb[1] << 8)
126
		    | (wchar & 0x3ff);
127
	      state->__count = 0;
128
	      *s++ = 0xf0 | ((tmp & 0x1c0000) >> 18);
129
	      *s++ = 0x80 | ((tmp &  0x3f000) >> 12);
130
	      *s++ = 0x80 | ((tmp &    0xfc0) >> 6);
131
	      *s   = 0x80 |  (tmp &     0x3f);
132
	      return 4;
133
	    }
134
	  /* Otherwise translate into CESU-8 value. */
135
	}
136
      *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
137
      *s++ = 0x80 | ((wchar &  0xfc0) >> 6);
138
      *s   = 0x80 |  (wchar &   0x3f);
139
      return ret + 3;
140
    }
141
  if (wchar >= 0x10000 && wchar <= 0x10ffff)
142
    {
143
      *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
144
      *s++ = 0x80 | ((wchar &  0x3f000) >> 12);
145
      *s++ = 0x80 | ((wchar &    0xfc0) >> 6);
146
      *s   = 0x80 |  (wchar &     0x3f);
147
      return 4;
148
    }
149
 
150
  r->_errno = EILSEQ;
151
  return -1;
152
}
153
 
154
/* Cygwin defines its own doublebyte charset conversion functions
155
   because the underlying OS requires wchar_t == UTF-16. */
156
#ifndef __CYGWIN__
157
int
158
_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state),
159
        struct _reent *r       _AND
160
        char          *s       _AND
161
        wchar_t        _wchar  _AND
162
	const char    *charset _AND
163
        mbstate_t     *state)
164
{
165
  wint_t wchar = _wchar;
166
 
167
  unsigned char char2 = (unsigned char)wchar;
168
  unsigned char char1 = (unsigned char)(wchar >> 8);
169
 
170
  if (s == NULL)
171
    return 0;  /* not state-dependent */
172
 
173
  if (char1 != 0x00)
174
    {
175
    /* first byte is non-zero..validate multi-byte char */
176
      if (_issjis1(char1) && _issjis2(char2))
177
	{
178
	  *s++ = (char)char1;
179
	  *s = (char)char2;
180
	  return 2;
181
	}
182
      else
183
	{
184
	  r->_errno = EILSEQ;
185
	  return -1;
186
	}
187
    }
188
  *s = (char) wchar;
189
  return 1;
190
}
191
 
192
int
193
_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state),
194
        struct _reent *r       _AND
195
        char          *s       _AND
196
        wchar_t        _wchar  _AND
197
	const char    *charset _AND
198
        mbstate_t     *state)
199
{
200
  wint_t wchar = _wchar;
201
  unsigned char char2 = (unsigned char)wchar;
202
  unsigned char char1 = (unsigned char)(wchar >> 8);
203
 
204
  if (s == NULL)
205
    return 0;  /* not state-dependent */
206
 
207
  if (char1 != 0x00)
208
    {
209
    /* first byte is non-zero..validate multi-byte char */
210
      if (_iseucjp1 (char1) && _iseucjp2 (char2))
211
	{
212
	  *s++ = (char)char1;
213
	  *s = (char)char2;
214
	  return 2;
215
	}
216
      else if (_iseucjp2 (char1) && _iseucjp2 (char2 | 0x80))
217
	{
218
	  *s++ = (char)0x8f;
219
	  *s++ = (char)char1;
220
	  *s = (char)(char2 | 0x80);
221
	  return 3;
222
	}
223
      else
224
	{
225
	  r->_errno = EILSEQ;
226
	  return -1;
227
	}
228
    }
229
  *s = (char) wchar;
230
  return 1;
231
}
232
 
233
int
234
_DEFUN (__jis_wctomb, (r, s, wchar, charset, state),
235
        struct _reent *r       _AND
236
        char          *s       _AND
237
        wchar_t        _wchar  _AND
238
	const char    *charset _AND
239
        mbstate_t     *state)
240
{
241
  wint_t wchar = _wchar;
242
  int cnt = 0;
243
  unsigned char char2 = (unsigned char)wchar;
244
  unsigned char char1 = (unsigned char)(wchar >> 8);
245
 
246
  if (s == NULL)
247
    return 1;  /* state-dependent */
248
 
249
  if (char1 != 0x00)
250
    {
251
    /* first byte is non-zero..validate multi-byte char */
252
      if (_isjis (char1) && _isjis (char2))
253
	{
254
	  if (state->__state == 0)
255
	    {
256
	      /* must switch from ASCII to JIS state */
257
	      state->__state = 1;
258
	      *s++ = ESC_CHAR;
259
	      *s++ = '$';
260
	      *s++ = 'B';
261
	      cnt = 3;
262
	    }
263
	  *s++ = (char)char1;
264
	  *s = (char)char2;
265
	  return cnt + 2;
266
	}
267
      r->_errno = EILSEQ;
268
      return -1;
269
    }
270
  if (state->__state != 0)
271
    {
272
      /* must switch from JIS to ASCII state */
273
      state->__state = 0;
274
      *s++ = ESC_CHAR;
275
      *s++ = '(';
276
      *s++ = 'B';
277
      cnt = 3;
278
    }
279
  *s = (char)char2;
280
  return cnt + 1;
281
}
282
#endif /* !__CYGWIN__ */
283
 
284
#ifdef _MB_EXTENDED_CHARSETS_ISO
285
int
286
_DEFUN (__iso_wctomb, (r, s, wchar, charset, state),
287
        struct _reent *r       _AND
288
        char          *s       _AND
289
        wchar_t        _wchar  _AND
290
	const char    *charset _AND
291
        mbstate_t     *state)
292
{
293
  wint_t wchar = _wchar;
294
 
295
  if (s == NULL)
296
    return 0;
297
 
298
  /* wchars <= 0x9f translate to all ISO charsets directly. */
299
  if (wchar >= 0xa0)
300
    {
301
      int iso_idx = __iso_8859_index (charset + 9);
302
      if (iso_idx >= 0)
303
	{
304
	  unsigned char mb;
305
 
306
	  if (s == NULL)
307
	    return 0;
308
 
309
	  for (mb = 0; mb < 0x60; ++mb)
310
	    if (__iso_8859_conv[iso_idx][mb] == wchar)
311
	      {
312
		*s = (char) (mb + 0xa0);
313
		return 1;
314
	      }
315
	  r->_errno = EILSEQ;
316
	  return -1;
317
	}
318
    }
319
 
320
  if ((size_t)wchar >= 0x100)
321
    {
322
      r->_errno = EILSEQ;
323
      return -1;
324
    }
325
 
326
  *s = (char) wchar;
327
  return 1;
328
}
329
#endif /* _MB_EXTENDED_CHARSETS_ISO */
330
 
331
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
332
int
333
_DEFUN (__cp_wctomb, (r, s, wchar, charset, state),
334
        struct _reent *r       _AND
335
        char          *s       _AND
336
        wchar_t        _wchar  _AND
337
	const char    *charset _AND
338
        mbstate_t     *state)
339
{
340
  wint_t wchar = _wchar;
341
 
342
  if (s == NULL)
343
    return 0;
344
 
345
  if (wchar >= 0x80)
346
    {
347
      int cp_idx = __cp_index (charset + 2);
348
      if (cp_idx >= 0)
349
	{
350
	  unsigned char mb;
351
 
352
	  if (s == NULL)
353
	    return 0;
354
 
355
	  for (mb = 0; mb < 0x80; ++mb)
356
	    if (__cp_conv[cp_idx][mb] == wchar)
357
	      {
358
		*s = (char) (mb + 0x80);
359
		return 1;
360
	      }
361
	  r->_errno = EILSEQ;
362
	  return -1;
363
	}
364
    }
365
 
366
  if ((size_t)wchar >= 0x100)
367
    {
368
      r->_errno = EILSEQ;
369
      return -1;
370
    }
371
 
372
  *s = (char) wchar;
373
  return 1;
374
}
375
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
376
#endif /* _MB_CAPABLE */