Subversion Repositories Kolibri OS

Rev

Rev 4874 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
FUNCTION
3
<>, <>---select or query locale
4
 
5
INDEX
6
	setlocale
7
INDEX
8
	localeconv
9
INDEX
10
	_setlocale_r
11
INDEX
12
	_localeconv_r
13
 
14
ANSI_SYNOPSIS
15
	#include 
16
	char *setlocale(int <[category]>, const char *<[locale]>);
17
	lconv *localeconv(void);
18
 
19
	char *_setlocale_r(void *<[reent]>,
20
                        int <[category]>, const char *<[locale]>);
21
	lconv *_localeconv_r(void *<[reent]>);
22
 
23
TRAD_SYNOPSIS
24
	#include 
25
	char *setlocale(<[category]>, <[locale]>)
26
	int <[category]>;
27
	char *<[locale]>;
28
 
29
	lconv *localeconv();
30
 
31
	char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
32
	char *<[reent]>;
33
	int <[category]>;
34
	char *<[locale]>;
35
 
36
	lconv *_localeconv_r(<[reent]>);
37
	char *<[reent]>;
38
 
39
DESCRIPTION
40
<> is the facility defined by ANSI C to condition the
41
execution environment for international collating and formatting
42
information; <> reports on the settings of the current
43
locale.
44
 
45
This is a minimal implementation, supporting only the required <<"POSIX">>
46
and <<"C">> values for <[locale]>; strings representing other locales are not
47
honored unless _MB_CAPABLE is defined.
48
 
49
If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
50
the form
51
 
52
  language[_TERRITORY][.charset][@@modifier]
53
 
54
<<"language">> is a two character string per ISO 639, or, if not available
55
for a given language, a three character string per ISO 639-3.
56
<<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
57
<<"modifier">> see below.
58
 
59
Additionally to the POSIX specifier, the following extension is supported
60
for backward compatibility with older implementations using newlib:
61
<<"C-charset">>.
62
Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
63
to specify language neutral locales while using other charsets than ASCII,
64
for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
65
but uses the UTF-8 charset.
66
 
67
The following charsets are recognized:
68
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
69
<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
70
1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
71
857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
72
1257, 1258].
73
 
74
Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
75
are equivalent.  Charset names with dashes can also be written without
76
dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
77
<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
78
 
79
Full support for all of the above charsets requires that newlib has been
80
build with multibyte support and support for all ISO and Windows Codepage.
81
Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
82
only newlib for Cygwin is built with full charset support by default.
83
Under Cygwin, this implementation additionally supports the charsets
84
<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
85
does not support <<"JIS">>.
86
 
87
Cygwin additionally supports locales from the file
88
/usr/share/locale/locale.alias.
89
 
90
(<<"">> is also accepted; if given, the settings are read from the
91
corresponding LC_* environment variables and $LANG according to POSIX rules.
92
 
93
This implementation also supports the modifier <<"cjknarrow">>, which
94
affects how the functions <> and <> handle characters
95
from the "CJK Ambiguous Width" category of characters described at
96
http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
97
of 1 for singlebyte charsets and a width of 2 for multibyte charsets
98
other than UTF-8. For UTF-8, their width depends on the language specifier:
99
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
100
and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
101
independent of charset and language.
102
 
103
If you use <> as the <[locale]> argument, <> returns a
104
pointer to the string representing the current locale.  The acceptable
105
values for <[category]> are defined in `<>' as macros
106
beginning with <<"LC_">>.
107
 
108
<> returns a pointer to a structure (also defined in
109
`<>') describing the locale-specific conventions currently
110
in effect.
111
 
112
<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
113
<> and <> respectively.  The extra argument
114
<[reent]> is a pointer to a reentrancy structure.
115
 
116
RETURNS
117
A successful call to <> returns a pointer to a string
118
associated with the specified category for the new locale.  The string
119
returned by <> is such that a subsequent call using that
120
string will restore that category (or all categories in case of LC_ALL),
121
to that state.  The application shall not modify the string returned
122
which may be overwritten by a subsequent call to <>.
123
On error, <> returns <>.
124
 
125
<> returns a pointer to a structure of type <>,
126
which describes the formatting and collating conventions in effect (in
127
this implementation, always those of the C locale).
128
 
129
PORTABILITY
130
ANSI C requires <>, but the only locale required across all
131
implementations is the C locale.
132
 
133
NOTES
134
There is no ISO-8859-12 codepage.  It's also refused by this implementation.
135
 
136
No supporting OS subroutines are required.
137
*/
138
 
139
/* Parts of this code are originally taken from FreeBSD. */
140
/*
141
 * Copyright (c) 1996 - 2002 FreeBSD Project
142
 * Copyright (c) 1991, 1993
143
 *      The Regents of the University of California.  All rights reserved.
144
 *
145
 * This code is derived from software contributed to Berkeley by
146
 * Paul Borman at Krystal Technologies.
147
 *
148
 * Redistribution and use in source and binary forms, with or without
149
 * modification, are permitted provided that the following conditions
150
 * are met:
151
 * 1. Redistributions of source code must retain the above copyright
152
 *    notice, this list of conditions and the following disclaimer.
153
 * 2. Redistributions in binary form must reproduce the above copyright
154
 *    notice, this list of conditions and the following disclaimer in the
155
 *    documentation and/or other materials provided with the distribution.
156
 * 4. Neither the name of the University nor the names of its contributors
157
 *    may be used to endorse or promote products derived from this software
158
 *    without specific prior written permission.
159
 *
160
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
161
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
162
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
163
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
164
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
165
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
166
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
167
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
168
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
169
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
170
 * SUCH DAMAGE.
171
 */
172
 
173
#include 
174
#include 
175
#include 
176
#include 
177
#include 
178
#include 
179
#include 
180
#include 
181
#include "lmessages.h"
182
#include "lmonetary.h"
183
#include "lnumeric.h"
184
#include "lctype.h"
185
#include "timelocal.h"
186
#include "../stdlib/local.h"
187
 
188
#define _LC_LAST      7
189
#define ENCODING_LEN 31
190
 
4921 Serge 191
#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
192
int __EXPORT __mb_cur_max = 6;
193
#else
4349 Serge 194
int __EXPORT __mb_cur_max = 1;
4921 Serge 195
#endif
4349 Serge 196
 
197
int __nlocale_changed = 0;
198
int __mlocale_changed = 0;
199
char *_PathLocale = NULL;
200
 
201
static
202
struct lconv lconv =
203
{
204
  ".", "", "", "", "", "", "", "", "", "",
205
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
206
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
207
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
208
  CHAR_MAX, CHAR_MAX
209
};
210
 
211
#ifdef _MB_CAPABLE
212
/*
213
 * Category names for getenv()
214
 */
215
static char *categories[_LC_LAST] = {
216
  "LC_ALL",
217
  "LC_COLLATE",
218
  "LC_CTYPE",
219
  "LC_MONETARY",
220
  "LC_NUMERIC",
221
  "LC_TIME",
222
  "LC_MESSAGES",
223
};
224
 
225
/*
226
 * Default locale per POSIX.  Can be overridden on a per-target base.
227
 */
228
#ifndef DEFAULT_LOCALE
229
#define DEFAULT_LOCALE	"C"
230
#endif
231
/*
232
 * This variable can be changed by any outside mechanism.  This allows,
233
 * for instance, to load the default locale from a file.
234
 */
235
char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
236
 
237
/*
238
 * Current locales for each category
239
 */
240
static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
241
    "C",
242
    "C",
243
#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
244
    "C.UTF-8",
245
#else
246
    "C",
247
#endif
248
    "C",
249
    "C",
250
    "C",
251
    "C",
252
};
253
 
254
/*
255
 * The locales we are going to try and load
256
 */
257
static char new_categories[_LC_LAST][ENCODING_LEN + 1];
258
static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
259
 
260
static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
261
static char *currentlocale(void);
262
static char *loadlocale(struct _reent *, int);
263
static const char *__get_locale_env(struct _reent *, int);
264
 
265
#endif /* _MB_CAPABLE */
266
 
267
#ifdef __CYGWIN__
268
static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
269
#else
270
static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
271
#endif
272
static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
273
static int lc_ctype_cjk_lang = 0;
274
 
275
char *
276
_DEFUN(_setlocale_r, (p, category, locale),
277
       struct _reent *p _AND
278
       int category _AND
279
       _CONST char *locale)
280
{
281
#ifndef _MB_CAPABLE
282
  if (locale)
283
    {
284
      if (strcmp (locale, "POSIX") && strcmp (locale, "C")
285
	  && strcmp (locale, ""))
286
        return NULL;
287
    }
288
  return "C";
289
#else /* !_MB_CAPABLE */
290
  int i, j, len, saverr;
291
  const char *env, *r;
292
 
293
  if (category < LC_ALL || category >= _LC_LAST)
294
    {
295
      p->_errno = EINVAL;
296
      return NULL;
297
    }
298
 
299
  if (locale == NULL)
300
    return category != LC_ALL ? current_categories[category] : currentlocale();
301
 
302
  /*
303
   * Default to the current locale for everything.
304
   */
305
  for (i = 1; i < _LC_LAST; ++i)
306
    strcpy (new_categories[i], current_categories[i]);
307
 
308
  /*
309
   * Now go fill up new_categories from the locale argument
310
   */
311
  if (!*locale)
312
    {
313
      if (category == LC_ALL)
314
	{
315
	  for (i = 1; i < _LC_LAST; ++i)
316
	    {
317
	      env = __get_locale_env (p, i);
318
	      if (strlen (env) > ENCODING_LEN)
319
		{
320
		  p->_errno = EINVAL;
321
		  return NULL;
322
		}
323
	      strcpy (new_categories[i], env);
324
	    }
325
	}
326
      else
327
	{
328
	  env = __get_locale_env (p, category);
329
	  if (strlen (env) > ENCODING_LEN)
330
	    {
331
	      p->_errno = EINVAL;
332
	      return NULL;
333
	    }
334
	  strcpy (new_categories[category], env);
335
	}
336
    }
337
  else if (category != LC_ALL)
338
    {
339
      if (strlen (locale) > ENCODING_LEN)
340
	{
341
	  p->_errno = EINVAL;
342
	  return NULL;
343
	}
344
      strcpy (new_categories[category], locale);
345
    }
346
  else
347
    {
348
      if ((r = strchr (locale, '/')) == NULL)
349
	{
350
	  if (strlen (locale) > ENCODING_LEN)
351
	    {
352
	      p->_errno = EINVAL;
353
	      return NULL;
354
	    }
355
	  for (i = 1; i < _LC_LAST; ++i)
356
	    strcpy (new_categories[i], locale);
357
	}
358
      else
359
	{
360
	  for (i = 1; r[1] == '/'; ++r)
361
	    ;
362
	  if (!r[1])
363
	    {
364
	      p->_errno = EINVAL;
365
	      return NULL;  /* Hmm, just slashes... */
366
	    }
367
	  do
368
	    {
369
	      if (i == _LC_LAST)
370
		break;  /* Too many slashes... */
371
	      if ((len = r - locale) > ENCODING_LEN)
372
		{
373
		  p->_errno = EINVAL;
374
		  return NULL;
375
		}
376
	      strlcpy (new_categories[i], locale, len + 1);
377
	      i++;
378
	      while (*r == '/')
379
		r++;
380
	      locale = r;
381
	      while (*r && *r != '/')
382
		r++;
383
	    }
384
	  while (*locale);
385
	  while (i < _LC_LAST)
386
	    {
387
	      strcpy (new_categories[i], new_categories[i-1]);
388
	      i++;
389
	    }
390
	}
391
    }
392
 
393
  if (category != LC_ALL)
394
    return loadlocale (p, category);
395
 
396
  for (i = 1; i < _LC_LAST; ++i)
397
    {
398
      strcpy (saved_categories[i], current_categories[i]);
399
      if (loadlocale (p, i) == NULL)
400
	{
401
	  saverr = p->_errno;
402
	  for (j = 1; j < i; j++)
403
	    {
404
	      strcpy (new_categories[j], saved_categories[j]);
405
	      if (loadlocale (p, j) == NULL)
406
		{
407
		  strcpy (new_categories[j], "C");
408
		  loadlocale (p, j);
409
		}
410
	    }
411
	  p->_errno = saverr;
412
	  return NULL;
413
	}
414
    }
415
  return currentlocale ();
416
#endif /* !_MB_CAPABLE */
417
}
418
 
419
#ifdef _MB_CAPABLE
420
static char *
421
currentlocale()
422
{
423
        int i;
424
 
425
        (void)strcpy(current_locale_string, current_categories[1]);
426
 
427
        for (i = 2; i < _LC_LAST; ++i)
428
                if (strcmp(current_categories[1], current_categories[i])) {
429
                        for (i = 2; i < _LC_LAST; ++i) {
430
                                (void)strcat(current_locale_string, "/");
431
                                (void)strcat(current_locale_string,
432
                                             current_categories[i]);
433
                        }
434
                        break;
435
                }
436
        return (current_locale_string);
437
}
438
#endif /* _MB_CAPABLE */
439
 
440
#ifdef _MB_CAPABLE
441
#ifdef __CYGWIN__
442
extern void __set_charset_from_locale (const char *locale, char *charset);
443
extern char *__set_locale_from_locale_alias (const char *, char *);
444
extern int __collate_load_locale (const char *, void *, const char *);
445
#endif /* __CYGWIN__ */
446
 
447
extern void __set_ctype (const char *charset);
448
 
449
static char *
450
loadlocale(struct _reent *p, int category)
451
{
452
  /* At this point a full-featured system would just load the locale
453
     specific data from the locale files.
454
     What we do here for now is to check the incoming string for correctness.
455
     The string must be in one of the allowed locale strings, either
456
     one in POSIX-style, or one in the old newlib style to maintain
457
     backward compatibility.  If the local string is correct, the charset
458
     is extracted and stored in lc_ctype_charset or lc_message_charset
459
     dependent on the cateogry. */
460
  char *locale = NULL;
461
  char charset[ENCODING_LEN + 1];
462
  unsigned long val;
463
  char *end, *c = NULL;
464
  int mbc_max;
465
  int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
466
  int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
467
		   const char *, mbstate_t *);
468
  int cjknarrow = 0;
469
 
470
  /* Avoid doing everything twice if nothing has changed. */
471
  if (!strcmp (new_categories[category], current_categories[category]))
472
    return current_categories[category];
473
 
474
#ifdef __CYGWIN__
475
  /* This additional code handles the case that the incoming locale string
476
     is not valid.  If so, it calls the function __set_locale_from_locale_alias,
477
     which is only available on Cygwin right now.  The function reads the
478
     file /usr/share/locale/locale.alias.  The file contains locale aliases
479
     and their replacement locale.  For instance, the alias "french" is
480
     translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
481
     "th_TH.TIS-620".  If successful, the function returns with a pointer
482
     to the second argument, which is a buffer in which the replacement locale
483
     gets stored.  Otherwise the function returns NULL. */
484
  char tmp_locale[ENCODING_LEN + 1];
485
  int ret = 0;
486
 
487
restart:
488
  if (!locale)
489
    locale = new_categories[category];
490
  else if (locale != tmp_locale)
491
    {
492
      locale = __set_locale_from_locale_alias (locale, tmp_locale);
493
      if (!locale)
494
	return NULL;
495
    }
496
# define FAIL	goto restart
497
#else
498
  locale = new_categories[category];
499
# define FAIL	return NULL
500
#endif
501
 
502
  /* "POSIX" is translated to "C", as on Linux. */
503
  if (!strcmp (locale, "POSIX"))
504
    strcpy (locale, "C");
505
  if (!strcmp (locale, "C"))				/* Default "C" locale */
506
    strcpy (charset, "ASCII");
507
  else if (locale[0] == 'C'
508
	   && (locale[1] == '-'		/* Old newlib style */
509
	       || locale[1] == '.'))	/* Extension for the C locale to allow
510
					   specifying different charsets while
511
					   sticking to the C locale in terms
512
					   of sort order, etc.  Proposed in
513
					   the Debian project. */
514
    {
515
      char *chp;
516
 
517
      c = locale + 2;
518
      strcpy (charset, c);
519
      if ((chp = strchr (charset, '@')))
520
        /* Strip off modifier */
521
        *chp = '\0';
522
      c += strlen (charset);
523
    }
524
  else							/* POSIX style */
525
    {
526
      c = locale;
527
 
528
      /* Don't use ctype macros here, they might be localized. */
529
      /* Language */
530
      if (c[0] < 'a' || c[0] > 'z'
531
	  || c[1] < 'a' || c[1] > 'z')
532
	FAIL;
533
      c += 2;
534
      /* Allow three character Language per ISO 639-3 */
535
      if (c[0] >= 'a' && c[0] <= 'z')
536
      	++c;
537
      if (c[0] == '_')
538
        {
539
	  /* Territory */
540
	  ++c;
541
	  if (c[0] < 'A' || c[0] > 'Z'
542
	      || c[1] < 'A' || c[1] > 'Z')
543
	    FAIL;
544
	  c += 2;
545
	}
546
      if (c[0] == '.')
547
	{
548
	  /* Charset */
549
	  char *chp;
550
 
551
	  ++c;
552
	  strcpy (charset, c);
553
	  if ((chp = strchr (charset, '@')))
554
	    /* Strip off modifier */
555
	    *chp = '\0';
556
	  c += strlen (charset);
557
	}
558
      else if (c[0] == '\0' || c[0] == '@')
559
	/* End of string or just a modifier */
560
#ifdef __CYGWIN__
561
	/* The Cygwin-only function __set_charset_from_locale checks
562
	   for the default charset which is connected to the given locale.
563
	   The function uses Windows functions in turn so it can't be easily
564
	   adapted to other targets.  However, if any other target provides
565
	   equivalent functionality, preferrably using the same function name
566
	   it would be sufficient to change the guarding #ifdef. */
567
	__set_charset_from_locale (locale, charset);
568
#else
569
	strcpy (charset, "ISO-8859-1");
570
#endif
571
      else
572
	/* Invalid string */
573
      	FAIL;
574
    }
575
  if (c && c[0] == '@')
576
	{
577
	  /* Modifier */
578
	  /* Only one modifier is recognized right now.  "cjknarrow" is used
579
	     to modify the behaviour of wcwidth() for East Asian languages.
580
	     For details see the comment at the end of this function. */
581
	  if (!strcmp (c + 1, "cjknarrow"))
582
	    cjknarrow = 1;
583
	}
584
  /* We only support this subset of charsets. */
585
  switch (charset[0])
586
    {
587
    case 'U':
588
    case 'u':
589
      if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
590
	FAIL;
591
      strcpy (charset, "UTF-8");
592
      mbc_max = 6;
593
      l_wctomb = __utf8_wctomb;
594
      l_mbtowc = __utf8_mbtowc;
595
    break;
596
#ifndef __CYGWIN__
597
    /* Cygwin does not support JIS at all. */
598
    case 'J':
599
    case 'j':
600
      if (strcasecmp (charset, "JIS"))
601
	FAIL;
602
      strcpy (charset, "JIS");
603
      mbc_max = 8;
604
      l_wctomb = __jis_wctomb;
605
      l_mbtowc = __jis_mbtowc;
606
    break;
607
#endif /* !__CYGWIN__ */
608
    case 'E':
609
    case 'e':
610
      if (strncasecmp (charset, "EUC", 3))
611
	FAIL;
612
      c = charset + 3;
613
      if (*c == '-')
614
	++c;
615
      if (!strcasecmp (c, "JP"))
616
	{
617
	  strcpy (charset, "EUCJP");
618
	  mbc_max = 3;
619
	  l_wctomb = __eucjp_wctomb;
620
	  l_mbtowc = __eucjp_mbtowc;
621
	}
622
#ifdef __CYGWIN__
623
      /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
624
      	 implementation requires Windows support. */
625
      else if (!strcasecmp (c, "KR"))
626
	{
627
	  strcpy (charset, "EUCKR");
628
	  mbc_max = 2;
629
	  l_wctomb = __kr_wctomb;
630
	  l_mbtowc = __kr_mbtowc;
631
	}
632
      else if (!strcasecmp (c, "CN"))
633
	{
634
	  strcpy (charset, "EUCCN");
635
	  mbc_max = 2;
636
	  l_wctomb = __gbk_wctomb;
637
	  l_mbtowc = __gbk_mbtowc;
638
	}
639
#endif /* __CYGWIN__ */
640
      else
641
	FAIL;
642
    break;
643
    case 'S':
644
    case 's':
645
      if (strcasecmp (charset, "SJIS"))
646
	FAIL;
647
      strcpy (charset, "SJIS");
648
      mbc_max = 2;
649
      l_wctomb = __sjis_wctomb;
650
      l_mbtowc = __sjis_mbtowc;
651
    break;
652
    case 'I':
653
    case 'i':
654
      /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
655
         ISO-8859-12.  This code also recognizes the aliases without dashes. */
656
      if (strncasecmp (charset, "ISO", 3))
657
	FAIL;
658
      c = charset + 3;
659
      if (*c == '-')
660
	++c;
661
      if (strncasecmp (c, "8859", 4))
662
	FAIL;
663
      c += 4;
664
      if (*c == '-')
665
	++c;
666
      val = _strtol_r (p, c, &end, 10);
667
      if (val < 1 || val > 16 || val == 12 || *end)
668
	FAIL;
669
      strcpy (charset, "ISO-8859-");
670
      c = charset + 9;
671
      if (val > 10)
672
      	*c++ = '1';
673
      *c++ = val % 10 + '0';
674
      *c = '\0';
675
      mbc_max = 1;
676
#ifdef _MB_EXTENDED_CHARSETS_ISO
677
      l_wctomb = __iso_wctomb;
678
      l_mbtowc = __iso_mbtowc;
679
#else /* !_MB_EXTENDED_CHARSETS_ISO */
680
      l_wctomb = __ascii_wctomb;
681
      l_mbtowc = __ascii_mbtowc;
682
#endif /* _MB_EXTENDED_CHARSETS_ISO */
683
    break;
684
    case 'C':
685
    case 'c':
686
      if (charset[1] != 'P' && charset[1] != 'p')
687
	FAIL;
688
      strncpy (charset, "CP", 2);
689
      val = _strtol_r (p, charset + 2, &end, 10);
690
      if (*end)
691
	FAIL;
692
      switch (val)
693
	{
694
	case 437:
695
	case 720:
696
	case 737:
697
	case 775:
698
	case 850:
699
	case 852:
700
	case 855:
701
	case 857:
702
	case 858:
703
	case 862:
704
	case 866:
705
	case 874:
706
	case 1125:
707
	case 1250:
708
	case 1251:
709
	case 1252:
710
	case 1253:
711
	case 1254:
712
	case 1255:
713
	case 1256:
714
	case 1257:
715
	case 1258:
716
	  mbc_max = 1;
717
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
718
	  l_wctomb = __cp_wctomb;
719
	  l_mbtowc = __cp_mbtowc;
720
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
721
	  l_wctomb = __ascii_wctomb;
722
	  l_mbtowc = __ascii_mbtowc;
723
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
724
	  break;
725
	case 932:
726
	  mbc_max = 2;
727
	  l_wctomb = __sjis_wctomb;
728
	  l_mbtowc = __sjis_mbtowc;
729
	  break;
730
	default:
731
	  FAIL;
732
	}
733
    break;
734
    case 'K':
735
    case 'k':
736
      /* KOI8-R, KOI8-U and the aliases without dash */
737
      if (strncasecmp (charset, "KOI8", 4))
738
	FAIL;
739
      c = charset + 4;
740
      if (*c == '-')
741
	++c;
742
      if (*c == 'R' || *c == 'r')
743
	strcpy (charset, "CP20866");
744
      else if (*c == 'U' || *c == 'u')
745
	strcpy (charset, "CP21866");
746
      else
747
	FAIL;
748
      mbc_max = 1;
749
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
750
      l_wctomb = __cp_wctomb;
751
      l_mbtowc = __cp_mbtowc;
752
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
753
      l_wctomb = __ascii_wctomb;
754
      l_mbtowc = __ascii_mbtowc;
755
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
756
      break;
757
    case 'A':
758
    case 'a':
759
      if (strcasecmp (charset, "ASCII"))
760
	FAIL;
761
      strcpy (charset, "ASCII");
762
      mbc_max = 1;
763
      l_wctomb = __ascii_wctomb;
764
      l_mbtowc = __ascii_mbtowc;
765
      break;
766
    case 'G':
767
    case 'g':
768
#ifdef __CYGWIN__
769
      /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
770
	 requires Windows support. */
771
      if (!strcasecmp (charset, "GBK")
772
	  || !strcasecmp (charset, "GB2312"))
773
      	{
774
	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
775
      mbc_max = 2;
776
      l_wctomb = __gbk_wctomb;
777
      l_mbtowc = __gbk_mbtowc;
778
	}
779
      else
780
#endif /* __CYGWIN__ */
781
      /* GEORGIAN-PS and the alias without dash */
782
      if (!strncasecmp (charset, "GEORGIAN", 8))
783
	{
784
	  c = charset + 8;
785
	  if (*c == '-')
786
	    ++c;
787
	  if (strcasecmp (c, "PS"))
788
	    FAIL;
789
	  strcpy (charset, "CP101");
790
	  mbc_max = 1;
791
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
792
	  l_wctomb = __cp_wctomb;
793
	  l_mbtowc = __cp_mbtowc;
794
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
795
	  l_wctomb = __ascii_wctomb;
796
	  l_mbtowc = __ascii_mbtowc;
797
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
798
	}
799
      else
800
	FAIL;
801
      break;
802
    case 'P':
803
    case 'p':
804
      /* PT154 */
805
      if (strcasecmp (charset, "PT154"))
806
	FAIL;
807
      strcpy (charset, "CP102");
808
      mbc_max = 1;
809
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
810
      l_wctomb = __cp_wctomb;
811
      l_mbtowc = __cp_mbtowc;
812
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
813
      l_wctomb = __ascii_wctomb;
814
      l_mbtowc = __ascii_mbtowc;
815
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
816
      break;
817
    case 'T':
818
    case 't':
819
      if (strncasecmp (charset, "TIS", 3))
820
      	FAIL;
821
      c = charset + 3;
822
      if (*c == '-')
823
	++c;
824
      if (strcasecmp (c, "620"))
825
      	FAIL;
826
      strcpy (charset, "CP874");
827
      mbc_max = 1;
828
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
829
      l_wctomb = __cp_wctomb;
830
      l_mbtowc = __cp_mbtowc;
831
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
832
      l_wctomb = __ascii_wctomb;
833
      l_mbtowc = __ascii_mbtowc;
834
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
835
      break;
836
#ifdef __CYGWIN__
837
    /* Newlib does not provide Big5 and Cygwin's implementation
838
       requires Windows support. */
839
    case 'B':
840
    case 'b':
841
      if (strcasecmp (charset, "BIG5"))
842
      	FAIL;
843
      strcpy (charset, "BIG5");
844
      mbc_max = 2;
845
      l_wctomb = __big5_wctomb;
846
      l_mbtowc = __big5_mbtowc;
847
      break;
848
#endif /* __CYGWIN__ */
849
    default:
850
      FAIL;
851
    }
852
  switch (category)
853
    {
854
    case LC_CTYPE:
855
      strcpy (lc_ctype_charset, charset);
856
      __mb_cur_max = mbc_max;
857
      __wctomb = l_wctomb;
858
      __mbtowc = l_mbtowc;
859
      __set_ctype (charset);
860
      /* Determine the width for the "CJK Ambiguous Width" category of
861
         characters. This is used in wcwidth(). Assume single width for
862
         single-byte charsets, and double width for multi-byte charsets
863
         other than UTF-8. For UTF-8, use double width for the East Asian
864
         languages ("ja", "ko", "zh"), and single width for everything else.
865
         Single width can also be forced with the "@cjknarrow" modifier. */
866
      lc_ctype_cjk_lang = !cjknarrow
867
			  && mbc_max > 1
868
			  && (charset[0] != 'U'
869
			      || strncmp (locale, "ja", 2) == 0
870
			      || strncmp (locale, "ko", 2) == 0
871
			      || strncmp (locale, "zh", 2) == 0);
872
#ifdef __HAVE_LOCALE_INFO__
873
      ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
874
#endif /* __HAVE_LOCALE_INFO__ */
875
      break;
876
    case LC_MESSAGES:
877
      strcpy (lc_message_charset, charset);
878
#ifdef __HAVE_LOCALE_INFO__
879
      ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
880
      if (!ret)
881
#endif /* __HAVE_LOCALE_INFO__ */
882
      break;
883
#ifdef __HAVE_LOCALE_INFO__
884
#ifdef __CYGWIN__
885
  /* Right now only Cygwin supports a __collate_load_locale function at all. */
886
    case LC_COLLATE:
887
      ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
888
      break;
889
#endif
890
    case LC_MONETARY:
891
      ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
892
      break;
893
    case LC_NUMERIC:
894
      ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
895
      break;
896
    case LC_TIME:
897
      ret = __time_load_locale (locale, (void *) l_wctomb, charset);
898
      break;
899
#endif /* __HAVE_LOCALE_INFO__ */
900
    default:
901
      break;
902
    }
903
#ifdef __HAVE_LOCALE_INFO__
904
  if (ret)
905
    FAIL;
906
#endif /* __HAVE_LOCALE_INFO__ */
907
  return strcpy(current_categories[category], new_categories[category]);
908
}
909
 
910
static const char *
911
__get_locale_env(struct _reent *p, int category)
912
{
913
  const char *env;
914
 
915
  /* 1. check LC_ALL. */
916
  env = _getenv_r (p, categories[0]);
917
 
918
  /* 2. check LC_* */
919
  if (env == NULL || !*env)
920
    env = _getenv_r (p, categories[category]);
921
 
922
  /* 3. check LANG */
923
  if (env == NULL || !*env)
924
    env = _getenv_r (p, "LANG");
925
 
926
  /* 4. if none is set, fall to default locale */
927
  if (env == NULL || !*env)
928
    env = __default_locale;
929
 
930
  return env;
931
}
932
#endif /* _MB_CAPABLE */
933
 
934
char *
935
_DEFUN_VOID(__locale_charset)
936
{
937
#if 0//def __HAVE_LOCALE_INFO__
938
  return __get_current_ctype_locale ()->codeset;
939
#else
940
  return lc_ctype_charset;
941
#endif
942
}
943
 
944
int
945
_DEFUN_VOID(__locale_mb_cur_max)
946
{
947
#if 0//def __HAVE_LOCALE_INFO__
948
  return __get_current_ctype_locale ()->mb_cur_max[0];
949
#else
950
  return __mb_cur_max;
951
#endif
952
}
953
 
954
 
955
char *
956
_DEFUN_VOID(__locale_msgcharset)
957
{
958
#ifdef __HAVE_LOCALE_INFO__
959
  return (char *) __get_current_messages_locale ()->codeset;
960
#else
961
  return lc_message_charset;
962
#endif
963
}
964
 
965
int
966
_DEFUN_VOID(__locale_cjk_lang)
967
{
968
  return lc_ctype_cjk_lang;
969
}
970
 
971
struct lconv *
972
_DEFUN(_localeconv_r, (data),
973
      struct _reent *data)
974
{
975
#ifdef __HAVE_LOCALE_INFO__
976
  if (__nlocale_changed)
977
    {
978
      struct lc_numeric_T *n = __get_current_numeric_locale ();
979
      lconv.decimal_point = (char *) n->decimal_point;
980
      lconv.thousands_sep = (char *) n->thousands_sep;
981
      lconv.grouping = (char *) n->grouping;
982
      __nlocale_changed = 0;
983
    }
984
  if (__mlocale_changed)
985
    {
986
      struct lc_monetary_T *m = __get_current_monetary_locale ();
987
      lconv.int_curr_symbol = (char *) m->int_curr_symbol;
988
      lconv.currency_symbol = (char *) m->currency_symbol;
989
      lconv.mon_decimal_point = (char *) m->mon_decimal_point;
990
      lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
991
      lconv.mon_grouping = (char *) m->mon_grouping;
992
      lconv.positive_sign = (char *) m->positive_sign;
993
      lconv.negative_sign = (char *) m->negative_sign;
994
      lconv.int_frac_digits = m->int_frac_digits[0];
995
      lconv.frac_digits = m->frac_digits[0];
996
      lconv.p_cs_precedes = m->p_cs_precedes[0];
997
      lconv.p_sep_by_space = m->p_sep_by_space[0];
998
      lconv.n_cs_precedes = m->n_cs_precedes[0];
999
      lconv.n_sep_by_space = m->n_sep_by_space[0];
1000
      lconv.p_sign_posn = m->p_sign_posn[0];
1001
      lconv.n_sign_posn = m->n_sign_posn[0];
1002
#ifdef __HAVE_LOCALE_INFO_EXTENDED__
1003
      lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
1004
      lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
1005
      lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
1006
      lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
1007
      lconv.int_n_sign_posn = m->int_n_sign_posn[0];
1008
      lconv.int_p_sign_posn = m->int_p_sign_posn[0];
1009
#else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1010
      lconv.int_p_cs_precedes = m->p_cs_precedes[0];
1011
      lconv.int_p_sep_by_space = m->p_sep_by_space[0];
1012
      lconv.int_n_cs_precedes = m->n_cs_precedes[0];
1013
      lconv.int_n_sep_by_space = m->n_sep_by_space[0];
1014
      lconv.int_n_sign_posn = m->n_sign_posn[0];
1015
      lconv.int_p_sign_posn = m->p_sign_posn[0];
1016
#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1017
      __mlocale_changed = 0;
1018
    }
1019
#endif /* __HAVE_LOCALE_INFO__ */
1020
  return (struct lconv *) &lconv;
1021
}
1022
 
1023
#ifndef _REENT_ONLY
1024
 
1025
#ifndef __CYGWIN__
1026
/* Cygwin provides its own version of setlocale to perform some more
1027
   initialization work.  It calls _setlocale_r, though. */
1028
char *
1029
_DEFUN(setlocale, (category, locale),
1030
       int category _AND
1031
       _CONST char *locale)
1032
{
1033
  return _setlocale_r (_REENT, category, locale);
1034
}
1035
#endif /* __CYGWIN__ */
1036
 
1037
struct lconv *
1038
_DEFUN_VOID(localeconv)
1039
{
1040
  return _localeconv_r (_REENT);
1041
}
1042
 
1043
#endif