Subversion Repositories Kolibri OS

Rev

Rev 1693 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1693 serge 1
/*
2
FUNCTION
3
<>, <>---select or query locale
4
 
5
INDEX
6
	setlocale
7
INDEX
8
	localeconv
9
INDEX
10
	_setlocale_r
11
INDEX
12
	_localeconv_r
13
 
14
ANSI_SYNOPSIS
15
	#include 
16
	char *setlocale(int <[category]>, const char *<[locale]>);
17
	lconv *localeconv(void);
18
 
19
	char *_setlocale_r(void *<[reent]>,
20
                        int <[category]>, const char *<[locale]>);
21
	lconv *_localeconv_r(void *<[reent]>);
22
 
23
TRAD_SYNOPSIS
24
	#include 
25
	char *setlocale(<[category]>, <[locale]>)
26
	int <[category]>;
27
	char *<[locale]>;
28
 
29
	lconv *localeconv();
30
 
31
	char *_setlocale_r(<[reent]>, <[category]>, <[locale]>)
32
	char *<[reent]>;
33
	int <[category]>;
34
	char *<[locale]>;
35
 
36
	lconv *_localeconv_r(<[reent]>);
37
	char *<[reent]>;
38
 
39
DESCRIPTION
40
<> is the facility defined by ANSI C to condition the
41
execution environment for international collating and formatting
42
information; <> reports on the settings of the current
43
locale.
44
 
45
This is a minimal implementation, supporting only the required <<"POSIX">>
46
and <<"C">> values for <[locale]>; strings representing other locales are not
47
honored unless _MB_CAPABLE is defined.
48
 
49
If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
50
the form
51
 
52
  language[_TERRITORY][.charset][@@modifier]
53
 
54
<<"language">> is a two character string per ISO 639, or, if not available
55
for a given language, a three character string per ISO 639-3.
56
<<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
57
<<"modifier">> see below.
58
 
59
Additionally to the POSIX specifier, the following extension is supported
60
for backward compatibility with older implementations using newlib:
61
<<"C-charset">>.
62
Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
63
to specify language neutral locales while using other charsets than ASCII,
64
for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
65
but uses the UTF-8 charset.
66
 
67
The following charsets are recognized:
68
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
69
<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
70
1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
71
857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
72
1257, 1258].
73
 
74
Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
75
are equivalent.  Charset names with dashes can also be written without
76
dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
77
<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
78
 
79
Full support for all of the above charsets requires that newlib has been
80
build with multibyte support and support for all ISO and Windows Codepage.
81
Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
82
only newlib for Cygwin is built with full charset support by default.
83
Under Cygwin, this implementation additionally supports the charsets
84
<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
85
does not support <<"JIS">>.
86
 
87
Cygwin additionally supports locales from the file
88
/usr/share/locale/locale.alias.
89
 
90
(<<"">> is also accepted; if given, the settings are read from the
91
corresponding LC_* environment variables and $LANG according to POSIX rules.
92
 
3065 serge 93
This implementation also supports the modifier <<"cjknarrow">>, which
94
affects how the functions <> and <> handle characters
95
from the "CJK Ambiguous Width" category of characters described at
96
http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
97
of 1 for singlebyte charsets and a width of 2 for multibyte charsets
98
other than UTF-8. For UTF-8, their width depends on the language specifier:
99
it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
100
and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
101
independent of charset and language.
1693 serge 102
 
103
If you use <> as the <[locale]> argument, <> returns a
104
pointer to the string representing the current locale.  The acceptable
105
values for <[category]> are defined in `<>' as macros
106
beginning with <<"LC_">>.
107
 
108
<> returns a pointer to a structure (also defined in
109
`<>') describing the locale-specific conventions currently
110
in effect.
111
 
112
<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
113
<> and <> respectively.  The extra argument
114
<[reent]> is a pointer to a reentrancy structure.
115
 
116
RETURNS
117
A successful call to <> returns a pointer to a string
118
associated with the specified category for the new locale.  The string
119
returned by <> is such that a subsequent call using that
120
string will restore that category (or all categories in case of LC_ALL),
121
to that state.  The application shall not modify the string returned
122
which may be overwritten by a subsequent call to <>.
123
On error, <> returns <>.
124
 
125
<> returns a pointer to a structure of type <>,
126
which describes the formatting and collating conventions in effect (in
127
this implementation, always those of the C locale).
128
 
129
PORTABILITY
130
ANSI C requires <>, but the only locale required across all
131
implementations is the C locale.
132
 
133
NOTES
134
There is no ISO-8859-12 codepage.  It's also refused by this implementation.
135
 
136
No supporting OS subroutines are required.
137
*/
138
 
139
/* Parts of this code are originally taken from FreeBSD. */
140
/*
141
 * Copyright (c) 1996 - 2002 FreeBSD Project
142
 * Copyright (c) 1991, 1993
143
 *      The Regents of the University of California.  All rights reserved.
144
 *
145
 * This code is derived from software contributed to Berkeley by
146
 * Paul Borman at Krystal Technologies.
147
 *
148
 * Redistribution and use in source and binary forms, with or without
149
 * modification, are permitted provided that the following conditions
150
 * are met:
151
 * 1. Redistributions of source code must retain the above copyright
152
 *    notice, this list of conditions and the following disclaimer.
153
 * 2. Redistributions in binary form must reproduce the above copyright
154
 *    notice, this list of conditions and the following disclaimer in the
155
 *    documentation and/or other materials provided with the distribution.
156
 * 4. Neither the name of the University nor the names of its contributors
157
 *    may be used to endorse or promote products derived from this software
158
 *    without specific prior written permission.
159
 *
160
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
161
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
162
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
163
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
164
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
165
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
166
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
167
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
168
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
169
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
170
 * SUCH DAMAGE.
171
 */
172
 
173
#include 
174
#include 
175
#include 
176
#include 
177
#include 
178
#include 
179
#include 
180
#include 
181
#include "lmessages.h"
182
#include "lmonetary.h"
183
#include "lnumeric.h"
184
#include "lctype.h"
3065 serge 185
#include "timelocal.h"
1693 serge 186
#include "../stdlib/local.h"
187
 
188
#define _LC_LAST      7
189
#define ENCODING_LEN 31
190
 
191
int __EXPORT __mb_cur_max = 1;
192
 
193
int __nlocale_changed = 0;
194
int __mlocale_changed = 0;
195
char *_PathLocale = NULL;
196
 
197
static
198
struct lconv lconv =
199
{
200
  ".", "", "", "", "", "", "", "", "", "",
201
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
202
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
203
  CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
204
  CHAR_MAX, CHAR_MAX
205
};
206
 
207
#ifdef _MB_CAPABLE
208
/*
209
 * Category names for getenv()
210
 */
211
static char *categories[_LC_LAST] = {
212
  "LC_ALL",
213
  "LC_COLLATE",
214
  "LC_CTYPE",
215
  "LC_MONETARY",
216
  "LC_NUMERIC",
217
  "LC_TIME",
218
  "LC_MESSAGES",
219
};
220
 
221
/*
222
 * Default locale per POSIX.  Can be overridden on a per-target base.
223
 */
224
#ifndef DEFAULT_LOCALE
225
#define DEFAULT_LOCALE	"C"
226
#endif
227
/*
228
 * This variable can be changed by any outside mechanism.  This allows,
229
 * for instance, to load the default locale from a file.
230
 */
231
char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
232
 
233
/*
234
 * Current locales for each category
235
 */
236
static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
237
    "C",
238
    "C",
3065 serge 239
#ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */
240
    "C.UTF-8",
241
#else
1693 serge 242
    "C",
3065 serge 243
#endif
1693 serge 244
    "C",
245
    "C",
246
    "C",
247
    "C",
248
};
249
 
250
/*
251
 * The locales we are going to try and load
252
 */
253
static char new_categories[_LC_LAST][ENCODING_LEN + 1];
254
static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
255
 
256
static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
257
static char *currentlocale(void);
258
static char *loadlocale(struct _reent *, int);
259
static const char *__get_locale_env(struct _reent *, int);
260
 
261
#endif /* _MB_CAPABLE */
262
 
3065 serge 263
#ifdef __CYGWIN__
1693 serge 264
static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8";
265
#else
266
static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
3065 serge 267
#endif
1693 serge 268
static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
269
static int lc_ctype_cjk_lang = 0;
270
 
271
char *
272
_DEFUN(_setlocale_r, (p, category, locale),
273
       struct _reent *p _AND
274
       int category _AND
275
       _CONST char *locale)
276
{
277
#ifndef _MB_CAPABLE
278
  if (locale)
279
    {
280
      if (strcmp (locale, "POSIX") && strcmp (locale, "C")
281
	  && strcmp (locale, ""))
282
        return NULL;
283
    }
284
  return "C";
285
#else /* !_MB_CAPABLE */
286
  int i, j, len, saverr;
287
  const char *env, *r;
288
 
289
  if (category < LC_ALL || category >= _LC_LAST)
290
    {
291
      p->_errno = EINVAL;
292
      return NULL;
293
    }
294
 
295
  if (locale == NULL)
296
    return category != LC_ALL ? current_categories[category] : currentlocale();
297
 
298
  /*
299
   * Default to the current locale for everything.
300
   */
301
  for (i = 1; i < _LC_LAST; ++i)
302
    strcpy (new_categories[i], current_categories[i]);
303
 
304
  /*
305
   * Now go fill up new_categories from the locale argument
306
   */
307
  if (!*locale)
308
    {
309
      if (category == LC_ALL)
310
	{
311
	  for (i = 1; i < _LC_LAST; ++i)
312
	    {
313
	      env = __get_locale_env (p, i);
314
	      if (strlen (env) > ENCODING_LEN)
315
		{
316
		  p->_errno = EINVAL;
317
		  return NULL;
318
		}
319
	      strcpy (new_categories[i], env);
320
	    }
321
	}
322
      else
323
	{
324
	  env = __get_locale_env (p, category);
325
	  if (strlen (env) > ENCODING_LEN)
326
	    {
327
	      p->_errno = EINVAL;
328
	      return NULL;
329
	    }
330
	  strcpy (new_categories[category], env);
331
	}
332
    }
333
  else if (category != LC_ALL)
334
    {
335
      if (strlen (locale) > ENCODING_LEN)
336
	{
337
	  p->_errno = EINVAL;
338
	  return NULL;
339
	}
340
      strcpy (new_categories[category], locale);
341
    }
342
  else
343
    {
344
      if ((r = strchr (locale, '/')) == NULL)
345
	{
346
	  if (strlen (locale) > ENCODING_LEN)
347
	    {
348
	      p->_errno = EINVAL;
349
	      return NULL;
350
	    }
351
	  for (i = 1; i < _LC_LAST; ++i)
352
	    strcpy (new_categories[i], locale);
353
	}
354
      else
355
	{
356
	  for (i = 1; r[1] == '/'; ++r)
357
	    ;
358
	  if (!r[1])
359
	    {
360
	      p->_errno = EINVAL;
361
	      return NULL;  /* Hmm, just slashes... */
362
	    }
363
	  do
364
	    {
365
	      if (i == _LC_LAST)
366
		break;  /* Too many slashes... */
367
	      if ((len = r - locale) > ENCODING_LEN)
368
		{
369
		  p->_errno = EINVAL;
370
		  return NULL;
371
		}
372
	      strlcpy (new_categories[i], locale, len + 1);
373
	      i++;
374
	      while (*r == '/')
375
		r++;
376
	      locale = r;
377
	      while (*r && *r != '/')
378
		r++;
379
	    }
380
	  while (*locale);
381
	  while (i < _LC_LAST)
382
	    {
383
	      strcpy (new_categories[i], new_categories[i-1]);
384
	      i++;
385
	    }
386
	}
387
    }
388
 
389
  if (category != LC_ALL)
390
    return loadlocale (p, category);
391
 
392
  for (i = 1; i < _LC_LAST; ++i)
393
    {
394
      strcpy (saved_categories[i], current_categories[i]);
395
      if (loadlocale (p, i) == NULL)
396
	{
397
	  saverr = p->_errno;
398
	  for (j = 1; j < i; j++)
399
	    {
400
	      strcpy (new_categories[j], saved_categories[j]);
401
	      if (loadlocale (p, j) == NULL)
402
		{
403
		  strcpy (new_categories[j], "C");
404
		  loadlocale (p, j);
405
		}
406
	    }
407
	  p->_errno = saverr;
408
	  return NULL;
409
	}
410
    }
411
  return currentlocale ();
412
#endif /* !_MB_CAPABLE */
413
}
414
 
415
#ifdef _MB_CAPABLE
416
static char *
417
currentlocale()
418
{
419
        int i;
420
 
421
        (void)strcpy(current_locale_string, current_categories[1]);
422
 
423
        for (i = 2; i < _LC_LAST; ++i)
424
                if (strcmp(current_categories[1], current_categories[i])) {
425
                        for (i = 2; i < _LC_LAST; ++i) {
426
                                (void)strcat(current_locale_string, "/");
427
                                (void)strcat(current_locale_string,
428
                                             current_categories[i]);
429
                        }
430
                        break;
431
                }
432
        return (current_locale_string);
433
}
434
#endif /* _MB_CAPABLE */
435
 
436
#ifdef _MB_CAPABLE
437
#ifdef __CYGWIN__
438
extern void __set_charset_from_locale (const char *locale, char *charset);
3065 serge 439
extern char *__set_locale_from_locale_alias (const char *, char *);
1693 serge 440
extern int __collate_load_locale (const char *, void *, const char *);
441
#endif /* __CYGWIN__ */
442
 
443
extern void __set_ctype (const char *charset);
444
 
445
static char *
446
loadlocale(struct _reent *p, int category)
447
{
448
  /* At this point a full-featured system would just load the locale
449
     specific data from the locale files.
450
     What we do here for now is to check the incoming string for correctness.
451
     The string must be in one of the allowed locale strings, either
452
     one in POSIX-style, or one in the old newlib style to maintain
453
     backward compatibility.  If the local string is correct, the charset
454
     is extracted and stored in lc_ctype_charset or lc_message_charset
455
     dependent on the cateogry. */
456
  char *locale = NULL;
457
  char charset[ENCODING_LEN + 1];
458
  unsigned long val;
3065 serge 459
  char *end, *c = NULL;
1693 serge 460
  int mbc_max;
461
  int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
462
  int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
463
		   const char *, mbstate_t *);
464
  int cjknarrow = 0;
465
 
466
  /* Avoid doing everything twice if nothing has changed. */
467
  if (!strcmp (new_categories[category], current_categories[category]))
468
    return current_categories[category];
469
 
470
#ifdef __CYGWIN__
471
  /* This additional code handles the case that the incoming locale string
472
     is not valid.  If so, it calls the function __set_locale_from_locale_alias,
473
     which is only available on Cygwin right now.  The function reads the
474
     file /usr/share/locale/locale.alias.  The file contains locale aliases
475
     and their replacement locale.  For instance, the alias "french" is
476
     translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
477
     "th_TH.TIS-620".  If successful, the function returns with a pointer
478
     to the second argument, which is a buffer in which the replacement locale
479
     gets stored.  Otherwise the function returns NULL. */
480
  char tmp_locale[ENCODING_LEN + 1];
481
  int ret = 0;
482
 
483
restart:
484
  if (!locale)
485
    locale = new_categories[category];
486
  else if (locale != tmp_locale)
487
    {
488
      locale = __set_locale_from_locale_alias (locale, tmp_locale);
489
      if (!locale)
490
	return NULL;
491
    }
492
# define FAIL	goto restart
493
#else
494
  locale = new_categories[category];
495
# define FAIL	return NULL
496
#endif
497
 
498
  /* "POSIX" is translated to "C", as on Linux. */
499
  if (!strcmp (locale, "POSIX"))
500
    strcpy (locale, "C");
501
  if (!strcmp (locale, "C"))				/* Default "C" locale */
502
    strcpy (charset, "ASCII");
503
  else if (locale[0] == 'C'
504
	   && (locale[1] == '-'		/* Old newlib style */
505
	       || locale[1] == '.'))	/* Extension for the C locale to allow
506
					   specifying different charsets while
507
					   sticking to the C locale in terms
508
					   of sort order, etc.  Proposed in
509
					   the Debian project. */
3065 serge 510
    {
511
      char *chp;
512
 
513
      c = locale + 2;
514
      strcpy (charset, c);
515
      if ((chp = strchr (charset, '@')))
516
        /* Strip off modifier */
517
        *chp = '\0';
518
      c += strlen (charset);
519
    }
1693 serge 520
  else							/* POSIX style */
521
    {
522
      c = locale;
523
 
524
      /* Don't use ctype macros here, they might be localized. */
525
      /* Language */
526
      if (c[0] < 'a' || c[0] > 'z'
527
	  || c[1] < 'a' || c[1] > 'z')
528
	FAIL;
529
      c += 2;
530
      /* Allow three character Language per ISO 639-3 */
531
      if (c[0] >= 'a' && c[0] <= 'z')
532
      	++c;
533
      if (c[0] == '_')
534
        {
535
	  /* Territory */
536
	  ++c;
537
	  if (c[0] < 'A' || c[0] > 'Z'
538
	      || c[1] < 'A' || c[1] > 'Z')
539
	    FAIL;
540
	  c += 2;
541
	}
542
      if (c[0] == '.')
543
	{
544
	  /* Charset */
545
	  char *chp;
546
 
547
	  ++c;
548
	  strcpy (charset, c);
549
	  if ((chp = strchr (charset, '@')))
550
	    /* Strip off modifier */
551
	    *chp = '\0';
552
	  c += strlen (charset);
553
	}
554
      else if (c[0] == '\0' || c[0] == '@')
555
	/* End of string or just a modifier */
556
#ifdef __CYGWIN__
557
	/* The Cygwin-only function __set_charset_from_locale checks
558
	   for the default charset which is connected to the given locale.
559
	   The function uses Windows functions in turn so it can't be easily
560
	   adapted to other targets.  However, if any other target provides
561
	   equivalent functionality, preferrably using the same function name
562
	   it would be sufficient to change the guarding #ifdef. */
563
	__set_charset_from_locale (locale, charset);
564
#else
565
	strcpy (charset, "ISO-8859-1");
566
#endif
567
      else
568
	/* Invalid string */
569
      	FAIL;
3065 serge 570
    }
571
  if (c && c[0] == '@')
1693 serge 572
	{
573
	  /* Modifier */
574
	  /* Only one modifier is recognized right now.  "cjknarrow" is used
575
	     to modify the behaviour of wcwidth() for East Asian languages.
576
	     For details see the comment at the end of this function. */
577
	  if (!strcmp (c + 1, "cjknarrow"))
578
	    cjknarrow = 1;
579
	}
580
  /* We only support this subset of charsets. */
581
  switch (charset[0])
582
    {
583
    case 'U':
584
    case 'u':
585
      if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
586
	FAIL;
587
      strcpy (charset, "UTF-8");
588
      mbc_max = 6;
589
      l_wctomb = __utf8_wctomb;
590
      l_mbtowc = __utf8_mbtowc;
591
    break;
592
#ifndef __CYGWIN__
593
    /* Cygwin does not support JIS at all. */
594
    case 'J':
595
    case 'j':
596
      if (strcasecmp (charset, "JIS"))
597
	FAIL;
598
      strcpy (charset, "JIS");
599
      mbc_max = 8;
600
      l_wctomb = __jis_wctomb;
601
      l_mbtowc = __jis_mbtowc;
602
    break;
603
#endif /* !__CYGWIN__ */
604
    case 'E':
605
    case 'e':
606
      if (strncasecmp (charset, "EUC", 3))
607
	FAIL;
608
      c = charset + 3;
609
      if (*c == '-')
610
	++c;
611
      if (!strcasecmp (c, "JP"))
612
	{
613
	  strcpy (charset, "EUCJP");
614
	  mbc_max = 3;
615
	  l_wctomb = __eucjp_wctomb;
616
	  l_mbtowc = __eucjp_mbtowc;
617
	}
618
#ifdef __CYGWIN__
619
      /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
620
      	 implementation requires Windows support. */
621
      else if (!strcasecmp (c, "KR"))
622
	{
623
	  strcpy (charset, "EUCKR");
624
	  mbc_max = 2;
625
	  l_wctomb = __kr_wctomb;
626
	  l_mbtowc = __kr_mbtowc;
627
	}
628
      else if (!strcasecmp (c, "CN"))
629
	{
630
	  strcpy (charset, "EUCCN");
631
	  mbc_max = 2;
632
	  l_wctomb = __gbk_wctomb;
633
	  l_mbtowc = __gbk_mbtowc;
634
	}
635
#endif /* __CYGWIN__ */
636
      else
637
	FAIL;
638
    break;
639
    case 'S':
640
    case 's':
641
      if (strcasecmp (charset, "SJIS"))
642
	FAIL;
643
      strcpy (charset, "SJIS");
644
      mbc_max = 2;
645
      l_wctomb = __sjis_wctomb;
646
      l_mbtowc = __sjis_mbtowc;
647
    break;
648
    case 'I':
649
    case 'i':
650
      /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
651
         ISO-8859-12.  This code also recognizes the aliases without dashes. */
652
      if (strncasecmp (charset, "ISO", 3))
653
	FAIL;
654
      c = charset + 3;
655
      if (*c == '-')
656
	++c;
657
      if (strncasecmp (c, "8859", 4))
658
	FAIL;
659
      c += 4;
660
      if (*c == '-')
661
	++c;
662
      val = _strtol_r (p, c, &end, 10);
663
      if (val < 1 || val > 16 || val == 12 || *end)
664
	FAIL;
665
      strcpy (charset, "ISO-8859-");
666
      c = charset + 9;
667
      if (val > 10)
668
      	*c++ = '1';
669
      *c++ = val % 10 + '0';
670
      *c = '\0';
671
      mbc_max = 1;
672
#ifdef _MB_EXTENDED_CHARSETS_ISO
673
      l_wctomb = __iso_wctomb;
674
      l_mbtowc = __iso_mbtowc;
675
#else /* !_MB_EXTENDED_CHARSETS_ISO */
676
      l_wctomb = __ascii_wctomb;
677
      l_mbtowc = __ascii_mbtowc;
678
#endif /* _MB_EXTENDED_CHARSETS_ISO */
679
    break;
680
    case 'C':
681
    case 'c':
682
      if (charset[1] != 'P' && charset[1] != 'p')
683
	FAIL;
684
      strncpy (charset, "CP", 2);
685
      val = _strtol_r (p, charset + 2, &end, 10);
686
      if (*end)
687
	FAIL;
688
      switch (val)
689
	{
690
	case 437:
691
	case 720:
692
	case 737:
693
	case 775:
694
	case 850:
695
	case 852:
696
	case 855:
697
	case 857:
698
	case 858:
699
	case 862:
700
	case 866:
701
	case 874:
702
	case 1125:
703
	case 1250:
704
	case 1251:
705
	case 1252:
706
	case 1253:
707
	case 1254:
708
	case 1255:
709
	case 1256:
710
	case 1257:
711
	case 1258:
712
	  mbc_max = 1;
713
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
714
	  l_wctomb = __cp_wctomb;
715
	  l_mbtowc = __cp_mbtowc;
716
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
717
	  l_wctomb = __ascii_wctomb;
718
	  l_mbtowc = __ascii_mbtowc;
719
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
720
	  break;
721
	case 932:
722
	  mbc_max = 2;
723
	  l_wctomb = __sjis_wctomb;
724
	  l_mbtowc = __sjis_mbtowc;
725
	  break;
726
	default:
727
	  FAIL;
728
	}
729
    break;
730
    case 'K':
731
    case 'k':
732
      /* KOI8-R, KOI8-U and the aliases without dash */
733
      if (strncasecmp (charset, "KOI8", 4))
734
	FAIL;
735
      c = charset + 4;
736
      if (*c == '-')
737
	++c;
738
      if (*c == 'R' || *c == 'r')
739
	strcpy (charset, "CP20866");
740
      else if (*c == 'U' || *c == 'u')
741
	strcpy (charset, "CP21866");
742
      else
743
	FAIL;
744
      mbc_max = 1;
745
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
746
      l_wctomb = __cp_wctomb;
747
      l_mbtowc = __cp_mbtowc;
748
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
749
      l_wctomb = __ascii_wctomb;
750
      l_mbtowc = __ascii_mbtowc;
751
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
752
      break;
753
    case 'A':
754
    case 'a':
755
      if (strcasecmp (charset, "ASCII"))
756
	FAIL;
757
      strcpy (charset, "ASCII");
758
      mbc_max = 1;
759
      l_wctomb = __ascii_wctomb;
760
      l_mbtowc = __ascii_mbtowc;
761
      break;
762
    case 'G':
763
    case 'g':
764
#ifdef __CYGWIN__
765
      /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
766
	 requires Windows support. */
767
      if (!strcasecmp (charset, "GBK")
768
	  || !strcasecmp (charset, "GB2312"))
769
      	{
770
	  strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
771
      mbc_max = 2;
772
      l_wctomb = __gbk_wctomb;
773
      l_mbtowc = __gbk_mbtowc;
774
	}
775
      else
776
#endif /* __CYGWIN__ */
777
      /* GEORGIAN-PS and the alias without dash */
778
      if (!strncasecmp (charset, "GEORGIAN", 8))
779
	{
780
	  c = charset + 8;
781
	  if (*c == '-')
782
	    ++c;
783
	  if (strcasecmp (c, "PS"))
784
	    FAIL;
785
	  strcpy (charset, "CP101");
786
	  mbc_max = 1;
787
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
788
	  l_wctomb = __cp_wctomb;
789
	  l_mbtowc = __cp_mbtowc;
790
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
791
	  l_wctomb = __ascii_wctomb;
792
	  l_mbtowc = __ascii_mbtowc;
793
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
794
	}
795
      else
796
	FAIL;
797
      break;
798
    case 'P':
799
    case 'p':
800
      /* PT154 */
801
      if (strcasecmp (charset, "PT154"))
802
	FAIL;
803
      strcpy (charset, "CP102");
804
      mbc_max = 1;
805
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
806
      l_wctomb = __cp_wctomb;
807
      l_mbtowc = __cp_mbtowc;
808
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
809
      l_wctomb = __ascii_wctomb;
810
      l_mbtowc = __ascii_mbtowc;
811
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
812
      break;
813
    case 'T':
814
    case 't':
815
      if (strncasecmp (charset, "TIS", 3))
816
      	FAIL;
817
      c = charset + 3;
818
      if (*c == '-')
819
	++c;
820
      if (strcasecmp (c, "620"))
821
      	FAIL;
822
      strcpy (charset, "CP874");
823
      mbc_max = 1;
824
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
825
      l_wctomb = __cp_wctomb;
826
      l_mbtowc = __cp_mbtowc;
827
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
828
      l_wctomb = __ascii_wctomb;
829
      l_mbtowc = __ascii_mbtowc;
830
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
831
      break;
832
#ifdef __CYGWIN__
833
    /* Newlib does not provide Big5 and Cygwin's implementation
834
       requires Windows support. */
835
    case 'B':
836
    case 'b':
837
      if (strcasecmp (charset, "BIG5"))
838
      	FAIL;
839
      strcpy (charset, "BIG5");
840
      mbc_max = 2;
841
      l_wctomb = __big5_wctomb;
842
      l_mbtowc = __big5_mbtowc;
843
      break;
844
#endif /* __CYGWIN__ */
845
    default:
846
      FAIL;
847
    }
848
  switch (category)
849
    {
850
    case LC_CTYPE:
851
      strcpy (lc_ctype_charset, charset);
852
      __mb_cur_max = mbc_max;
853
      __wctomb = l_wctomb;
854
      __mbtowc = l_mbtowc;
855
      __set_ctype (charset);
3065 serge 856
      /* Determine the width for the "CJK Ambiguous Width" category of
857
         characters. This is used in wcwidth(). Assume single width for
858
         single-byte charsets, and double width for multi-byte charsets
859
         other than UTF-8. For UTF-8, use double width for the East Asian
860
         languages ("ja", "ko", "zh"), and single width for everything else.
861
         Single width can also be forced with the "@cjknarrow" modifier. */
1693 serge 862
      lc_ctype_cjk_lang = !cjknarrow
3065 serge 863
			  && mbc_max > 1
864
			  && (charset[0] != 'U'
865
			      || strncmp (locale, "ja", 2) == 0
1693 serge 866
			      || strncmp (locale, "ko", 2) == 0
3065 serge 867
			      || strncmp (locale, "zh", 2) == 0);
1693 serge 868
#ifdef __HAVE_LOCALE_INFO__
869
      ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max);
870
#endif /* __HAVE_LOCALE_INFO__ */
871
      break;
872
    case LC_MESSAGES:
873
      strcpy (lc_message_charset, charset);
874
#ifdef __HAVE_LOCALE_INFO__
875
      ret = __messages_load_locale (locale, (void *) l_wctomb, charset);
876
      if (!ret)
877
#endif /* __HAVE_LOCALE_INFO__ */
878
      break;
879
#ifdef __HAVE_LOCALE_INFO__
880
#ifdef __CYGWIN__
881
  /* Right now only Cygwin supports a __collate_load_locale function at all. */
882
    case LC_COLLATE:
883
      ret = __collate_load_locale (locale, (void *) l_mbtowc, charset);
884
      break;
885
#endif
886
    case LC_MONETARY:
887
      ret = __monetary_load_locale (locale, (void *) l_wctomb, charset);
888
      break;
889
    case LC_NUMERIC:
890
      ret = __numeric_load_locale (locale, (void *) l_wctomb, charset);
891
      break;
892
    case LC_TIME:
893
      ret = __time_load_locale (locale, (void *) l_wctomb, charset);
894
      break;
895
#endif /* __HAVE_LOCALE_INFO__ */
896
    default:
897
      break;
898
    }
899
#ifdef __HAVE_LOCALE_INFO__
900
  if (ret)
901
    FAIL;
902
#endif /* __HAVE_LOCALE_INFO__ */
903
  return strcpy(current_categories[category], new_categories[category]);
904
}
905
 
906
static const char *
907
__get_locale_env(struct _reent *p, int category)
908
{
909
  const char *env;
910
 
911
  /* 1. check LC_ALL. */
912
  env = _getenv_r (p, categories[0]);
913
 
914
  /* 2. check LC_* */
915
  if (env == NULL || !*env)
916
    env = _getenv_r (p, categories[category]);
917
 
918
  /* 3. check LANG */
919
  if (env == NULL || !*env)
920
    env = _getenv_r (p, "LANG");
921
 
922
  /* 4. if none is set, fall to default locale */
923
  if (env == NULL || !*env)
924
    env = __default_locale;
925
 
926
  return env;
927
}
928
#endif /* _MB_CAPABLE */
929
 
930
char *
931
_DEFUN_VOID(__locale_charset)
932
{
933
#if 0//def __HAVE_LOCALE_INFO__
934
  return __get_current_ctype_locale ()->codeset;
935
#else
936
  return lc_ctype_charset;
937
#endif
938
}
939
 
940
int
941
_DEFUN_VOID(__locale_mb_cur_max)
942
{
943
#if 0//def __HAVE_LOCALE_INFO__
944
  return __get_current_ctype_locale ()->mb_cur_max[0];
945
#else
946
  return __mb_cur_max;
947
#endif
948
}
949
 
950
 
951
char *
952
_DEFUN_VOID(__locale_msgcharset)
953
{
954
#ifdef __HAVE_LOCALE_INFO__
3065 serge 955
  return (char *) __get_current_messages_locale ()->codeset;
1693 serge 956
#else
957
  return lc_message_charset;
958
#endif
959
}
960
 
961
int
962
_DEFUN_VOID(__locale_cjk_lang)
963
{
964
  return lc_ctype_cjk_lang;
965
}
966
 
967
struct lconv *
968
_DEFUN(_localeconv_r, (data),
969
      struct _reent *data)
970
{
971
#ifdef __HAVE_LOCALE_INFO__
972
  if (__nlocale_changed)
973
    {
974
      struct lc_numeric_T *n = __get_current_numeric_locale ();
3065 serge 975
      lconv.decimal_point = (char *) n->decimal_point;
976
      lconv.thousands_sep = (char *) n->thousands_sep;
977
      lconv.grouping = (char *) n->grouping;
1693 serge 978
      __nlocale_changed = 0;
979
    }
980
  if (__mlocale_changed)
981
    {
982
      struct lc_monetary_T *m = __get_current_monetary_locale ();
3065 serge 983
      lconv.int_curr_symbol = (char *) m->int_curr_symbol;
984
      lconv.currency_symbol = (char *) m->currency_symbol;
985
      lconv.mon_decimal_point = (char *) m->mon_decimal_point;
986
      lconv.mon_thousands_sep = (char *) m->mon_thousands_sep;
987
      lconv.mon_grouping = (char *) m->mon_grouping;
988
      lconv.positive_sign = (char *) m->positive_sign;
989
      lconv.negative_sign = (char *) m->negative_sign;
1693 serge 990
      lconv.int_frac_digits = m->int_frac_digits[0];
991
      lconv.frac_digits = m->frac_digits[0];
992
      lconv.p_cs_precedes = m->p_cs_precedes[0];
993
      lconv.p_sep_by_space = m->p_sep_by_space[0];
994
      lconv.n_cs_precedes = m->n_cs_precedes[0];
995
      lconv.n_sep_by_space = m->n_sep_by_space[0];
996
      lconv.p_sign_posn = m->p_sign_posn[0];
997
      lconv.n_sign_posn = m->n_sign_posn[0];
998
#ifdef __HAVE_LOCALE_INFO_EXTENDED__
999
      lconv.int_p_cs_precedes = m->int_p_cs_precedes[0];
1000
      lconv.int_p_sep_by_space = m->int_p_sep_by_space[0];
1001
      lconv.int_n_cs_precedes = m->int_n_cs_precedes[0];
1002
      lconv.int_n_sep_by_space = m->int_n_sep_by_space[0];
1003
      lconv.int_n_sign_posn = m->int_n_sign_posn[0];
1004
      lconv.int_p_sign_posn = m->int_p_sign_posn[0];
1005
#else /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1006
      lconv.int_p_cs_precedes = m->p_cs_precedes[0];
1007
      lconv.int_p_sep_by_space = m->p_sep_by_space[0];
1008
      lconv.int_n_cs_precedes = m->n_cs_precedes[0];
1009
      lconv.int_n_sep_by_space = m->n_sep_by_space[0];
1010
      lconv.int_n_sign_posn = m->n_sign_posn[0];
1011
      lconv.int_p_sign_posn = m->p_sign_posn[0];
1012
#endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */
1013
      __mlocale_changed = 0;
1014
    }
1015
#endif /* __HAVE_LOCALE_INFO__ */
1016
  return (struct lconv *) &lconv;
1017
}
1018
 
1019
#ifndef _REENT_ONLY
1020
 
1021
#ifndef __CYGWIN__
1022
/* Cygwin provides its own version of setlocale to perform some more
1023
   initialization work.  It calls _setlocale_r, though. */
1024
char *
1025
_DEFUN(setlocale, (category, locale),
1026
       int category _AND
1027
       _CONST char *locale)
1028
{
1029
  return _setlocale_r (_REENT, category, locale);
1030
}
1031
#endif /* __CYGWIN__ */
1032
 
1033
struct lconv *
1034
_DEFUN_VOID(localeconv)
1035
{
1036
  return _localeconv_r (_REENT);
1037
}
1038
 
1039
#endif