Rev 4921 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | FUNCTION |
||
3 | < |
||
4 | |||
5 | INDEX |
||
6 | setlocale |
||
7 | INDEX |
||
8 | localeconv |
||
9 | INDEX |
||
10 | _setlocale_r |
||
11 | INDEX |
||
12 | _localeconv_r |
||
13 | |||
14 | ANSI_SYNOPSIS |
||
15 | #include |
||
16 | char *setlocale(int <[category]>, const char *<[locale]>); |
||
17 | lconv *localeconv(void); |
||
18 | |||
19 | char *_setlocale_r(void *<[reent]>, |
||
20 | int <[category]>, const char *<[locale]>); |
||
21 | lconv *_localeconv_r(void *<[reent]>); |
||
22 | |||
23 | TRAD_SYNOPSIS |
||
24 | #include |
||
25 | char *setlocale(<[category]>, <[locale]>) |
||
26 | int <[category]>; |
||
27 | char *<[locale]>; |
||
28 | |||
29 | lconv *localeconv(); |
||
30 | |||
31 | char *_setlocale_r(<[reent]>, <[category]>, <[locale]>) |
||
32 | char *<[reent]>; |
||
33 | int <[category]>; |
||
34 | char *<[locale]>; |
||
35 | |||
36 | lconv *_localeconv_r(<[reent]>); |
||
37 | char *<[reent]>; |
||
38 | |||
39 | DESCRIPTION |
||
40 | < |
||
41 | execution environment for international collating and formatting |
||
42 | information; < |
||
43 | locale. |
||
44 | |||
45 | This is a minimal implementation, supporting only the required <<"POSIX">> |
||
46 | and <<"C">> values for <[locale]>; strings representing other locales are not |
||
47 | honored unless _MB_CAPABLE is defined. |
||
48 | |||
49 | If _MB_CAPABLE is defined, POSIX locale strings are allowed, following |
||
50 | the form |
||
51 | |||
52 | language[_TERRITORY][.charset][@@modifier] |
||
53 | |||
54 | <<"language">> is a two character string per ISO 639, or, if not available |
||
55 | for a given language, a three character string per ISO 639-3. |
||
56 | <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and |
||
57 | <<"modifier">> see below. |
||
58 | |||
59 | Additionally to the POSIX specifier, the following extension is supported |
||
60 | for backward compatibility with older implementations using newlib: |
||
61 | <<"C-charset">>. |
||
62 | Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow |
||
63 | to specify language neutral locales while using other charsets than ASCII, |
||
64 | for instance <<"C.UTF-8">>, which keeps all settings as in the C locale, |
||
65 | but uses the UTF-8 charset. |
||
66 | |||
67 | The following charsets are recognized: |
||
68 | <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>, |
||
69 | <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with |
||
70 | 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, |
||
71 | 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, |
||
72 | 1257, 1258]. |
||
73 | |||
74 | Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">> |
||
75 | are equivalent. Charset names with dashes can also be written without |
||
76 | dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and |
||
77 | <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>. |
||
78 | |||
79 | Full support for all of the above charsets requires that newlib has been |
||
80 | build with multibyte support and support for all ISO and Windows Codepage. |
||
81 | Otherwise all singlebyte charsets are simply mapped to ASCII. Right now, |
||
82 | only newlib for Cygwin is built with full charset support by default. |
||
83 | Under Cygwin, this implementation additionally supports the charsets |
||
84 | <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>. Cygwin |
||
85 | does not support <<"JIS">>. |
||
86 | |||
87 | Cygwin additionally supports locales from the file |
||
88 | /usr/share/locale/locale.alias. |
||
89 | |||
90 | (<<"">> is also accepted; if given, the settings are read from the |
||
6099 | serge | 91 | corresponding LC_* environment variables and $LANG according to POSIX rules.) |
4349 | Serge | 92 | |
93 | This implementation also supports the modifier <<"cjknarrow">>, which |
||
94 | affects how the functions < |
||
95 | from the "CJK Ambiguous Width" category of characters described at |
||
96 | http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width |
||
97 | of 1 for singlebyte charsets and a width of 2 for multibyte charsets |
||
98 | other than UTF-8. For UTF-8, their width depends on the language specifier: |
||
99 | it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean), |
||
100 | and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1, |
||
101 | independent of charset and language. |
||
102 | |||
103 | If you use < |
||
104 | pointer to the string representing the current locale. The acceptable |
||
105 | values for <[category]> are defined in `< |
||
106 | beginning with <<"LC_">>. |
||
107 | |||
108 | < |
||
109 | `< |
||
110 | in effect. |
||
111 | |||
112 | <<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of |
||
113 | < |
||
114 | <[reent]> is a pointer to a reentrancy structure. |
||
115 | |||
116 | RETURNS |
||
117 | A successful call to < |
||
118 | associated with the specified category for the new locale. The string |
||
119 | returned by < |
||
120 | string will restore that category (or all categories in case of LC_ALL), |
||
121 | to that state. The application shall not modify the string returned |
||
122 | which may be overwritten by a subsequent call to < |
||
123 | On error, < |
||
124 | |||
125 | < |
||
126 | which describes the formatting and collating conventions in effect (in |
||
127 | this implementation, always those of the C locale). |
||
128 | |||
129 | PORTABILITY |
||
130 | ANSI C requires < |
||
131 | implementations is the C locale. |
||
132 | |||
133 | NOTES |
||
134 | There is no ISO-8859-12 codepage. It's also refused by this implementation. |
||
135 | |||
136 | No supporting OS subroutines are required. |
||
137 | */ |
||
138 | |||
139 | /* Parts of this code are originally taken from FreeBSD. */ |
||
140 | /* |
||
141 | * Copyright (c) 1996 - 2002 FreeBSD Project |
||
142 | * Copyright (c) 1991, 1993 |
||
143 | * The Regents of the University of California. All rights reserved. |
||
144 | * |
||
145 | * This code is derived from software contributed to Berkeley by |
||
146 | * Paul Borman at Krystal Technologies. |
||
147 | * |
||
148 | * Redistribution and use in source and binary forms, with or without |
||
149 | * modification, are permitted provided that the following conditions |
||
150 | * are met: |
||
151 | * 1. Redistributions of source code must retain the above copyright |
||
152 | * notice, this list of conditions and the following disclaimer. |
||
153 | * 2. Redistributions in binary form must reproduce the above copyright |
||
154 | * notice, this list of conditions and the following disclaimer in the |
||
155 | * documentation and/or other materials provided with the distribution. |
||
156 | * 4. Neither the name of the University nor the names of its contributors |
||
157 | * may be used to endorse or promote products derived from this software |
||
158 | * without specific prior written permission. |
||
159 | * |
||
160 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
||
161 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||
162 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||
163 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
||
164 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||
165 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||
166 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||
167 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||
168 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||
169 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||
170 | * SUCH DAMAGE. |
||
171 | */ |
||
172 | |||
173 | #include |
||
174 | #include |
||
175 | #include |
||
176 | #include |
||
177 | #include |
||
178 | #include |
||
179 | #include |
||
180 | #include |
||
181 | #include "lmessages.h" |
||
182 | #include "lmonetary.h" |
||
183 | #include "lnumeric.h" |
||
184 | #include "lctype.h" |
||
185 | #include "timelocal.h" |
||
186 | #include "../stdlib/local.h" |
||
187 | |||
188 | #define _LC_LAST 7 |
||
189 | #define ENCODING_LEN 31 |
||
190 | |||
4921 | Serge | 191 | #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */ |
192 | int __EXPORT __mb_cur_max = 6; |
||
193 | #else |
||
4349 | Serge | 194 | int __EXPORT __mb_cur_max = 1; |
4921 | Serge | 195 | #endif |
4349 | Serge | 196 | |
197 | int __nlocale_changed = 0; |
||
198 | int __mlocale_changed = 0; |
||
199 | char *_PathLocale = NULL; |
||
200 | |||
201 | static |
||
202 | struct lconv lconv = |
||
203 | { |
||
204 | ".", "", "", "", "", "", "", "", "", "", |
||
205 | CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX, |
||
206 | CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX, |
||
207 | CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX, |
||
208 | CHAR_MAX, CHAR_MAX |
||
209 | }; |
||
210 | |||
211 | #ifdef _MB_CAPABLE |
||
212 | /* |
||
213 | * Category names for getenv() |
||
214 | */ |
||
215 | static char *categories[_LC_LAST] = { |
||
216 | "LC_ALL", |
||
217 | "LC_COLLATE", |
||
218 | "LC_CTYPE", |
||
219 | "LC_MONETARY", |
||
220 | "LC_NUMERIC", |
||
221 | "LC_TIME", |
||
222 | "LC_MESSAGES", |
||
223 | }; |
||
224 | |||
225 | /* |
||
226 | * Default locale per POSIX. Can be overridden on a per-target base. |
||
227 | */ |
||
228 | #ifndef DEFAULT_LOCALE |
||
229 | #define DEFAULT_LOCALE "C" |
||
230 | #endif |
||
231 | /* |
||
232 | * This variable can be changed by any outside mechanism. This allows, |
||
233 | * for instance, to load the default locale from a file. |
||
234 | */ |
||
235 | char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE; |
||
236 | |||
237 | /* |
||
238 | * Current locales for each category |
||
239 | */ |
||
240 | static char current_categories[_LC_LAST][ENCODING_LEN + 1] = { |
||
241 | "C", |
||
242 | "C", |
||
243 | #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */ |
||
244 | "C.UTF-8", |
||
245 | #else |
||
246 | "C", |
||
247 | #endif |
||
248 | "C", |
||
249 | "C", |
||
250 | "C", |
||
251 | "C", |
||
252 | }; |
||
253 | |||
254 | /* |
||
255 | * The locales we are going to try and load |
||
256 | */ |
||
257 | static char new_categories[_LC_LAST][ENCODING_LEN + 1]; |
||
258 | static char saved_categories[_LC_LAST][ENCODING_LEN + 1]; |
||
259 | |||
260 | static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]; |
||
261 | static char *currentlocale(void); |
||
262 | static char *loadlocale(struct _reent *, int); |
||
263 | static const char *__get_locale_env(struct _reent *, int); |
||
264 | |||
265 | #endif /* _MB_CAPABLE */ |
||
266 | |||
267 | #ifdef __CYGWIN__ |
||
268 | static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8"; |
||
269 | #else |
||
270 | static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII"; |
||
271 | #endif |
||
272 | static char lc_message_charset[ENCODING_LEN + 1] = "ASCII"; |
||
273 | static int lc_ctype_cjk_lang = 0; |
||
274 | |||
275 | char * |
||
276 | _DEFUN(_setlocale_r, (p, category, locale), |
||
277 | struct _reent *p _AND |
||
278 | int category _AND |
||
279 | _CONST char *locale) |
||
280 | { |
||
281 | #ifndef _MB_CAPABLE |
||
282 | if (locale) |
||
283 | { |
||
284 | if (strcmp (locale, "POSIX") && strcmp (locale, "C") |
||
285 | && strcmp (locale, "")) |
||
286 | return NULL; |
||
287 | } |
||
288 | return "C"; |
||
289 | #else /* !_MB_CAPABLE */ |
||
290 | int i, j, len, saverr; |
||
291 | const char *env, *r; |
||
292 | |||
293 | if (category < LC_ALL || category >= _LC_LAST) |
||
294 | { |
||
295 | p->_errno = EINVAL; |
||
296 | return NULL; |
||
297 | } |
||
298 | |||
299 | if (locale == NULL) |
||
300 | return category != LC_ALL ? current_categories[category] : currentlocale(); |
||
301 | |||
302 | /* |
||
303 | * Default to the current locale for everything. |
||
304 | */ |
||
305 | for (i = 1; i < _LC_LAST; ++i) |
||
306 | strcpy (new_categories[i], current_categories[i]); |
||
307 | |||
308 | /* |
||
309 | * Now go fill up new_categories from the locale argument |
||
310 | */ |
||
311 | if (!*locale) |
||
312 | { |
||
313 | if (category == LC_ALL) |
||
314 | { |
||
315 | for (i = 1; i < _LC_LAST; ++i) |
||
316 | { |
||
317 | env = __get_locale_env (p, i); |
||
318 | if (strlen (env) > ENCODING_LEN) |
||
319 | { |
||
320 | p->_errno = EINVAL; |
||
321 | return NULL; |
||
322 | } |
||
323 | strcpy (new_categories[i], env); |
||
324 | } |
||
325 | } |
||
326 | else |
||
327 | { |
||
328 | env = __get_locale_env (p, category); |
||
329 | if (strlen (env) > ENCODING_LEN) |
||
330 | { |
||
331 | p->_errno = EINVAL; |
||
332 | return NULL; |
||
333 | } |
||
334 | strcpy (new_categories[category], env); |
||
335 | } |
||
336 | } |
||
337 | else if (category != LC_ALL) |
||
338 | { |
||
339 | if (strlen (locale) > ENCODING_LEN) |
||
340 | { |
||
341 | p->_errno = EINVAL; |
||
342 | return NULL; |
||
343 | } |
||
344 | strcpy (new_categories[category], locale); |
||
345 | } |
||
346 | else |
||
347 | { |
||
348 | if ((r = strchr (locale, '/')) == NULL) |
||
349 | { |
||
350 | if (strlen (locale) > ENCODING_LEN) |
||
351 | { |
||
352 | p->_errno = EINVAL; |
||
353 | return NULL; |
||
354 | } |
||
355 | for (i = 1; i < _LC_LAST; ++i) |
||
356 | strcpy (new_categories[i], locale); |
||
357 | } |
||
358 | else |
||
359 | { |
||
360 | for (i = 1; r[1] == '/'; ++r) |
||
361 | ; |
||
362 | if (!r[1]) |
||
363 | { |
||
364 | p->_errno = EINVAL; |
||
365 | return NULL; /* Hmm, just slashes... */ |
||
366 | } |
||
367 | do |
||
368 | { |
||
369 | if (i == _LC_LAST) |
||
370 | break; /* Too many slashes... */ |
||
371 | if ((len = r - locale) > ENCODING_LEN) |
||
372 | { |
||
373 | p->_errno = EINVAL; |
||
374 | return NULL; |
||
375 | } |
||
376 | strlcpy (new_categories[i], locale, len + 1); |
||
377 | i++; |
||
378 | while (*r == '/') |
||
379 | r++; |
||
380 | locale = r; |
||
381 | while (*r && *r != '/') |
||
382 | r++; |
||
383 | } |
||
384 | while (*locale); |
||
385 | while (i < _LC_LAST) |
||
386 | { |
||
387 | strcpy (new_categories[i], new_categories[i-1]); |
||
388 | i++; |
||
389 | } |
||
390 | } |
||
391 | } |
||
392 | |||
393 | if (category != LC_ALL) |
||
394 | return loadlocale (p, category); |
||
395 | |||
396 | for (i = 1; i < _LC_LAST; ++i) |
||
397 | { |
||
398 | strcpy (saved_categories[i], current_categories[i]); |
||
399 | if (loadlocale (p, i) == NULL) |
||
400 | { |
||
401 | saverr = p->_errno; |
||
402 | for (j = 1; j < i; j++) |
||
403 | { |
||
404 | strcpy (new_categories[j], saved_categories[j]); |
||
405 | if (loadlocale (p, j) == NULL) |
||
406 | { |
||
407 | strcpy (new_categories[j], "C"); |
||
408 | loadlocale (p, j); |
||
409 | } |
||
410 | } |
||
411 | p->_errno = saverr; |
||
412 | return NULL; |
||
413 | } |
||
414 | } |
||
415 | return currentlocale (); |
||
416 | #endif /* !_MB_CAPABLE */ |
||
417 | } |
||
418 | |||
419 | #ifdef _MB_CAPABLE |
||
420 | static char * |
||
421 | currentlocale() |
||
422 | { |
||
423 | int i; |
||
424 | |||
425 | (void)strcpy(current_locale_string, current_categories[1]); |
||
426 | |||
427 | for (i = 2; i < _LC_LAST; ++i) |
||
428 | if (strcmp(current_categories[1], current_categories[i])) { |
||
429 | for (i = 2; i < _LC_LAST; ++i) { |
||
430 | (void)strcat(current_locale_string, "/"); |
||
431 | (void)strcat(current_locale_string, |
||
432 | current_categories[i]); |
||
433 | } |
||
434 | break; |
||
435 | } |
||
436 | return (current_locale_string); |
||
437 | } |
||
438 | #endif /* _MB_CAPABLE */ |
||
439 | |||
440 | #ifdef _MB_CAPABLE |
||
441 | #ifdef __CYGWIN__ |
||
442 | extern void __set_charset_from_locale (const char *locale, char *charset); |
||
443 | extern char *__set_locale_from_locale_alias (const char *, char *); |
||
444 | extern int __collate_load_locale (const char *, void *, const char *); |
||
445 | #endif /* __CYGWIN__ */ |
||
446 | |||
447 | extern void __set_ctype (const char *charset); |
||
448 | |||
449 | static char * |
||
450 | loadlocale(struct _reent *p, int category) |
||
451 | { |
||
452 | /* At this point a full-featured system would just load the locale |
||
453 | specific data from the locale files. |
||
454 | What we do here for now is to check the incoming string for correctness. |
||
455 | The string must be in one of the allowed locale strings, either |
||
456 | one in POSIX-style, or one in the old newlib style to maintain |
||
457 | backward compatibility. If the local string is correct, the charset |
||
458 | is extracted and stored in lc_ctype_charset or lc_message_charset |
||
459 | dependent on the cateogry. */ |
||
460 | char *locale = NULL; |
||
461 | char charset[ENCODING_LEN + 1]; |
||
462 | unsigned long val; |
||
463 | char *end, *c = NULL; |
||
464 | int mbc_max; |
||
465 | int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); |
||
466 | int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, |
||
467 | const char *, mbstate_t *); |
||
468 | int cjknarrow = 0; |
||
469 | |||
470 | /* Avoid doing everything twice if nothing has changed. */ |
||
471 | if (!strcmp (new_categories[category], current_categories[category])) |
||
472 | return current_categories[category]; |
||
473 | |||
474 | #ifdef __CYGWIN__ |
||
475 | /* This additional code handles the case that the incoming locale string |
||
476 | is not valid. If so, it calls the function __set_locale_from_locale_alias, |
||
477 | which is only available on Cygwin right now. The function reads the |
||
478 | file /usr/share/locale/locale.alias. The file contains locale aliases |
||
479 | and their replacement locale. For instance, the alias "french" is |
||
480 | translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to |
||
481 | "th_TH.TIS-620". If successful, the function returns with a pointer |
||
482 | to the second argument, which is a buffer in which the replacement locale |
||
483 | gets stored. Otherwise the function returns NULL. */ |
||
484 | char tmp_locale[ENCODING_LEN + 1]; |
||
485 | int ret = 0; |
||
486 | |||
487 | restart: |
||
488 | if (!locale) |
||
489 | locale = new_categories[category]; |
||
490 | else if (locale != tmp_locale) |
||
491 | { |
||
492 | locale = __set_locale_from_locale_alias (locale, tmp_locale); |
||
493 | if (!locale) |
||
494 | return NULL; |
||
495 | } |
||
496 | # define FAIL goto restart |
||
497 | #else |
||
498 | locale = new_categories[category]; |
||
499 | # define FAIL return NULL |
||
500 | #endif |
||
6099 | serge | 501 | |
4349 | Serge | 502 | /* "POSIX" is translated to "C", as on Linux. */ |
503 | if (!strcmp (locale, "POSIX")) |
||
504 | strcpy (locale, "C"); |
||
505 | if (!strcmp (locale, "C")) /* Default "C" locale */ |
||
506 | strcpy (charset, "ASCII"); |
||
507 | else if (locale[0] == 'C' |
||
508 | && (locale[1] == '-' /* Old newlib style */ |
||
509 | || locale[1] == '.')) /* Extension for the C locale to allow |
||
510 | specifying different charsets while |
||
511 | sticking to the C locale in terms |
||
512 | of sort order, etc. Proposed in |
||
513 | the Debian project. */ |
||
514 | { |
||
515 | char *chp; |
||
516 | |||
517 | c = locale + 2; |
||
518 | strcpy (charset, c); |
||
519 | if ((chp = strchr (charset, '@'))) |
||
520 | /* Strip off modifier */ |
||
521 | *chp = '\0'; |
||
522 | c += strlen (charset); |
||
523 | } |
||
524 | else /* POSIX style */ |
||
525 | { |
||
526 | c = locale; |
||
527 | |||
528 | /* Don't use ctype macros here, they might be localized. */ |
||
529 | /* Language */ |
||
530 | if (c[0] < 'a' || c[0] > 'z' |
||
531 | || c[1] < 'a' || c[1] > 'z') |
||
532 | FAIL; |
||
533 | c += 2; |
||
534 | /* Allow three character Language per ISO 639-3 */ |
||
535 | if (c[0] >= 'a' && c[0] <= 'z') |
||
536 | ++c; |
||
537 | if (c[0] == '_') |
||
538 | { |
||
539 | /* Territory */ |
||
540 | ++c; |
||
541 | if (c[0] < 'A' || c[0] > 'Z' |
||
542 | || c[1] < 'A' || c[1] > 'Z') |
||
543 | FAIL; |
||
544 | c += 2; |
||
545 | } |
||
546 | if (c[0] == '.') |
||
547 | { |
||
548 | /* Charset */ |
||
549 | char *chp; |
||
550 | |||
551 | ++c; |
||
552 | strcpy (charset, c); |
||
553 | if ((chp = strchr (charset, '@'))) |
||
554 | /* Strip off modifier */ |
||
555 | *chp = '\0'; |
||
556 | c += strlen (charset); |
||
557 | } |
||
558 | else if (c[0] == '\0' || c[0] == '@') |
||
559 | /* End of string or just a modifier */ |
||
560 | #ifdef __CYGWIN__ |
||
561 | /* The Cygwin-only function __set_charset_from_locale checks |
||
562 | for the default charset which is connected to the given locale. |
||
563 | The function uses Windows functions in turn so it can't be easily |
||
564 | adapted to other targets. However, if any other target provides |
||
565 | equivalent functionality, preferrably using the same function name |
||
566 | it would be sufficient to change the guarding #ifdef. */ |
||
567 | __set_charset_from_locale (locale, charset); |
||
568 | #else |
||
569 | strcpy (charset, "ISO-8859-1"); |
||
570 | #endif |
||
571 | else |
||
572 | /* Invalid string */ |
||
573 | FAIL; |
||
574 | } |
||
575 | if (c && c[0] == '@') |
||
6099 | serge | 576 | { |
577 | /* Modifier */ |
||
578 | /* Only one modifier is recognized right now. "cjknarrow" is used |
||
579 | to modify the behaviour of wcwidth() for East Asian languages. |
||
580 | For details see the comment at the end of this function. */ |
||
581 | if (!strcmp (c + 1, "cjknarrow")) |
||
582 | cjknarrow = 1; |
||
583 | } |
||
4349 | Serge | 584 | /* We only support this subset of charsets. */ |
585 | switch (charset[0]) |
||
586 | { |
||
587 | case 'U': |
||
588 | case 'u': |
||
589 | if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8")) |
||
590 | FAIL; |
||
591 | strcpy (charset, "UTF-8"); |
||
592 | mbc_max = 6; |
||
593 | l_wctomb = __utf8_wctomb; |
||
594 | l_mbtowc = __utf8_mbtowc; |
||
595 | break; |
||
596 | #ifndef __CYGWIN__ |
||
597 | /* Cygwin does not support JIS at all. */ |
||
598 | case 'J': |
||
599 | case 'j': |
||
600 | if (strcasecmp (charset, "JIS")) |
||
601 | FAIL; |
||
602 | strcpy (charset, "JIS"); |
||
603 | mbc_max = 8; |
||
604 | l_wctomb = __jis_wctomb; |
||
605 | l_mbtowc = __jis_mbtowc; |
||
606 | break; |
||
607 | #endif /* !__CYGWIN__ */ |
||
608 | case 'E': |
||
609 | case 'e': |
||
610 | if (strncasecmp (charset, "EUC", 3)) |
||
611 | FAIL; |
||
612 | c = charset + 3; |
||
613 | if (*c == '-') |
||
614 | ++c; |
||
615 | if (!strcasecmp (c, "JP")) |
||
616 | { |
||
617 | strcpy (charset, "EUCJP"); |
||
618 | mbc_max = 3; |
||
619 | l_wctomb = __eucjp_wctomb; |
||
620 | l_mbtowc = __eucjp_mbtowc; |
||
621 | } |
||
622 | #ifdef __CYGWIN__ |
||
623 | /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's |
||
624 | implementation requires Windows support. */ |
||
625 | else if (!strcasecmp (c, "KR")) |
||
626 | { |
||
627 | strcpy (charset, "EUCKR"); |
||
628 | mbc_max = 2; |
||
629 | l_wctomb = __kr_wctomb; |
||
630 | l_mbtowc = __kr_mbtowc; |
||
631 | } |
||
632 | else if (!strcasecmp (c, "CN")) |
||
633 | { |
||
634 | strcpy (charset, "EUCCN"); |
||
635 | mbc_max = 2; |
||
636 | l_wctomb = __gbk_wctomb; |
||
637 | l_mbtowc = __gbk_mbtowc; |
||
638 | } |
||
639 | #endif /* __CYGWIN__ */ |
||
640 | else |
||
641 | FAIL; |
||
642 | break; |
||
643 | case 'S': |
||
644 | case 's': |
||
645 | if (strcasecmp (charset, "SJIS")) |
||
646 | FAIL; |
||
647 | strcpy (charset, "SJIS"); |
||
648 | mbc_max = 2; |
||
649 | l_wctomb = __sjis_wctomb; |
||
650 | l_mbtowc = __sjis_mbtowc; |
||
651 | break; |
||
652 | case 'I': |
||
653 | case 'i': |
||
654 | /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for |
||
655 | ISO-8859-12. This code also recognizes the aliases without dashes. */ |
||
656 | if (strncasecmp (charset, "ISO", 3)) |
||
657 | FAIL; |
||
658 | c = charset + 3; |
||
659 | if (*c == '-') |
||
660 | ++c; |
||
661 | if (strncasecmp (c, "8859", 4)) |
||
662 | FAIL; |
||
663 | c += 4; |
||
664 | if (*c == '-') |
||
665 | ++c; |
||
666 | val = _strtol_r (p, c, &end, 10); |
||
667 | if (val < 1 || val > 16 || val == 12 || *end) |
||
668 | FAIL; |
||
669 | strcpy (charset, "ISO-8859-"); |
||
670 | c = charset + 9; |
||
671 | if (val > 10) |
||
672 | *c++ = '1'; |
||
673 | *c++ = val % 10 + '0'; |
||
674 | *c = '\0'; |
||
675 | mbc_max = 1; |
||
676 | #ifdef _MB_EXTENDED_CHARSETS_ISO |
||
677 | l_wctomb = __iso_wctomb; |
||
678 | l_mbtowc = __iso_mbtowc; |
||
679 | #else /* !_MB_EXTENDED_CHARSETS_ISO */ |
||
680 | l_wctomb = __ascii_wctomb; |
||
681 | l_mbtowc = __ascii_mbtowc; |
||
682 | #endif /* _MB_EXTENDED_CHARSETS_ISO */ |
||
683 | break; |
||
684 | case 'C': |
||
685 | case 'c': |
||
686 | if (charset[1] != 'P' && charset[1] != 'p') |
||
687 | FAIL; |
||
688 | strncpy (charset, "CP", 2); |
||
689 | val = _strtol_r (p, charset + 2, &end, 10); |
||
690 | if (*end) |
||
691 | FAIL; |
||
692 | switch (val) |
||
693 | { |
||
694 | case 437: |
||
695 | case 720: |
||
696 | case 737: |
||
697 | case 775: |
||
698 | case 850: |
||
699 | case 852: |
||
700 | case 855: |
||
701 | case 857: |
||
702 | case 858: |
||
703 | case 862: |
||
704 | case 866: |
||
705 | case 874: |
||
706 | case 1125: |
||
707 | case 1250: |
||
708 | case 1251: |
||
709 | case 1252: |
||
710 | case 1253: |
||
711 | case 1254: |
||
712 | case 1255: |
||
713 | case 1256: |
||
714 | case 1257: |
||
715 | case 1258: |
||
716 | mbc_max = 1; |
||
717 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
||
718 | l_wctomb = __cp_wctomb; |
||
719 | l_mbtowc = __cp_mbtowc; |
||
720 | #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ |
||
721 | l_wctomb = __ascii_wctomb; |
||
722 | l_mbtowc = __ascii_mbtowc; |
||
723 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
||
724 | break; |
||
725 | case 932: |
||
726 | mbc_max = 2; |
||
727 | l_wctomb = __sjis_wctomb; |
||
728 | l_mbtowc = __sjis_mbtowc; |
||
729 | break; |
||
730 | default: |
||
731 | FAIL; |
||
732 | } |
||
733 | break; |
||
734 | case 'K': |
||
735 | case 'k': |
||
736 | /* KOI8-R, KOI8-U and the aliases without dash */ |
||
737 | if (strncasecmp (charset, "KOI8", 4)) |
||
738 | FAIL; |
||
739 | c = charset + 4; |
||
740 | if (*c == '-') |
||
741 | ++c; |
||
742 | if (*c == 'R' || *c == 'r') |
||
743 | strcpy (charset, "CP20866"); |
||
744 | else if (*c == 'U' || *c == 'u') |
||
745 | strcpy (charset, "CP21866"); |
||
746 | else |
||
747 | FAIL; |
||
748 | mbc_max = 1; |
||
749 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
||
750 | l_wctomb = __cp_wctomb; |
||
751 | l_mbtowc = __cp_mbtowc; |
||
752 | #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ |
||
753 | l_wctomb = __ascii_wctomb; |
||
754 | l_mbtowc = __ascii_mbtowc; |
||
755 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
||
756 | break; |
||
757 | case 'A': |
||
758 | case 'a': |
||
759 | if (strcasecmp (charset, "ASCII")) |
||
760 | FAIL; |
||
761 | strcpy (charset, "ASCII"); |
||
762 | mbc_max = 1; |
||
763 | l_wctomb = __ascii_wctomb; |
||
764 | l_mbtowc = __ascii_mbtowc; |
||
765 | break; |
||
766 | case 'G': |
||
767 | case 'g': |
||
768 | #ifdef __CYGWIN__ |
||
769 | /* Newlib does not provide GBK/GB2312 and Cygwin's implementation |
||
770 | requires Windows support. */ |
||
771 | if (!strcasecmp (charset, "GBK") |
||
772 | || !strcasecmp (charset, "GB2312")) |
||
773 | { |
||
774 | strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK"); |
||
6099 | serge | 775 | mbc_max = 2; |
776 | l_wctomb = __gbk_wctomb; |
||
777 | l_mbtowc = __gbk_mbtowc; |
||
4349 | Serge | 778 | } |
779 | else |
||
780 | #endif /* __CYGWIN__ */ |
||
781 | /* GEORGIAN-PS and the alias without dash */ |
||
782 | if (!strncasecmp (charset, "GEORGIAN", 8)) |
||
783 | { |
||
784 | c = charset + 8; |
||
785 | if (*c == '-') |
||
786 | ++c; |
||
787 | if (strcasecmp (c, "PS")) |
||
788 | FAIL; |
||
789 | strcpy (charset, "CP101"); |
||
790 | mbc_max = 1; |
||
791 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
||
792 | l_wctomb = __cp_wctomb; |
||
793 | l_mbtowc = __cp_mbtowc; |
||
794 | #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ |
||
795 | l_wctomb = __ascii_wctomb; |
||
796 | l_mbtowc = __ascii_mbtowc; |
||
797 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
||
798 | } |
||
799 | else |
||
800 | FAIL; |
||
801 | break; |
||
802 | case 'P': |
||
803 | case 'p': |
||
804 | /* PT154 */ |
||
805 | if (strcasecmp (charset, "PT154")) |
||
806 | FAIL; |
||
807 | strcpy (charset, "CP102"); |
||
808 | mbc_max = 1; |
||
809 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
||
810 | l_wctomb = __cp_wctomb; |
||
811 | l_mbtowc = __cp_mbtowc; |
||
812 | #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ |
||
813 | l_wctomb = __ascii_wctomb; |
||
814 | l_mbtowc = __ascii_mbtowc; |
||
815 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
||
816 | break; |
||
817 | case 'T': |
||
818 | case 't': |
||
819 | if (strncasecmp (charset, "TIS", 3)) |
||
820 | FAIL; |
||
821 | c = charset + 3; |
||
822 | if (*c == '-') |
||
823 | ++c; |
||
824 | if (strcasecmp (c, "620")) |
||
825 | FAIL; |
||
826 | strcpy (charset, "CP874"); |
||
827 | mbc_max = 1; |
||
828 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
||
829 | l_wctomb = __cp_wctomb; |
||
830 | l_mbtowc = __cp_mbtowc; |
||
831 | #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ |
||
832 | l_wctomb = __ascii_wctomb; |
||
833 | l_mbtowc = __ascii_mbtowc; |
||
834 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
||
835 | break; |
||
836 | #ifdef __CYGWIN__ |
||
837 | /* Newlib does not provide Big5 and Cygwin's implementation |
||
838 | requires Windows support. */ |
||
839 | case 'B': |
||
840 | case 'b': |
||
841 | if (strcasecmp (charset, "BIG5")) |
||
842 | FAIL; |
||
843 | strcpy (charset, "BIG5"); |
||
844 | mbc_max = 2; |
||
845 | l_wctomb = __big5_wctomb; |
||
846 | l_mbtowc = __big5_mbtowc; |
||
847 | break; |
||
848 | #endif /* __CYGWIN__ */ |
||
849 | default: |
||
850 | FAIL; |
||
851 | } |
||
852 | switch (category) |
||
853 | { |
||
854 | case LC_CTYPE: |
||
855 | strcpy (lc_ctype_charset, charset); |
||
856 | __mb_cur_max = mbc_max; |
||
857 | __wctomb = l_wctomb; |
||
858 | __mbtowc = l_mbtowc; |
||
859 | __set_ctype (charset); |
||
860 | /* Determine the width for the "CJK Ambiguous Width" category of |
||
861 | characters. This is used in wcwidth(). Assume single width for |
||
862 | single-byte charsets, and double width for multi-byte charsets |
||
863 | other than UTF-8. For UTF-8, use double width for the East Asian |
||
864 | languages ("ja", "ko", "zh"), and single width for everything else. |
||
865 | Single width can also be forced with the "@cjknarrow" modifier. */ |
||
866 | lc_ctype_cjk_lang = !cjknarrow |
||
867 | && mbc_max > 1 |
||
868 | && (charset[0] != 'U' |
||
869 | || strncmp (locale, "ja", 2) == 0 |
||
870 | || strncmp (locale, "ko", 2) == 0 |
||
871 | || strncmp (locale, "zh", 2) == 0); |
||
872 | #ifdef __HAVE_LOCALE_INFO__ |
||
873 | ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max); |
||
874 | #endif /* __HAVE_LOCALE_INFO__ */ |
||
875 | break; |
||
876 | case LC_MESSAGES: |
||
877 | strcpy (lc_message_charset, charset); |
||
878 | #ifdef __HAVE_LOCALE_INFO__ |
||
879 | ret = __messages_load_locale (locale, (void *) l_wctomb, charset); |
||
880 | if (!ret) |
||
881 | #endif /* __HAVE_LOCALE_INFO__ */ |
||
882 | break; |
||
883 | #ifdef __HAVE_LOCALE_INFO__ |
||
884 | #ifdef __CYGWIN__ |
||
885 | /* Right now only Cygwin supports a __collate_load_locale function at all. */ |
||
886 | case LC_COLLATE: |
||
887 | ret = __collate_load_locale (locale, (void *) l_mbtowc, charset); |
||
888 | break; |
||
889 | #endif |
||
890 | case LC_MONETARY: |
||
891 | ret = __monetary_load_locale (locale, (void *) l_wctomb, charset); |
||
892 | break; |
||
893 | case LC_NUMERIC: |
||
894 | ret = __numeric_load_locale (locale, (void *) l_wctomb, charset); |
||
895 | break; |
||
896 | case LC_TIME: |
||
897 | ret = __time_load_locale (locale, (void *) l_wctomb, charset); |
||
898 | break; |
||
899 | #endif /* __HAVE_LOCALE_INFO__ */ |
||
900 | default: |
||
901 | break; |
||
902 | } |
||
903 | #ifdef __HAVE_LOCALE_INFO__ |
||
904 | if (ret) |
||
905 | FAIL; |
||
906 | #endif /* __HAVE_LOCALE_INFO__ */ |
||
907 | return strcpy(current_categories[category], new_categories[category]); |
||
908 | } |
||
909 | |||
910 | static const char * |
||
911 | __get_locale_env(struct _reent *p, int category) |
||
912 | { |
||
913 | const char *env; |
||
914 | |||
915 | /* 1. check LC_ALL. */ |
||
916 | env = _getenv_r (p, categories[0]); |
||
917 | |||
918 | /* 2. check LC_* */ |
||
919 | if (env == NULL || !*env) |
||
920 | env = _getenv_r (p, categories[category]); |
||
921 | |||
922 | /* 3. check LANG */ |
||
923 | if (env == NULL || !*env) |
||
924 | env = _getenv_r (p, "LANG"); |
||
925 | |||
926 | /* 4. if none is set, fall to default locale */ |
||
927 | if (env == NULL || !*env) |
||
928 | env = __default_locale; |
||
929 | |||
930 | return env; |
||
931 | } |
||
932 | #endif /* _MB_CAPABLE */ |
||
933 | |||
934 | char * |
||
935 | _DEFUN_VOID(__locale_charset) |
||
936 | { |
||
937 | #if 0//def __HAVE_LOCALE_INFO__ |
||
938 | return __get_current_ctype_locale ()->codeset; |
||
939 | #else |
||
940 | return lc_ctype_charset; |
||
941 | #endif |
||
942 | } |
||
943 | |||
944 | int |
||
945 | _DEFUN_VOID(__locale_mb_cur_max) |
||
946 | { |
||
947 | #if 0//def __HAVE_LOCALE_INFO__ |
||
948 | return __get_current_ctype_locale ()->mb_cur_max[0]; |
||
949 | #else |
||
950 | return __mb_cur_max; |
||
951 | #endif |
||
952 | } |
||
953 | |||
954 | |||
955 | char * |
||
956 | _DEFUN_VOID(__locale_msgcharset) |
||
957 | { |
||
958 | #ifdef __HAVE_LOCALE_INFO__ |
||
959 | return (char *) __get_current_messages_locale ()->codeset; |
||
960 | #else |
||
961 | return lc_message_charset; |
||
962 | #endif |
||
963 | } |
||
964 | |||
965 | int |
||
966 | _DEFUN_VOID(__locale_cjk_lang) |
||
967 | { |
||
968 | return lc_ctype_cjk_lang; |
||
969 | } |
||
970 | |||
971 | struct lconv * |
||
972 | _DEFUN(_localeconv_r, (data), |
||
973 | struct _reent *data) |
||
974 | { |
||
975 | #ifdef __HAVE_LOCALE_INFO__ |
||
976 | if (__nlocale_changed) |
||
977 | { |
||
978 | struct lc_numeric_T *n = __get_current_numeric_locale (); |
||
979 | lconv.decimal_point = (char *) n->decimal_point; |
||
980 | lconv.thousands_sep = (char *) n->thousands_sep; |
||
981 | lconv.grouping = (char *) n->grouping; |
||
982 | __nlocale_changed = 0; |
||
983 | } |
||
984 | if (__mlocale_changed) |
||
985 | { |
||
986 | struct lc_monetary_T *m = __get_current_monetary_locale (); |
||
987 | lconv.int_curr_symbol = (char *) m->int_curr_symbol; |
||
988 | lconv.currency_symbol = (char *) m->currency_symbol; |
||
989 | lconv.mon_decimal_point = (char *) m->mon_decimal_point; |
||
990 | lconv.mon_thousands_sep = (char *) m->mon_thousands_sep; |
||
991 | lconv.mon_grouping = (char *) m->mon_grouping; |
||
992 | lconv.positive_sign = (char *) m->positive_sign; |
||
993 | lconv.negative_sign = (char *) m->negative_sign; |
||
994 | lconv.int_frac_digits = m->int_frac_digits[0]; |
||
995 | lconv.frac_digits = m->frac_digits[0]; |
||
996 | lconv.p_cs_precedes = m->p_cs_precedes[0]; |
||
997 | lconv.p_sep_by_space = m->p_sep_by_space[0]; |
||
998 | lconv.n_cs_precedes = m->n_cs_precedes[0]; |
||
999 | lconv.n_sep_by_space = m->n_sep_by_space[0]; |
||
1000 | lconv.p_sign_posn = m->p_sign_posn[0]; |
||
1001 | lconv.n_sign_posn = m->n_sign_posn[0]; |
||
1002 | #ifdef __HAVE_LOCALE_INFO_EXTENDED__ |
||
1003 | lconv.int_p_cs_precedes = m->int_p_cs_precedes[0]; |
||
1004 | lconv.int_p_sep_by_space = m->int_p_sep_by_space[0]; |
||
1005 | lconv.int_n_cs_precedes = m->int_n_cs_precedes[0]; |
||
1006 | lconv.int_n_sep_by_space = m->int_n_sep_by_space[0]; |
||
1007 | lconv.int_n_sign_posn = m->int_n_sign_posn[0]; |
||
1008 | lconv.int_p_sign_posn = m->int_p_sign_posn[0]; |
||
1009 | #else /* !__HAVE_LOCALE_INFO_EXTENDED__ */ |
||
1010 | lconv.int_p_cs_precedes = m->p_cs_precedes[0]; |
||
1011 | lconv.int_p_sep_by_space = m->p_sep_by_space[0]; |
||
1012 | lconv.int_n_cs_precedes = m->n_cs_precedes[0]; |
||
1013 | lconv.int_n_sep_by_space = m->n_sep_by_space[0]; |
||
1014 | lconv.int_n_sign_posn = m->n_sign_posn[0]; |
||
1015 | lconv.int_p_sign_posn = m->p_sign_posn[0]; |
||
1016 | #endif /* !__HAVE_LOCALE_INFO_EXTENDED__ */ |
||
1017 | __mlocale_changed = 0; |
||
1018 | } |
||
1019 | #endif /* __HAVE_LOCALE_INFO__ */ |
||
1020 | return (struct lconv *) &lconv; |
||
1021 | } |
||
1022 | |||
1023 | #ifndef _REENT_ONLY |
||
1024 | |||
1025 | #ifndef __CYGWIN__ |
||
1026 | /* Cygwin provides its own version of setlocale to perform some more |
||
1027 | initialization work. It calls _setlocale_r, though. */ |
||
1028 | char * |
||
1029 | _DEFUN(setlocale, (category, locale), |
||
1030 | int category _AND |
||
1031 | _CONST char *locale) |
||
1032 | { |
||
1033 | return _setlocale_r (_REENT, category, locale); |
||
1034 | } |
||
1035 | #endif /* __CYGWIN__ */ |
||
1036 | |||
1037 | struct lconv * |
||
1038 | _DEFUN_VOID(localeconv) |
||
1039 | { |
||
1040 | return _localeconv_r (_REENT); |
||
1041 | } |
||
1042 | |||
1043 | #endif>>>=>>>>>>>>>>>> |