Rev 1693 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 1693 | Rev 3065 | ||
---|---|---|---|
Line 88... | Line 88... | ||
88 | /usr/share/locale/locale.alias. |
88 | /usr/share/locale/locale.alias. |
Line 89... | Line 89... | ||
89 | 89 | ||
90 | (<<"">> is also accepted; if given, the settings are read from the |
90 | (<<"">> is also accepted; if given, the settings are read from the |
Line 91... | Line 91... | ||
91 | corresponding LC_* environment variables and $LANG according to POSIX rules. |
91 | corresponding LC_* environment variables and $LANG according to POSIX rules. |
92 | - | ||
93 | This implementation also supports a single modifier, <<"cjknarrow">>. |
- | |
94 | Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one |
92 | |
95 | of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies |
93 | This implementation also supports the modifier <<"cjknarrow">>, which |
96 | how the functions < |
94 | affects how the functions < |
97 | the "CJK Ambiguous Width" character class described in |
95 | from the "CJK Ambiguous Width" category of characters described at |
98 | http://www.unicode.org/unicode/reports/tr11/. Usually these characters |
96 | http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width |
99 | have a width of 1, unless you specify one of the aforementioned |
97 | of 1 for singlebyte charsets and a width of 2 for multibyte charsets |
100 | languages, in which case these characters have a width of 2. By |
98 | other than UTF-8. For UTF-8, their width depends on the language specifier: |
- | 99 | it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean), |
|
Line 101... | Line 100... | ||
101 | specifying the <<"cjknarrow">> modifier, these characters will have a |
100 | and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1, |
102 | width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well. |
101 | independent of charset and language. |
103 | 102 | ||
104 | If you use < |
103 | If you use < |
Line 181... | Line 180... | ||
181 | #include |
180 | #include |
182 | #include "lmessages.h" |
181 | #include "lmessages.h" |
183 | #include "lmonetary.h" |
182 | #include "lmonetary.h" |
184 | #include "lnumeric.h" |
183 | #include "lnumeric.h" |
185 | #include "lctype.h" |
184 | #include "lctype.h" |
- | 185 | #include "timelocal.h" |
|
186 | #include "../stdlib/local.h" |
186 | #include "../stdlib/local.h" |
Line 187... | Line 187... | ||
187 | 187 | ||
188 | #define _LC_LAST 7 |
188 | #define _LC_LAST 7 |
Line 234... | Line 234... | ||
234 | * Current locales for each category |
234 | * Current locales for each category |
235 | */ |
235 | */ |
236 | static char current_categories[_LC_LAST][ENCODING_LEN + 1] = { |
236 | static char current_categories[_LC_LAST][ENCODING_LEN + 1] = { |
237 | "C", |
237 | "C", |
238 | "C", |
238 | "C", |
- | 239 | #ifdef __CYGWIN__ /* Cygwin starts with LC_CTYPE set to "C.UTF-8". */ |
|
- | 240 | "C.UTF-8", |
|
- | 241 | #else |
|
239 | "C", |
242 | "C", |
- | 243 | #endif |
|
240 | "C", |
244 | "C", |
241 | "C", |
245 | "C", |
242 | "C", |
246 | "C", |
243 | "C", |
247 | "C", |
244 | }; |
248 | }; |
Line 254... | Line 258... | ||
254 | static char *loadlocale(struct _reent *, int); |
258 | static char *loadlocale(struct _reent *, int); |
255 | static const char *__get_locale_env(struct _reent *, int); |
259 | static const char *__get_locale_env(struct _reent *, int); |
Line 256... | Line 260... | ||
256 | 260 | ||
Line 257... | Line 261... | ||
257 | #endif /* _MB_CAPABLE */ |
261 | #endif /* _MB_CAPABLE */ |
258 | 262 | ||
259 | #if 0 /*def __CYGWIN__ TODO: temporarily(?) disable C == UTF-8 */ |
- | |
260 | static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8"; |
263 | #ifdef __CYGWIN__ |
261 | static char lc_message_charset[ENCODING_LEN + 1] = "UTF-8"; |
264 | static char lc_ctype_charset[ENCODING_LEN + 1] = "UTF-8"; |
262 | #else |
- | |
263 | static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII"; |
265 | #else |
- | 266 | static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII"; |
|
264 | static char lc_message_charset[ENCODING_LEN + 1] = "ASCII"; |
267 | #endif |
Line 265... | Line 268... | ||
265 | #endif |
268 | static char lc_message_charset[ENCODING_LEN + 1] = "ASCII"; |
266 | static int lc_ctype_cjk_lang = 0; |
269 | static int lc_ctype_cjk_lang = 0; |
267 | 270 | ||
Line 431... | Line 434... | ||
431 | #endif /* _MB_CAPABLE */ |
434 | #endif /* _MB_CAPABLE */ |
Line 432... | Line 435... | ||
432 | 435 | ||
433 | #ifdef _MB_CAPABLE |
436 | #ifdef _MB_CAPABLE |
434 | #ifdef __CYGWIN__ |
437 | #ifdef __CYGWIN__ |
435 | extern void __set_charset_from_locale (const char *locale, char *charset); |
438 | extern void __set_charset_from_locale (const char *locale, char *charset); |
436 | extern int __set_locale_from_locale_alias (const char *, char *); |
439 | extern char *__set_locale_from_locale_alias (const char *, char *); |
437 | extern int __collate_load_locale (const char *, void *, const char *); |
440 | extern int __collate_load_locale (const char *, void *, const char *); |
Line 438... | Line 441... | ||
438 | #endif /* __CYGWIN__ */ |
441 | #endif /* __CYGWIN__ */ |
Line 451... | Line 454... | ||
451 | is extracted and stored in lc_ctype_charset or lc_message_charset |
454 | is extracted and stored in lc_ctype_charset or lc_message_charset |
452 | dependent on the cateogry. */ |
455 | dependent on the cateogry. */ |
453 | char *locale = NULL; |
456 | char *locale = NULL; |
454 | char charset[ENCODING_LEN + 1]; |
457 | char charset[ENCODING_LEN + 1]; |
455 | unsigned long val; |
458 | unsigned long val; |
456 | char *end, *c; |
459 | char *end, *c = NULL; |
457 | int mbc_max; |
460 | int mbc_max; |
458 | int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); |
461 | int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); |
459 | int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, |
462 | int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, |
460 | const char *, mbstate_t *); |
463 | const char *, mbstate_t *); |
461 | int cjknarrow = 0; |
464 | int cjknarrow = 0; |
Line 494... | Line 497... | ||
494 | 497 | ||
495 | /* "POSIX" is translated to "C", as on Linux. */ |
498 | /* "POSIX" is translated to "C", as on Linux. */ |
496 | if (!strcmp (locale, "POSIX")) |
499 | if (!strcmp (locale, "POSIX")) |
497 | strcpy (locale, "C"); |
500 | strcpy (locale, "C"); |
498 | if (!strcmp (locale, "C")) /* Default "C" locale */ |
- | |
499 | #if 0 /*def __CYGWIN__ TODO: temporarily(?) disable C == UTF-8 */ |
- | |
500 | strcpy (charset, "UTF-8"); |
- | |
501 | #else |
501 | if (!strcmp (locale, "C")) /* Default "C" locale */ |
502 | strcpy (charset, "ASCII"); |
- | |
503 | #endif |
502 | strcpy (charset, "ASCII"); |
504 | else if (locale[0] == 'C' |
503 | else if (locale[0] == 'C' |
505 | && (locale[1] == '-' /* Old newlib style */ |
504 | && (locale[1] == '-' /* Old newlib style */ |
506 | || locale[1] == '.')) /* Extension for the C locale to allow |
505 | || locale[1] == '.')) /* Extension for the C locale to allow |
507 | specifying different charsets while |
506 | specifying different charsets while |
508 | sticking to the C locale in terms |
507 | sticking to the C locale in terms |
509 | of sort order, etc. Proposed in |
508 | of sort order, etc. Proposed in |
- | 509 | the Debian project. */ |
|
- | 510 | { |
|
- | 511 | char *chp; |
|
- | 512 | ||
510 | the Debian project. */ |
513 | c = locale + 2; |
- | 514 | strcpy (charset, c); |
|
- | 515 | if ((chp = strchr (charset, '@'))) |
|
- | 516 | /* Strip off modifier */ |
|
- | 517 | *chp = '\0'; |
|
- | 518 | c += strlen (charset); |
|
511 | strcpy (charset, locale + 2); |
519 | } |
512 | else /* POSIX style */ |
520 | else /* POSIX style */ |
513 | { |
521 | { |
Line 514... | Line 522... | ||
514 | c = locale; |
522 | c = locale; |
Line 557... | Line 565... | ||
557 | strcpy (charset, "ISO-8859-1"); |
565 | strcpy (charset, "ISO-8859-1"); |
558 | #endif |
566 | #endif |
559 | else |
567 | else |
560 | /* Invalid string */ |
568 | /* Invalid string */ |
561 | FAIL; |
569 | FAIL; |
- | 570 | } |
|
562 | if (c[0] == '@') |
571 | if (c && c[0] == '@') |
563 | { |
572 | { |
564 | /* Modifier */ |
573 | /* Modifier */ |
565 | /* Only one modifier is recognized right now. "cjknarrow" is used |
574 | /* Only one modifier is recognized right now. "cjknarrow" is used |
566 | to modify the behaviour of wcwidth() for East Asian languages. |
575 | to modify the behaviour of wcwidth() for East Asian languages. |
567 | For details see the comment at the end of this function. */ |
576 | For details see the comment at the end of this function. */ |
568 | if (!strcmp (c + 1, "cjknarrow")) |
577 | if (!strcmp (c + 1, "cjknarrow")) |
569 | cjknarrow = 1; |
578 | cjknarrow = 1; |
570 | } |
579 | } |
571 | } |
- | |
572 | /* We only support this subset of charsets. */ |
580 | /* We only support this subset of charsets. */ |
573 | switch (charset[0]) |
581 | switch (charset[0]) |
574 | { |
582 | { |
575 | case 'U': |
583 | case 'U': |
576 | case 'u': |
584 | case 'u': |
Line 843... | Line 851... | ||
843 | strcpy (lc_ctype_charset, charset); |
851 | strcpy (lc_ctype_charset, charset); |
844 | __mb_cur_max = mbc_max; |
852 | __mb_cur_max = mbc_max; |
845 | __wctomb = l_wctomb; |
853 | __wctomb = l_wctomb; |
846 | __mbtowc = l_mbtowc; |
854 | __mbtowc = l_mbtowc; |
847 | __set_ctype (charset); |
855 | __set_ctype (charset); |
848 | /* Check for the language part of the locale specifier. In case |
856 | /* Determine the width for the "CJK Ambiguous Width" category of |
849 | of "ja", "ko", or "zh", assume the use of CJK fonts, unless the |
857 | characters. This is used in wcwidth(). Assume single width for |
850 | "@cjknarrow" modifier has been specifed. |
858 | single-byte charsets, and double width for multi-byte charsets |
851 | The result is stored in lc_ctype_cjk_lang and tested in wcwidth() |
859 | other than UTF-8. For UTF-8, use double width for the East Asian |
852 | to figure out the width to return (1 or 2) for the "CJK Ambiguous |
860 | languages ("ja", "ko", "zh"), and single width for everything else. |
853 | Width" category of characters. */ |
861 | Single width can also be forced with the "@cjknarrow" modifier. */ |
854 | lc_ctype_cjk_lang = !cjknarrow |
862 | lc_ctype_cjk_lang = !cjknarrow |
- | 863 | && mbc_max > 1 |
|
- | 864 | && (charset[0] != 'U' |
|
855 | && ((strncmp (locale, "ja", 2) == 0 |
865 | || strncmp (locale, "ja", 2) == 0 |
856 | || strncmp (locale, "ko", 2) == 0 |
866 | || strncmp (locale, "ko", 2) == 0 |
857 | || strncmp (locale, "zh", 2) == 0)); |
867 | || strncmp (locale, "zh", 2) == 0); |
858 | #ifdef __HAVE_LOCALE_INFO__ |
868 | #ifdef __HAVE_LOCALE_INFO__ |
859 | ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max); |
869 | ret = __ctype_load_locale (locale, (void *) l_wctomb, charset, mbc_max); |
860 | #endif /* __HAVE_LOCALE_INFO__ */ |
870 | #endif /* __HAVE_LOCALE_INFO__ */ |
861 | break; |
871 | break; |
862 | case LC_MESSAGES: |
872 | case LC_MESSAGES: |
Line 940... | Line 950... | ||
940 | 950 | ||
941 | char * |
951 | char * |
942 | _DEFUN_VOID(__locale_msgcharset) |
952 | _DEFUN_VOID(__locale_msgcharset) |
943 | { |
953 | { |
944 | #ifdef __HAVE_LOCALE_INFO__ |
954 | #ifdef __HAVE_LOCALE_INFO__ |
945 | return __get_current_messages_locale ()->codeset; |
955 | return (char *) __get_current_messages_locale ()->codeset; |
946 | #else |
956 | #else |
947 | return lc_message_charset; |
957 | return lc_message_charset; |
948 | #endif |
958 | #endif |
Line 960... | Line 970... | ||
960 | { |
970 | { |
961 | #ifdef __HAVE_LOCALE_INFO__ |
971 | #ifdef __HAVE_LOCALE_INFO__ |
962 | if (__nlocale_changed) |
972 | if (__nlocale_changed) |
963 | { |
973 | { |
964 | struct lc_numeric_T *n = __get_current_numeric_locale (); |
974 | struct lc_numeric_T *n = __get_current_numeric_locale (); |
965 | lconv.decimal_point = n->decimal_point; |
975 | lconv.decimal_point = (char *) n->decimal_point; |
966 | lconv.thousands_sep = n->thousands_sep; |
976 | lconv.thousands_sep = (char *) n->thousands_sep; |
967 | lconv.grouping = n->grouping; |
977 | lconv.grouping = (char *) n->grouping; |
968 | __nlocale_changed = 0; |
978 | __nlocale_changed = 0; |
969 | } |
979 | } |
970 | if (__mlocale_changed) |
980 | if (__mlocale_changed) |
971 | { |
981 | { |
972 | struct lc_monetary_T *m = __get_current_monetary_locale (); |
982 | struct lc_monetary_T *m = __get_current_monetary_locale (); |
973 | lconv.int_curr_symbol = m->int_curr_symbol; |
983 | lconv.int_curr_symbol = (char *) m->int_curr_symbol; |
974 | lconv.currency_symbol = m->currency_symbol; |
984 | lconv.currency_symbol = (char *) m->currency_symbol; |
975 | lconv.mon_decimal_point = m->mon_decimal_point; |
985 | lconv.mon_decimal_point = (char *) m->mon_decimal_point; |
976 | lconv.mon_thousands_sep = m->mon_thousands_sep; |
986 | lconv.mon_thousands_sep = (char *) m->mon_thousands_sep; |
977 | lconv.mon_grouping = m->mon_grouping; |
987 | lconv.mon_grouping = (char *) m->mon_grouping; |
978 | lconv.positive_sign = m->positive_sign; |
988 | lconv.positive_sign = (char *) m->positive_sign; |
979 | lconv.negative_sign = m->negative_sign; |
989 | lconv.negative_sign = (char *) m->negative_sign; |
980 | lconv.int_frac_digits = m->int_frac_digits[0]; |
990 | lconv.int_frac_digits = m->int_frac_digits[0]; |
981 | lconv.frac_digits = m->frac_digits[0]; |
991 | lconv.frac_digits = m->frac_digits[0]; |
982 | lconv.p_cs_precedes = m->p_cs_precedes[0]; |
992 | lconv.p_cs_precedes = m->p_cs_precedes[0]; |
983 | lconv.p_sep_by_space = m->p_sep_by_space[0]; |
993 | lconv.p_sep_by_space = m->p_sep_by_space[0]; |
984 | lconv.n_cs_precedes = m->n_cs_precedes[0]; |
994 | lconv.n_cs_precedes = m->n_cs_precedes[0]; |